From 13c4d39657e5296abdced9fcfd6d6c685ee498e2 Mon Sep 17 00:00:00 2001 From: ryandmaggio Date: Thu, 17 Mar 2022 19:25:38 +0000 Subject: [PATCH 1/7] actually the updated one now --- bashlex/parser.py | 119 +++++++++++++++++++++++++++++++++------------- 1 file changed, 87 insertions(+), 32 deletions(-) diff --git a/bashlex/parser.py b/bashlex/parser.py index e6586c24..f4354a61 100644 --- a/bashlex/parser.py +++ b/bashlex/parser.py @@ -12,16 +12,6 @@ def _partsspan(parts): ('right', 'BAR', 'BAR_AND') ) -def handleNotImplemented(p, type): - if len(p) == 2: - raise NotImplementedError('type = {%s}, token = {%s}' % (type, p[1])) - else: - raise NotImplementedError('type = {%s}, token = {%s}, parts = {%s}' % (type, p[1], p[2])) - -def handleAssert(p, test): - if not test: - raise AssertionError('token = {%s}' % p[1]) - def p_inputunit(p): '''inputunit : simple_list simple_list_terminator | NEWLINE @@ -196,9 +186,9 @@ def p_command(p): if isinstance(p[1], ast.node): p[0] = p[1] if len(p) == 3: - handleAssert(p, p[0].kind == 'compound') + assert p[0].kind == 'compound' p[0].redirects.extend(p[2]) - handleAssert(p, p[0].pos[0] < p[0].redirects[-1].pos[1]) + assert p[0].pos[0] < p[0].redirects[-1].pos[1] p[0].pos = (p[0].pos[0], p[0].redirects[-1].pos[1]) else: p[0] = ast.node(kind='command', parts=p[1], pos=_partsspan(p[1])) @@ -219,7 +209,7 @@ def p_shell_command(p): p[0] = p[1] else: # while or until - handleAssert(p, p[2].kind == 'list') + assert p[2].kind == 'list' parts = _makeparts(p) kind = parts[0].word @@ -229,7 +219,7 @@ def p_shell_command(p): list=[ast.node(kind=kind, parts=parts, pos=_partsspan(parts))], pos=_partsspan(parts)) - handleAssert(p, p[0].kind == 'compound') + assert p[0].kind == 'compound' def _makeparts(p): parts = [] @@ -278,7 +268,7 @@ def p_arith_for_command(p): | FOR ARITH_FOR_EXPRS list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY | FOR ARITH_FOR_EXPRS DO compound_list DONE | FOR ARITH_FOR_EXPRS LEFT_CURLY compound_list RIGHT_CURLY''' - handleNotImplemented(p, 'arithmetic for') + raise NotImplementedError('arithmetic for') def p_select_command(p): '''select_command : SELECT WORD newline_list DO list DONE @@ -287,13 +277,26 @@ def p_select_command(p): | SELECT WORD SEMICOLON newline_list LEFT_CURLY list RIGHT_CURLY | SELECT WORD newline_list IN word_list list_terminator newline_list DO list DONE | SELECT WORD newline_list IN word_list list_terminator newline_list LEFT_CURLY list RIGHT_CURLY''' - handleNotImplemented(p, 'select command') + raise NotImplementedError('select command') def p_case_command(p): '''case_command : CASE WORD newline_list IN newline_list ESAC | CASE WORD newline_list IN case_clause_sequence newline_list ESAC | CASE WORD newline_list IN case_clause ESAC''' - handleNotImplemented(p, 'case command') + parts = _makeparts(p) + rparts = [] + for i, e in enumerate(parts): + print(e) + if isinstance(e, list): + rparts.extend(e) + else: + rparts.append(e) + p[0] = ast.node(kind='compound', + redirects=[], + #list=[ast.node(kind='compound', list=rparts, pos=_partsspan(rparts))], + list=rparts, + pos=_partsspan(rparts)) + #raise NotImplementedError ('case command') def p_function_def(p): '''function_def : WORD LEFT_PAREN RIGHT_PAREN newline_list function_body @@ -309,12 +312,12 @@ def p_function_def(p): def p_function_body(p): '''function_body : shell_command | shell_command redirection_list''' - handleAssert(p, p[1].kind == 'compound') + assert p[1].kind == 'compound' p[0] = p[1] if len(p) == 3: p[0].redirects.extend(p[2]) - handleAssert(p, p[0].pos[0] < p[0].redirects[-1].pos[1]) + assert p[0].pos[0] < p[0].redirects[-1].pos[1] p[0].pos = (p[0].pos[0], p[0].redirects[-1].pos[1]) def p_subshell(p): @@ -331,7 +334,7 @@ def p_coproc(p): | COPROC WORD shell_command | COPROC WORD shell_command redirection_list | COPROC simple_command''' - handleNotImplemented(p, 'coproc') + raise NotImplementedError('coproc') def p_if_command(p): '''if_command : IF compound_list THEN compound_list FI @@ -356,11 +359,11 @@ def p_group_command(p): def p_arith_command(p): '''arith_command : ARITH_CMD''' - handleNotImplemented(p, 'arithmetic command') + raise NotImplementedError('arithmetic command') def p_cond_command(p): '''cond_command : COND_START COND_CMD COND_END''' - handleNotImplemented(p, 'cond command') + raise NotImplementedError('cond command') def p_elif_clause(p): '''elif_clause : ELIF compound_list THEN compound_list @@ -377,14 +380,32 @@ def p_elif_clause(p): def p_case_clause(p): '''case_clause : pattern_list | case_clause_sequence pattern_list''' - handleNotImplemented(p, 'case clause') + + if len(p) == 2: + p[0]=p[1] + else: + p[0] = p[2] + p[0].append(p[1]) + #raise NotImplementedError('case clause') + def p_pattern_list(p): '''pattern_list : newline_list pattern RIGHT_PAREN compound_list | newline_list pattern RIGHT_PAREN newline_list | newline_list LEFT_PAREN pattern RIGHT_PAREN compound_list | newline_list LEFT_PAREN pattern RIGHT_PAREN newline_list''' - handleNotImplemented(p, 'pattern list') + + parts = _makeparts(p) + print("*"*80) + print("PATTERN_LIST") + for i, e in enumerate(parts): + print("Part %d"%i) + print(e) + print("*"*80) + p[0]=parts + + #raise NotImplementedError('pattern list') + def p_case_clause_sequence(p): '''case_clause_sequence : pattern_list SEMI_SEMI @@ -393,12 +414,25 @@ def p_case_clause_sequence(p): | case_clause_sequence pattern_list SEMI_AND | pattern_list SEMI_SEMI_AND | case_clause_sequence pattern_list SEMI_SEMI_AND''' - handleNotImplemented(p, 'case clause') + + if len(p) == 3: + p[0]=p[1] + else: + p[0] = p[2] + p[0].append(p[1]) + + #raise NotImplementedError('case clause') def p_pattern(p): '''pattern : WORD | pattern BAR WORD''' - handleNotImplemented(p, 'pattern') + parserobj = p.context + if len(p) == 2: + p[0] = [_expandword(parserobj, p.slice[1])] + else: + p[0] = p[1] + p[0].append(_expandword(parserobj, p.slice[3])) + #raise NotImplementedError('pattern') def p_list(p): '''list : newline_list list0''' @@ -531,7 +565,7 @@ def p_timespec(p): '''timespec : TIME | TIME TIMEOPT | TIME TIMEOPT TIMEIGN''' - handleNotImplemented(p, 'time command') + raise NotImplementedError('time command') def p_empty(p): '''empty :''' @@ -541,9 +575,14 @@ def p_error(p): assert isinstance(p, tokenizer.token) if p.ttype == tokenizer.tokentype.EOF: - raise errors.ParsingError('unexpected EOF', - p.lexer.source, - len(p.lexer.source)) + print(p) + parts = _makeparts([p]) + #return ast.node(kind='operator', parts=parts, pos=parts.lexspan(1)) + #return ast.node(kind='reservedword', word=p, pos=0) + pass + #raise errors.ParsingError('unexpected EOF', + # p.lexer.source, + # len(p.lexer.source)) else: raise errors.ParsingError('unexpected token %r' % p.value, p.lexer.source, p.lexpos) @@ -582,6 +621,7 @@ def get_correction_rightparen_states(): def parsesingle(s, strictmode=True, expansionlimit=None, convertpos=False): '''like parse, but only consumes a single top level node, e.g. parsing 'a\nb' will only return a node for 'a', leaving b unparsed''' + print("*"*80 + "\nPARSESINGLE\n" + s + '\n' + "*"*80) p = _parser(s, strictmode=strictmode, expansionlimit=expansionlimit) tree = p.parse() if convertpos: @@ -607,8 +647,10 @@ def parse(s, strictmode=True, expansionlimit=None, convertpos=False): expansionlimit is used to limit the amount of recursive parsing done due to command substitutions found during word expansion. ''' + print("*"*80 + "\nPARSE\n" + s + '\n' + "*"*80) p = _parser(s, strictmode=strictmode, expansionlimit=expansionlimit) parts = [p.parse()] + print("AFTER PARSE") class endfinder(ast.nodevisitor): def __init__(self): @@ -621,11 +663,17 @@ def visitheredoc(self, node, value): ef.visit(parts[-1]) index = max(parts[-1].pos[1], ef.end) + 1 while index < len(s): - part = _parser(s[index:], strictmode=strictmode).parse() + print("IN WHILE LOOP, IDX: %d"%index) + print(s[index:]) + print("____________________") + string = s[index:] + if s[index:].strip() == '': + string = '' + part = _parser(string, strictmode=strictmode).parse() if not isinstance(part, ast.node): break - + print("AFTER BREAK") ast.posshifter(index).visit(part) parts.append(part) ef = _endfinder() @@ -633,9 +681,12 @@ def visitheredoc(self, node, value): index = max(parts[-1].pos[1], ef.end) + 1 if convertpos: + print("IN CONVERT POS") for tree in parts: ast.posconverter(s).visit(tree) + print("AFTER EVERYTHING") + return parts def split(s): @@ -682,6 +733,10 @@ def __init__(self, s, strictmode=True, expansionlimit=None, tokenizerargs=None): strictmode=strictmode, **tokenizerargs) + print("SELF.S") + print(self.s) + print("TOKENS") + print(self.tok) self.redirstack = self.tok.redirstack def parse(self): From 1da291561976a387c828fc7bcc2b0db95535e628 Mon Sep 17 00:00:00 2001 From: ryandmaggio Date: Mon, 28 Mar 2022 20:49:23 +0000 Subject: [PATCH 2/7] Maybe-redy-to-deploy changes to parser, definitely not to tests --- bashlex/ast.py | 28 ++++++++++++++ bashlex/parser.py | 91 +++++++++++++++++--------------------------- tests/test_parser.py | 29 ++++++++++++++ 3 files changed, 91 insertions(+), 57 deletions(-) diff --git a/bashlex/ast.py b/bashlex/ast.py index 3d69edf9..f94bd5f2 100644 --- a/bashlex/ast.py +++ b/bashlex/ast.py @@ -92,6 +92,26 @@ def visit(self, n): dochild = self._visitnode(n, n.command) if dochild is None or dochild: self.visit(n.command) + elif k == 'case': + dochild = self._visitnode(n, n.parts) + if dochild is None or dochild: + for child in n.parts: + self.visit(child) + elif k == 'pattern_list': + dochild = self._visitnode(n, n.parts, None) + if dochild is None or dochild: + for child in n.parts: + self.visit(child) + elif k == 'case_clause_sequence': + dochild = self._visitnode(n, n.parts) + if dochild is None or dochild: + for child in n.parts: + self.visit(child) + elif k == 'pattern': + dochild = self._visitnode(n, n.parts) + if dochild is None or dochild: + for child in n.parts: + self.visit(child) else: raise ValueError('unknown node kind %r' % k) self.visitnodeend(n) @@ -130,6 +150,14 @@ def visitreservedword(self, n, word): pass def visitparameter(self, n, value): pass + def visitcase(self, n, parts): + pass + def visitcase_clause_sequence(self, n, parts, sequence): + pass + def visitpattern(self, n, parts): + pass + def visitpattern_list(self, n, parts, action): + pass def visittilde(self, n, value): pass def visitredirect(self, n, input, type, output, heredoc): diff --git a/bashlex/parser.py b/bashlex/parser.py index f4354a61..7af4618c 100644 --- a/bashlex/parser.py +++ b/bashlex/parser.py @@ -284,19 +284,13 @@ def p_case_command(p): | CASE WORD newline_list IN case_clause_sequence newline_list ESAC | CASE WORD newline_list IN case_clause ESAC''' parts = _makeparts(p) - rparts = [] - for i, e in enumerate(parts): - print(e) - if isinstance(e, list): - rparts.extend(e) - else: - rparts.append(e) p[0] = ast.node(kind='compound', - redirects=[], - #list=[ast.node(kind='compound', list=rparts, pos=_partsspan(rparts))], - list=rparts, - pos=_partsspan(rparts)) - #raise NotImplementedError ('case command') + redirects = [], + list = [ast.node(kind='case', + parts = parts, + pos = _partsspan(parts)) + ], + pos = _partsspan(parts)) def p_function_def(p): '''function_def : WORD LEFT_PAREN RIGHT_PAREN newline_list function_body @@ -386,8 +380,6 @@ def p_case_clause(p): else: p[0] = p[2] p[0].append(p[1]) - #raise NotImplementedError('case clause') - def p_pattern_list(p): '''pattern_list : newline_list pattern RIGHT_PAREN compound_list @@ -395,17 +387,25 @@ def p_pattern_list(p): | newline_list LEFT_PAREN pattern RIGHT_PAREN compound_list | newline_list LEFT_PAREN pattern RIGHT_PAREN newline_list''' - parts = _makeparts(p) - print("*"*80) - print("PATTERN_LIST") - for i, e in enumerate(parts): - print("Part %d"%i) - print(e) - print("*"*80) - p[0]=parts - - #raise NotImplementedError('pattern list') - + parserobj = p.context + parts = [] + action = None + if p.slice[2].type == "pattern": + patterns = p[2] + parts.extend(patterns) + rparen = ast.node(kind='reservedword', word=p[3], pos = p.lexspan(3)) + parts.append(rparen) + else: + lparen = ast.node(kind='reservedword', word=p[2], pos=p.lexspan(2)) + patterns = p[3] + rparen = ast.node(kind='reservedword', word=p[4], pos=p.lexspan(4)) + parts.extend([lparen, patterns, rparen]) + if p.slice[-1].type == "compound_list": + # for some reason, p[-1] does not give the "true" last element, do not know why + action = p[len(p)-1] + parts.append(action) + p[0] = ast.node(kind="pattern_list", + parts=parts, pos = _partsspan(parts)) def p_case_clause_sequence(p): '''case_clause_sequence : pattern_list SEMI_SEMI @@ -414,26 +414,23 @@ def p_case_clause_sequence(p): | case_clause_sequence pattern_list SEMI_AND | pattern_list SEMI_SEMI_AND | case_clause_sequence pattern_list SEMI_SEMI_AND''' - if len(p) == 3: p[0]=p[1] else: p[0] = p[2] - p[0].append(p[1]) - - #raise NotImplementedError('case clause') + p[0].parts.append(p[1]) def p_pattern(p): '''pattern : WORD | pattern BAR WORD''' + parserobj = p.context if len(p) == 2: p[0] = [_expandword(parserobj, p.slice[1])] else: p[0] = p[1] p[0].append(_expandword(parserobj, p.slice[3])) - #raise NotImplementedError('pattern') - + def p_list(p): '''list : newline_list list0''' p[0] = p[2] @@ -575,14 +572,9 @@ def p_error(p): assert isinstance(p, tokenizer.token) if p.ttype == tokenizer.tokentype.EOF: - print(p) - parts = _makeparts([p]) - #return ast.node(kind='operator', parts=parts, pos=parts.lexspan(1)) - #return ast.node(kind='reservedword', word=p, pos=0) - pass - #raise errors.ParsingError('unexpected EOF', - # p.lexer.source, - # len(p.lexer.source)) + raise errors.ParsingError('unexpected EOF', + p.lexer.source, + len(p.lexer.source)) else: raise errors.ParsingError('unexpected token %r' % p.value, p.lexer.source, p.lexpos) @@ -621,7 +613,6 @@ def get_correction_rightparen_states(): def parsesingle(s, strictmode=True, expansionlimit=None, convertpos=False): '''like parse, but only consumes a single top level node, e.g. parsing 'a\nb' will only return a node for 'a', leaving b unparsed''' - print("*"*80 + "\nPARSESINGLE\n" + s + '\n' + "*"*80) p = _parser(s, strictmode=strictmode, expansionlimit=expansionlimit) tree = p.parse() if convertpos: @@ -647,10 +638,8 @@ def parse(s, strictmode=True, expansionlimit=None, convertpos=False): expansionlimit is used to limit the amount of recursive parsing done due to command substitutions found during word expansion. ''' - print("*"*80 + "\nPARSE\n" + s + '\n' + "*"*80) p = _parser(s, strictmode=strictmode, expansionlimit=expansionlimit) parts = [p.parse()] - print("AFTER PARSE") class endfinder(ast.nodevisitor): def __init__(self): @@ -663,17 +652,12 @@ def visitheredoc(self, node, value): ef.visit(parts[-1]) index = max(parts[-1].pos[1], ef.end) + 1 while index < len(s): - print("IN WHILE LOOP, IDX: %d"%index) - print(s[index:]) - print("____________________") - string = s[index:] - if s[index:].strip() == '': - string = '' - part = _parser(string, strictmode=strictmode).parse() + + part = _parser(s[index:], strictmode=strictmode).parse() if not isinstance(part, ast.node): break - print("AFTER BREAK") + ast.posshifter(index).visit(part) parts.append(part) ef = _endfinder() @@ -681,12 +665,9 @@ def visitheredoc(self, node, value): index = max(parts[-1].pos[1], ef.end) + 1 if convertpos: - print("IN CONVERT POS") for tree in parts: ast.posconverter(s).visit(tree) - print("AFTER EVERYTHING") - return parts def split(s): @@ -733,10 +714,6 @@ def __init__(self, s, strictmode=True, expansionlimit=None, tokenizerargs=None): strictmode=strictmode, **tokenizerargs) - print("SELF.S") - print(self.s) - print("TOKENS") - print(self.tok) self.redirstack = self.tok.redirstack def parse(self): diff --git a/tests/test_parser.py b/tests/test_parser.py index 22e6758c..9fc292e9 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -67,6 +67,14 @@ def compoundnode(s, *parts, **kwargs): assert not kwargs return ast.node(kind='compound', s=s, list=list(parts), redirects=redirects) +def casenode(parts, s): + return(ast.node(kind='compound', + redirects=[], + list = [ast.ndoe(kind='case', parts=list(parts), s=s)]) + +def caseclausesequence(s, parts): + return ast.node(kind='caseclausesequence', parts=list(parts), s=s) + def procsubnode(s, command): return ast.node(kind='processsubstitution', s=s, command=command) @@ -1103,3 +1111,24 @@ def test_parameter_braces(self): ]) ), ) + def test_cases(self): + return + # s = """ + # case "$1" in + # 1) echo 1;; + # 2) + # # comment + # ( + # echo 2 + # echo 3 + # ) + # ;; + # 3) echo 3;; + # esac + # """ + # self.assertASTEquals(s, + # casenode(s, parts = [ + # reservedwordnode('case', 'case'), + # ]) + # ) + From 63431578356f0528379d654d594ccc9fb587358c Mon Sep 17 00:00:00 2001 From: ryandmaggio Date: Mon, 28 Mar 2022 21:10:56 +0000 Subject: [PATCH 3/7] brought the unchanged parts of parser up to date with current bashlex --- bashlex/parser.py | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/bashlex/parser.py b/bashlex/parser.py index 7af4618c..e324f07a 100644 --- a/bashlex/parser.py +++ b/bashlex/parser.py @@ -12,6 +12,16 @@ def _partsspan(parts): ('right', 'BAR', 'BAR_AND') ) +def handleNotImplemented(p, type): + if len(p) == 2: + raise NotImplementedError('type = {%s}, token = {%s}' % (type, p[1])) + else: + raise NotImplementedError('type = {%s}, token = {%s}, parts = {%s}' % (type, p[1], p[2])) + +def handleAssert(p, test): + if not test: + raise AssertionError('token = {%s}' % p[1]) + def p_inputunit(p): '''inputunit : simple_list simple_list_terminator | NEWLINE @@ -186,9 +196,9 @@ def p_command(p): if isinstance(p[1], ast.node): p[0] = p[1] if len(p) == 3: - assert p[0].kind == 'compound' + handleAssert(p, p[0].kind == 'compound') p[0].redirects.extend(p[2]) - assert p[0].pos[0] < p[0].redirects[-1].pos[1] + handleAssert(p, p[0].pos[0] < p[0].redirects[-1].pos[1]) p[0].pos = (p[0].pos[0], p[0].redirects[-1].pos[1]) else: p[0] = ast.node(kind='command', parts=p[1], pos=_partsspan(p[1])) @@ -209,7 +219,7 @@ def p_shell_command(p): p[0] = p[1] else: # while or until - assert p[2].kind == 'list' + handleAssert(p, p[2].kind == 'list') parts = _makeparts(p) kind = parts[0].word @@ -219,7 +229,7 @@ def p_shell_command(p): list=[ast.node(kind=kind, parts=parts, pos=_partsspan(parts))], pos=_partsspan(parts)) - assert p[0].kind == 'compound' + handleAssert(p, p[0].kind == 'compound') def _makeparts(p): parts = [] @@ -268,7 +278,7 @@ def p_arith_for_command(p): | FOR ARITH_FOR_EXPRS list_terminator newline_list LEFT_CURLY compound_list RIGHT_CURLY | FOR ARITH_FOR_EXPRS DO compound_list DONE | FOR ARITH_FOR_EXPRS LEFT_CURLY compound_list RIGHT_CURLY''' - raise NotImplementedError('arithmetic for') + handleNotImplemented(p, 'arithmetic for') def p_select_command(p): '''select_command : SELECT WORD newline_list DO list DONE @@ -277,20 +287,13 @@ def p_select_command(p): | SELECT WORD SEMICOLON newline_list LEFT_CURLY list RIGHT_CURLY | SELECT WORD newline_list IN word_list list_terminator newline_list DO list DONE | SELECT WORD newline_list IN word_list list_terminator newline_list LEFT_CURLY list RIGHT_CURLY''' - raise NotImplementedError('select command') + handleNotImplemented(p, 'select command') def p_case_command(p): '''case_command : CASE WORD newline_list IN newline_list ESAC | CASE WORD newline_list IN case_clause_sequence newline_list ESAC | CASE WORD newline_list IN case_clause ESAC''' - parts = _makeparts(p) - p[0] = ast.node(kind='compound', - redirects = [], - list = [ast.node(kind='case', - parts = parts, - pos = _partsspan(parts)) - ], - pos = _partsspan(parts)) + handleNotImplemented(p, 'case command') def p_function_def(p): '''function_def : WORD LEFT_PAREN RIGHT_PAREN newline_list function_body @@ -306,12 +309,12 @@ def p_function_def(p): def p_function_body(p): '''function_body : shell_command | shell_command redirection_list''' - assert p[1].kind == 'compound' + handleAssert(p, p[1].kind == 'compound') p[0] = p[1] if len(p) == 3: p[0].redirects.extend(p[2]) - assert p[0].pos[0] < p[0].redirects[-1].pos[1] + handleAssert(p, p[0].pos[0] < p[0].redirects[-1].pos[1]) p[0].pos = (p[0].pos[0], p[0].redirects[-1].pos[1]) def p_subshell(p): @@ -328,7 +331,7 @@ def p_coproc(p): | COPROC WORD shell_command | COPROC WORD shell_command redirection_list | COPROC simple_command''' - raise NotImplementedError('coproc') + handleNotImplemented(p, 'coproc') def p_if_command(p): '''if_command : IF compound_list THEN compound_list FI @@ -353,11 +356,11 @@ def p_group_command(p): def p_arith_command(p): '''arith_command : ARITH_CMD''' - raise NotImplementedError('arithmetic command') + handleNotImplemented(p, 'arithmetic command') def p_cond_command(p): '''cond_command : COND_START COND_CMD COND_END''' - raise NotImplementedError('cond command') + handleNotImplemented(p, 'cond command') def p_elif_clause(p): '''elif_clause : ELIF compound_list THEN compound_list @@ -430,7 +433,7 @@ def p_pattern(p): else: p[0] = p[1] p[0].append(_expandword(parserobj, p.slice[3])) - + def p_list(p): '''list : newline_list list0''' p[0] = p[2] @@ -562,7 +565,7 @@ def p_timespec(p): '''timespec : TIME | TIME TIMEOPT | TIME TIMEOPT TIMEIGN''' - raise NotImplementedError('time command') + handleNotImplemented(p, 'time command') def p_empty(p): '''empty :''' @@ -652,7 +655,6 @@ def visitheredoc(self, node, value): ef.visit(parts[-1]) index = max(parts[-1].pos[1], ef.end) + 1 while index < len(s): - part = _parser(s[index:], strictmode=strictmode).parse() if not isinstance(part, ast.node): From 84bee7697facba2fa7e93d500ae679b1b217c83b Mon Sep 17 00:00:00 2001 From: ryandmaggio Date: Mon, 4 Apr 2022 19:44:29 +0000 Subject: [PATCH 4/7] Maybe actually got the case statement parsing correct, wrote a test for it --- bashlex/ast.py | 2 +- bashlex/parser.py | 39 +++++++++++++++++++++-- tests/test_parser.py | 75 +++++++++++++++++++++++++++++++------------- 3 files changed, 90 insertions(+), 26 deletions(-) diff --git a/bashlex/ast.py b/bashlex/ast.py index f94bd5f2..fda85ba9 100644 --- a/bashlex/ast.py +++ b/bashlex/ast.py @@ -152,7 +152,7 @@ def visitparameter(self, n, value): pass def visitcase(self, n, parts): pass - def visitcase_clause_sequence(self, n, parts, sequence): + def visitcase_clause_sequence(self, n, parts): pass def visitpattern(self, n, parts): pass diff --git a/bashlex/parser.py b/bashlex/parser.py index e324f07a..1dd7e093 100644 --- a/bashlex/parser.py +++ b/bashlex/parser.py @@ -293,7 +293,12 @@ def p_case_command(p): '''case_command : CASE WORD newline_list IN newline_list ESAC | CASE WORD newline_list IN case_clause_sequence newline_list ESAC | CASE WORD newline_list IN case_clause ESAC''' - handleNotImplemented(p, 'case command') + parts = _makeparts(p) + p[0] = ast.node(kind='compound', + redirects = [], + list=[ast.node(kind='case', parts=parts, pos=_partsspan(parts))], + pos=_partsspan(parts)) + #handleNotImplemented(p, 'case command') def p_function_def(p): '''function_def : WORD LEFT_PAREN RIGHT_PAREN newline_list function_body @@ -381,8 +386,9 @@ def p_case_clause(p): if len(p) == 2: p[0]=p[1] else: - p[0] = p[2] - p[0].append(p[1]) + print("SWITCHAROO (small edition)") + p[0] = p[2] + p[0].append(p[1]) def p_pattern_list(p): '''pattern_list : newline_list pattern RIGHT_PAREN compound_list @@ -407,6 +413,10 @@ def p_pattern_list(p): # for some reason, p[-1] does not give the "true" last element, do not know why action = p[len(p)-1] parts.append(action) + print("NODE") + print(p) + print("ACTION:") + print(action) p[0] = ast.node(kind="pattern_list", parts=parts, pos = _partsspan(parts)) @@ -417,11 +427,34 @@ def p_case_clause_sequence(p): | case_clause_sequence pattern_list SEMI_AND | pattern_list SEMI_SEMI_AND | case_clause_sequence pattern_list SEMI_SEMI_AND''' + parts = _makeparts(p) + end = parts[len(parts)-1] if len(p) == 3: p[0]=p[1] + p[0].parts.append(end) else: + print("HIT SWITCHEROO\nP1") + print(p[1]) + print("P2") + print(p[2]) + i = 0 + for part in parts: + print("PARTS[%d]"%i) + print(part) + i+=1 p[0] = p[2] + print("END") + print(end) + p[0].parts.append(end) p[0].parts.append(p[1]) + #p1_parts = _makeparts(p[1]) + #p1 = ast.node(kind='case_clause_sequence', parts=[p[1]], redirects=[], pos=_partsspan(p1_parts)) + #p[0] = p[2] + #p[0].parts.append(p1) + #p[0] = ast.node(kind='case_clause_sequence', parts=[parts[1], parts[0]], redirects=[], pos=_partsspan(parts)) + + print("P[0]") + print(p[0]) def p_pattern(p): '''pattern : WORD diff --git a/tests/test_parser.py b/tests/test_parser.py index 9fc292e9..556ec8a0 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -67,14 +67,17 @@ def compoundnode(s, *parts, **kwargs): assert not kwargs return ast.node(kind='compound', s=s, list=list(parts), redirects=redirects) -def casenode(parts, s): +def casenode(s, *parts): return(ast.node(kind='compound', redirects=[], - list = [ast.ndoe(kind='case', parts=list(parts), s=s)]) + list = [ast.node(kind='case', parts=list(parts), s=s)])) -def caseclausesequence(s, parts): +def caseclausesequence(s, *parts): return ast.node(kind='caseclausesequence', parts=list(parts), s=s) +def patternlistnode(s, *parts): + return ast.node(kind='patternlist', parts=list(parts), s=s) + def procsubnode(s, command): return ast.node(kind='processsubstitution', s=s, command=command) @@ -1111,24 +1114,52 @@ def test_parameter_braces(self): ]) ), ) + def test_cases(self): - return - # s = """ - # case "$1" in - # 1) echo 1;; - # 2) - # # comment - # ( - # echo 2 - # echo 3 - # ) - # ;; - # 3) echo 3;; - # esac - # """ - # self.assertASTEquals(s, - # casenode(s, parts = [ - # reservedwordnode('case', 'case'), - # ]) - # ) + #return + s = """ + case "$1" in + 1) echo foo + ;; + *) + ( + echo bar + ) + ;; + esac + """ + self.assertASTEquals(s, + compoundnode(s, + casenode(s, + reservedwordnode('case', 'case'), + wordnode('$1', + parameternode('1', '1') + ), + reservedwordnode('in', 'in'), + patternlistnode('1) echo foo\n;;\n*)\n(\necho bar\n);;\n', + wordnode('*', '*'), + reservedwordnode(')', ')'), + compoundnode('(\necho bar\n)', + reservedwordnode('(', '('), + commandnode('echo bar', + wordnode('echo', 'echo'), + wordnode('bar', 'bar') + ), + reservedwordnode(')', ')') + ), + reservedwordnode(';;', ';;'), + patternlistnode('1) echo foo\n;;', + wordnode('1', '1'), + reservedwordnode(')', ')'), + commandnode('echo foo\n', + wordnode('echo', 'echo'), + wordnode('foo', 'foo') + ), + reservedwordnode(';;', ';;') + ) + ), + reservedwordnode('esac', 'esac') + ) + ) + ) From 660cfb946ee5d67d3fd4ff77f2f79b910aae22ee Mon Sep 17 00:00:00 2001 From: ryandmaggio Date: Mon, 4 Apr 2022 19:54:59 +0000 Subject: [PATCH 5/7] cleaned up edits --- bashlex/parser.py | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/bashlex/parser.py b/bashlex/parser.py index 1dd7e093..39271b6d 100644 --- a/bashlex/parser.py +++ b/bashlex/parser.py @@ -386,9 +386,8 @@ def p_case_clause(p): if len(p) == 2: p[0]=p[1] else: - print("SWITCHAROO (small edition)") p[0] = p[2] - p[0].append(p[1]) + p[0].parts.append(p[1]) def p_pattern_list(p): '''pattern_list : newline_list pattern RIGHT_PAREN compound_list @@ -396,7 +395,6 @@ def p_pattern_list(p): | newline_list LEFT_PAREN pattern RIGHT_PAREN compound_list | newline_list LEFT_PAREN pattern RIGHT_PAREN newline_list''' - parserobj = p.context parts = [] action = None if p.slice[2].type == "pattern": @@ -410,13 +408,8 @@ def p_pattern_list(p): rparen = ast.node(kind='reservedword', word=p[4], pos=p.lexspan(4)) parts.extend([lparen, patterns, rparen]) if p.slice[-1].type == "compound_list": - # for some reason, p[-1] does not give the "true" last element, do not know why action = p[len(p)-1] parts.append(action) - print("NODE") - print(p) - print("ACTION:") - print(action) p[0] = ast.node(kind="pattern_list", parts=parts, pos = _partsspan(parts)) @@ -433,28 +426,9 @@ def p_case_clause_sequence(p): p[0]=p[1] p[0].parts.append(end) else: - print("HIT SWITCHEROO\nP1") - print(p[1]) - print("P2") - print(p[2]) - i = 0 - for part in parts: - print("PARTS[%d]"%i) - print(part) - i+=1 p[0] = p[2] - print("END") - print(end) p[0].parts.append(end) p[0].parts.append(p[1]) - #p1_parts = _makeparts(p[1]) - #p1 = ast.node(kind='case_clause_sequence', parts=[p[1]], redirects=[], pos=_partsspan(p1_parts)) - #p[0] = p[2] - #p[0].parts.append(p1) - #p[0] = ast.node(kind='case_clause_sequence', parts=[parts[1], parts[0]], redirects=[], pos=_partsspan(parts)) - - print("P[0]") - print(p[0]) def p_pattern(p): '''pattern : WORD From 4172edb8b3fe128655c221517516b02b25126d26 Mon Sep 17 00:00:00 2001 From: ryandmaggio Date: Wed, 13 Apr 2022 19:54:45 +0000 Subject: [PATCH 6/7] Bypass raise NotImplemented for arithmetic expansion --- bashlex/subst.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bashlex/subst.py b/bashlex/subst.py index 2df60235..2f08e914 100644 --- a/bashlex/subst.py +++ b/bashlex/subst.py @@ -49,7 +49,9 @@ def _parsedolparen(parserobj, base, sindex): def _extractcommandsubst(parserobj, string, sindex, sxcommand=False): if string[sindex] == '(': - raise NotImplementedError('arithmetic expansion') + node, si = _parsedolparen(parserobj, string, sindex) + si += 1 + return ast.node(kind="commandsubstitution", command=node, pos=(sindex-2, si)), si #return _extractdelimitedstring(parserobj, string, sindex, '$(', '(', '(', sxcommand=True) else: node, si = _parsedolparen(parserobj, string, sindex) From fa7ed97c390996f34000413e5b8720bd2cbf545a Mon Sep 17 00:00:00 2001 From: ryandmaggio Date: Thu, 14 Apr 2022 14:26:18 +0000 Subject: [PATCH 7/7] added comments to the pattern_list parsing --- bashlex/parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bashlex/parser.py b/bashlex/parser.py index 39271b6d..03c562eb 100644 --- a/bashlex/parser.py +++ b/bashlex/parser.py @@ -298,7 +298,6 @@ def p_case_command(p): redirects = [], list=[ast.node(kind='case', parts=parts, pos=_partsspan(parts))], pos=_partsspan(parts)) - #handleNotImplemented(p, 'case command') def p_function_def(p): '''function_def : WORD LEFT_PAREN RIGHT_PAREN newline_list function_body @@ -397,16 +396,19 @@ def p_pattern_list(p): parts = [] action = None + # If we are in cases 1 or 2, we need to construct a reservedwrod node for right parentheses if p.slice[2].type == "pattern": patterns = p[2] parts.extend(patterns) rparen = ast.node(kind='reservedword', word=p[3], pos = p.lexspan(3)) parts.append(rparen) + # If we are in cases 3 or 4, we need to construct a reservedword node for left and right parentheses else: lparen = ast.node(kind='reservedword', word=p[2], pos=p.lexspan(2)) patterns = p[3] rparen = ast.node(kind='reservedword', word=p[4], pos=p.lexspan(4)) parts.extend([lparen, patterns, rparen]) + # If we are in cases 1 or 3, we need to include the compound_list node at the end if p.slice[-1].type == "compound_list": action = p[len(p)-1] parts.append(action)