diff options
-rw-r--r-- | CHANGES | 21 | ||||
-rw-r--r-- | README | 4 | ||||
-rw-r--r-- | doc/ply.html | 52 | ||||
-rw-r--r-- | example/ansic/clex.py | 2 | ||||
-rw-r--r-- | example/ansic/cparse.py | 2 | ||||
-rw-r--r-- | example/calc/calc.py | 2 | ||||
-rw-r--r-- | ply/yacc.py | 74 | ||||
-rw-r--r-- | test/calclex.py | 2 | ||||
-rw-r--r-- | test/testyacc.py | 18 | ||||
-rw-r--r-- | test/yacc_error5.py | 2 | ||||
-rw-r--r-- | test/yacc_error6.py | 80 | ||||
-rw-r--r-- | test/yacc_error7.py | 80 |
12 files changed, 291 insertions, 48 deletions
@@ -1,5 +1,26 @@ Version 3.5 --------------------- +04/26/12: beazley + Changes to functions available in panic mode error recover. In previous versions + of PLY, the following global functions were available for use in the p_error() rule: + + yacc.errok() # Reset error state + yacc.token() # Get the next token + yacc.restart() # Reset the parsing stack + + The use of global variables was problematic for code involving multiple parsers + and frankly was a poor design overall. These functions have been moved to methods + of the parser instance created by the yacc() function. To make it easier to + obtain an instance of the parser in error handling, it is now passed as an optional + parameter to p_error() like this: + + def p_error(p, parser): + ... + parser.errok() + + *** POTENTIAL INCOMPATIBILITY *** The original global functions now issue a + DeprecationWarning. + 04/19/12: beazley Fixed some problems with line and position tracking and the use of error symbols. If you have a grammar rule involving an error rule like this: @@ -1,6 +1,6 @@ -PLY (Python Lex-Yacc) Version 3.4 +PLY (Python Lex-Yacc) Version 3.5 -Copyright (C) 2001-2011, +Copyright (C) 2001-2012, David M. Beazley (Dabeaz LLC) All rights reserved. diff --git a/doc/ply.html b/doc/ply.html index 0500dad..58da951 100644 --- a/doc/ply.html +++ b/doc/ply.html @@ -12,7 +12,7 @@ dave@dabeaz.com<br> </b> <p> -<b>PLY Version: 3.4</b> +<b>PLY Version: 3.5</b> <p> <!-- INDEX --> @@ -2449,8 +2449,12 @@ When a syntax error occurs, <tt>yacc.py</tt> performs the following steps: <ol> <li>On the first occurrence of an error, the user-defined <tt>p_error()</tt> function -is called with the offending token as an argument. However, if the syntax error is due to -reaching the end-of-file, <tt>p_error()</tt> is called with an argument of <tt>None</tt>. +is called with the offending token as an argument. However, if the syntax error is due to +reaching the end-of-file, <tt>p_error()</tt> is called with an + argument of <tt>None</tt>. +An optional second argument containing the instance of the parser +that's running is also passed to <tt>p_error()</tt> which may be + useful in panic-mode recovery described below. Afterwards, the parser enters an "error-recovery" mode in which it will not make future calls to <tt>p_error()</tt> until it has successfully shifted at least 3 tokens onto the parsing stack. @@ -2541,13 +2545,13 @@ parser in its initial state. <blockquote> <pre> -def p_error(p): +def p_error(p, parser): print "Whoa. You are seriously hosed." # Read ahead looking for a closing '}' - while 1: - tok = yacc.token() # Get the next token + while True: + tok = parser.token() # Get the next token if not tok or tok.type == 'RBRACE': break - yacc.restart() + parser.restart() </pre> </blockquote> @@ -2556,51 +2560,57 @@ This function simply discards the bad token and tells the parser that the error <blockquote> <pre> -def p_error(p): +def p_error(p, parser): print "Syntax error at token", p.type # Just discard the token and tell the parser it's okay. - yacc.errok() + parser.errok() </pre> </blockquote> <P> -Within the <tt>p_error()</tt> function, three functions are available to control the behavior -of the parser: +More information on these methods is as follows: +</p> + <p> <ul> -<li><tt>yacc.errok()</tt>. This resets the parser state so it doesn't think it's in error-recovery +<li><tt>parser.errok()</tt>. This resets the parser state so it doesn't think it's in error-recovery mode. This will prevent an <tt>error</tt> token from being generated and will reset the internal error counters so that the next syntax error will call <tt>p_error()</tt> again. <p> -<li><tt>yacc.token()</tt>. This returns the next token on the input stream. +<li><tt>parser.token()</tt>. This returns the next token on the input stream. <p> -<li><tt>yacc.restart()</tt>. This discards the entire parsing stack and resets the parser +<li><tt>parser.restart()</tt>. This discards the entire parsing stack and resets the parser to its initial state. </ul> -Note: these functions are only available when invoking <tt>p_error()</tt> and are not available -at any other time. - <p> To supply the next lookahead token to the parser, <tt>p_error()</tt> can return a token. This might be useful if trying to synchronize on special characters. For example: <blockquote> <pre> -def p_error(p): +def p_error(p, parser): # Read ahead looking for a terminating ";" - while 1: - tok = yacc.token() # Get the next token + while True: + tok = parser.token() # Get the next token if not tok or tok.type == 'SEMI': break - yacc.errok() + parser.errok() # Return SEMI to the parser as the next lookahead token return tok </pre> </blockquote> +<p> +<b>Compatibility note:</b> Defining <tt>p_error()</tt> with two +arguments was first supported in Ply-3.5 and is only needed if you +need to manipulate the associated parser during error handling. +A lot of PLY code uses a single argument which will continue to work +for now. +</p> + <H4><a name="ply_nn35"></a>6.8.3 Signaling an error from a production</H4> diff --git a/example/ansic/clex.py b/example/ansic/clex.py index 37fdd8e..4da3e1a 100644 --- a/example/ansic/clex.py +++ b/example/ansic/clex.py @@ -154,7 +154,7 @@ def t_error(t): print("Illegal character %s" % repr(t.value[0])) t.lexer.skip(1) -lexer = lex.lex(optimize=1) +lexer = lex.lex() if __name__ == "__main__": lex.runmain(lexer) diff --git a/example/ansic/cparse.py b/example/ansic/cparse.py index c9b9164..4db7680 100644 --- a/example/ansic/cparse.py +++ b/example/ansic/cparse.py @@ -854,7 +854,7 @@ def p_error(t): import profile # Build the grammar -yacc.yacc(method='LALR') +yacc.yacc(method='LALR',write_tables=False,debug=True) #profile.run("yacc.yacc(method='LALR')") diff --git a/example/calc/calc.py b/example/calc/calc.py index b923780..17df4e7 100644 --- a/example/calc/calc.py +++ b/example/calc/calc.py @@ -89,7 +89,7 @@ def p_expression_name(p): print("Undefined name '%s'" % p[1]) p[0] = 0 -def p_error(p): +def p_error(p,parser): if p: print("Syntax error at '%s'" % p.value) else: diff --git a/ply/yacc.py b/ply/yacc.py index a7593d4..41eeb03 100644 --- a/ply/yacc.py +++ b/ply/yacc.py @@ -158,6 +158,44 @@ def format_stack_entry(r): else: return "<%s @ 0x%x>" % (type(r).__name__,id(r)) +# Panic mode error recovery support. This feature is being reworked--much of the +# code here is to offer a deprecation/backwards compatible transition + +_errok = None +_token = None +_restart = None +_warnmsg = """PLY: Don't use global functions errok(), token(), and restart() in p_error(). +Instead, define p_error() with two arguments and invoke methods on the supplied parser instance: + + def p_error(p, parser): + ... + # Use parser.errok(), parser.token(), parser.restart() + ... +""" +import warnings +def errok(): + warnings.warn(_warnmsg) + return _errok() + +def restart(): + warnings.warn(_warnmsg) + return _restart() + +def token(): + warnings.warn(_warnmsg) + return _token() + +# Utility function to call the p_error() function with 1 or 2 arguments +def call_errorfunc(errorfunc,token,parser): + global _errok, _token, _restart + _errok = parser.errok + _token = parser.token + _restart = parser.restart + try: + return errorfunc(token,parser) + except TypeError: + return errorfunc(token) + #----------------------------------------------------------------------------- # === LR Parsing Engine === # @@ -311,6 +349,9 @@ class LRParser: else: get_token = tokenfunc + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token + # Set up the state and symbol stacks statestack = [ ] # Stack of parsing states @@ -510,15 +551,9 @@ class LRParser: if errtoken.type == "$end": errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart if errtoken and not hasattr(errtoken,'lexer'): errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - + tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic # mode recovery on their own. The @@ -630,6 +665,9 @@ class LRParser: else: get_token = tokenfunc + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token + # Set up the state and symbol stacks statestack = [ ] # Stack of parsing states @@ -791,14 +829,9 @@ class LRParser: if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart if errtoken and not hasattr(errtoken,'lexer'): errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions + tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic @@ -910,6 +943,9 @@ class LRParser: else: get_token = tokenfunc + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token + # Set up the state and symbol stacks statestack = [ ] # Stack of parsing states @@ -1054,14 +1090,9 @@ class LRParser: if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart if errtoken and not hasattr(errtoken,'lexer'): errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions + tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic @@ -2899,8 +2930,9 @@ class ParserReflect(object): efile = func_code(self.error_func).co_filename self.files[efile] = 1 - if (func_code(self.error_func).co_argcount != 1+ismethod): - self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) + argcount = func_code(self.error_func).co_argcount - ismethod + if argcount not in (1,2): + self.log.error("%s:%d: p_error() requires 1 or 2 arguments",efile,eline) self.error = 1 # Get the tokens map diff --git a/test/calclex.py b/test/calclex.py index 302f0b0..030a986 100644 --- a/test/calclex.py +++ b/test/calclex.py @@ -43,7 +43,7 @@ def t_error(t): t.lexer.skip(1) # Build the lexer -lex.lex() +lexer = lex.lex() diff --git a/test/testyacc.py b/test/testyacc.py index cd2ab03..1a98b4f 100644 --- a/test/testyacc.py +++ b/test/testyacc.py @@ -184,6 +184,24 @@ class YaccErrorWarningTests(unittest.TestCase): "Assignment Error at 2:5 to 5:33\n" )) + def test_yacc_error6(self): + run_import("yacc_error6") + result = sys.stdout.getvalue() + self.assert_(check_expected(result, + "a=7\n" + "Line 3: Syntax error at '*'\n" + "c=21\n" + )) + + def test_yacc_error7(self): + run_import("yacc_error7") + result = sys.stdout.getvalue() + self.assert_(check_expected(result, + "a=7\n" + "Line 3: Syntax error at '*'\n" + "c=21\n" + )) + def test_yacc_inf(self): self.assertRaises(ply.yacc.YaccError,run_import,"yacc_inf") result = sys.stderr.getvalue() diff --git a/test/yacc_error5.py b/test/yacc_error5.py index 7cb538e..9eb0f85 100644 --- a/test/yacc_error5.py +++ b/test/yacc_error5.py @@ -78,6 +78,8 @@ def p_error(t): print("Syntax error at '%s'" % t.value) parser = yacc.yacc() +import calclex +calclex.lexer.lineno=1 parser.parse(""" a = 3 + (4*5) + diff --git a/test/yacc_error6.py b/test/yacc_error6.py new file mode 100644 index 0000000..20e8b24 --- /dev/null +++ b/test/yacc_error6.py @@ -0,0 +1,80 @@ +# ----------------------------------------------------------------------------- +# yacc_error6.py +# +# Panic mode recovery test +# ----------------------------------------------------------------------------- +import sys + +if ".." not in sys.path: sys.path.insert(0,"..") +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +def p_statements(t): + 'statements : statements statement' + pass + +def p_statements_1(t): + 'statements : statement' + pass + +def p_statement_assign(p): + 'statement : LPAREN NAME EQUALS expression RPAREN' + print("%s=%s" % (p[2],p[4])) + +def p_statement_expr(t): + 'statement : LPAREN expression RPAREN' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_error(p, parser): + if p: + print("Line %d: Syntax error at '%s'" % (p.lineno, p.value)) + # Scan ahead looking for a name token + while True: + tok = parser.token() + if not tok or tok.type == 'RPAREN': + break + if tok: + parser.restart() + return None + +parser = yacc.yacc() +import calclex +calclex.lexer.lineno=1 + +parser.parse(""" +(a = 3 + 4) +(b = 4 + * 5 - 6 + *) +(c = 10 + 11) +""") + + + + + + diff --git a/test/yacc_error7.py b/test/yacc_error7.py new file mode 100644 index 0000000..0e7c0a7 --- /dev/null +++ b/test/yacc_error7.py @@ -0,0 +1,80 @@ +# ----------------------------------------------------------------------------- +# yacc_error7.py +# +# Panic mode recovery test using deprecated functionality +# ----------------------------------------------------------------------------- +import sys + +if ".." not in sys.path: sys.path.insert(0,"..") +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +def p_statements(t): + 'statements : statements statement' + pass + +def p_statements_1(t): + 'statements : statement' + pass + +def p_statement_assign(p): + 'statement : LPAREN NAME EQUALS expression RPAREN' + print("%s=%s" % (p[2],p[4])) + +def p_statement_expr(t): + 'statement : LPAREN expression RPAREN' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_error(p, parser): + if p: + print("Line %d: Syntax error at '%s'" % (p.lineno, p.value)) + # Scan ahead looking for a name token + while True: + tok = yacc.token() + if not tok or tok.type == 'RPAREN': + break + if tok: + yacc.restart() + return None + +parser = yacc.yacc() +import calclex +calclex.lexer.lineno=1 + +parser.parse(""" +(a = 3 + 4) +(b = 4 + * 5 - 6 + *) +(c = 10 + 11) +""") + + + + + + |