summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGES21
-rw-r--r--README4
-rw-r--r--doc/ply.html52
-rw-r--r--example/ansic/clex.py2
-rw-r--r--example/ansic/cparse.py2
-rw-r--r--example/calc/calc.py2
-rw-r--r--ply/yacc.py74
-rw-r--r--test/calclex.py2
-rw-r--r--test/testyacc.py18
-rw-r--r--test/yacc_error5.py2
-rw-r--r--test/yacc_error6.py80
-rw-r--r--test/yacc_error7.py80
12 files changed, 291 insertions, 48 deletions
diff --git a/CHANGES b/CHANGES
index 99dc683..28da920 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,26 @@
Version 3.5
---------------------
+04/26/12: beazley
+ Changes to functions available in panic mode error recover. In previous versions
+ of PLY, the following global functions were available for use in the p_error() rule:
+
+ yacc.errok() # Reset error state
+ yacc.token() # Get the next token
+ yacc.restart() # Reset the parsing stack
+
+ The use of global variables was problematic for code involving multiple parsers
+ and frankly was a poor design overall. These functions have been moved to methods
+ of the parser instance created by the yacc() function. To make it easier to
+ obtain an instance of the parser in error handling, it is now passed as an optional
+ parameter to p_error() like this:
+
+ def p_error(p, parser):
+ ...
+ parser.errok()
+
+ *** POTENTIAL INCOMPATIBILITY *** The original global functions now issue a
+ DeprecationWarning.
+
04/19/12: beazley
Fixed some problems with line and position tracking and the use of error
symbols. If you have a grammar rule involving an error rule like this:
diff --git a/README b/README
index f384d1a..5d660bd 100644
--- a/README
+++ b/README
@@ -1,6 +1,6 @@
-PLY (Python Lex-Yacc) Version 3.4
+PLY (Python Lex-Yacc) Version 3.5
-Copyright (C) 2001-2011,
+Copyright (C) 2001-2012,
David M. Beazley (Dabeaz LLC)
All rights reserved.
diff --git a/doc/ply.html b/doc/ply.html
index 0500dad..58da951 100644
--- a/doc/ply.html
+++ b/doc/ply.html
@@ -12,7 +12,7 @@ dave@dabeaz.com<br>
</b>
<p>
-<b>PLY Version: 3.4</b>
+<b>PLY Version: 3.5</b>
<p>
<!-- INDEX -->
@@ -2449,8 +2449,12 @@ When a syntax error occurs, <tt>yacc.py</tt> performs the following steps:
<ol>
<li>On the first occurrence of an error, the user-defined <tt>p_error()</tt> function
-is called with the offending token as an argument. However, if the syntax error is due to
-reaching the end-of-file, <tt>p_error()</tt> is called with an argument of <tt>None</tt>.
+is called with the offending token as an argument. However, if the syntax error is due to
+reaching the end-of-file, <tt>p_error()</tt> is called with an
+ argument of <tt>None</tt>.
+An optional second argument containing the instance of the parser
+that's running is also passed to <tt>p_error()</tt> which may be
+ useful in panic-mode recovery described below.
Afterwards, the parser enters
an "error-recovery" mode in which it will not make future calls to <tt>p_error()</tt> until it
has successfully shifted at least 3 tokens onto the parsing stack.
@@ -2541,13 +2545,13 @@ parser in its initial state.
<blockquote>
<pre>
-def p_error(p):
+def p_error(p, parser):
print "Whoa. You are seriously hosed."
# Read ahead looking for a closing '}'
- while 1:
- tok = yacc.token() # Get the next token
+ while True:
+ tok = parser.token() # Get the next token
if not tok or tok.type == 'RBRACE': break
- yacc.restart()
+ parser.restart()
</pre>
</blockquote>
@@ -2556,51 +2560,57 @@ This function simply discards the bad token and tells the parser that the error
<blockquote>
<pre>
-def p_error(p):
+def p_error(p, parser):
print "Syntax error at token", p.type
# Just discard the token and tell the parser it's okay.
- yacc.errok()
+ parser.errok()
</pre>
</blockquote>
<P>
-Within the <tt>p_error()</tt> function, three functions are available to control the behavior
-of the parser:
+More information on these methods is as follows:
+</p>
+
<p>
<ul>
-<li><tt>yacc.errok()</tt>. This resets the parser state so it doesn't think it's in error-recovery
+<li><tt>parser.errok()</tt>. This resets the parser state so it doesn't think it's in error-recovery
mode. This will prevent an <tt>error</tt> token from being generated and will reset the internal
error counters so that the next syntax error will call <tt>p_error()</tt> again.
<p>
-<li><tt>yacc.token()</tt>. This returns the next token on the input stream.
+<li><tt>parser.token()</tt>. This returns the next token on the input stream.
<p>
-<li><tt>yacc.restart()</tt>. This discards the entire parsing stack and resets the parser
+<li><tt>parser.restart()</tt>. This discards the entire parsing stack and resets the parser
to its initial state.
</ul>
-Note: these functions are only available when invoking <tt>p_error()</tt> and are not available
-at any other time.
-
<p>
To supply the next lookahead token to the parser, <tt>p_error()</tt> can return a token. This might be
useful if trying to synchronize on special characters. For example:
<blockquote>
<pre>
-def p_error(p):
+def p_error(p, parser):
# Read ahead looking for a terminating ";"
- while 1:
- tok = yacc.token() # Get the next token
+ while True:
+ tok = parser.token() # Get the next token
if not tok or tok.type == 'SEMI': break
- yacc.errok()
+ parser.errok()
# Return SEMI to the parser as the next lookahead token
return tok
</pre>
</blockquote>
+<p>
+<b>Compatibility note:</b> Defining <tt>p_error()</tt> with two
+arguments was first supported in Ply-3.5 and is only needed if you
+need to manipulate the associated parser during error handling.
+A lot of PLY code uses a single argument which will continue to work
+for now.
+</p>
+
<H4><a name="ply_nn35"></a>6.8.3 Signaling an error from a production</H4>
diff --git a/example/ansic/clex.py b/example/ansic/clex.py
index 37fdd8e..4da3e1a 100644
--- a/example/ansic/clex.py
+++ b/example/ansic/clex.py
@@ -154,7 +154,7 @@ def t_error(t):
print("Illegal character %s" % repr(t.value[0]))
t.lexer.skip(1)
-lexer = lex.lex(optimize=1)
+lexer = lex.lex()
if __name__ == "__main__":
lex.runmain(lexer)
diff --git a/example/ansic/cparse.py b/example/ansic/cparse.py
index c9b9164..4db7680 100644
--- a/example/ansic/cparse.py
+++ b/example/ansic/cparse.py
@@ -854,7 +854,7 @@ def p_error(t):
import profile
# Build the grammar
-yacc.yacc(method='LALR')
+yacc.yacc(method='LALR',write_tables=False,debug=True)
#profile.run("yacc.yacc(method='LALR')")
diff --git a/example/calc/calc.py b/example/calc/calc.py
index b923780..17df4e7 100644
--- a/example/calc/calc.py
+++ b/example/calc/calc.py
@@ -89,7 +89,7 @@ def p_expression_name(p):
print("Undefined name '%s'" % p[1])
p[0] = 0
-def p_error(p):
+def p_error(p,parser):
if p:
print("Syntax error at '%s'" % p.value)
else:
diff --git a/ply/yacc.py b/ply/yacc.py
index a7593d4..41eeb03 100644
--- a/ply/yacc.py
+++ b/ply/yacc.py
@@ -158,6 +158,44 @@ def format_stack_entry(r):
else:
return "<%s @ 0x%x>" % (type(r).__name__,id(r))
+# Panic mode error recovery support. This feature is being reworked--much of the
+# code here is to offer a deprecation/backwards compatible transition
+
+_errok = None
+_token = None
+_restart = None
+_warnmsg = """PLY: Don't use global functions errok(), token(), and restart() in p_error().
+Instead, define p_error() with two arguments and invoke methods on the supplied parser instance:
+
+ def p_error(p, parser):
+ ...
+ # Use parser.errok(), parser.token(), parser.restart()
+ ...
+"""
+import warnings
+def errok():
+ warnings.warn(_warnmsg)
+ return _errok()
+
+def restart():
+ warnings.warn(_warnmsg)
+ return _restart()
+
+def token():
+ warnings.warn(_warnmsg)
+ return _token()
+
+# Utility function to call the p_error() function with 1 or 2 arguments
+def call_errorfunc(errorfunc,token,parser):
+ global _errok, _token, _restart
+ _errok = parser.errok
+ _token = parser.token
+ _restart = parser.restart
+ try:
+ return errorfunc(token,parser)
+ except TypeError:
+ return errorfunc(token)
+
#-----------------------------------------------------------------------------
# === LR Parsing Engine ===
#
@@ -311,6 +349,9 @@ class LRParser:
else:
get_token = tokenfunc
+ # Set the parser() token method (sometimes used in error recovery)
+ self.token = get_token
+
# Set up the state and symbol stacks
statestack = [ ] # Stack of parsing states
@@ -510,15 +551,9 @@ class LRParser:
if errtoken.type == "$end":
errtoken = None # End of file!
if self.errorfunc:
- global errok,token,restart
- errok = self.errok # Set some special functions available in error recovery
- token = get_token
- restart = self.restart
if errtoken and not hasattr(errtoken,'lexer'):
errtoken.lexer = lexer
- tok = self.errorfunc(errtoken)
- del errok, token, restart # Delete special functions
-
+ tok = call_errorfunc(self.errorfunc, errtoken, self)
if self.errorok:
# User must have done some kind of panic
# mode recovery on their own. The
@@ -630,6 +665,9 @@ class LRParser:
else:
get_token = tokenfunc
+ # Set the parser() token method (sometimes used in error recovery)
+ self.token = get_token
+
# Set up the state and symbol stacks
statestack = [ ] # Stack of parsing states
@@ -791,14 +829,9 @@ class LRParser:
if errtoken.type == '$end':
errtoken = None # End of file!
if self.errorfunc:
- global errok,token,restart
- errok = self.errok # Set some special functions available in error recovery
- token = get_token
- restart = self.restart
if errtoken and not hasattr(errtoken,'lexer'):
errtoken.lexer = lexer
- tok = self.errorfunc(errtoken)
- del errok, token, restart # Delete special functions
+ tok = call_errorfunc(self.errorfunc, errtoken, self)
if self.errorok:
# User must have done some kind of panic
@@ -910,6 +943,9 @@ class LRParser:
else:
get_token = tokenfunc
+ # Set the parser() token method (sometimes used in error recovery)
+ self.token = get_token
+
# Set up the state and symbol stacks
statestack = [ ] # Stack of parsing states
@@ -1054,14 +1090,9 @@ class LRParser:
if errtoken.type == '$end':
errtoken = None # End of file!
if self.errorfunc:
- global errok,token,restart
- errok = self.errok # Set some special functions available in error recovery
- token = get_token
- restart = self.restart
if errtoken and not hasattr(errtoken,'lexer'):
errtoken.lexer = lexer
- tok = self.errorfunc(errtoken)
- del errok, token, restart # Delete special functions
+ tok = call_errorfunc(self.errorfunc, errtoken, self)
if self.errorok:
# User must have done some kind of panic
@@ -2899,8 +2930,9 @@ class ParserReflect(object):
efile = func_code(self.error_func).co_filename
self.files[efile] = 1
- if (func_code(self.error_func).co_argcount != 1+ismethod):
- self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)
+ argcount = func_code(self.error_func).co_argcount - ismethod
+ if argcount not in (1,2):
+ self.log.error("%s:%d: p_error() requires 1 or 2 arguments",efile,eline)
self.error = 1
# Get the tokens map
diff --git a/test/calclex.py b/test/calclex.py
index 302f0b0..030a986 100644
--- a/test/calclex.py
+++ b/test/calclex.py
@@ -43,7 +43,7 @@ def t_error(t):
t.lexer.skip(1)
# Build the lexer
-lex.lex()
+lexer = lex.lex()
diff --git a/test/testyacc.py b/test/testyacc.py
index cd2ab03..1a98b4f 100644
--- a/test/testyacc.py
+++ b/test/testyacc.py
@@ -184,6 +184,24 @@ class YaccErrorWarningTests(unittest.TestCase):
"Assignment Error at 2:5 to 5:33\n"
))
+ def test_yacc_error6(self):
+ run_import("yacc_error6")
+ result = sys.stdout.getvalue()
+ self.assert_(check_expected(result,
+ "a=7\n"
+ "Line 3: Syntax error at '*'\n"
+ "c=21\n"
+ ))
+
+ def test_yacc_error7(self):
+ run_import("yacc_error7")
+ result = sys.stdout.getvalue()
+ self.assert_(check_expected(result,
+ "a=7\n"
+ "Line 3: Syntax error at '*'\n"
+ "c=21\n"
+ ))
+
def test_yacc_inf(self):
self.assertRaises(ply.yacc.YaccError,run_import,"yacc_inf")
result = sys.stderr.getvalue()
diff --git a/test/yacc_error5.py b/test/yacc_error5.py
index 7cb538e..9eb0f85 100644
--- a/test/yacc_error5.py
+++ b/test/yacc_error5.py
@@ -78,6 +78,8 @@ def p_error(t):
print("Syntax error at '%s'" % t.value)
parser = yacc.yacc()
+import calclex
+calclex.lexer.lineno=1
parser.parse("""
a = 3 +
(4*5) +
diff --git a/test/yacc_error6.py b/test/yacc_error6.py
new file mode 100644
index 0000000..20e8b24
--- /dev/null
+++ b/test/yacc_error6.py
@@ -0,0 +1,80 @@
+# -----------------------------------------------------------------------------
+# yacc_error6.py
+#
+# Panic mode recovery test
+# -----------------------------------------------------------------------------
+import sys
+
+if ".." not in sys.path: sys.path.insert(0,"..")
+import ply.yacc as yacc
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+def p_statements(t):
+ 'statements : statements statement'
+ pass
+
+def p_statements_1(t):
+ 'statements : statement'
+ pass
+
+def p_statement_assign(p):
+ 'statement : LPAREN NAME EQUALS expression RPAREN'
+ print("%s=%s" % (p[2],p[4]))
+
+def p_statement_expr(t):
+ 'statement : LPAREN expression RPAREN'
+ print(t[1])
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[2] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_error(p, parser):
+ if p:
+ print("Line %d: Syntax error at '%s'" % (p.lineno, p.value))
+ # Scan ahead looking for a name token
+ while True:
+ tok = parser.token()
+ if not tok or tok.type == 'RPAREN':
+ break
+ if tok:
+ parser.restart()
+ return None
+
+parser = yacc.yacc()
+import calclex
+calclex.lexer.lineno=1
+
+parser.parse("""
+(a = 3 + 4)
+(b = 4 + * 5 - 6 + *)
+(c = 10 + 11)
+""")
+
+
+
+
+
+
diff --git a/test/yacc_error7.py b/test/yacc_error7.py
new file mode 100644
index 0000000..0e7c0a7
--- /dev/null
+++ b/test/yacc_error7.py
@@ -0,0 +1,80 @@
+# -----------------------------------------------------------------------------
+# yacc_error7.py
+#
+# Panic mode recovery test using deprecated functionality
+# -----------------------------------------------------------------------------
+import sys
+
+if ".." not in sys.path: sys.path.insert(0,"..")
+import ply.yacc as yacc
+
+from calclex import tokens
+
+# Parsing rules
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+def p_statements(t):
+ 'statements : statements statement'
+ pass
+
+def p_statements_1(t):
+ 'statements : statement'
+ pass
+
+def p_statement_assign(p):
+ 'statement : LPAREN NAME EQUALS expression RPAREN'
+ print("%s=%s" % (p[2],p[4]))
+
+def p_statement_expr(t):
+ 'statement : LPAREN expression RPAREN'
+ print(t[1])
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[2] == '/': t[0] = t[1] / t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_error(p, parser):
+ if p:
+ print("Line %d: Syntax error at '%s'" % (p.lineno, p.value))
+ # Scan ahead looking for a name token
+ while True:
+ tok = yacc.token()
+ if not tok or tok.type == 'RPAREN':
+ break
+ if tok:
+ yacc.restart()
+ return None
+
+parser = yacc.yacc()
+import calclex
+calclex.lexer.lineno=1
+
+parser.parse("""
+(a = 3 + 4)
+(b = 4 + * 5 - 6 + *)
+(c = 10 + 11)
+""")
+
+
+
+
+
+