summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGES31
-rw-r--r--README4
-rw-r--r--doc/ply.html18
-rw-r--r--example/ansic/lextab.py8
-rw-r--r--ply/lex.py2
-rw-r--r--ply/yacc.py192
6 files changed, 156 insertions, 99 deletions
diff --git a/CHANGES b/CHANGES
index 5f66625..6fb78b8 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,36 @@
Version 2.3
-----------------------------
+02/18/07: beazley
+ Performance improvements. Made some changes to the internal
+ table organization and LR parser to improve parsing performance.
+
+02/18/07: beazley
+ Automatic tracking of line number and position information must now be
+ enabled by a special flag to parse(). For example:
+
+ yacc.parse(data,tracking=True)
+
+ In many applications, it's just not that important to have the
+ parser automatically track all line numbers. By making this an
+ optional feature, it allows the parser to run significantly faster
+ (more than a 20% speed increase in many cases). Note: positional
+ information is always available for raw tokens---this change only
+ applies to positional information associated with nonterminal
+ grammar symbols.
+ *** POTENTIAL INCOMPATIBILITY ***
+
+02/18/07: beazley
+ Yacc no longer supports extended slices of grammar productions.
+ However, it does support regular slices. For example:
+
+ def p_foo(p):
+ '''foo: a b c d e'''
+ p[0] = p[1:3]
+
+ This change is a performance improvement to the parser--it streamlines
+ normal access to the grammar values since slices are now handled in
+ a __getslice__() method as opposed to __getitem__().
+
02/12/07: beazley
Fixed a bug in the handling of token names when combined with
start conditions. Bug reported by Todd O'Bryan.
diff --git a/README b/README
index 7d6678f..d5e26a5 100644
--- a/README
+++ b/README
@@ -1,8 +1,8 @@
-PLY (Python Lex-Yacc) Version 2.3 (February 13, 2007)
+PLY (Python Lex-Yacc) Version 2.3 (February 18, 2007)
David M. Beazley (dave@dabeaz.com)
-Copyright (C) 2001-2006 David M. Beazley
+Copyright (C) 2001-2007 David M. Beazley
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
diff --git a/doc/ply.html b/doc/ply.html
index 8ae8de4..ed4d56f 100644
--- a/doc/ply.html
+++ b/doc/ply.html
@@ -2434,8 +2434,17 @@ to discard huge portions of the input text to find a valid restart point.
<H3><a name="ply_nn33"></a>5.9 Line Number and Position Tracking</H3>
-<tt>yacc.py</tt> automatically tracks line numbers and positions for all of the grammar symbols and tokens it processes. To retrieve the line
-numbers, two functions are used in grammar rules:
+<tt>yacc.py</tt> can automatically track line numbers and positions for all of the grammar symbols and tokens it processes. However, this
+extra tracking requires extra processing and can significantly slow down parsing. Therefore, it must be enabled by passing the
+<tt>tracking=True</tt> option to <tt>yacc.parse()</tt>. For example:
+
+<blockquote>
+<pre>
+yacc.parse(data,tracking=True)
+</pre>
+</blockquote>
+
+Once enabled, line numbers can be retrieved using the following two functions in grammar rules:
<ul>
<li><tt>p.lineno(num)</tt>. Return the starting line number for symbol <em>num</em>
@@ -2485,6 +2494,11 @@ def p_expression(p):
Note: The <tt>lexspan()</tt> function only returns the range of values up the start of the last grammar symbol.
+<p>
+Note: The <tt>lineno()</tt> and <tt>lexpos()</tt> methods can always be called to get positional information
+on raw tokens or terminals. This information is available regardless of whether or not the parser is tracking
+positional information for other grammar symbols.
+
<H3><a name="ply_nn34"></a>5.10 AST Construction</H3>
diff --git a/example/ansic/lextab.py b/example/ansic/lextab.py
index ce9804b..b994df1 100644
--- a/example/ansic/lextab.py
+++ b/example/ansic/lextab.py
@@ -1,8 +1,8 @@
-# lextab.py. This file automatically created by PLY (version 2.2). Don't edit!
-_lextokens = {'SHORT': None, 'SIGNED': None, 'TIMES': None, 'TYPEID': None, 'GT': None, 'ARROW': None, 'FCONST': None, 'CONST': None, 'GE': None, 'PERIOD': None, 'SEMI': None, 'REGISTER': None, 'ENUM': None, 'SIZEOF': None, 'COMMA': None, 'RBRACE': None, 'RPAREN': None, 'RSHIFTEQUAL': None, 'LT': None, 'OREQUAL': None, 'XOREQUAL': None, 'DOUBLE': None, 'LBRACE': None, 'STRUCT': None, 'LPAREN': None, 'PLUSEQUAL': None, 'LNOT': None, 'NOT': None, 'CONDOP': None, 'LE': None, 'FLOAT': None, 'GOTO': None, 'LOR': None, 'EQ': None, 'MOD': None, 'ICONST': None, 'LONG': None, 'PLUS': None, 'DIVIDE': None, 'WHILE': None, 'UNION': None, 'CHAR': None, 'SWITCH': None, 'DO': None, 'FOR': None, 'VOID': None, 'EXTERN': None, 'RETURN': None, 'MINUSEQUAL': None, 'ELSE': None, 'ANDEQUAL': None, 'BREAK': None, 'CCONST': None, 'INT': None, 'DIVEQUAL': None, 'DEFAULT': None, 'TIMESEQUAL': None, 'MINUS': None, 'OR': None, 'CONTINUE': None, 'IF': None, 'UNSIGNED': None, 'ID': None, 'MINUSMINUS': None, 'COLON': None, 'LSHIFTEQUAL': None, 'RBRACKET': None, 'VOLATILE': None, 'CASE': None, 'PLUSPLUS': None, 'RSHIFT': None, 'MODEQUAL': None, 'LAND': None, 'AND': None, 'ELLIPSIS': None, 'STATIC': None, 'LBRACKET': None, 'LSHIFT': None, 'NE': None, 'TYPEDEF': None, 'AUTO': None, 'XOR': None, 'EQUALS': None, 'SCONST': None}
+# lextab.py. This file automatically created by PLY (version 2.3). Don't edit!
+_lextokens = {'DO': None, 'SHORT': None, 'RETURN': None, 'RSHIFTEQUAL': None, 'DEFAULT': None, 'VOID': None, 'NE': None, 'CHAR': None, 'WHILE': None, 'LNOT': None, 'STATIC': None, 'SCONST': None, 'LSHIFT': None, 'EXTERN': None, 'CONST': None, 'SIZEOF': None, 'CONDOP': None, 'MINUS': None, 'DIVIDE': None, 'CASE': None, 'LAND': None, 'RPAREN': None, 'FCONST': None, 'SEMI': None, 'REGISTER': None, 'MODEQUAL': None, 'UNSIGNED': None, 'LONG': None, 'PLUSPLUS': None, 'SWITCH': None, 'LSHIFTEQUAL': None, 'PLUS': None, 'COMMA': None, 'LOR': None, 'PERIOD': None, 'CCONST': None, 'RBRACKET': None, 'TYPEDEF': None, 'GT': None, 'XOR': None, 'GOTO': None, 'FOR': None, 'UNION': None, 'AUTO': None, 'ENUM': None, 'EQUALS': None, 'DIVEQUAL': None, 'ELSE': None, 'PLUSEQUAL': None, 'GE': None, 'LE': None, 'ICONST': None, 'ARROW': None, 'ELLIPSIS': None, 'LPAREN': None, 'RBRACE': None, 'MINUSMINUS': None, 'TIMES': None, 'EQ': None, 'ID': None, 'IF': None, 'AND': None, 'TYPEID': None, 'LBRACE': None, 'STRUCT': None, 'RSHIFT': None, 'COLON': None, 'INT': None, 'DOUBLE': None, 'MINUSEQUAL': None, 'FLOAT': None, 'XOREQUAL': None, 'SIGNED': None, 'LT': None, 'BREAK': None, 'CONTINUE': None, 'VOLATILE': None, 'LBRACKET': None, 'NOT': None, 'ANDEQUAL': None, 'OREQUAL': None, 'TIMESEQUAL': None, 'OR': None, 'MOD': None}
_lexreflags = 0
_lexliterals = ''
_lexstateinfo = {'INITIAL': 'inclusive'}
-_lexstatere = {'INITIAL': [('(?P<t_NEWLINE>\\n+)|(?P<t_ID>[A-Za-z_][\\w_]*)|(?P<t_comment> /\\*(.|\\n)*?\\*/)|(?P<t_preprocessor>\\#(.)*?\\n)|(?P<t_FCONST>((\\d+)(\\.\\d+)(e(\\+|-)?(\\d+))? | (\\d+)e(\\+|-)?(\\d+))([lL]|[fF])?)|(?P<t_ICONST>\\d+([uU]|[lL]|[uU][lL]|[lL][uU])?)|(?P<t_CCONST>(L)?\\\'([^\\\\\\n]|(\\\\.))*?\\\')|(?P<t_SCONST>\\"([^\\\\\\n]|(\\\\.))*?\\")|(?P<t_ELLIPSIS>\\.\\.\\.)|(?P<t_LOR>\\|\\|)|(?P<t_PLUSPLUS>\\+\\+)|(?P<t_TIMESEQUAL>\\*=)|(?P<t_RSHIFTEQUAL>>>=)|(?P<t_OREQUAL>\\|=)|(?P<t_PLUSEQUAL>\\+=)|(?P<t_LSHIFTEQUAL><<=)|(?P<t_RBRACKET>\\])|(?P<t_MODEQUAL>%=)|(?P<t_XOREQUAL>^=)|(?P<t_LSHIFT><<)|(?P<t_TIMES>\\*)|(?P<t_LAND>&&)|(?P<t_MINUSMINUS>--)|(?P<t_NE>!=)|(?P<t_LPAREN>\\()|(?P<t_ANDEQUAL>&=)|(?P<t_RSHIFT>>>)|(?P<t_LBRACKET>\\[)|(?P<t_LBRACE>\\{)|(?P<t_OR>\\|)|(?P<t_RBRACE>\\})|(?P<t_ARROW>->)|(?P<t_PLUS>\\+)|(?P<t_CONDOP>\\?)|(?P<t_LE><=)|(?P<t_MINUSEQUAL>-=)|(?P<t_PERIOD>\\.)|(?P<t_DIVEQUAL>/=)|(?P<t_EQ>==)|(?P<t_GE>>=)|(?P<t_RPAREN>\\))|(?P<t_XOR>\\^)|(?P<t_SEMI>;)|(?P<t_AND>&)|(?P<t_NOT>~)|(?P<t_EQUALS>=)|(?P<t_MOD>%)|(?P<t_LT><)|(?P<t_MINUS>-)|(?P<t_LNOT>!)|(?P<t_DIVIDE>/)|(?P<t_COMMA>,)|(?P<t_GT>>)|(?P<t_COLON>:)', [None, ('t_NEWLINE', 'NEWLINE'), ('t_ID', 'ID'), ('t_comment', 'comment'), None, ('t_preprocessor', 'preprocessor'), None, (None, 'FCONST'), None, None, None, None, None, None, None, None, None, None, (None, 'ICONST'), None, (None, 'CCONST'), None, None, None, (None, 'SCONST'), None, None, (None, 'ELLIPSIS'), (None, 'LOR'), (None, 'PLUSPLUS'), (None, 'TIMESEQUAL'), (None, 'RSHIFTEQUAL'), (None, 'OREQUAL'), (None, 'PLUSEQUAL'), (None, 'LSHIFTEQUAL'), (None, 'RBRACKET'), (None, 'MODEQUAL'), (None, 'XOREQUAL'), (None, 'LSHIFT'), (None, 'TIMES'), (None, 'LAND'), (None, 'MINUSMINUS'), (None, 'NE'), (None, 'LPAREN'), (None, 'ANDEQUAL'), (None, 'RSHIFT'), (None, 'LBRACKET'), (None, 'LBRACE'), (None, 'OR'), (None, 'RBRACE'), (None, 'ARROW'), (None, 'PLUS'), (None, 'CONDOP'), (None, 'LE'), (None, 'MINUSEQUAL'), (None, 'PERIOD'), (None, 'DIVEQUAL'), (None, 'EQ'), (None, 'GE'), (None, 'RPAREN'), (None, 'XOR'), (None, 'SEMI'), (None, 'AND'), (None, 'NOT'), (None, 'EQUALS'), (None, 'MOD'), (None, 'LT'), (None, 'MINUS'), (None, 'LNOT'), (None, 'DIVIDE'), (None, 'COMMA'), (None, 'GT'), (None, 'COLON')])]}
-_lexstateignore = {'INITIAL': ' \t\f'}
+_lexstatere = {'INITIAL': [('(?P<t_NEWLINE>\\n+)|(?P<t_ID>[A-Za-z_][\\w_]*)|(?P<t_comment> /\\*(.|\\n)*?\\*/)|(?P<t_preprocessor>\\#(.)*?\\n)|(?P<t_FCONST>((\\d+)(\\.\\d+)(e(\\+|-)?(\\d+))? | (\\d+)e(\\+|-)?(\\d+))([lL]|[fF])?)|(?P<t_ICONST>\\d+([uU]|[lL]|[uU][lL]|[lL][uU])?)|(?P<t_CCONST>(L)?\\\'([^\\\\\\n]|(\\\\.))*?\\\')|(?P<t_SCONST>\\"([^\\\\\\n]|(\\\\.))*?\\")|(?P<t_ELLIPSIS>\\.\\.\\.)|(?P<t_PLUSPLUS>\\+\\+)|(?P<t_LOR>\\|\\|)|(?P<t_RSHIFTEQUAL>>>=)|(?P<t_TIMESEQUAL>\\*=)|(?P<t_LSHIFTEQUAL><<=)|(?P<t_PLUSEQUAL>\\+=)|(?P<t_OREQUAL>\\|=)|(?P<t_XOREQUAL>^=)|(?P<t_RBRACKET>\\])|(?P<t_LE><=)|(?P<t_PERIOD>\\.)|(?P<t_RSHIFT>>>)|(?P<t_LPAREN>\\()|(?P<t_NE>!=)|(?P<t_RBRACE>\\})|(?P<t_MINUSMINUS>--)|(?P<t_MINUSEQUAL>-=)|(?P<t_MODEQUAL>%=)|(?P<t_OR>\\|)|(?P<t_LBRACE>\\{)|(?P<t_ANDEQUAL>&=)|(?P<t_DIVEQUAL>/=)|(?P<t_ARROW>->)|(?P<t_TIMES>\\*)|(?P<t_CONDOP>\\?)|(?P<t_XOR>\\^)|(?P<t_LBRACKET>\\[)|(?P<t_GE>>=)|(?P<t_RPAREN>\\))|(?P<t_LAND>&&)|(?P<t_LSHIFT><<)|(?P<t_EQ>==)|(?P<t_PLUS>\\+)|(?P<t_NOT>~)|(?P<t_COMMA>,)|(?P<t_MOD>%)|(?P<t_DIVIDE>/)|(?P<t_LT><)|(?P<t_AND>&)|(?P<t_SEMI>;)|(?P<t_MINUS>-)|(?P<t_EQUALS>=)|(?P<t_COLON>:)|(?P<t_LNOT>!)|(?P<t_GT>>)', [None, ('t_NEWLINE', 'NEWLINE'), ('t_ID', 'ID'), ('t_comment', 'comment'), None, ('t_preprocessor', 'preprocessor'), None, (None, 'FCONST'), None, None, None, None, None, None, None, None, None, None, (None, 'ICONST'), None, (None, 'CCONST'), None, None, None, (None, 'SCONST'), None, None, (None, 'ELLIPSIS'), (None, 'PLUSPLUS'), (None, 'LOR'), (None, 'RSHIFTEQUAL'), (None, 'TIMESEQUAL'), (None, 'LSHIFTEQUAL'), (None, 'PLUSEQUAL'), (None, 'OREQUAL'), (None, 'XOREQUAL'), (None, 'RBRACKET'), (None, 'LE'), (None, 'PERIOD'), (None, 'RSHIFT'), (None, 'LPAREN'), (None, 'NE'), (None, 'RBRACE'), (None, 'MINUSMINUS'), (None, 'MINUSEQUAL'), (None, 'MODEQUAL'), (None, 'OR'), (None, 'LBRACE'), (None, 'ANDEQUAL'), (None, 'DIVEQUAL'), (None, 'ARROW'), (None, 'TIMES'), (None, 'CONDOP'), (None, 'XOR'), (None, 'LBRACKET'), (None, 'GE'), (None, 'RPAREN'), (None, 'LAND'), (None, 'LSHIFT'), (None, 'EQ'), (None, 'PLUS'), (None, 'NOT'), (None, 'COMMA'), (None, 'MOD'), (None, 'DIVIDE'), (None, 'LT'), (None, 'AND'), (None, 'SEMI'), (None, 'MINUS'), (None, 'EQUALS'), (None, 'COLON'), (None, 'LNOT'), (None, 'GT')])]}
+_lexstateignore = {'INITIAL': ' \t\x0c'}
_lexstateerrorf = {'INITIAL': 't_error'}
diff --git a/ply/lex.py b/ply/lex.py
index 7cdddb5..bb0a5d6 100644
--- a/ply/lex.py
+++ b/ply/lex.py
@@ -3,7 +3,7 @@
#
# Author: David M. Beazley (dave@dabeaz.com)
#
-# Copyright (C) 2001-2006, David M. Beazley
+# Copyright (C) 2001-2007, David M. Beazley
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
diff --git a/ply/yacc.py b/ply/yacc.py
index c53f085..395e3c0 100644
--- a/ply/yacc.py
+++ b/ply/yacc.py
@@ -3,7 +3,7 @@
#
# Author(s): David M. Beazley (dave@dabeaz.com)
#
-# Copyright (C) 2001-2006, David M. Beazley
+# Copyright (C) 2001-2007, David M. Beazley
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -72,6 +72,16 @@ import re, types, sys, cStringIO, md5, os.path
# Exception raised for yacc-related errors
class YaccError(Exception): pass
+# Available instance types. This is used when parsers are defined by a class.
+# it's a little funky because I want to preserve backwards compatibility
+# with Python 2.0 where types.ObjectType is undefined.
+
+try:
+ _INSTANCETYPE = (types.InstanceType, types.ObjectType)
+except AttributeError:
+ _INSTANCETYPE = types.InstanceType
+ class object: pass # Note: needed if no new-style classes present
+
#-----------------------------------------------------------------------------
# === LR Parsing Engine ===
#
@@ -89,7 +99,7 @@ class YaccError(Exception): pass
# .lexpos = Starting lex position
# .endlexpos = Ending lex position (optional, set automatically)
-class YaccSymbol:
+class YaccSymbol(object):
def __str__(self): return self.type
def __repr__(self): return str(self)
@@ -107,17 +117,16 @@ class YaccProduction:
self.slice = s
self.pbstack = []
self.stack = stack
-
def __getitem__(self,n):
- if type(n) == types.IntType:
- if n >= 0: return self.slice[n].value
- else: return self.stack[n].value
- else:
- return [s.value for s in self.slice[n.start:n.stop:n.step]]
+ if n >= 0: return self.slice[n].value
+ else: return self.stack[n].value
def __setitem__(self,n,v):
self.slice[n].value = v
+ def __getslice__(self,i,j):
+ return [s.value for s in self.slice[i:j]]
+
def __len__(self):
return len(self.slice)
@@ -168,7 +177,7 @@ class Parser:
self.method = "Unknown LR" # Table construction method used
def errok(self):
- self.errorcount = 0
+ self.errorok = 1
def restart(self):
del self.statestack[:]
@@ -177,28 +186,28 @@ class Parser:
sym.type = '$end'
self.symstack.append(sym)
self.statestack.append(0)
-
- def parse(self,input=None,lexer=None,debug=0):
+
+ def parse(self,input=None,lexer=None,debug=0,tracking=0):
lookahead = None # Current lookahead symbol
lookaheadstack = [ ] # Stack of lookahead symbols
actions = self.action # Local reference to action table
goto = self.goto # Local reference to goto table
prod = self.productions # Local reference to production list
pslice = YaccProduction(None) # Production object passed to grammar rules
- pslice.parser = self # Parser object
- self.errorcount = 0 # Used during error recovery
+ errorcount = 0 # Used during error recovery
# If no lexer was given, we will try to use the lex module
if not lexer:
import lex
lexer = lex.lexer
-
- pslice.lexer = lexer
+ pslice.lexer = lexer
+ pslice.parser = self
+
# If input was supplied, pass to lexer
if input:
lexer.input(input)
-
+
# Tokenize function
get_token = lexer.token
@@ -211,17 +220,18 @@ class Parser:
errtoken = None # Err token
# The start state is assumed to be (0,$end)
+
statestack.append(0)
sym = YaccSymbol()
sym.type = '$end'
symstack.append(sym)
-
+ state = 0
while 1:
# Get the next symbol on the input. If a lookahead symbol
# is already set, we just use that. Otherwise, we'll pull
# the next token off of the lookaheadstack or from the lexer
if debug > 1:
- print 'state', statestack[-1]
+ print 'state', state
if not lookahead:
if not lookaheadstack:
lookahead = get_token() # Get the next token
@@ -234,9 +244,8 @@ class Parser:
errorlead = ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()
# Check the action table
- s = statestack[-1]
ltype = lookahead.type
- t = actions.get((s,ltype),None)
+ t = actions[state].get(ltype)
if debug > 1:
print 'action', t
@@ -248,15 +257,14 @@ class Parser:
sys.stderr.write("yacc: Parse error. EOF\n")
return
statestack.append(t)
+ state = t
if debug > 1:
sys.stderr.write("%-60s shift state %s\n" % (errorlead, t))
symstack.append(lookahead)
lookahead = None
# Decrease error count on successful shift
- if self.errorcount > 0:
- self.errorcount -= 1
-
+ if errorcount: errorcount -=1
continue
if t < 0:
@@ -275,20 +283,20 @@ class Parser:
if plen:
targ = symstack[-plen-1:]
targ[0] = sym
- try:
- sym.lineno = targ[1].lineno
- sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno)
- sym.lexpos = targ[1].lexpos
- sym.endlexpos = getattr(targ[-1],"endlexpos",targ[-1].lexpos)
- except AttributeError:
- sym.lineno = 0
+ if tracking:
+ t1 = targ[1]
+ sym.lineno = t1.lineno
+ sym.lexpos = t1.lexpos
+ t1 = targ[-1]
+ sym.endlineno = getattr(t1,"endlineno",t1.lineno)
+ sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
del symstack[-plen:]
del statestack[-plen:]
else:
sym.lineno = 0
targ = [ sym ]
pslice.slice = targ
- pslice.pbstack = []
+
# Call the grammar rule with our special slice object
p.func(pslice)
@@ -298,15 +306,16 @@ class Parser:
for _t in pslice.pbstack:
lookaheadstack.append(_t)
lookahead = None
+ pslice.pbstack = []
symstack.append(sym)
- statestack.append(goto[statestack[-1],pname])
+ state = goto[statestack[-1]][pname]
+ statestack.append(state)
continue
if t == 0:
n = symstack[-1]
return getattr(n,"value",None)
- sys.stderr.write(errorlead, "\n")
if t == None:
if debug:
@@ -321,8 +330,9 @@ class Parser:
# the user defined p_error() function if this is the
# first syntax error. This function is only called if
# errorcount == 0.
- if not self.errorcount:
- self.errorcount = error_count
+ if errorcount == 0 or self.errorok:
+ errorcount = error_count
+ self.errorok = 0
errtoken = lookahead
if errtoken.type == '$end':
errtoken = None # End of file!
@@ -334,7 +344,7 @@ class Parser:
tok = self.errorfunc(errtoken)
del errok, token, restart # Delete special functions
- if not self.errorcount:
+ if self.errorok:
# User must have done some kind of panic
# mode recovery on their own. The
# returned token is the next lookahead
@@ -354,7 +364,7 @@ class Parser:
return
else:
- self.errorcount = error_count
+ errorcount = error_count
# case 1: the statestack only has 1 entry on it. If we're in this state, the
# entire parse has been rolled back and we're completely hosed. The token is
@@ -1637,12 +1647,15 @@ def lr_parse_table(method):
if method == 'LALR':
add_lalr_lookaheads(C)
+
# Build the parser table, state by state
st = 0
for I in C:
# Loop over each production in I
actlist = [ ] # List of actions
-
+ st_action = { }
+ st_actionp = { }
+ st_goto = { }
if yaccdebug:
_vf.write("\nstate %d\n\n" % st)
for p in I:
@@ -1654,8 +1667,8 @@ def lr_parse_table(method):
if p.prod[-1] == ".":
if p.name == "S'":
# Start symbol. Accept!
- action[st,"$end"] = 0
- actionp[st,"$end"] = p
+ st_action["$end"] = 0
+ st_actionp["$end"] = p
else:
# We are at the end of a production. Reduce!
if method == 'LALR':
@@ -1664,25 +1677,25 @@ def lr_parse_table(method):
laheads = Follow[p.name]
for a in laheads:
actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
- r = action.get((st,a),None)
+ r = st_action.get(a,None)
if r is not None:
# Whoa. Have a shift/reduce or reduce/reduce conflict
if r > 0:
# Need to decide on shift or reduce here
# By default we favor shifting. Need to add
# some precedence rules here.
- sprec,slevel = Productions[actionp[st,a].number].prec
+ sprec,slevel = Productions[st_actionp[a].number].prec
rprec,rlevel = Precedence.get(a,('right',0))
if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
# We really need to reduce here.
- action[st,a] = -p.number
- actionp[st,a] = p
+ st_action[a] = -p.number
+ st_actionp[a] = p
if not slevel and not rlevel:
_vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
_vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
n_srconflict += 1
elif (slevel == rlevel) and (rprec == 'nonassoc'):
- action[st,a] = None
+ st_action[a] = None
else:
# Hmmm. Guess we'll keep the shift
if not rlevel:
@@ -1695,17 +1708,17 @@ def lr_parse_table(method):
oldp = Productions[-r]
pp = Productions[p.number]
if oldp.line > pp.line:
- action[st,a] = -p.number
- actionp[st,a] = p
+ st_action[a] = -p.number
+ st_actionp[a] = p
# sys.stderr.write("Reduce/reduce conflict in state %d\n" % st)
n_rrconflict += 1
- _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a]))
- _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a]))
+ _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, st_actionp[a].number, st_actionp[a]))
+ _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,st_actionp[a].number, st_actionp[a]))
else:
sys.stderr.write("Unknown conflict in state %d\n" % st)
else:
- action[st,a] = -p.number
- actionp[st,a] = p
+ st_action[a] = -p.number
+ st_actionp[a] = p
else:
i = p.lr_index
a = p.prod[i+1] # Get symbol right after the "."
@@ -1715,7 +1728,7 @@ def lr_parse_table(method):
if j >= 0:
# We are in a shift state
actlist.append((a,p,"shift and go to state %d" % j))
- r = action.get((st,a),None)
+ r = st_action.get(a,None)
if r is not None:
# Whoa have a shift/reduce or shift/shift conflict
if r > 0:
@@ -1726,18 +1739,18 @@ def lr_parse_table(method):
# - if precedence of reduce rule is higher, we reduce.
# - if precedence of reduce is same and left assoc, we reduce.
# - otherwise we shift
- rprec,rlevel = Productions[actionp[st,a].number].prec
+ rprec,rlevel = Productions[st_actionp[a].number].prec
sprec,slevel = Precedence.get(a,('right',0))
if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
# We decide to shift here... highest precedence to shift
- action[st,a] = j
- actionp[st,a] = p
+ st_action[a] = j
+ st_actionp[a] = p
if not rlevel:
n_srconflict += 1
_vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
_vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
elif (slevel == rlevel) and (rprec == 'nonassoc'):
- action[st,a] = None
+ st_action[a] = None
else:
# Hmmm. Guess we'll keep the reduce
if not slevel and not rlevel:
@@ -1748,8 +1761,8 @@ def lr_parse_table(method):
else:
sys.stderr.write("Unknown conflict in state %d\n" % st)
else:
- action[st,a] = j
- actionp[st,a] = p
+ st_action[a] = j
+ st_actionp[a] = p
except StandardError,e:
raise YaccError, "Hosed in lr_parse_table", e
@@ -1758,14 +1771,14 @@ def lr_parse_table(method):
if yaccdebug:
_actprint = { }
for a,p,m in actlist:
- if action.has_key((st,a)):
- if p is actionp[st,a]:
+ if st_action.has_key(a):
+ if p is st_actionp[a]:
_vf.write(" %-15s %s\n" % (a,m))
_actprint[(a,m)] = 1
_vf.write("\n")
for a,p,m in actlist:
- if action.has_key((st,a)):
- if p is not actionp[st,a]:
+ if st_action.has_key(a):
+ if p is not st_actionp[a]:
if not _actprint.has_key((a,m)):
_vf.write(" ! %-15s [ %s ]\n" % (a,m))
_actprint[(a,m)] = 1
@@ -1782,10 +1795,14 @@ def lr_parse_table(method):
g = lr0_goto(I,n)
j = _lr0_cidhash.get(id(g),-1)
if j >= 0:
- goto[st,n] = j
+ st_goto[n] = j
if yaccdebug:
_vf.write(" %-30s shift and go to state %d\n" % (n,j))
+ action[st] = st_action
+ actionp[st] = st_actionp
+ goto[st] = st_goto
+
st += 1
if yaccdebug:
@@ -1828,14 +1845,15 @@ _lr_signature = %s
# Factor out names to try and make smaller
if smaller:
items = { }
-
- for k,v in _lr_action.items():
- i = items.get(k[1])
- if not i:
- i = ([],[])
- items[k[1]] = i
- i[0].append(k[0])
- i[1].append(v)
+
+ for s,nd in _lr_action.items():
+ for name,v in nd.items():
+ i = items.get(name)
+ if not i:
+ i = ([],[])
+ items[name] = i
+ i[0].append(s)
+ i[1].append(v)
f.write("\n_lr_action_items = {")
for k,v in items.items():
@@ -1853,7 +1871,8 @@ _lr_signature = %s
_lr_action = { }
for _k, _v in _lr_action_items.items():
for _x,_y in zip(_v[0],_v[1]):
- _lr_action[(_x,_k)] = _y
+ if not _lr_action.has_key(_x): _lr_action[_x] = { }
+ _lr_action[_x][_k] = _y
del _lr_action_items
""")
@@ -1866,14 +1885,15 @@ del _lr_action_items
if smaller:
# Factor out names to try and make smaller
items = { }
-
- for k,v in _lr_goto.items():
- i = items.get(k[1])
- if not i:
- i = ([],[])
- items[k[1]] = i
- i[0].append(k[0])
- i[1].append(v)
+
+ for s,nd in _lr_goto.items():
+ for name,v in nd.items():
+ i = items.get(name)
+ if not i:
+ i = ([],[])
+ items[name] = i
+ i[0].append(s)
+ i[1].append(v)
f.write("\n_lr_goto_items = {")
for k,v in items.items():
@@ -1891,7 +1911,8 @@ del _lr_action_items
_lr_goto = { }
for _k, _v in _lr_goto_items.items():
for _x,_y in zip(_v[0],_v[1]):
- _lr_goto[(_x,_k)] = _y
+ if not _lr_goto.has_key(_x): _lr_goto[_x] = { }
+ _lr_goto[_x][_k] = _y
del _lr_goto_items
""")
else:
@@ -1937,15 +1958,6 @@ def lr_read_tables(module=tab_module,optimize=0):
return 0
-# Available instance types. This is used when parsers are defined by a class.
-# it's a little funky because I want to preserve backwards compatibility
-# with Python 2.0 where types.ObjectType is undefined.
-
-try:
- _INSTANCETYPE = (types.InstanceType, types.ObjectType)
-except AttributeError:
- _INSTANCETYPE = types.InstanceType
-
# -----------------------------------------------------------------------------
# yacc(module)
#