diff options
-rw-r--r-- | CHANGES | 31 | ||||
-rw-r--r-- | README | 4 | ||||
-rw-r--r-- | doc/ply.html | 18 | ||||
-rw-r--r-- | example/ansic/lextab.py | 8 | ||||
-rw-r--r-- | ply/lex.py | 2 | ||||
-rw-r--r-- | ply/yacc.py | 192 |
6 files changed, 156 insertions, 99 deletions
@@ -1,5 +1,36 @@ Version 2.3 ----------------------------- +02/18/07: beazley + Performance improvements. Made some changes to the internal + table organization and LR parser to improve parsing performance. + +02/18/07: beazley + Automatic tracking of line number and position information must now be + enabled by a special flag to parse(). For example: + + yacc.parse(data,tracking=True) + + In many applications, it's just not that important to have the + parser automatically track all line numbers. By making this an + optional feature, it allows the parser to run significantly faster + (more than a 20% speed increase in many cases). Note: positional + information is always available for raw tokens---this change only + applies to positional information associated with nonterminal + grammar symbols. + *** POTENTIAL INCOMPATIBILITY *** + +02/18/07: beazley + Yacc no longer supports extended slices of grammar productions. + However, it does support regular slices. For example: + + def p_foo(p): + '''foo: a b c d e''' + p[0] = p[1:3] + + This change is a performance improvement to the parser--it streamlines + normal access to the grammar values since slices are now handled in + a __getslice__() method as opposed to __getitem__(). + 02/12/07: beazley Fixed a bug in the handling of token names when combined with start conditions. Bug reported by Todd O'Bryan. @@ -1,8 +1,8 @@ -PLY (Python Lex-Yacc) Version 2.3 (February 13, 2007) +PLY (Python Lex-Yacc) Version 2.3 (February 18, 2007) David M. Beazley (dave@dabeaz.com) -Copyright (C) 2001-2006 David M. Beazley +Copyright (C) 2001-2007 David M. Beazley This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public diff --git a/doc/ply.html b/doc/ply.html index 8ae8de4..ed4d56f 100644 --- a/doc/ply.html +++ b/doc/ply.html @@ -2434,8 +2434,17 @@ to discard huge portions of the input text to find a valid restart point. <H3><a name="ply_nn33"></a>5.9 Line Number and Position Tracking</H3> -<tt>yacc.py</tt> automatically tracks line numbers and positions for all of the grammar symbols and tokens it processes. To retrieve the line -numbers, two functions are used in grammar rules: +<tt>yacc.py</tt> can automatically track line numbers and positions for all of the grammar symbols and tokens it processes. However, this +extra tracking requires extra processing and can significantly slow down parsing. Therefore, it must be enabled by passing the +<tt>tracking=True</tt> option to <tt>yacc.parse()</tt>. For example: + +<blockquote> +<pre> +yacc.parse(data,tracking=True) +</pre> +</blockquote> + +Once enabled, line numbers can be retrieved using the following two functions in grammar rules: <ul> <li><tt>p.lineno(num)</tt>. Return the starting line number for symbol <em>num</em> @@ -2485,6 +2494,11 @@ def p_expression(p): Note: The <tt>lexspan()</tt> function only returns the range of values up the start of the last grammar symbol. +<p> +Note: The <tt>lineno()</tt> and <tt>lexpos()</tt> methods can always be called to get positional information +on raw tokens or terminals. This information is available regardless of whether or not the parser is tracking +positional information for other grammar symbols. + <H3><a name="ply_nn34"></a>5.10 AST Construction</H3> diff --git a/example/ansic/lextab.py b/example/ansic/lextab.py index ce9804b..b994df1 100644 --- a/example/ansic/lextab.py +++ b/example/ansic/lextab.py @@ -1,8 +1,8 @@ -# lextab.py. This file automatically created by PLY (version 2.2). Don't edit! -_lextokens = {'SHORT': None, 'SIGNED': None, 'TIMES': None, 'TYPEID': None, 'GT': None, 'ARROW': None, 'FCONST': None, 'CONST': None, 'GE': None, 'PERIOD': None, 'SEMI': None, 'REGISTER': None, 'ENUM': None, 'SIZEOF': None, 'COMMA': None, 'RBRACE': None, 'RPAREN': None, 'RSHIFTEQUAL': None, 'LT': None, 'OREQUAL': None, 'XOREQUAL': None, 'DOUBLE': None, 'LBRACE': None, 'STRUCT': None, 'LPAREN': None, 'PLUSEQUAL': None, 'LNOT': None, 'NOT': None, 'CONDOP': None, 'LE': None, 'FLOAT': None, 'GOTO': None, 'LOR': None, 'EQ': None, 'MOD': None, 'ICONST': None, 'LONG': None, 'PLUS': None, 'DIVIDE': None, 'WHILE': None, 'UNION': None, 'CHAR': None, 'SWITCH': None, 'DO': None, 'FOR': None, 'VOID': None, 'EXTERN': None, 'RETURN': None, 'MINUSEQUAL': None, 'ELSE': None, 'ANDEQUAL': None, 'BREAK': None, 'CCONST': None, 'INT': None, 'DIVEQUAL': None, 'DEFAULT': None, 'TIMESEQUAL': None, 'MINUS': None, 'OR': None, 'CONTINUE': None, 'IF': None, 'UNSIGNED': None, 'ID': None, 'MINUSMINUS': None, 'COLON': None, 'LSHIFTEQUAL': None, 'RBRACKET': None, 'VOLATILE': None, 'CASE': None, 'PLUSPLUS': None, 'RSHIFT': None, 'MODEQUAL': None, 'LAND': None, 'AND': None, 'ELLIPSIS': None, 'STATIC': None, 'LBRACKET': None, 'LSHIFT': None, 'NE': None, 'TYPEDEF': None, 'AUTO': None, 'XOR': None, 'EQUALS': None, 'SCONST': None} +# lextab.py. This file automatically created by PLY (version 2.3). Don't edit! +_lextokens = {'DO': None, 'SHORT': None, 'RETURN': None, 'RSHIFTEQUAL': None, 'DEFAULT': None, 'VOID': None, 'NE': None, 'CHAR': None, 'WHILE': None, 'LNOT': None, 'STATIC': None, 'SCONST': None, 'LSHIFT': None, 'EXTERN': None, 'CONST': None, 'SIZEOF': None, 'CONDOP': None, 'MINUS': None, 'DIVIDE': None, 'CASE': None, 'LAND': None, 'RPAREN': None, 'FCONST': None, 'SEMI': None, 'REGISTER': None, 'MODEQUAL': None, 'UNSIGNED': None, 'LONG': None, 'PLUSPLUS': None, 'SWITCH': None, 'LSHIFTEQUAL': None, 'PLUS': None, 'COMMA': None, 'LOR': None, 'PERIOD': None, 'CCONST': None, 'RBRACKET': None, 'TYPEDEF': None, 'GT': None, 'XOR': None, 'GOTO': None, 'FOR': None, 'UNION': None, 'AUTO': None, 'ENUM': None, 'EQUALS': None, 'DIVEQUAL': None, 'ELSE': None, 'PLUSEQUAL': None, 'GE': None, 'LE': None, 'ICONST': None, 'ARROW': None, 'ELLIPSIS': None, 'LPAREN': None, 'RBRACE': None, 'MINUSMINUS': None, 'TIMES': None, 'EQ': None, 'ID': None, 'IF': None, 'AND': None, 'TYPEID': None, 'LBRACE': None, 'STRUCT': None, 'RSHIFT': None, 'COLON': None, 'INT': None, 'DOUBLE': None, 'MINUSEQUAL': None, 'FLOAT': None, 'XOREQUAL': None, 'SIGNED': None, 'LT': None, 'BREAK': None, 'CONTINUE': None, 'VOLATILE': None, 'LBRACKET': None, 'NOT': None, 'ANDEQUAL': None, 'OREQUAL': None, 'TIMESEQUAL': None, 'OR': None, 'MOD': None} _lexreflags = 0 _lexliterals = '' _lexstateinfo = {'INITIAL': 'inclusive'} -_lexstatere = {'INITIAL': [('(?P<t_NEWLINE>\\n+)|(?P<t_ID>[A-Za-z_][\\w_]*)|(?P<t_comment> /\\*(.|\\n)*?\\*/)|(?P<t_preprocessor>\\#(.)*?\\n)|(?P<t_FCONST>((\\d+)(\\.\\d+)(e(\\+|-)?(\\d+))? | (\\d+)e(\\+|-)?(\\d+))([lL]|[fF])?)|(?P<t_ICONST>\\d+([uU]|[lL]|[uU][lL]|[lL][uU])?)|(?P<t_CCONST>(L)?\\\'([^\\\\\\n]|(\\\\.))*?\\\')|(?P<t_SCONST>\\"([^\\\\\\n]|(\\\\.))*?\\")|(?P<t_ELLIPSIS>\\.\\.\\.)|(?P<t_LOR>\\|\\|)|(?P<t_PLUSPLUS>\\+\\+)|(?P<t_TIMESEQUAL>\\*=)|(?P<t_RSHIFTEQUAL>>>=)|(?P<t_OREQUAL>\\|=)|(?P<t_PLUSEQUAL>\\+=)|(?P<t_LSHIFTEQUAL><<=)|(?P<t_RBRACKET>\\])|(?P<t_MODEQUAL>%=)|(?P<t_XOREQUAL>^=)|(?P<t_LSHIFT><<)|(?P<t_TIMES>\\*)|(?P<t_LAND>&&)|(?P<t_MINUSMINUS>--)|(?P<t_NE>!=)|(?P<t_LPAREN>\\()|(?P<t_ANDEQUAL>&=)|(?P<t_RSHIFT>>>)|(?P<t_LBRACKET>\\[)|(?P<t_LBRACE>\\{)|(?P<t_OR>\\|)|(?P<t_RBRACE>\\})|(?P<t_ARROW>->)|(?P<t_PLUS>\\+)|(?P<t_CONDOP>\\?)|(?P<t_LE><=)|(?P<t_MINUSEQUAL>-=)|(?P<t_PERIOD>\\.)|(?P<t_DIVEQUAL>/=)|(?P<t_EQ>==)|(?P<t_GE>>=)|(?P<t_RPAREN>\\))|(?P<t_XOR>\\^)|(?P<t_SEMI>;)|(?P<t_AND>&)|(?P<t_NOT>~)|(?P<t_EQUALS>=)|(?P<t_MOD>%)|(?P<t_LT><)|(?P<t_MINUS>-)|(?P<t_LNOT>!)|(?P<t_DIVIDE>/)|(?P<t_COMMA>,)|(?P<t_GT>>)|(?P<t_COLON>:)', [None, ('t_NEWLINE', 'NEWLINE'), ('t_ID', 'ID'), ('t_comment', 'comment'), None, ('t_preprocessor', 'preprocessor'), None, (None, 'FCONST'), None, None, None, None, None, None, None, None, None, None, (None, 'ICONST'), None, (None, 'CCONST'), None, None, None, (None, 'SCONST'), None, None, (None, 'ELLIPSIS'), (None, 'LOR'), (None, 'PLUSPLUS'), (None, 'TIMESEQUAL'), (None, 'RSHIFTEQUAL'), (None, 'OREQUAL'), (None, 'PLUSEQUAL'), (None, 'LSHIFTEQUAL'), (None, 'RBRACKET'), (None, 'MODEQUAL'), (None, 'XOREQUAL'), (None, 'LSHIFT'), (None, 'TIMES'), (None, 'LAND'), (None, 'MINUSMINUS'), (None, 'NE'), (None, 'LPAREN'), (None, 'ANDEQUAL'), (None, 'RSHIFT'), (None, 'LBRACKET'), (None, 'LBRACE'), (None, 'OR'), (None, 'RBRACE'), (None, 'ARROW'), (None, 'PLUS'), (None, 'CONDOP'), (None, 'LE'), (None, 'MINUSEQUAL'), (None, 'PERIOD'), (None, 'DIVEQUAL'), (None, 'EQ'), (None, 'GE'), (None, 'RPAREN'), (None, 'XOR'), (None, 'SEMI'), (None, 'AND'), (None, 'NOT'), (None, 'EQUALS'), (None, 'MOD'), (None, 'LT'), (None, 'MINUS'), (None, 'LNOT'), (None, 'DIVIDE'), (None, 'COMMA'), (None, 'GT'), (None, 'COLON')])]} -_lexstateignore = {'INITIAL': ' \t\f'} +_lexstatere = {'INITIAL': [('(?P<t_NEWLINE>\\n+)|(?P<t_ID>[A-Za-z_][\\w_]*)|(?P<t_comment> /\\*(.|\\n)*?\\*/)|(?P<t_preprocessor>\\#(.)*?\\n)|(?P<t_FCONST>((\\d+)(\\.\\d+)(e(\\+|-)?(\\d+))? | (\\d+)e(\\+|-)?(\\d+))([lL]|[fF])?)|(?P<t_ICONST>\\d+([uU]|[lL]|[uU][lL]|[lL][uU])?)|(?P<t_CCONST>(L)?\\\'([^\\\\\\n]|(\\\\.))*?\\\')|(?P<t_SCONST>\\"([^\\\\\\n]|(\\\\.))*?\\")|(?P<t_ELLIPSIS>\\.\\.\\.)|(?P<t_PLUSPLUS>\\+\\+)|(?P<t_LOR>\\|\\|)|(?P<t_RSHIFTEQUAL>>>=)|(?P<t_TIMESEQUAL>\\*=)|(?P<t_LSHIFTEQUAL><<=)|(?P<t_PLUSEQUAL>\\+=)|(?P<t_OREQUAL>\\|=)|(?P<t_XOREQUAL>^=)|(?P<t_RBRACKET>\\])|(?P<t_LE><=)|(?P<t_PERIOD>\\.)|(?P<t_RSHIFT>>>)|(?P<t_LPAREN>\\()|(?P<t_NE>!=)|(?P<t_RBRACE>\\})|(?P<t_MINUSMINUS>--)|(?P<t_MINUSEQUAL>-=)|(?P<t_MODEQUAL>%=)|(?P<t_OR>\\|)|(?P<t_LBRACE>\\{)|(?P<t_ANDEQUAL>&=)|(?P<t_DIVEQUAL>/=)|(?P<t_ARROW>->)|(?P<t_TIMES>\\*)|(?P<t_CONDOP>\\?)|(?P<t_XOR>\\^)|(?P<t_LBRACKET>\\[)|(?P<t_GE>>=)|(?P<t_RPAREN>\\))|(?P<t_LAND>&&)|(?P<t_LSHIFT><<)|(?P<t_EQ>==)|(?P<t_PLUS>\\+)|(?P<t_NOT>~)|(?P<t_COMMA>,)|(?P<t_MOD>%)|(?P<t_DIVIDE>/)|(?P<t_LT><)|(?P<t_AND>&)|(?P<t_SEMI>;)|(?P<t_MINUS>-)|(?P<t_EQUALS>=)|(?P<t_COLON>:)|(?P<t_LNOT>!)|(?P<t_GT>>)', [None, ('t_NEWLINE', 'NEWLINE'), ('t_ID', 'ID'), ('t_comment', 'comment'), None, ('t_preprocessor', 'preprocessor'), None, (None, 'FCONST'), None, None, None, None, None, None, None, None, None, None, (None, 'ICONST'), None, (None, 'CCONST'), None, None, None, (None, 'SCONST'), None, None, (None, 'ELLIPSIS'), (None, 'PLUSPLUS'), (None, 'LOR'), (None, 'RSHIFTEQUAL'), (None, 'TIMESEQUAL'), (None, 'LSHIFTEQUAL'), (None, 'PLUSEQUAL'), (None, 'OREQUAL'), (None, 'XOREQUAL'), (None, 'RBRACKET'), (None, 'LE'), (None, 'PERIOD'), (None, 'RSHIFT'), (None, 'LPAREN'), (None, 'NE'), (None, 'RBRACE'), (None, 'MINUSMINUS'), (None, 'MINUSEQUAL'), (None, 'MODEQUAL'), (None, 'OR'), (None, 'LBRACE'), (None, 'ANDEQUAL'), (None, 'DIVEQUAL'), (None, 'ARROW'), (None, 'TIMES'), (None, 'CONDOP'), (None, 'XOR'), (None, 'LBRACKET'), (None, 'GE'), (None, 'RPAREN'), (None, 'LAND'), (None, 'LSHIFT'), (None, 'EQ'), (None, 'PLUS'), (None, 'NOT'), (None, 'COMMA'), (None, 'MOD'), (None, 'DIVIDE'), (None, 'LT'), (None, 'AND'), (None, 'SEMI'), (None, 'MINUS'), (None, 'EQUALS'), (None, 'COLON'), (None, 'LNOT'), (None, 'GT')])]} +_lexstateignore = {'INITIAL': ' \t\x0c'} _lexstateerrorf = {'INITIAL': 't_error'} @@ -3,7 +3,7 @@ # # Author: David M. Beazley (dave@dabeaz.com) # -# Copyright (C) 2001-2006, David M. Beazley +# Copyright (C) 2001-2007, David M. Beazley # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public diff --git a/ply/yacc.py b/ply/yacc.py index c53f085..395e3c0 100644 --- a/ply/yacc.py +++ b/ply/yacc.py @@ -3,7 +3,7 @@ # # Author(s): David M. Beazley (dave@dabeaz.com) # -# Copyright (C) 2001-2006, David M. Beazley +# Copyright (C) 2001-2007, David M. Beazley # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -72,6 +72,16 @@ import re, types, sys, cStringIO, md5, os.path # Exception raised for yacc-related errors class YaccError(Exception): pass +# Available instance types. This is used when parsers are defined by a class. +# it's a little funky because I want to preserve backwards compatibility +# with Python 2.0 where types.ObjectType is undefined. + +try: + _INSTANCETYPE = (types.InstanceType, types.ObjectType) +except AttributeError: + _INSTANCETYPE = types.InstanceType + class object: pass # Note: needed if no new-style classes present + #----------------------------------------------------------------------------- # === LR Parsing Engine === # @@ -89,7 +99,7 @@ class YaccError(Exception): pass # .lexpos = Starting lex position # .endlexpos = Ending lex position (optional, set automatically) -class YaccSymbol: +class YaccSymbol(object): def __str__(self): return self.type def __repr__(self): return str(self) @@ -107,17 +117,16 @@ class YaccProduction: self.slice = s self.pbstack = [] self.stack = stack - def __getitem__(self,n): - if type(n) == types.IntType: - if n >= 0: return self.slice[n].value - else: return self.stack[n].value - else: - return [s.value for s in self.slice[n.start:n.stop:n.step]] + if n >= 0: return self.slice[n].value + else: return self.stack[n].value def __setitem__(self,n,v): self.slice[n].value = v + def __getslice__(self,i,j): + return [s.value for s in self.slice[i:j]] + def __len__(self): return len(self.slice) @@ -168,7 +177,7 @@ class Parser: self.method = "Unknown LR" # Table construction method used def errok(self): - self.errorcount = 0 + self.errorok = 1 def restart(self): del self.statestack[:] @@ -177,28 +186,28 @@ class Parser: sym.type = '$end' self.symstack.append(sym) self.statestack.append(0) - - def parse(self,input=None,lexer=None,debug=0): + + def parse(self,input=None,lexer=None,debug=0,tracking=0): lookahead = None # Current lookahead symbol lookaheadstack = [ ] # Stack of lookahead symbols actions = self.action # Local reference to action table goto = self.goto # Local reference to goto table prod = self.productions # Local reference to production list pslice = YaccProduction(None) # Production object passed to grammar rules - pslice.parser = self # Parser object - self.errorcount = 0 # Used during error recovery + errorcount = 0 # Used during error recovery # If no lexer was given, we will try to use the lex module if not lexer: import lex lexer = lex.lexer - - pslice.lexer = lexer + pslice.lexer = lexer + pslice.parser = self + # If input was supplied, pass to lexer if input: lexer.input(input) - + # Tokenize function get_token = lexer.token @@ -211,17 +220,18 @@ class Parser: errtoken = None # Err token # The start state is assumed to be (0,$end) + statestack.append(0) sym = YaccSymbol() sym.type = '$end' symstack.append(sym) - + state = 0 while 1: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer if debug > 1: - print 'state', statestack[-1] + print 'state', state if not lookahead: if not lookaheadstack: lookahead = get_token() # Get the next token @@ -234,9 +244,8 @@ class Parser: errorlead = ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip() # Check the action table - s = statestack[-1] ltype = lookahead.type - t = actions.get((s,ltype),None) + t = actions[state].get(ltype) if debug > 1: print 'action', t @@ -248,15 +257,14 @@ class Parser: sys.stderr.write("yacc: Parse error. EOF\n") return statestack.append(t) + state = t if debug > 1: sys.stderr.write("%-60s shift state %s\n" % (errorlead, t)) symstack.append(lookahead) lookahead = None # Decrease error count on successful shift - if self.errorcount > 0: - self.errorcount -= 1 - + if errorcount: errorcount -=1 continue if t < 0: @@ -275,20 +283,20 @@ class Parser: if plen: targ = symstack[-plen-1:] targ[0] = sym - try: - sym.lineno = targ[1].lineno - sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno) - sym.lexpos = targ[1].lexpos - sym.endlexpos = getattr(targ[-1],"endlexpos",targ[-1].lexpos) - except AttributeError: - sym.lineno = 0 + if tracking: + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1,"endlineno",t1.lineno) + sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) del symstack[-plen:] del statestack[-plen:] else: sym.lineno = 0 targ = [ sym ] pslice.slice = targ - pslice.pbstack = [] + # Call the grammar rule with our special slice object p.func(pslice) @@ -298,15 +306,16 @@ class Parser: for _t in pslice.pbstack: lookaheadstack.append(_t) lookahead = None + pslice.pbstack = [] symstack.append(sym) - statestack.append(goto[statestack[-1],pname]) + state = goto[statestack[-1]][pname] + statestack.append(state) continue if t == 0: n = symstack[-1] return getattr(n,"value",None) - sys.stderr.write(errorlead, "\n") if t == None: if debug: @@ -321,8 +330,9 @@ class Parser: # the user defined p_error() function if this is the # first syntax error. This function is only called if # errorcount == 0. - if not self.errorcount: - self.errorcount = error_count + if errorcount == 0 or self.errorok: + errorcount = error_count + self.errorok = 0 errtoken = lookahead if errtoken.type == '$end': errtoken = None # End of file! @@ -334,7 +344,7 @@ class Parser: tok = self.errorfunc(errtoken) del errok, token, restart # Delete special functions - if not self.errorcount: + if self.errorok: # User must have done some kind of panic # mode recovery on their own. The # returned token is the next lookahead @@ -354,7 +364,7 @@ class Parser: return else: - self.errorcount = error_count + errorcount = error_count # case 1: the statestack only has 1 entry on it. If we're in this state, the # entire parse has been rolled back and we're completely hosed. The token is @@ -1637,12 +1647,15 @@ def lr_parse_table(method): if method == 'LALR': add_lalr_lookaheads(C) + # Build the parser table, state by state st = 0 for I in C: # Loop over each production in I actlist = [ ] # List of actions - + st_action = { } + st_actionp = { } + st_goto = { } if yaccdebug: _vf.write("\nstate %d\n\n" % st) for p in I: @@ -1654,8 +1667,8 @@ def lr_parse_table(method): if p.prod[-1] == ".": if p.name == "S'": # Start symbol. Accept! - action[st,"$end"] = 0 - actionp[st,"$end"] = p + st_action["$end"] = 0 + st_actionp["$end"] = p else: # We are at the end of a production. Reduce! if method == 'LALR': @@ -1664,25 +1677,25 @@ def lr_parse_table(method): laheads = Follow[p.name] for a in laheads: actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) - r = action.get((st,a),None) + r = st_action.get(a,None) if r is not None: # Whoa. Have a shift/reduce or reduce/reduce conflict if r > 0: # Need to decide on shift or reduce here # By default we favor shifting. Need to add # some precedence rules here. - sprec,slevel = Productions[actionp[st,a].number].prec + sprec,slevel = Productions[st_actionp[a].number].prec rprec,rlevel = Precedence.get(a,('right',0)) if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): # We really need to reduce here. - action[st,a] = -p.number - actionp[st,a] = p + st_action[a] = -p.number + st_actionp[a] = p if not slevel and not rlevel: _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) n_srconflict += 1 elif (slevel == rlevel) and (rprec == 'nonassoc'): - action[st,a] = None + st_action[a] = None else: # Hmmm. Guess we'll keep the shift if not rlevel: @@ -1695,17 +1708,17 @@ def lr_parse_table(method): oldp = Productions[-r] pp = Productions[p.number] if oldp.line > pp.line: - action[st,a] = -p.number - actionp[st,a] = p + st_action[a] = -p.number + st_actionp[a] = p # sys.stderr.write("Reduce/reduce conflict in state %d\n" % st) n_rrconflict += 1 - _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a])) - _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a])) + _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, st_actionp[a].number, st_actionp[a])) + _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,st_actionp[a].number, st_actionp[a])) else: sys.stderr.write("Unknown conflict in state %d\n" % st) else: - action[st,a] = -p.number - actionp[st,a] = p + st_action[a] = -p.number + st_actionp[a] = p else: i = p.lr_index a = p.prod[i+1] # Get symbol right after the "." @@ -1715,7 +1728,7 @@ def lr_parse_table(method): if j >= 0: # We are in a shift state actlist.append((a,p,"shift and go to state %d" % j)) - r = action.get((st,a),None) + r = st_action.get(a,None) if r is not None: # Whoa have a shift/reduce or shift/shift conflict if r > 0: @@ -1726,18 +1739,18 @@ def lr_parse_table(method): # - if precedence of reduce rule is higher, we reduce. # - if precedence of reduce is same and left assoc, we reduce. # - otherwise we shift - rprec,rlevel = Productions[actionp[st,a].number].prec + rprec,rlevel = Productions[st_actionp[a].number].prec sprec,slevel = Precedence.get(a,('right',0)) if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')): # We decide to shift here... highest precedence to shift - action[st,a] = j - actionp[st,a] = p + st_action[a] = j + st_actionp[a] = p if not rlevel: n_srconflict += 1 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) elif (slevel == rlevel) and (rprec == 'nonassoc'): - action[st,a] = None + st_action[a] = None else: # Hmmm. Guess we'll keep the reduce if not slevel and not rlevel: @@ -1748,8 +1761,8 @@ def lr_parse_table(method): else: sys.stderr.write("Unknown conflict in state %d\n" % st) else: - action[st,a] = j - actionp[st,a] = p + st_action[a] = j + st_actionp[a] = p except StandardError,e: raise YaccError, "Hosed in lr_parse_table", e @@ -1758,14 +1771,14 @@ def lr_parse_table(method): if yaccdebug: _actprint = { } for a,p,m in actlist: - if action.has_key((st,a)): - if p is actionp[st,a]: + if st_action.has_key(a): + if p is st_actionp[a]: _vf.write(" %-15s %s\n" % (a,m)) _actprint[(a,m)] = 1 _vf.write("\n") for a,p,m in actlist: - if action.has_key((st,a)): - if p is not actionp[st,a]: + if st_action.has_key(a): + if p is not st_actionp[a]: if not _actprint.has_key((a,m)): _vf.write(" ! %-15s [ %s ]\n" % (a,m)) _actprint[(a,m)] = 1 @@ -1782,10 +1795,14 @@ def lr_parse_table(method): g = lr0_goto(I,n) j = _lr0_cidhash.get(id(g),-1) if j >= 0: - goto[st,n] = j + st_goto[n] = j if yaccdebug: _vf.write(" %-30s shift and go to state %d\n" % (n,j)) + action[st] = st_action + actionp[st] = st_actionp + goto[st] = st_goto + st += 1 if yaccdebug: @@ -1828,14 +1845,15 @@ _lr_signature = %s # Factor out names to try and make smaller if smaller: items = { } - - for k,v in _lr_action.items(): - i = items.get(k[1]) - if not i: - i = ([],[]) - items[k[1]] = i - i[0].append(k[0]) - i[1].append(v) + + for s,nd in _lr_action.items(): + for name,v in nd.items(): + i = items.get(name) + if not i: + i = ([],[]) + items[name] = i + i[0].append(s) + i[1].append(v) f.write("\n_lr_action_items = {") for k,v in items.items(): @@ -1853,7 +1871,8 @@ _lr_signature = %s _lr_action = { } for _k, _v in _lr_action_items.items(): for _x,_y in zip(_v[0],_v[1]): - _lr_action[(_x,_k)] = _y + if not _lr_action.has_key(_x): _lr_action[_x] = { } + _lr_action[_x][_k] = _y del _lr_action_items """) @@ -1866,14 +1885,15 @@ del _lr_action_items if smaller: # Factor out names to try and make smaller items = { } - - for k,v in _lr_goto.items(): - i = items.get(k[1]) - if not i: - i = ([],[]) - items[k[1]] = i - i[0].append(k[0]) - i[1].append(v) + + for s,nd in _lr_goto.items(): + for name,v in nd.items(): + i = items.get(name) + if not i: + i = ([],[]) + items[name] = i + i[0].append(s) + i[1].append(v) f.write("\n_lr_goto_items = {") for k,v in items.items(): @@ -1891,7 +1911,8 @@ del _lr_action_items _lr_goto = { } for _k, _v in _lr_goto_items.items(): for _x,_y in zip(_v[0],_v[1]): - _lr_goto[(_x,_k)] = _y + if not _lr_goto.has_key(_x): _lr_goto[_x] = { } + _lr_goto[_x][_k] = _y del _lr_goto_items """) else: @@ -1937,15 +1958,6 @@ def lr_read_tables(module=tab_module,optimize=0): return 0 -# Available instance types. This is used when parsers are defined by a class. -# it's a little funky because I want to preserve backwards compatibility -# with Python 2.0 where types.ObjectType is undefined. - -try: - _INSTANCETYPE = (types.InstanceType, types.ObjectType) -except AttributeError: - _INSTANCETYPE = types.InstanceType - # ----------------------------------------------------------------------------- # yacc(module) # |