diff options
author | David Beazley <dave@dabeaz.com> | 2008-05-04 15:46:44 +0000 |
---|---|---|
committer | David Beazley <dave@dabeaz.com> | 2008-05-04 15:46:44 +0000 |
commit | 73a258c24753f664577865239be7cd0a136e25b4 (patch) | |
tree | 2cd2568c4798d9fcc914ba0323bb7e54c15f8b1f | |
parent | 08efa8beab68f9b932a0eb92cc54166856f50a7a (diff) | |
download | ply-73a258c24753f664577865239be7cd0a136e25b4.tar.gz |
Various bug fixes and cleanup
-rw-r--r-- | CHANGES | 58 | ||||
-rw-r--r-- | doc/ply.html | 4 | ||||
-rw-r--r-- | example/ansic/clex.py | 2 | ||||
-rw-r--r-- | ply/lex.py | 32 | ||||
-rw-r--r-- | ply/yacc.py | 50 | ||||
-rw-r--r-- | test/lex_state_norule.py | 2 |
6 files changed, 96 insertions, 52 deletions
@@ -1,7 +1,41 @@ Version 2.4 ----------------------------- +05/04/08: beazley + lex() now has an outputdir that can specify the output directory for + tables when running in optimize mode. For example: + + lexer = lex.lex(optimize=True, lextab="ltab", outputdir="foo/bar") + + The behavior of specifying a table module and output directory are + more aligned with the behavior of yacc(). + +05/04/08: beazley + [Issue 9] + Fixed filename bug in when specifying the modulename in lex() and yacc(). + If you specified options such as the following: + + parser = yacc.yacc(tabmodule="foo.bar.parsetab",outputdir="foo/bar") + + yacc would create a file "foo.bar.parsetab.py" in the given directory. + Now, it simply generates a file "parsetab.py" in that directory. + Bug reported by cptbinho. + +05/04/08: beazley + Slight modification to lex() and yacc() to allow their table files + to be loaded from a previously loaded module. This might make + it easier to load the parsing tables from a complicated package + structure. For example: + + import foo.bar.spam.parsetab as parsetab + parser = yacc.yacc(tabmodule=parsetab) + + Note: lex and yacc will never regenerate the table file if used + in the form---you will get a warning message instead. + This idea suggested by Brian Clapper. + + 04/28/08: beazley - Fixed a big with p_erro() functions being picked up correctly + Fixed a big with p_error() functions being picked up correctly when running in yacc(optimize=1) mode. Patch contributed by Bart Whiteley. @@ -113,28 +147,6 @@ Version 2.4 would result in an error message about "No input given". Reported by Andrew Dalke. -03/13/07: beazley - Modified the LexToken objects returned by the lexer so that they support - comparisons. Comparisons can be performed against simple strings like - this: - - if tok == 'NUMBER': - ... - - In this case, the token type field is compared against the string. You - can also compare against a tuple, like this: - - if tok == ('ID','foo') - - In this case, the tuple is taken to be the token type and token value. - Last, but not least, two different tokens can be compared. Tokens are - equal if they have the same type and value. - - One useful aspect of this addition is that token sequences can be easily - compared. For example: - - if toks == ['NUMBER','+','NUMBER']: - Version 2.3 ----------------------------- 02/20/07: beazley diff --git a/doc/ply.html b/doc/ply.html index 8b64b67..ecffe04 100644 --- a/doc/ply.html +++ b/doc/ply.html @@ -2765,7 +2765,7 @@ each time it runs (which may take awhile depending on how large your grammar is) <blockquote> <pre> -yacc.parse(debug=1) +yacc.parse(debug=n) # Pick n > 1 for increased amounts of debugging </pre> </blockquote> @@ -2774,7 +2774,7 @@ yacc.parse(debug=1) <blockquote> <pre> -yacc.parse(debug=1, debugfile="debugging.out") +yacc.parse(debug=n, debugfile="debugging.out") # Pick n > 1 for increasing amount of debugging </pre> </blockquote> diff --git a/example/ansic/clex.py b/example/ansic/clex.py index 6b9d7e7..7dc98cd 100644 --- a/example/ansic/clex.py +++ b/example/ansic/clex.py @@ -142,7 +142,7 @@ t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\'' # Comments def t_comment(t): - r' /\*(.|\n)*?\*/' + r'/\*(.|\n)*?\*/' t.lineno += t.value.count('\n') # Preprocessor directive (ignored) @@ -24,7 +24,7 @@ __version__ = "2.4" -import re, sys, types, copy +import re, sys, types, copy, os # This regular expression is used to match valid token names _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') @@ -35,10 +35,10 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') # the existence of ObjectType. try: - _INSTANCETYPE = (types.InstanceType, types.ObjectType) + _INSTANCETYPE = (types.InstanceType, types.ObjectType) except AttributeError: - _INSTANCETYPE = types.InstanceType - class object: pass # Note: needed if no new-style classes present + _INSTANCETYPE = types.InstanceType + class object: pass # Note: needed if no new-style classes present # Exception thrown when invalid token encountered and no default error # handler is defined. @@ -70,13 +70,6 @@ class LexToken(object): def skip(self,n): self.lexer.skip(n) _SkipWarning("Calling t.skip() on a token is deprecated. Please use t.lexer.skip()") - def __cmp__(self,other): - if isinstance(other,(types.StringType,types.UnicodeType)): - return cmp(self.type,other) - elif isinstance(other,types.TupleType): - return cmp((self.type,self.value),other) - else: - return cmp((self.type,self.value),(other.type,other.value)) # ----------------------------------------------------------------------------- # Lexer class @@ -144,8 +137,12 @@ class Lexer(object): # ------------------------------------------------------------ # writetab() - Write lexer information to a table file # ------------------------------------------------------------ - def writetab(self,tabfile): - tf = open(tabfile+".py","w") + def writetab(self,tabfile,outputdir=""): + if isinstance(tabfile,types.ModuleType): + return + basetabfilename = tabfile.rsplit(".",1)[-1] + filename = os.path.join(outputdir,basetabfilename)+".py" + tf = open(filename,"w") tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) tf.write("_lextokens = %s\n" % repr(self.lextokens)) tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) @@ -175,7 +172,10 @@ class Lexer(object): # readtab() - Read lexer information from a tab file # ------------------------------------------------------------ def readtab(self,tabfile,fdict): - exec "import %s as lextab" % tabfile + if isinstance(tabfile,types.ModuleType): + lextab = tabfile + else: + exec "import %s as lextab" % tabfile self.lextokens = lextab._lextokens self.lexreflags = lextab._lexreflags self.lexliterals = lextab._lexliterals @@ -492,7 +492,7 @@ def _statetoken(s,names): # # Build all of the regular expression rules from definitions in the supplied module # ----------------------------------------------------------------------------- -def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0): +def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir=""): global lexer ldict = None stateinfo = { 'INITIAL' : 'inclusive'} @@ -825,7 +825,7 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now # If in optimize mode, we write the lextab if lextab and optimize: - lexobj.writetab(lextab) + lexobj.writetab(lextab,outputdir) return lexobj diff --git a/ply/yacc.py b/ply/yacc.py index 63bc176..fb9b0b5 100644 --- a/ply/yacc.py +++ b/ply/yacc.py @@ -77,10 +77,10 @@ class YaccError(Exception): pass # with Python 2.0 where types.ObjectType is undefined. try: - _INSTANCETYPE = (types.InstanceType, types.ObjectType) + _INSTANCETYPE = (types.InstanceType, types.ObjectType) except AttributeError: - _INSTANCETYPE = types.InstanceType - class object: pass # Note: needed if no new-style classes present + _INSTANCETYPE = types.InstanceType + class object: pass # Note: needed if no new-style classes present #----------------------------------------------------------------------------- # === LR Parsing Engine === @@ -510,7 +510,7 @@ def validate_dict(d): # Initialize all of the global variables used during grammar construction def initialize_vars(): global Productions, Prodnames, Prodmap, Terminals - global Nonterminals, First, Follow, Precedence, LRitems + global Nonterminals, First, Follow, Precedence, UsedPrecedence, LRitems global Errorfunc, Signature, Requires Productions = [None] # A list of all of the productions. The first @@ -536,6 +536,10 @@ def initialize_vars(): Precedence = { } # Precedence rules for each terminal. Contains tuples of the # form ('right',level) or ('nonassoc', level) or ('left',level) + UsedPrecedence = { } # Precedence rules that were actually used by the grammer. + # This is only used to provide error checking and to generate + # a warning about unused precedence rules. + LRitems = [ ] # A list of all LR items for the grammar. These are the # productions with the "dot" like E -> E . PLUS E @@ -718,6 +722,7 @@ def add_production(f,file,line,prodname,syms): return -1 else: p.prec = prec + UsedPrecedence[precname] = 1 del p.prod[i] del p.prod[i] continue @@ -1050,6 +1055,21 @@ def add_precedence(plist): return error # ----------------------------------------------------------------------------- +# check_precedence() +# +# Checks the use of the Precedence tables. This makes sure all of the symbols +# are terminals or were used with %prec +# ----------------------------------------------------------------------------- + +def check_precedence(): + error = 0 + for precname in Precedence.keys(): + if not (Terminals.has_key(precname) or UsedPrecedence.has_key(precname)): + sys.stderr.write("yacc: Precedence rule '%s' defined for unknown symbol '%s'\n" % (Precedence[precname][0],precname)) + error += 1 + return error + +# ----------------------------------------------------------------------------- # augment_grammar() # # Compute the augmented grammar. This is just a rule S' -> start where start @@ -1852,7 +1872,12 @@ def lr_parse_table(method): # ----------------------------------------------------------------------------- def lr_write_tables(modulename=tab_module,outputdir=''): - filename = os.path.join(outputdir,modulename) + ".py" + if isinstance(modulename, types.ModuleType): + print >>sys.stderr, "Warning module %s is inconsistent with the grammar (ignored)" % modulename + return + + basemodulename = modulename.rsplit(".",1)[-1] + filename = os.path.join(outputdir,basemodulename) + ".py" try: f = open(filename,"w") @@ -1969,8 +1994,11 @@ del _lr_goto_items def lr_read_tables(module=tab_module,optimize=0): global _lr_action, _lr_goto, _lr_productions, _lr_method try: - exec "import %s as parsetab" % module - + if isinstance(module,types.ModuleType): + parsetab = module + else: + exec "import %s as parsetab" % module + if (optimize) or (Signature.digest() == parsetab._lr_signature): _lr_action = parsetab._lr_action _lr_goto = parsetab._lr_goto @@ -2179,6 +2207,10 @@ def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, otherfunc = [ldict[f] for f in ldict.keys() if (type(f) in (types.FunctionType,types.MethodType) and ldict[f].__name__[:2] != 'p_')] + # Check precedence rules + if check_precedence(): + error = 1 + if error: raise YaccError,"Unable to construct parser." @@ -2234,11 +2266,11 @@ def yacc_cleanup(): del _lr_action, _lr_goto, _lr_method, _lr_goto_cache global Productions, Prodnames, Prodmap, Terminals - global Nonterminals, First, Follow, Precedence, LRitems + global Nonterminals, First, Follow, Precedence, UsedPrecedence, LRitems global Errorfunc, Signature, Requires del Productions, Prodnames, Prodmap, Terminals - del Nonterminals, First, Follow, Precedence, LRitems + del Nonterminals, First, Follow, Precedence, UsedPrecedence, LRitems del Errorfunc, Signature, Requires global _vf, _vfc diff --git a/test/lex_state_norule.py b/test/lex_state_norule.py index e48a319..2d15248 100644 --- a/test/lex_state_norule.py +++ b/test/lex_state_norule.py @@ -1,4 +1,4 @@ -# lex_state2.py +# lex_state_norule.py # # Declaration of a state for which no rules are defined |