Various bug fixes and cleanup

author: David Beazley <dave@dabeaz.com> 2008-05-04 15:46:44 +0000
committer: David Beazley <dave@dabeaz.com> 2008-05-04 15:46:44 +0000
commit: 73a258c24753f664577865239be7cd0a136e25b4 (patch)
tree: 2cd2568c4798d9fcc914ba0323bb7e54c15f8b1f
parent: 08efa8beab68f9b932a0eb92cc54166856f50a7a (diff)
download: ply-73a258c24753f664577865239be7cd0a136e25b4.tar.gz
6 files changed, 96 insertions, 52 deletions
diff --git a/CHANGES b/CHANGES
index 58e06f0..78712b9 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,7 +1,41 @@
 Version 2.4
 -----------------------------
+05/04/08: beazley
+          lex() now has an outputdir that can specify the output directory for
+          tables when running in optimize mode.  For example:
+
+             lexer = lex.lex(optimize=True, lextab="ltab", outputdir="foo/bar")
+ 
+          The behavior of specifying a table module and output directory are
+          more aligned with the behavior of yacc().
+          
+05/04/08: beazley
+          [Issue 9]
+          Fixed filename bug in when specifying the modulename in lex() and yacc(). 
+          If you specified options such as the following:
+
+             parser = yacc.yacc(tabmodule="foo.bar.parsetab",outputdir="foo/bar")
+
+          yacc would create a file "foo.bar.parsetab.py" in the given directory.
+          Now, it simply generates a file "parsetab.py" in that directory. 
+          Bug reported by cptbinho.
+
+05/04/08: beazley
+          Slight modification to lex() and yacc() to allow their table files
+	  to be loaded from a previously loaded module.   This might make
+	  it easier to load the parsing tables from a complicated package
+          structure.  For example:
+
+	       import foo.bar.spam.parsetab as parsetab
+               parser = yacc.yacc(tabmodule=parsetab)
+
+          Note:  lex and yacc will never regenerate the table file if used
+          in the form---you will get a warning message instead. 
+          This idea suggested by Brian Clapper.
+
+
 04/28/08: beazley
-          Fixed a big with p_erro() functions being picked up correctly
+          Fixed a big with p_error() functions being picked up correctly
           when running in yacc(optimize=1) mode.  Patch contributed by
           Bart Whiteley.
 
@@ -113,28 +147,6 @@ Version 2.4
           would result in an error message about "No input given".  Reported
           by Andrew Dalke.
 
-03/13/07: beazley
-          Modified the LexToken objects returned by the lexer so that they support
-          comparisons.  Comparisons can be performed against simple strings like
-          this:
-
-               if tok == 'NUMBER':
-                  ...
-
-          In this case, the token type field is compared against the string. You
-          can also compare against a tuple, like this:
-
-              if tok == ('ID','foo')
-
-          In this case, the tuple is taken to be the token type and token value.
-          Last, but not least, two different tokens can be compared.  Tokens are
-          equal if they have the same type and value.
-
-          One useful aspect of this addition is that token sequences can be easily
-          compared.  For example:
-
-              if toks == ['NUMBER','+','NUMBER']:
-
 Version 2.3
 -----------------------------
 02/20/07: beazley
diff --git a/doc/ply.html b/doc/ply.html
index 8b64b67..ecffe04 100644
--- a/doc/ply.html
+++ b/doc/ply.html
@@ -2765,7 +2765,7 @@ each time it runs (which may take awhile depending on how large your grammar is)
 
 <blockquote>
 <pre>
-yacc.parse(debug=1)
+yacc.parse(debug=n)      # Pick n > 1 for increased amounts of debugging
 </pre>
 </blockquote>
 
@@ -2774,7 +2774,7 @@ yacc.parse(debug=1)
 
 <blockquote>
 <pre>
-yacc.parse(debug=1, debugfile="debugging.out")
+yacc.parse(debug=n, debugfile="debugging.out")   # Pick n > 1 for increasing amount of debugging
 </pre>
 </blockquote>
 
diff --git a/example/ansic/clex.py b/example/ansic/clex.py
index 6b9d7e7..7dc98cd 100644
--- a/example/ansic/clex.py
+++ b/example/ansic/clex.py
@@ -142,7 +142,7 @@ t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
 
 # Comments
 def t_comment(t):
-    r' /\*(.|\n)*?\*/'
+    r'/\*(.|\n)*?\*/'
     t.lineno += t.value.count('\n')
 
 # Preprocessor directive (ignored)
diff --git a/ply/lex.py b/ply/lex.py
index dd85bac..64ff9ce 100644
--- a/ply/lex.py
+++ b/ply/lex.py
@@ -24,7 +24,7 @@
 
 __version__ = "2.4"
 
-import re, sys, types, copy
+import re, sys, types, copy, os
 
 # This regular expression is used to match valid token names
 _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
@@ -35,10 +35,10 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
 # the existence of ObjectType.
 
 try:
-   _INSTANCETYPE = (types.InstanceType, types.ObjectType)
+    _INSTANCETYPE = (types.InstanceType, types.ObjectType)
 except AttributeError:
-   _INSTANCETYPE = types.InstanceType
-   class object: pass       # Note: needed if no new-style classes present
+    _INSTANCETYPE = types.InstanceType
+    class object: pass       # Note: needed if no new-style classes present
 
 # Exception thrown when invalid token encountered and no default error
 # handler is defined.
@@ -70,13 +70,6 @@ class LexToken(object):
     def skip(self,n):
         self.lexer.skip(n)
         _SkipWarning("Calling t.skip() on a token is deprecated.  Please use t.lexer.skip()")
-    def __cmp__(self,other):
-       if isinstance(other,(types.StringType,types.UnicodeType)):
-          return cmp(self.type,other)
-       elif isinstance(other,types.TupleType):
-          return cmp((self.type,self.value),other)
-       else:
-          return cmp((self.type,self.value),(other.type,other.value))
 
 # -----------------------------------------------------------------------------
 # Lexer class
@@ -144,8 +137,12 @@ class Lexer(object):
     # ------------------------------------------------------------
     # writetab() - Write lexer information to a table file
     # ------------------------------------------------------------
-    def writetab(self,tabfile):
-        tf = open(tabfile+".py","w")
+    def writetab(self,tabfile,outputdir=""):
+        if isinstance(tabfile,types.ModuleType):
+            return
+        basetabfilename = tabfile.rsplit(".",1)[-1]
+        filename = os.path.join(outputdir,basetabfilename)+".py"
+        tf = open(filename,"w")
         tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__))
         tf.write("_lextokens    = %s\n" % repr(self.lextokens))
         tf.write("_lexreflags   = %s\n" % repr(self.lexreflags))
@@ -175,7 +172,10 @@ class Lexer(object):
     # readtab() - Read lexer information from a tab file
     # ------------------------------------------------------------
     def readtab(self,tabfile,fdict):
-        exec "import %s as lextab" % tabfile
+        if isinstance(tabfile,types.ModuleType):
+            lextab = tabfile
+        else:
+            exec "import %s as lextab" % tabfile
         self.lextokens      = lextab._lextokens
         self.lexreflags     = lextab._lexreflags
         self.lexliterals    = lextab._lexliterals
@@ -492,7 +492,7 @@ def _statetoken(s,names):
 #
 # Build all of the regular expression rules from definitions in the supplied module
 # -----------------------------------------------------------------------------
-def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0):
+def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir=""):
     global lexer
     ldict = None
     stateinfo  = { 'INITIAL' : 'inclusive'}
@@ -825,7 +825,7 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now
 
     # If in optimize mode, we write the lextab   
     if lextab and optimize:
-        lexobj.writetab(lextab)
+        lexobj.writetab(lextab,outputdir)
 
     return lexobj
 
diff --git a/ply/yacc.py b/ply/yacc.py
index 63bc176..fb9b0b5 100644
--- a/ply/yacc.py
+++ b/ply/yacc.py
@@ -77,10 +77,10 @@ class YaccError(Exception):   pass
 # with Python 2.0 where types.ObjectType is undefined.
 
 try:
-   _INSTANCETYPE = (types.InstanceType, types.ObjectType)
+    _INSTANCETYPE = (types.InstanceType, types.ObjectType)
 except AttributeError:
-   _INSTANCETYPE = types.InstanceType
-   class object: pass     # Note: needed if no new-style classes present
+    _INSTANCETYPE = types.InstanceType
+    class object: pass     # Note: needed if no new-style classes present
 
 #-----------------------------------------------------------------------------
 #                        ===  LR Parsing Engine ===
@@ -510,7 +510,7 @@ def validate_dict(d):
 # Initialize all of the global variables used during grammar construction
 def initialize_vars():
     global Productions, Prodnames, Prodmap, Terminals 
-    global Nonterminals, First, Follow, Precedence, LRitems
+    global Nonterminals, First, Follow, Precedence, UsedPrecedence, LRitems
     global Errorfunc, Signature, Requires
 
     Productions  = [None]  # A list of all of the productions.  The first
@@ -536,6 +536,10 @@ def initialize_vars():
     Precedence   = { }     # Precedence rules for each terminal. Contains tuples of the
                            # form ('right',level) or ('nonassoc', level) or ('left',level)
 
+    UsedPrecedence = { }   # Precedence rules that were actually used by the grammer.
+                           # This is only used to provide error checking and to generate
+                           # a warning about unused precedence rules.
+
     LRitems      = [ ]     # A list of all LR items for the grammar.  These are the
                            # productions with the "dot" like E -> E . PLUS E
 
@@ -718,6 +722,7 @@ def add_production(f,file,line,prodname,syms):
                 return -1
             else:
                 p.prec = prec
+                UsedPrecedence[precname] = 1
             del p.prod[i]
             del p.prod[i]
             continue
@@ -1050,6 +1055,21 @@ def add_precedence(plist):
     return error
 
 # -----------------------------------------------------------------------------
+# check_precedence()
+#
+# Checks the use of the Precedence tables.  This makes sure all of the symbols
+# are terminals or were used with %prec
+# -----------------------------------------------------------------------------
+
+def check_precedence():
+    error = 0
+    for precname in Precedence.keys():
+        if not (Terminals.has_key(precname) or UsedPrecedence.has_key(precname)):
+            sys.stderr.write("yacc: Precedence rule '%s' defined for unknown symbol '%s'\n" % (Precedence[precname][0],precname))
+            error += 1
+    return error
+            
+# -----------------------------------------------------------------------------
 # augment_grammar()
 #
 # Compute the augmented grammar.  This is just a rule S' -> start where start
@@ -1852,7 +1872,12 @@ def lr_parse_table(method):
 # -----------------------------------------------------------------------------
 
 def lr_write_tables(modulename=tab_module,outputdir=''):
-    filename = os.path.join(outputdir,modulename) + ".py"
+    if isinstance(modulename, types.ModuleType):
+        print >>sys.stderr, "Warning module %s is inconsistent with the grammar (ignored)" % modulename
+        return
+
+    basemodulename = modulename.rsplit(".",1)[-1]
+    filename = os.path.join(outputdir,basemodulename) + ".py"
     try:
         f = open(filename,"w")
 
@@ -1969,8 +1994,11 @@ del _lr_goto_items
 def lr_read_tables(module=tab_module,optimize=0):
     global _lr_action, _lr_goto, _lr_productions, _lr_method
     try:
-        exec "import %s as parsetab" % module
-        
+        if isinstance(module,types.ModuleType):
+            parsetab = module
+        else:
+            exec "import %s as parsetab" % module
+
         if (optimize) or (Signature.digest() == parsetab._lr_signature):
             _lr_action = parsetab._lr_action
             _lr_goto   = parsetab._lr_goto
@@ -2179,6 +2207,10 @@ def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module,
             otherfunc = [ldict[f] for f in ldict.keys()
                if (type(f) in (types.FunctionType,types.MethodType) and ldict[f].__name__[:2] != 'p_')]
 
+            # Check precedence rules
+            if check_precedence():
+                error = 1
+
             if error:
                 raise YaccError,"Unable to construct parser."
             
@@ -2234,11 +2266,11 @@ def yacc_cleanup():
     del _lr_action, _lr_goto, _lr_method, _lr_goto_cache
 
     global Productions, Prodnames, Prodmap, Terminals 
-    global Nonterminals, First, Follow, Precedence, LRitems
+    global Nonterminals, First, Follow, Precedence, UsedPrecedence, LRitems
     global Errorfunc, Signature, Requires
     
     del Productions, Prodnames, Prodmap, Terminals
-    del Nonterminals, First, Follow, Precedence, LRitems
+    del Nonterminals, First, Follow, Precedence, UsedPrecedence, LRitems
     del Errorfunc, Signature, Requires
     
     global _vf, _vfc
diff --git a/test/lex_state_norule.py b/test/lex_state_norule.py
index e48a319..2d15248 100644
--- a/test/lex_state_norule.py
+++ b/test/lex_state_norule.py
@@ -1,4 +1,4 @@
-# lex_state2.py
+# lex_state_norule.py
 #
 # Declaration of a state for which no rules are defined
author	David Beazley <dave@dabeaz.com>	2008-05-04 15:46:44 +0000
committer	David Beazley <dave@dabeaz.com>	2008-05-04 15:46:44 +0000
commit	73a258c24753f664577865239be7cd0a136e25b4 (patch)
tree	2cd2568c4798d9fcc914ba0323bb7e54c15f8b1f
parent	08efa8beab68f9b932a0eb92cc54166856f50a7a (diff)
download	ply-73a258c24753f664577865239be7cd0a136e25b4.tar.gz