summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Beazley <dave@dabeaz.com>2008-05-04 15:46:44 +0000
committerDavid Beazley <dave@dabeaz.com>2008-05-04 15:46:44 +0000
commit73a258c24753f664577865239be7cd0a136e25b4 (patch)
tree2cd2568c4798d9fcc914ba0323bb7e54c15f8b1f
parent08efa8beab68f9b932a0eb92cc54166856f50a7a (diff)
downloadply-73a258c24753f664577865239be7cd0a136e25b4.tar.gz
Various bug fixes and cleanup
-rw-r--r--CHANGES58
-rw-r--r--doc/ply.html4
-rw-r--r--example/ansic/clex.py2
-rw-r--r--ply/lex.py32
-rw-r--r--ply/yacc.py50
-rw-r--r--test/lex_state_norule.py2
6 files changed, 96 insertions, 52 deletions
diff --git a/CHANGES b/CHANGES
index 58e06f0..78712b9 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,7 +1,41 @@
Version 2.4
-----------------------------
+05/04/08: beazley
+ lex() now has an outputdir that can specify the output directory for
+ tables when running in optimize mode. For example:
+
+ lexer = lex.lex(optimize=True, lextab="ltab", outputdir="foo/bar")
+
+ The behavior of specifying a table module and output directory are
+ more aligned with the behavior of yacc().
+
+05/04/08: beazley
+ [Issue 9]
+ Fixed filename bug in when specifying the modulename in lex() and yacc().
+ If you specified options such as the following:
+
+ parser = yacc.yacc(tabmodule="foo.bar.parsetab",outputdir="foo/bar")
+
+ yacc would create a file "foo.bar.parsetab.py" in the given directory.
+ Now, it simply generates a file "parsetab.py" in that directory.
+ Bug reported by cptbinho.
+
+05/04/08: beazley
+ Slight modification to lex() and yacc() to allow their table files
+ to be loaded from a previously loaded module. This might make
+ it easier to load the parsing tables from a complicated package
+ structure. For example:
+
+ import foo.bar.spam.parsetab as parsetab
+ parser = yacc.yacc(tabmodule=parsetab)
+
+ Note: lex and yacc will never regenerate the table file if used
+ in the form---you will get a warning message instead.
+ This idea suggested by Brian Clapper.
+
+
04/28/08: beazley
- Fixed a big with p_erro() functions being picked up correctly
+ Fixed a big with p_error() functions being picked up correctly
when running in yacc(optimize=1) mode. Patch contributed by
Bart Whiteley.
@@ -113,28 +147,6 @@ Version 2.4
would result in an error message about "No input given". Reported
by Andrew Dalke.
-03/13/07: beazley
- Modified the LexToken objects returned by the lexer so that they support
- comparisons. Comparisons can be performed against simple strings like
- this:
-
- if tok == 'NUMBER':
- ...
-
- In this case, the token type field is compared against the string. You
- can also compare against a tuple, like this:
-
- if tok == ('ID','foo')
-
- In this case, the tuple is taken to be the token type and token value.
- Last, but not least, two different tokens can be compared. Tokens are
- equal if they have the same type and value.
-
- One useful aspect of this addition is that token sequences can be easily
- compared. For example:
-
- if toks == ['NUMBER','+','NUMBER']:
-
Version 2.3
-----------------------------
02/20/07: beazley
diff --git a/doc/ply.html b/doc/ply.html
index 8b64b67..ecffe04 100644
--- a/doc/ply.html
+++ b/doc/ply.html
@@ -2765,7 +2765,7 @@ each time it runs (which may take awhile depending on how large your grammar is)
<blockquote>
<pre>
-yacc.parse(debug=1)
+yacc.parse(debug=n) # Pick n > 1 for increased amounts of debugging
</pre>
</blockquote>
@@ -2774,7 +2774,7 @@ yacc.parse(debug=1)
<blockquote>
<pre>
-yacc.parse(debug=1, debugfile="debugging.out")
+yacc.parse(debug=n, debugfile="debugging.out") # Pick n > 1 for increasing amount of debugging
</pre>
</blockquote>
diff --git a/example/ansic/clex.py b/example/ansic/clex.py
index 6b9d7e7..7dc98cd 100644
--- a/example/ansic/clex.py
+++ b/example/ansic/clex.py
@@ -142,7 +142,7 @@ t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
# Comments
def t_comment(t):
- r' /\*(.|\n)*?\*/'
+ r'/\*(.|\n)*?\*/'
t.lineno += t.value.count('\n')
# Preprocessor directive (ignored)
diff --git a/ply/lex.py b/ply/lex.py
index dd85bac..64ff9ce 100644
--- a/ply/lex.py
+++ b/ply/lex.py
@@ -24,7 +24,7 @@
__version__ = "2.4"
-import re, sys, types, copy
+import re, sys, types, copy, os
# This regular expression is used to match valid token names
_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
@@ -35,10 +35,10 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
# the existence of ObjectType.
try:
- _INSTANCETYPE = (types.InstanceType, types.ObjectType)
+ _INSTANCETYPE = (types.InstanceType, types.ObjectType)
except AttributeError:
- _INSTANCETYPE = types.InstanceType
- class object: pass # Note: needed if no new-style classes present
+ _INSTANCETYPE = types.InstanceType
+ class object: pass # Note: needed if no new-style classes present
# Exception thrown when invalid token encountered and no default error
# handler is defined.
@@ -70,13 +70,6 @@ class LexToken(object):
def skip(self,n):
self.lexer.skip(n)
_SkipWarning("Calling t.skip() on a token is deprecated. Please use t.lexer.skip()")
- def __cmp__(self,other):
- if isinstance(other,(types.StringType,types.UnicodeType)):
- return cmp(self.type,other)
- elif isinstance(other,types.TupleType):
- return cmp((self.type,self.value),other)
- else:
- return cmp((self.type,self.value),(other.type,other.value))
# -----------------------------------------------------------------------------
# Lexer class
@@ -144,8 +137,12 @@ class Lexer(object):
# ------------------------------------------------------------
# writetab() - Write lexer information to a table file
# ------------------------------------------------------------
- def writetab(self,tabfile):
- tf = open(tabfile+".py","w")
+ def writetab(self,tabfile,outputdir=""):
+ if isinstance(tabfile,types.ModuleType):
+ return
+ basetabfilename = tabfile.rsplit(".",1)[-1]
+ filename = os.path.join(outputdir,basetabfilename)+".py"
+ tf = open(filename,"w")
tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__))
tf.write("_lextokens = %s\n" % repr(self.lextokens))
tf.write("_lexreflags = %s\n" % repr(self.lexreflags))
@@ -175,7 +172,10 @@ class Lexer(object):
# readtab() - Read lexer information from a tab file
# ------------------------------------------------------------
def readtab(self,tabfile,fdict):
- exec "import %s as lextab" % tabfile
+ if isinstance(tabfile,types.ModuleType):
+ lextab = tabfile
+ else:
+ exec "import %s as lextab" % tabfile
self.lextokens = lextab._lextokens
self.lexreflags = lextab._lexreflags
self.lexliterals = lextab._lexliterals
@@ -492,7 +492,7 @@ def _statetoken(s,names):
#
# Build all of the regular expression rules from definitions in the supplied module
# -----------------------------------------------------------------------------
-def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0):
+def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir=""):
global lexer
ldict = None
stateinfo = { 'INITIAL' : 'inclusive'}
@@ -825,7 +825,7 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now
# If in optimize mode, we write the lextab
if lextab and optimize:
- lexobj.writetab(lextab)
+ lexobj.writetab(lextab,outputdir)
return lexobj
diff --git a/ply/yacc.py b/ply/yacc.py
index 63bc176..fb9b0b5 100644
--- a/ply/yacc.py
+++ b/ply/yacc.py
@@ -77,10 +77,10 @@ class YaccError(Exception): pass
# with Python 2.0 where types.ObjectType is undefined.
try:
- _INSTANCETYPE = (types.InstanceType, types.ObjectType)
+ _INSTANCETYPE = (types.InstanceType, types.ObjectType)
except AttributeError:
- _INSTANCETYPE = types.InstanceType
- class object: pass # Note: needed if no new-style classes present
+ _INSTANCETYPE = types.InstanceType
+ class object: pass # Note: needed if no new-style classes present
#-----------------------------------------------------------------------------
# === LR Parsing Engine ===
@@ -510,7 +510,7 @@ def validate_dict(d):
# Initialize all of the global variables used during grammar construction
def initialize_vars():
global Productions, Prodnames, Prodmap, Terminals
- global Nonterminals, First, Follow, Precedence, LRitems
+ global Nonterminals, First, Follow, Precedence, UsedPrecedence, LRitems
global Errorfunc, Signature, Requires
Productions = [None] # A list of all of the productions. The first
@@ -536,6 +536,10 @@ def initialize_vars():
Precedence = { } # Precedence rules for each terminal. Contains tuples of the
# form ('right',level) or ('nonassoc', level) or ('left',level)
+ UsedPrecedence = { } # Precedence rules that were actually used by the grammer.
+ # This is only used to provide error checking and to generate
+ # a warning about unused precedence rules.
+
LRitems = [ ] # A list of all LR items for the grammar. These are the
# productions with the "dot" like E -> E . PLUS E
@@ -718,6 +722,7 @@ def add_production(f,file,line,prodname,syms):
return -1
else:
p.prec = prec
+ UsedPrecedence[precname] = 1
del p.prod[i]
del p.prod[i]
continue
@@ -1050,6 +1055,21 @@ def add_precedence(plist):
return error
# -----------------------------------------------------------------------------
+# check_precedence()
+#
+# Checks the use of the Precedence tables. This makes sure all of the symbols
+# are terminals or were used with %prec
+# -----------------------------------------------------------------------------
+
+def check_precedence():
+ error = 0
+ for precname in Precedence.keys():
+ if not (Terminals.has_key(precname) or UsedPrecedence.has_key(precname)):
+ sys.stderr.write("yacc: Precedence rule '%s' defined for unknown symbol '%s'\n" % (Precedence[precname][0],precname))
+ error += 1
+ return error
+
+# -----------------------------------------------------------------------------
# augment_grammar()
#
# Compute the augmented grammar. This is just a rule S' -> start where start
@@ -1852,7 +1872,12 @@ def lr_parse_table(method):
# -----------------------------------------------------------------------------
def lr_write_tables(modulename=tab_module,outputdir=''):
- filename = os.path.join(outputdir,modulename) + ".py"
+ if isinstance(modulename, types.ModuleType):
+ print >>sys.stderr, "Warning module %s is inconsistent with the grammar (ignored)" % modulename
+ return
+
+ basemodulename = modulename.rsplit(".",1)[-1]
+ filename = os.path.join(outputdir,basemodulename) + ".py"
try:
f = open(filename,"w")
@@ -1969,8 +1994,11 @@ del _lr_goto_items
def lr_read_tables(module=tab_module,optimize=0):
global _lr_action, _lr_goto, _lr_productions, _lr_method
try:
- exec "import %s as parsetab" % module
-
+ if isinstance(module,types.ModuleType):
+ parsetab = module
+ else:
+ exec "import %s as parsetab" % module
+
if (optimize) or (Signature.digest() == parsetab._lr_signature):
_lr_action = parsetab._lr_action
_lr_goto = parsetab._lr_goto
@@ -2179,6 +2207,10 @@ def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module,
otherfunc = [ldict[f] for f in ldict.keys()
if (type(f) in (types.FunctionType,types.MethodType) and ldict[f].__name__[:2] != 'p_')]
+ # Check precedence rules
+ if check_precedence():
+ error = 1
+
if error:
raise YaccError,"Unable to construct parser."
@@ -2234,11 +2266,11 @@ def yacc_cleanup():
del _lr_action, _lr_goto, _lr_method, _lr_goto_cache
global Productions, Prodnames, Prodmap, Terminals
- global Nonterminals, First, Follow, Precedence, LRitems
+ global Nonterminals, First, Follow, Precedence, UsedPrecedence, LRitems
global Errorfunc, Signature, Requires
del Productions, Prodnames, Prodmap, Terminals
- del Nonterminals, First, Follow, Precedence, LRitems
+ del Nonterminals, First, Follow, Precedence, UsedPrecedence, LRitems
del Errorfunc, Signature, Requires
global _vf, _vfc
diff --git a/test/lex_state_norule.py b/test/lex_state_norule.py
index e48a319..2d15248 100644
--- a/test/lex_state_norule.py
+++ b/test/lex_state_norule.py
@@ -1,4 +1,4 @@
-# lex_state2.py
+# lex_state_norule.py
#
# Declaration of a state for which no rules are defined