summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/ply.html153
-rw-r--r--example/BASIC/basiclex.py2
-rw-r--r--ply/lex.py32
-rw-r--r--ply/yacc.py34
4 files changed, 178 insertions, 43 deletions
diff --git a/doc/ply.html b/doc/ply.html
index f9fe036..fca0966 100644
--- a/doc/ply.html
+++ b/doc/ply.html
@@ -72,10 +72,23 @@ dave@dabeaz.com<br>
<!-- INDEX -->
+<h2>Preface and Requirements</h2>
+<p>
+This document provides an overview of lexing and parsing with PLY.
+Given the intrinsic complexity of parsing, I would strongly advise
+that you read (or at least skim) this entire document before jumping
+into a big development project with PLY.
+</p>
-
-
+<p>
+PLY-3.0 is compatible with both Python 2 and Python 3. Be aware that
+Python 3 support is new and has not been extensively tested (although
+all of the examples and unit tests pass under Python 3.0). If you are
+using Python 2, you should try to use Python 2.4 or newer. Although PLY
+works with versions as far back as Python 2.2, some of its optional features
+require more modern library modules.
+</p>
<H2><a name="ply_nn1"></a>1. Introduction</H2>
@@ -392,11 +405,7 @@ converts the string into a Python integer.
<pre>
def t_NUMBER(t):
r'\d+'
- try:
- t.value = int(t.value)
- except ValueError:
- print "Number %s is too large!" % t.value
- t.value = 0
+ t.value = int(t.value)
return t
</pre>
</blockquote>
@@ -427,8 +436,8 @@ expressions in order of decreasing length, this problem is solved for rules defi
the order can be explicitly controlled since rules appearing first are checked first.
<p>
-To handle reserved words, it is usually easier to just match an identifier and do a special name lookup in a function
-like this:
+To handle reserved words, you should write a single rule to match an
+identifier and do a special name lookup in a function like this:
<blockquote>
<pre>
@@ -741,12 +750,16 @@ lexer = lex.lex(debug=1)
</pre>
</blockquote>
-This will result in a large amount of debugging information to be printed including all of the added rules and the master
-regular expressions.
+<p>
+This will produce various sorts of debugging information including all of the added rules,
+the master regular expressions used by the lexer, and tokens generating during lexing.
+</p>
+<p>
In addition, <tt>lex.py</tt> comes with a simple main function which
will either tokenize input read from standard input or from a file specified
on the command line. To use it, simply put this in your lexer:
+</p>
<blockquote>
<pre>
@@ -755,6 +768,9 @@ if __name__ == '__main__':
</pre>
</blockquote>
+Please refer to the "Debugging" section near the end for some more advanced details
+of debugging.
+
<H3><a name="ply_nn17"></a>3.14 Alternative specification of lexers</H3>
@@ -2990,16 +3006,7 @@ each time it runs (which may take awhile depending on how large your grammar is)
<blockquote>
<pre>
-yacc.parse(debug=n) # Pick n > 1 for increased amounts of debugging
-</pre>
-</blockquote>
-
-<p>
-<li>To redirect the debugging output to a filename of your choosing, use:
-
-<blockquote>
-<pre>
-yacc.parse(debug=n, debugfile="debugging.out") # Pick n > 1 for increasing amount of debugging
+yacc.parse(debug=1)
</pre>
</blockquote>
@@ -3117,9 +3124,107 @@ the tables without the need for doc strings.
<p>
Beware: running PLY in optimized mode disables a lot of error
checking. You should only do this when your project has stabilized
-and you don't need to do any debugging.
-
-<H2><a name="ply_nn39"></a>8. Where to go from here?</H2>
+and you don't need to do any debugging. One of the purposes of
+optimized mode is to substantially decrease the startup time of
+your compiler (by assuming that everything is already properly
+specified and works).
+
+<H2>8. Advanced Debugging</H2>
+
+<p>
+Debugging a compiler is typically not an easy task. PLY provides some
+advanced diagonistic capabilities through the use of Python's
+<tt>logging</tt> module. The next two sections describe this:
+
+<h3>8.1 Debugging the lex() and yacc() commands</h3>
+
+<p>
+Both the <tt>lex()</tt> and <tt>yacc()</tt> commands have a debugging
+mode that can be enabled using the <tt>debug</tt> flag. For example:
+
+<blockquote>
+<pre>
+lex.lex(debug=True)
+yacc.yacc(debug=True)
+</pre>
+</blockquote>
+
+Normally, the output produced by debugging is routed to either
+standard error or, in the case of <tt>yacc()</tt>, to a file
+<tt>parser.out</tt>. This output can be more carefully controlled
+by supplying a logging object. Here is an example that adds
+information about where different debugging messages are coming from:
+
+<blockquote>
+<pre>
+# Set up a logging object
+import logging
+logging.basicConfig(
+ level = logging.DEBUG,
+ filename = "parselog.txt",
+ filemode = "w",
+ format = "%(filename)10s:%(lineno)4d:%(message)s"
+)
+log = logging.getLogger()
+
+lex.lex(debug=True,debuglog=log)
+yacc.yacc(debug=True,debuglog=log)
+</pre>
+</blockquote>
+
+If you supply a custom logger, the amount of debugging
+information produced can be controlled by setting the logging level.
+Typically, debugging messages are either issued at the <tt>DEBUG</tt>,
+<tt>INFO</tt>, or <tt>WARNING</tt> levels.
+
+<p>
+PLY's error messages and warnings are also produced using the logging
+interface. This can be controlled by passing a logging object
+using the <tt>errorlog</tt> parameter.
+
+<blockquote>
+<pre>
+lex.lex(errorlog=log)
+yacc.yacc(errorlog=log)
+</pre>
+</blockquote>
+
+If you want to completely silence warnings, you can either pass in a
+logging object with an appropriate filter level or use the <tt>NullLogger</tt>
+object defined in either <tt>lex</tt> or <tt>yacc</tt>. For example:
+
+<blockquote>
+<pre>
+yacc.yacc(errorlog=yacc.NullLogger())
+</pre>
+</blockquote>
+
+<h3>8.2 Run-time Debugging</h3>
+
+<p>
+To enable run-time debugging of a parser, use the <tt>debug</tt> option to parse. This
+option can either be an integer (which simply turns debugging on or off) or an instance
+of a logger object. For example:
+
+<blockquote>
+<pre>
+log = logging.getLogger()
+parser.parse(input,debug=log)
+</pre>
+</blockquote>
+
+If a logging object is passed, you can use its filtering level to control how much
+output gets generated. The <tt>INFO</tt> level is used to produce information
+about rule reductions. The <tt>DEBUG</tt> level will show information about the
+parsing stack, token shifts, and other details. The <tt>ERROR</tt> level shows information
+related to parsing errors.
+
+<p>
+For very complicated problems, you should pass in a logging object that
+redirects to a file where you can more easily inspect the output after
+execution.
+
+<H2><a name="ply_nn39"></a>9. Where to go from here?</H2>
The <tt>examples</tt> directory of the PLY distribution contains several simple examples. Please consult a
diff --git a/example/BASIC/basiclex.py b/example/BASIC/basiclex.py
index 4317e0f..3d27cde 100644
--- a/example/BASIC/basiclex.py
+++ b/example/BASIC/basiclex.py
@@ -54,7 +54,7 @@ def t_error(t):
print("Illegal character %s" % t.value[0])
t.lexer.skip(1)
-lex.lex()
+lex.lex(debug=0)
diff --git a/ply/lex.py b/ply/lex.py
index 18245c7..5ffed48 100644
--- a/ply/lex.py
+++ b/ply/lex.py
@@ -78,11 +78,8 @@ class PlyLogger(object):
def error(self,msg,*args,**kwargs):
self.f.write("ERROR: " + (msg % args) + "\n")
- def info(self,msg,*args,**kwargs):
- pass
-
- def debug(self,msg,*args,**kwargs):
- pass
+ info = critical
+ debug = critical
# Null logger is used when no output is generated. Does nothing.
class NullLogger(object):
@@ -289,7 +286,7 @@ class Lexer:
self.lexpos += n
# ------------------------------------------------------------
- # token() - Return the next token from the Lexer
+ # opttoken() - Return the next token from the Lexer
#
# Note: This function has been carefully implemented to be as fast
# as possible. Don't make changes unless you really know what
@@ -855,7 +852,7 @@ class LexerReflect(object):
#
# Build all of the regular expression rules from definitions in the supplied module
# -----------------------------------------------------------------------------
-def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,outputdir="",errorlog=None):
+def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,outputdir="", debuglog=None, errorlog=None):
global lexer
ldict = None
stateinfo = { 'INITIAL' : 'inclusive'}
@@ -866,6 +863,10 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,out
if errorlog is None:
errorlog = PlyLogger(sys.stderr)
+ if debug:
+ if debuglog is None:
+ debuglog = PlyLogger(sys.stderr)
+
# Get the module dictionary used for the lexer
if object: module = object
@@ -893,9 +894,11 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,out
except ImportError:
pass
- # Get the tokens, states, and literals variables (if any)
-
- states = ldict.get("states",None)
+ # Dump some basic debugging information
+ if debug:
+ debuglog.info("lex: tokens = %r", linfo.tokens)
+ debuglog.info("lex: literals = %r", linfo.literals)
+ debuglog.info("lex: states = %r", linfo.stateinfo)
# Build a dictionary of valid token names
lexobj.lextokens = { }
@@ -921,15 +924,22 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,out
line = func_code(f).co_firstlineno
file = func_code(f).co_filename
regex_list.append("(?P<%s>%s)" % (fname,f.__doc__))
+ if debug:
+ debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state)
# Now add all of the simple rules
for name,r in linfo.strsym[state]:
regex_list.append("(?P<%s>%s)" % (name,r))
+ if debug:
+ debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state)
regexs[state] = regex_list
# Build the master regular expressions
+ if debug:
+ debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====")
+
for state in regexs:
lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames)
lexobj.lexstatere[state] = lexre
@@ -937,7 +947,7 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,out
lexobj.lexstaterenames[state] = re_names
if debug:
for i in range(len(re_text)):
- errorlog.debug("lex: state '%s'. regex[%d] = '%s'",state, i, re_text[i])
+ debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i])
# For inclusive states, we need to add the regular expressions from the INITIAL state
for state,stype in stateinfo.items():
diff --git a/ply/yacc.py b/ply/yacc.py
index a2b84c3..2de5ffe 100644
--- a/ply/yacc.py
+++ b/ply/yacc.py
@@ -71,6 +71,8 @@ error_count = 3 # Number of symbols that must be shifted to leave
yaccdevel = 0 # Set to True if developing yacc. This turns off optimized
# implementations of certain functions.
+resultlimit = 40 # Size limit of results when running in debug mode.
+
import re, types, sys, os.path
# Compatibility function for python 2.6/3.0
@@ -126,6 +128,23 @@ class NullLogger(object):
# Exception raised for yacc-related errors
class YaccError(Exception): pass
+# Format the result message that the parser produces when running in debug mode.
+def format_result(r):
+ repr_str = repr(r)
+ if len(repr_str) > resultlimit:
+ repr_str = repr_str[:resultlimit]+" ..."
+ result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
+ return result
+
+
+# Format stack entries when the parser is running in debug mode
+def format_stack_entry(r):
+ repr_str = repr(r)
+ if len(repr_str) < 16:
+ return repr_str
+ else:
+ return "<%s @ 0x%x>" % (type(r).__name__,id(r))
+
#-----------------------------------------------------------------------------
# === LR Parsing Engine ===
#
@@ -264,7 +283,7 @@ class LRParser:
if not lexer:
lex = load_ply_lex()
lexer = lex.lexer
-
+
# Set up the lexer and parser objects on pslice
pslice.lexer = lexer
pslice.parser = self
@@ -354,7 +373,7 @@ class LRParser:
# --! DEBUG
if plen:
- debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [_v.value for _v in symstack[-plen:]],-t)
+ debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
else:
debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
@@ -388,7 +407,7 @@ class LRParser:
del statestack[-plen:]
p.callable(pslice)
# --! DEBUG
- debug.info("Result : %r", pslice[0])
+ debug.info("Result : %s", format_result(pslice[0]))
# --! DEBUG
symstack.append(sym)
state = goto[statestack[-1]][pname]
@@ -427,7 +446,7 @@ class LRParser:
# Call the grammar rule with our special slice object
p.callable(pslice)
# --! DEBUG
- debug.info("Result : %r", pslice[0])
+ debug.info("Result : %s", format_result(pslice[0]))
# --! DEBUG
symstack.append(sym)
state = goto[statestack[-1]][pname]
@@ -449,7 +468,7 @@ class LRParser:
n = symstack[-1]
result = getattr(n,"value",None)
# --! DEBUG
- debug.info("Done : Returning %r", result)
+ debug.info("Done : Returning %s", format_result(result))
debug.info("PLY: PARSE DEBUG END")
# --! DEBUG
return result
@@ -2717,7 +2736,7 @@ class ParserReflect(object):
sig = crc32(f[3].encode('latin-1'),sig)
except (TypeError,ValueError):
pass
- return sig & 0xffffffff
+ return sig
# -----------------------------------------------------------------------------
# validate_file()
@@ -3109,7 +3128,8 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star
raise YaccError("Unable to build parser")
# Run the LRGeneratedTable on the grammar
- errorlog.debug("Generating %s tables", method)
+ if debug:
+ errorlog.debug("Generating %s tables", method)
lr = LRGeneratedTable(grammar,method,debuglog)