diff options
-rw-r--r-- | doc/ply.html | 153 | ||||
-rw-r--r-- | example/BASIC/basiclex.py | 2 | ||||
-rw-r--r-- | ply/lex.py | 32 | ||||
-rw-r--r-- | ply/yacc.py | 34 |
4 files changed, 178 insertions, 43 deletions
diff --git a/doc/ply.html b/doc/ply.html index f9fe036..fca0966 100644 --- a/doc/ply.html +++ b/doc/ply.html @@ -72,10 +72,23 @@ dave@dabeaz.com<br> <!-- INDEX --> +<h2>Preface and Requirements</h2> +<p> +This document provides an overview of lexing and parsing with PLY. +Given the intrinsic complexity of parsing, I would strongly advise +that you read (or at least skim) this entire document before jumping +into a big development project with PLY. +</p> - - +<p> +PLY-3.0 is compatible with both Python 2 and Python 3. Be aware that +Python 3 support is new and has not been extensively tested (although +all of the examples and unit tests pass under Python 3.0). If you are +using Python 2, you should try to use Python 2.4 or newer. Although PLY +works with versions as far back as Python 2.2, some of its optional features +require more modern library modules. +</p> <H2><a name="ply_nn1"></a>1. Introduction</H2> @@ -392,11 +405,7 @@ converts the string into a Python integer. <pre> def t_NUMBER(t): r'\d+' - try: - t.value = int(t.value) - except ValueError: - print "Number %s is too large!" % t.value - t.value = 0 + t.value = int(t.value) return t </pre> </blockquote> @@ -427,8 +436,8 @@ expressions in order of decreasing length, this problem is solved for rules defi the order can be explicitly controlled since rules appearing first are checked first. <p> -To handle reserved words, it is usually easier to just match an identifier and do a special name lookup in a function -like this: +To handle reserved words, you should write a single rule to match an +identifier and do a special name lookup in a function like this: <blockquote> <pre> @@ -741,12 +750,16 @@ lexer = lex.lex(debug=1) </pre> </blockquote> -This will result in a large amount of debugging information to be printed including all of the added rules and the master -regular expressions. +<p> +This will produce various sorts of debugging information including all of the added rules, +the master regular expressions used by the lexer, and tokens generating during lexing. +</p> +<p> In addition, <tt>lex.py</tt> comes with a simple main function which will either tokenize input read from standard input or from a file specified on the command line. To use it, simply put this in your lexer: +</p> <blockquote> <pre> @@ -755,6 +768,9 @@ if __name__ == '__main__': </pre> </blockquote> +Please refer to the "Debugging" section near the end for some more advanced details +of debugging. + <H3><a name="ply_nn17"></a>3.14 Alternative specification of lexers</H3> @@ -2990,16 +3006,7 @@ each time it runs (which may take awhile depending on how large your grammar is) <blockquote> <pre> -yacc.parse(debug=n) # Pick n > 1 for increased amounts of debugging -</pre> -</blockquote> - -<p> -<li>To redirect the debugging output to a filename of your choosing, use: - -<blockquote> -<pre> -yacc.parse(debug=n, debugfile="debugging.out") # Pick n > 1 for increasing amount of debugging +yacc.parse(debug=1) </pre> </blockquote> @@ -3117,9 +3124,107 @@ the tables without the need for doc strings. <p> Beware: running PLY in optimized mode disables a lot of error checking. You should only do this when your project has stabilized -and you don't need to do any debugging. - -<H2><a name="ply_nn39"></a>8. Where to go from here?</H2> +and you don't need to do any debugging. One of the purposes of +optimized mode is to substantially decrease the startup time of +your compiler (by assuming that everything is already properly +specified and works). + +<H2>8. Advanced Debugging</H2> + +<p> +Debugging a compiler is typically not an easy task. PLY provides some +advanced diagonistic capabilities through the use of Python's +<tt>logging</tt> module. The next two sections describe this: + +<h3>8.1 Debugging the lex() and yacc() commands</h3> + +<p> +Both the <tt>lex()</tt> and <tt>yacc()</tt> commands have a debugging +mode that can be enabled using the <tt>debug</tt> flag. For example: + +<blockquote> +<pre> +lex.lex(debug=True) +yacc.yacc(debug=True) +</pre> +</blockquote> + +Normally, the output produced by debugging is routed to either +standard error or, in the case of <tt>yacc()</tt>, to a file +<tt>parser.out</tt>. This output can be more carefully controlled +by supplying a logging object. Here is an example that adds +information about where different debugging messages are coming from: + +<blockquote> +<pre> +# Set up a logging object +import logging +logging.basicConfig( + level = logging.DEBUG, + filename = "parselog.txt", + filemode = "w", + format = "%(filename)10s:%(lineno)4d:%(message)s" +) +log = logging.getLogger() + +lex.lex(debug=True,debuglog=log) +yacc.yacc(debug=True,debuglog=log) +</pre> +</blockquote> + +If you supply a custom logger, the amount of debugging +information produced can be controlled by setting the logging level. +Typically, debugging messages are either issued at the <tt>DEBUG</tt>, +<tt>INFO</tt>, or <tt>WARNING</tt> levels. + +<p> +PLY's error messages and warnings are also produced using the logging +interface. This can be controlled by passing a logging object +using the <tt>errorlog</tt> parameter. + +<blockquote> +<pre> +lex.lex(errorlog=log) +yacc.yacc(errorlog=log) +</pre> +</blockquote> + +If you want to completely silence warnings, you can either pass in a +logging object with an appropriate filter level or use the <tt>NullLogger</tt> +object defined in either <tt>lex</tt> or <tt>yacc</tt>. For example: + +<blockquote> +<pre> +yacc.yacc(errorlog=yacc.NullLogger()) +</pre> +</blockquote> + +<h3>8.2 Run-time Debugging</h3> + +<p> +To enable run-time debugging of a parser, use the <tt>debug</tt> option to parse. This +option can either be an integer (which simply turns debugging on or off) or an instance +of a logger object. For example: + +<blockquote> +<pre> +log = logging.getLogger() +parser.parse(input,debug=log) +</pre> +</blockquote> + +If a logging object is passed, you can use its filtering level to control how much +output gets generated. The <tt>INFO</tt> level is used to produce information +about rule reductions. The <tt>DEBUG</tt> level will show information about the +parsing stack, token shifts, and other details. The <tt>ERROR</tt> level shows information +related to parsing errors. + +<p> +For very complicated problems, you should pass in a logging object that +redirects to a file where you can more easily inspect the output after +execution. + +<H2><a name="ply_nn39"></a>9. Where to go from here?</H2> The <tt>examples</tt> directory of the PLY distribution contains several simple examples. Please consult a diff --git a/example/BASIC/basiclex.py b/example/BASIC/basiclex.py index 4317e0f..3d27cde 100644 --- a/example/BASIC/basiclex.py +++ b/example/BASIC/basiclex.py @@ -54,7 +54,7 @@ def t_error(t): print("Illegal character %s" % t.value[0]) t.lexer.skip(1) -lex.lex() +lex.lex(debug=0) @@ -78,11 +78,8 @@ class PlyLogger(object): def error(self,msg,*args,**kwargs): self.f.write("ERROR: " + (msg % args) + "\n") - def info(self,msg,*args,**kwargs): - pass - - def debug(self,msg,*args,**kwargs): - pass + info = critical + debug = critical # Null logger is used when no output is generated. Does nothing. class NullLogger(object): @@ -289,7 +286,7 @@ class Lexer: self.lexpos += n # ------------------------------------------------------------ - # token() - Return the next token from the Lexer + # opttoken() - Return the next token from the Lexer # # Note: This function has been carefully implemented to be as fast # as possible. Don't make changes unless you really know what @@ -855,7 +852,7 @@ class LexerReflect(object): # # Build all of the regular expression rules from definitions in the supplied module # ----------------------------------------------------------------------------- -def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,outputdir="",errorlog=None): +def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,outputdir="", debuglog=None, errorlog=None): global lexer ldict = None stateinfo = { 'INITIAL' : 'inclusive'} @@ -866,6 +863,10 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,out if errorlog is None: errorlog = PlyLogger(sys.stderr) + if debug: + if debuglog is None: + debuglog = PlyLogger(sys.stderr) + # Get the module dictionary used for the lexer if object: module = object @@ -893,9 +894,11 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,out except ImportError: pass - # Get the tokens, states, and literals variables (if any) - - states = ldict.get("states",None) + # Dump some basic debugging information + if debug: + debuglog.info("lex: tokens = %r", linfo.tokens) + debuglog.info("lex: literals = %r", linfo.literals) + debuglog.info("lex: states = %r", linfo.stateinfo) # Build a dictionary of valid token names lexobj.lextokens = { } @@ -921,15 +924,22 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,out line = func_code(f).co_firstlineno file = func_code(f).co_filename regex_list.append("(?P<%s>%s)" % (fname,f.__doc__)) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state) # Now add all of the simple rules for name,r in linfo.strsym[state]: regex_list.append("(?P<%s>%s)" % (name,r)) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state) regexs[state] = regex_list # Build the master regular expressions + if debug: + debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====") + for state in regexs: lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames) lexobj.lexstatere[state] = lexre @@ -937,7 +947,7 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,out lexobj.lexstaterenames[state] = re_names if debug: for i in range(len(re_text)): - errorlog.debug("lex: state '%s'. regex[%d] = '%s'",state, i, re_text[i]) + debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i]) # For inclusive states, we need to add the regular expressions from the INITIAL state for state,stype in stateinfo.items(): diff --git a/ply/yacc.py b/ply/yacc.py index a2b84c3..2de5ffe 100644 --- a/ply/yacc.py +++ b/ply/yacc.py @@ -71,6 +71,8 @@ error_count = 3 # Number of symbols that must be shifted to leave yaccdevel = 0 # Set to True if developing yacc. This turns off optimized # implementations of certain functions. +resultlimit = 40 # Size limit of results when running in debug mode. + import re, types, sys, os.path # Compatibility function for python 2.6/3.0 @@ -126,6 +128,23 @@ class NullLogger(object): # Exception raised for yacc-related errors class YaccError(Exception): pass +# Format the result message that the parser produces when running in debug mode. +def format_result(r): + repr_str = repr(r) + if len(repr_str) > resultlimit: + repr_str = repr_str[:resultlimit]+" ..." + result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) + return result + + +# Format stack entries when the parser is running in debug mode +def format_stack_entry(r): + repr_str = repr(r) + if len(repr_str) < 16: + return repr_str + else: + return "<%s @ 0x%x>" % (type(r).__name__,id(r)) + #----------------------------------------------------------------------------- # === LR Parsing Engine === # @@ -264,7 +283,7 @@ class LRParser: if not lexer: lex = load_ply_lex() lexer = lex.lexer - + # Set up the lexer and parser objects on pslice pslice.lexer = lexer pslice.parser = self @@ -354,7 +373,7 @@ class LRParser: # --! DEBUG if plen: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [_v.value for _v in symstack[-plen:]],-t) + debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) else: debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) @@ -388,7 +407,7 @@ class LRParser: del statestack[-plen:] p.callable(pslice) # --! DEBUG - debug.info("Result : %r", pslice[0]) + debug.info("Result : %s", format_result(pslice[0])) # --! DEBUG symstack.append(sym) state = goto[statestack[-1]][pname] @@ -427,7 +446,7 @@ class LRParser: # Call the grammar rule with our special slice object p.callable(pslice) # --! DEBUG - debug.info("Result : %r", pslice[0]) + debug.info("Result : %s", format_result(pslice[0])) # --! DEBUG symstack.append(sym) state = goto[statestack[-1]][pname] @@ -449,7 +468,7 @@ class LRParser: n = symstack[-1] result = getattr(n,"value",None) # --! DEBUG - debug.info("Done : Returning %r", result) + debug.info("Done : Returning %s", format_result(result)) debug.info("PLY: PARSE DEBUG END") # --! DEBUG return result @@ -2717,7 +2736,7 @@ class ParserReflect(object): sig = crc32(f[3].encode('latin-1'),sig) except (TypeError,ValueError): pass - return sig & 0xffffffff + return sig # ----------------------------------------------------------------------------- # validate_file() @@ -3109,7 +3128,8 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star raise YaccError("Unable to build parser") # Run the LRGeneratedTable on the grammar - errorlog.debug("Generating %s tables", method) + if debug: + errorlog.debug("Generating %s tables", method) lr = LRGeneratedTable(grammar,method,debuglog) |