From 9b82bd0761afe0bf05040460137b68a2555c4eda Mon Sep 17 00:00:00 2001 From: David Beazley Date: Wed, 22 Apr 2015 13:39:16 -0500 Subject: Documentation updates --- README.md | 22 +-- doc/ply.html | 355 +++++++++++++++++++++++++++++------------------- example/calceof/calc.py | 115 ++++++++++++++++ setup.py | 2 +- 4 files changed, 342 insertions(+), 152 deletions(-) create mode 100644 example/calceof/calc.py diff --git a/README.md b/README.md index 58507a8..0d01f7a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ PLY (Python Lex-Yacc) Version 3.5 -Copyright (C) 2001-2012, +Copyright (C) 2001-2015, David M. Beazley (Dabeaz LLC) All rights reserved. @@ -112,7 +112,11 @@ book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown may also be useful. -A Google group for PLY can be found at +The GitHub page for PLY can be found at: + + https://github.com/dabeaz/ply + +An old and relatively inactive discussion group for PLY is found at: http://groups.google.com/group/ply-hack @@ -130,7 +134,7 @@ and testing a revised LALR(1) implementation for PLY-2.0. Special Note for PLY-3.0 ======================== PLY-3.0 the first PLY release to support Python 3. However, backwards -compatibility with Python 2.2 is still preserved. PLY provides dual +compatibility with Python 2.6 is still preserved. PLY provides dual Python 2/3 compatibility by restricting its implementation to a common subset of basic language features. You should not convert PLY using 2to3--it is not necessary and may in fact break the implementation. @@ -238,7 +242,7 @@ with variables. import ply.yacc as yacc yacc.yacc() - while 1: + while True: try: s = raw_input('calc > ') # use input() on Python 3 except EOFError: @@ -252,12 +256,10 @@ My goal with PLY is to simply have a decent lex/yacc implementation for Python. As a general rule, I don't spend huge amounts of time working on it unless I receive very specific bug reports and/or patches to fix problems. I also try to incorporate submitted feature -requests and enhancements into each new version. To contact me about -bugs and/or new features, please send email to dave@dabeaz.com. - -In addition there is a Google group for discussing PLY related issues at - - http://groups.google.com/group/ply-hack +requests and enhancements into each new version. Please visit the PLY +github page at https://github.com/dabeaz/ply to submit issues and pull +requests. To contact me about bugs and/or new features, please send +email to dave@dabeaz.com. -- Dave diff --git a/doc/ply.html b/doc/ply.html index 13113f6..95ab9d0 100644 --- a/doc/ply.html +++ b/doc/ply.html @@ -32,6 +32,7 @@ dave@dabeaz.com
  • Ignored characters
  • Literal characters
  • Error handling +
  • EOF Handling
  • Building and using the lexer
  • The @TOKEN decorator
  • Optimized mode @@ -58,6 +59,7 @@ dave@dabeaz.com
  • Recovery and resynchronization with error rules
  • Panic mode recovery
  • Signalling an error from a production +
  • When Do Syntax Errors Get Reported
  • General comments on error handling
  • Line Number and Position Tracking @@ -79,6 +81,7 @@ dave@dabeaz.com
    +

    1. Preface and Requirements

    @@ -91,7 +94,7 @@ into a big development project with PLY.

    PLY-3.5 is compatible with both Python 2 and Python 3. If you are using -Python 2, you should use Python 2.6 or newer. +Python 2, you have to use Python 2.6 or newer.

    2. Introduction

    @@ -107,19 +110,7 @@ relatively straightforward to use PLY.

    Early versions of PLY were developed to support an Introduction to -Compilers Course I taught in 2001 at the University of Chicago. In this course, -students built a fully functional compiler for a simple Pascal-like -language. Their compiler, implemented entirely in Python, had to -include lexical analysis, parsing, type checking, type inference, -nested scoping, and code generation for the SPARC processor. -Approximately 30 different compiler implementations were completed in -this course. Most of PLY's interface and operation has been influenced by common -usability problems encountered by students. Since 2001, PLY has -continued to be improved as feedback has been received from users. -PLY-3.0 represents a major refactoring of the original implementation -with an eye towards future enhancements. - -

    +Compilers Course I taught in 2001 at the University of Chicago. Since PLY was primarily developed as an instructional tool, you will find it to be fairly picky about token and grammar rule specification. In part, this @@ -145,13 +136,14 @@ used as a reference for PLY as the concepts are virtually identical.

    3. PLY Overview

    +

    PLY consists of two separate modules; lex.py and yacc.py, both of which are found in a Python package called ply. The lex.py module is used to break input text into a collection of tokens specified by a collection of regular expression rules. yacc.py is used to recognize language syntax that has -been specified in the form of a context free grammar. yacc.py uses LR parsing and generates its parsing tables -using either the LALR(1) (the default) or SLR table generation algorithms. +been specified in the form of a context free grammar. +

    The two tools are meant to work together. Specifically, @@ -167,7 +159,7 @@ simple one-pass compilers. Like its Unix counterpart, yacc.py provides most of the features you expect including extensive error checking, grammar validation, support for empty productions, error tokens, and ambiguity -resolution via precedence rules. In fact, everything that is possible in traditional yacc +resolution via precedence rules. In fact, almost everything that is possible in traditional yacc should be supported in PLY.

    @@ -278,7 +270,7 @@ t_ignore = ' \t' # Error handling rule def t_error(t): - print "Illegal character '%s'" % t.value[0] + print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) # Build the lexer @@ -306,8 +298,9 @@ lexer.input(data) # Tokenize while True: tok = lexer.token() - if not tok: break # No more input - print tok + if not tok: + break # No more input + print(tok) @@ -334,7 +327,7 @@ Lexers also support the iteration protocol. So, you can write the above loop

     for tok in lexer:
    -    print tok
    +    print(tok)
     
    @@ -349,8 +342,9 @@ accessing these attributes: # Tokenize while True: tok = lexer.token() - if not tok: break # No more input - print tok.type, tok.value, tok.lineno, tok.lexpos + if not tok: + break # No more input + print(tok.type, tok.value, tok.lineno, tok.lexpos) @@ -363,10 +357,12 @@ token relative to the start of the input text.

    4.2 The tokens list

    +

    All lexers must provide a list tokens that defines all of the possible token names that can be produced by the lexer. This list is always required and is used to perform a variety of validation checks. The tokens list is also used by the yacc.py module to identify terminals. +

    In the example, the following code specified the token names: @@ -585,6 +581,15 @@ Although it is possible to define a regular expression rule for whitespace in a similar to t_newline(), the use of t_ignore provides substantially better lexing performance because it is handled as a special case and is checked in a much more efficient manner than the normal regular expression rules. +

    + +

    +The characters given in t_ignore are not ignored when such characters are part of +other regular expression patterns. For example, if you had a rule to capture quoted text, +that pattern can include the ignored characters (which will be captured in the normal way). The +main purpose of t_ignore is to ignore whitespace and other padding between the +tokens that you actually want to parse. +

    4.8 Literal characters

    @@ -609,14 +614,38 @@ literals = "+-*/" A literal character is simply a single character that is returned "as is" when encountered by the lexer. Literals are checked after all of the defined regular expression rules. Thus, if a rule starts with one of the literal characters, it will always take precedence. +

    When a literal token is returned, both its type and value attributes are set to the character itself. For example, '+'. +

    + +

    +It's possible to write token functions that perform additional actions +when literals are matched. However, you'll need to set the token type +appropriately. For example: +

    + +
    +
    +literals = [ '{', '}' ]
    +
    +def t_lbrace(t):
    +    r'\{'
    +    t.type = '{'      # Set token type to the expected literal
    +    return t
    +
    +def t_rbrace(t):
    +    r'\}'
    +    t.type = '}'      # Set token type to the expected literal
    +    return t
    +
    +

    4.9 Error handling

    -Finally, the t_error() +The t_error() function is used to handle lexing errors that occur when illegal characters are detected. In this case, the t.value attribute contains the rest of the input string that has not been tokenized. In the example, the error function @@ -626,49 +655,67 @@ was defined as follows:

     # Error handling rule
     def t_error(t):
    -    print "Illegal character '%s'" % t.value[0]
    +    print("Illegal character '%s'" % t.value[0])
         t.lexer.skip(1)
     
    In this case, we simply print the offending character and skip ahead one character by calling t.lexer.skip(1). -

    4.10 Building and using the lexer

    +

    4.10 EOF Handling

    -To build the lexer, the function lex.lex() is used. This function -uses Python reflection (or introspection) to read the regular expression rules -out of the calling context and build the lexer. Once the lexer has been built, two methods can -be used to control the lexer. +The t_eof() function is used to handle an end-of-file (EOF) condition in the input. As input, it +receives a token type 'eof' with the lineno and lexpos attributes set appropriately. +The main use of this function is provide more input to the lexer so that it can continue to parse. Here is an +example of how this works: +

    - +
    +
    +# EOF handling rule
    +def t_eof(t):
    +    # Get more input (Example)
    +    more = raw_input('... ')
    +    if more:
    +        self.lexer.input(more)
    +        return self.lexer.token()
    +    return None
    +
    +
    -The preferred way to use PLY is to invoke the above methods directly on the lexer object returned by the -lex() function. The legacy interface to PLY involves module-level functions lex.input() and lex.token(). -For example: +

    +The EOF function should return the next available token (by calling self.lexer.token()) or None to +indicate no more data. Be aware that setting more input with the self.lexer.input() method does +NOT reset the lexer state or the lineno attribute used for position tracking. The lexpos +attribute is reset so be aware of that if you're using it in error reporting. +

    + +

    4.11 Building and using the lexer

    + + +

    +To build the lexer, the function lex.lex() is used. For example:

    -lex.lex()
    -lex.input(sometext)
    -while 1:
    -    tok = lex.token()
    -    if not tok: break
    -    print tok
    +lexer = lex.lex()
     
    -

    -In this example, the module-level functions lex.input() and lex.token() are bound to the input() -and token() methods of the last lexer created by the lex module. This interface may go away at some point so -it's probably best not to use it. +

    This function +uses Python reflection (or introspection) to read the regular expression rules +out of the calling context and build the lexer. Once the lexer has been built, two methods can +be used to control the lexer. +

    + -

    4.11 The @TOKEN decorator

    +

    4.12 The @TOKEN decorator

    In some applications, you may want to define build tokens from as a series of @@ -700,22 +747,11 @@ def t_ID(t): -This will attach identifier to the docstring for t_ID() allowing lex.py to work normally. An alternative -approach this problem is to set the docstring directly like this: - -
    -
    -def t_ID(t):
    -    ...
    -
    -t_ID.__doc__ = identifier
    -
    -
    - -NOTE: Use of @TOKEN requires Python-2.4 or newer. If you're concerned about backwards compatibility with older -versions of Python, use the alternative approach of setting the docstring directly. +

    +This will attach identifier to the docstring for t_ID() allowing lex.py to work normally. +

    -

    4.12 Optimized mode

    +

    4.13 Optimized mode

    For improved performance, it may be desirable to use Python's @@ -732,8 +768,9 @@ lexer = lex.lex(optimize=1) Next, run Python in its normal operating mode. When you do -this, lex.py will write a file called lextab.py to -the current directory. This file contains all of the regular +this, lex.py will write a file called lextab.py in +the same directory as the module containing the lexer specification. +This file contains all of the regular expression rules and tables used during lexing. On subsequent executions, lextab.py will simply be imported to build the lexer. This @@ -742,6 +779,7 @@ works in Python's optimized mode.

    To change the name of the lexer-generated file, use the lextab keyword argument. For example: +

    @@ -749,10 +787,19 @@ lexer = lex.lex(optimize=1,lextab="footab")
     
    +

    To change the output directory of the file, use the outputdir keyword argument. For example: +

    + +
    +
    +lexer = lex.lex(optimize=1, outputdir="/some/directory")
    +
    +
    + When running in optimized mode, it is important to note that lex disables most error checking. Thus, this is really only recommended if you're sure everything is working correctly and you're ready to start releasing production code. -

    4.13 Debugging

    +

    4.14 Debugging

    For the purpose of debugging, you can run lex() in a debugging mode as follows: @@ -784,7 +831,7 @@ if __name__ == '__main__': Please refer to the "Debugging" section near the end for some more advanced details of debugging. -

    4.14 Alternative specification of lexers

    +

    4.15 Alternative specification of lexers

    As shown in the example, lexers are specified all within one Python module. If you want to @@ -835,7 +882,7 @@ t_ignore = ' \t' # Error handling rule def t_error(t): - print "Illegal character '%s'" % t.value[0] + print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) @@ -902,7 +949,7 @@ class MyLexer(object): # Error handling rule def t_error(self,t): - print "Illegal character '%s'" % t.value[0] + print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) # Build the lexer @@ -914,8 +961,9 @@ class MyLexer(object): self.lexer.input(data) while True: tok = self.lexer.token() - if not tok: break - print tok + if not tok: + break + print(tok) # Build the lexer and try it out m = MyLexer() @@ -933,7 +981,7 @@ PLY only works properly if the lexer actions are defined by bound-methods. When using the module option to lex(), PLY collects symbols from the underlying object using the dir() function. There is no direct access to the __dict__ attribute of the object supplied as a -module value. +module value.

    Finally, if you want to keep things nicely encapsulated, but don't want to use a @@ -979,7 +1027,7 @@ def MyLexer(): # Error handling rule def t_error(t): - print "Illegal character '%s'" % t.value[0] + print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) # Build the lexer from my environment and return it @@ -993,7 +1041,7 @@ define a single lexer per module (source file). There are extensive validation may falsely report error messages if you don't follow this rule.

    -

    4.15 Maintaining state

    +

    4.16 Maintaining state

    In your lexer, you may want to maintain a variety of state @@ -1090,7 +1138,7 @@ def MyLexer(): -

    4.16 Lexer cloning

    +

    4.17 Lexer cloning

    @@ -1115,7 +1163,7 @@ cloned lexers could be used to handle different input files.

    Creating a clone is different than calling lex.lex() in that -PLY doesn't regenerate any of the internal tables or regular expressions. So, +PLY doesn't regenerate any of the internal tables or regular expressions.

    Special considerations need to be made when cloning lexers that also @@ -1139,7 +1187,7 @@ important to emphasize that clone() is only meant to create a new lexer that reuses the regular expressions and environment of another lexer. If you need to make a totally new copy of a lexer, then call lex() again. -

    4.17 Internal lexer state

    +

    4.18 Internal lexer state

    A Lexer object lexer has a number of internal attributes that may be useful in certain @@ -1177,7 +1225,7 @@ current token. If you have written a regular expression that contains named gro Note: This attribute is only updated when tokens are defined and processed by functions. -

    4.18 Conditional lexing and start conditions

    +

    4.19 Conditional lexing and start conditions

    In advanced parsing applications, it may be useful to have different @@ -1254,8 +1302,8 @@ t_INITIAL_NUMBER = r'\d+'

    -States are also associated with the special t_ignore and t_error() declarations. For example, if a state treats -these differently, you can declare: +States are also associated with the special t_ignore, t_error(), and t_eof() declarations. For example, if a state treats +these differently, you can declare:

    @@ -1376,13 +1424,16 @@ However, if the closing right brace is encountered, the rule t_ccode_rbrace<
     position), stores it, and returns a token 'CCODE' containing all of that text.  When returning the token, the lexing state is restored back to its
     initial state.
     
    -

    4.19 Miscellaneous Issues

    +

    4.20 Miscellaneous Issues

  • The lexer requires input to be supplied as a single input string. Since most machines have more than enough memory, this rarely presents a performance concern. However, it means that the lexer currently can't be used with streaming data -such as open files or sockets. This limitation is primarily a side-effect of using the re module. +such as open files or sockets. This limitation is primarily a side-effect of using the re module. You might be +able to work around this by implementing an appropriate def t_eof() end-of-file handling rule. The main complication +here is that you'll probably need to ensure that data is fed to the lexer in a way so that it doesn't split in in the middle +of a token.

  • The lexer should work properly with both Unicode strings given as token and pattern matching rules as @@ -1606,7 +1657,7 @@ def p_factor_expr(p): # Error rule for syntax errors def p_error(p): - print "Syntax error in input!" + print("Syntax error in input!") # Build the parser parser = yacc.yacc() @@ -1618,7 +1669,7 @@ while True: break if not s: continue result = parser.parse(s) - print result + print(result)
  • @@ -1688,15 +1739,20 @@ calc > +

    Since table construction is relatively expensive (especially for large -grammars), the resulting parsing table is written to the current -directory in a file called parsetab.py. In addition, a +grammars), the resulting parsing table is written to +a file called parsetab.py. In addition, a debugging file called parser.out is created. On subsequent executions, yacc will reload the table from parsetab.py unless it has detected a change in the underlying grammar (in which case the tables and parsetab.py file are -regenerated). Note: The names of parser output files can be changed -if necessary. See the PLY Reference for details. +regenerated). Both of these files are written to the same directory +as the module in which the parser is specified. The output directory +can be changed by giving an outputdir keyword argument to yacc(). +The name of the parsetab module can also be changed using the +tabmodule keyword argument to yacc(). +

    If any errors are detected in your grammar specification, yacc.py will produce @@ -1891,7 +1947,7 @@ an argument to yacc(). For example:

    -yacc.yacc(start='foo')
    +parser = yacc.yacc(start='foo')
     
    @@ -2507,7 +2563,7 @@ To account for the possibility of a bad expression, you might write an additiona
     def p_statement_print_error(p):
          'statement : PRINT error SEMI'
    -     print "Syntax error in print statement. Bad expression"
    +     print("Syntax error in print statement. Bad expression")
     
     
    @@ -2531,7 +2587,7 @@ on the right in an error rule. For example:
     def p_statement_print_error(p):
         'statement : PRINT error'
    -    print "Syntax error in print statement. Bad expression"
    +    print("Syntax error in print statement. Bad expression")
     
    @@ -2553,11 +2609,16 @@ parser in its initial state.
     def p_error(p):
    -    print "Whoa. You are seriously hosed."
    +    print("Whoa. You are seriously hosed.")
    +    if not p:
    +        print("End of File!")
    +        return
    +
         # Read ahead looking for a closing '}'
         while True:
             tok = parser.token()             # Get the next token
    -        if not tok or tok.type == 'RBRACE': break
    +        if not tok or tok.type == 'RBRACE': 
    +            break
         parser.restart()
     
    @@ -2568,9 +2629,12 @@ This function simply discards the bad token and tells the parser that the error
     def p_error(p):
    -    print "Syntax error at token", p.type
    -    # Just discard the token and tell the parser it's okay.
    -    parser.errok()
    +    if p:
    +         print("Syntax error at token", p.type)
    +         # Just discard the token and tell the parser it's okay.
    +         parser.errok()
    +    else:
    +         print("Syntax error at EOF")
     
    @@ -2646,8 +2710,44 @@ raises SyntaxError.

    Note: This feature of PLY is meant to mimic the behavior of the YYERROR macro in yacc. +

    6.8.4 When Do Syntax Errors Get Reported

    -

    6.8.4 General comments on error handling

    + +

    +In most cases, yacc will handle errors as soon as a bad input token is +detected on the input. However, be aware that yacc may choose to +delay error handling until after it has reduced one or more grammar +rules first. This behavior might be unexpected, but it's related to +special states in the underlying parsing table known as "defaulted +states." A defaulted state is parsing condition where the same +grammar rule will be reduced regardless of what valid token +comes next on the input. For such states, yacc chooses to go ahead +and reduce the grammar rule without reading the next input +token. If the next token is bad, yacc will eventually get around to reading it and +report a syntax error. It's just a little unusual in that you might +see some of your grammar rules firing immediately prior to the syntax +error. +

    + +

    +Usually, the delayed error reporting with defaulted states is harmless +(and there are other reasons for wanting PLY to behave in this way). +However, if you need to turn this behavior off for some reason. You +can clear the defaulted states table like this: +

    + +
    +
    +parser = yacc.yacc()
    +parser.defaulted_states = {}
    +
    +
    + +

    +Disabling defaulted states is not recommended if your grammar makes use +of embedded actions as described in Section 6.11.

    + +

    6.8.5 General comments on error handling

    For normal types of languages, error recovery with error rules and resynchronization characters is probably the most reliable @@ -2730,7 +2830,7 @@ example: def p_bad_func(p): 'funccall : fname LPAREN error RPAREN' # Line number reported from LPAREN token - print "Bad function call at line", p.lineno(2) + print("Bad function call at line", p.lineno(2)) @@ -2861,7 +2961,7 @@ suppose you have a rule like this:
     def p_foo(p):
         "foo : A B C D"
    -    print "Parsed a foo", p[1],p[2],p[3],p[4]
    +    print("Parsed a foo", p[1],p[2],p[3],p[4])
     
    @@ -2877,12 +2977,12 @@ been parsed. To do this, write an empty rule like this:
     def p_foo(p):
         "foo : A seen_A B C D"
    -    print "Parsed a foo", p[1],p[3],p[4],p[5]
    -    print "seen_A returned", p[2]
    +    print("Parsed a foo", p[1],p[3],p[4],p[5])
    +    print("seen_A returned", p[2])
     
     def p_seen_A(p):
         "seen_A :"
    -    print "Saw an A = ", p[-1]   # Access grammar symbol to left
    +    print("Saw an A = ", p[-1])   # Access grammar symbol to left
         p[0] = some_value            # Assign value to seen_A
     
     
    @@ -2973,25 +3073,13 @@ might undo the operations performed in the embedded action
      -
    • The default parsing method is LALR. To use SLR instead, run yacc() as follows: - -
      -
      -yacc.yacc(method="SLR")
      -
      -
      -Note: LALR table generation takes approximately twice as long as SLR table generation. There is no -difference in actual parsing performance---the same code is used in both cases. LALR is preferred when working -with more complicated grammars since it is more powerful. - -

    • By default, yacc.py relies on lex.py for tokenizing. However, an alternative tokenizer can be supplied as follows:
      -yacc.parse(lexer=x)
      +parser = yacc.parse(lexer=x)
       
      in this case, x must be a Lexer object that minimally has a x.token() method for retrieving the next @@ -3003,7 +3091,7 @@ To disable this, use
      -yacc.yacc(debug=0)
      +parser = yacc.yacc(debug=False)
       
      @@ -3012,7 +3100,7 @@ yacc.yacc(debug=0)
      -yacc.yacc(tabmodule="foo")
      +parser = yacc.yacc(tabmodule="foo")
       
      @@ -3020,7 +3108,7 @@ yacc.yacc(tabmodule="foo")
    • To change the directory in which the parsetab.py file (and other output files) are written, use:
      -yacc.yacc(tabmodule="foo",outputdir="somedirectory")
      +parser = yacc.yacc(tabmodule="foo",outputdir="somedirectory")
       
      @@ -3028,7 +3116,7 @@ yacc.yacc(tabmodule="foo",outputdir="somedirectory")
    • To prevent yacc from generating any kind of parser table file, use:
      -yacc.yacc(write_tables=0)
      +parser = yacc.yacc(write_tables=False)
       
      @@ -3040,25 +3128,10 @@ each time it runs (which may take awhile depending on how large your grammar is)
      -yacc.parse(debug=1)     
      -
      -
      - -

      -

    • The yacc.yacc() function really returns a parser object. If you want to support multiple -parsers in the same application, do this: - -
      -
      -p = yacc.yacc()
      -...
      -p.parse()
      +parser = yacc.parse(debug=True)     
       
      -Note: The function yacc.parse() is bound to the last parser that was generated.
    • - -

    • Since the generation of the LALR tables is relatively expensive, previously generated tables are cached and reused if possible. The decision to regenerate the tables is determined by taking an MD5 @@ -3066,8 +3139,8 @@ checksum of all grammar rules and precedence rules. Only in the event of a mism

      It should be noted that table generation is reasonably efficient, even for grammars that involve around a 100 rules -and several hundred states. For more complex languages such as C, table generation may take 30-60 seconds on a slow -machine. Please be patient.

    • +and several hundred states. +

    • Since LR parsing is driven by tables, the performance of the parser is largely independent of the @@ -3128,7 +3201,7 @@ the lexer object that triggered the rule. For example: def t_NUMBER(t): r'\d+' ... - print t.lexer # Show lexer object + print(t.lexer) # Show lexer object @@ -3140,8 +3213,8 @@ and parser objects respectively. def p_expr_plus(p): 'expr : expr PLUS expr' ... - print p.parser # Show parser object - print p.lexer # Show lexer object + print(p.parser) # Show parser object + print(p.lexer) # Show lexer object diff --git a/example/calceof/calc.py b/example/calceof/calc.py new file mode 100644 index 0000000..4a880fb --- /dev/null +++ b/example/calceof/calc.py @@ -0,0 +1,115 @@ +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. Asks the user for more input and +# demonstrates the use of the t_eof() rule. +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0,"../..") + +if sys.version_info[0] >= 3: + raw_input = input + +tokens = ( + 'NAME','NUMBER', + ) + +literals = ['=','+','-','*','/', '(',')'] + +# Tokens + +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + t.value = int(t.value) + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + +def t_eof(t): + more = raw_input('... ') + if more: + t.lexer.input(more + '\n') + return t.lexer.token() + else: + return None + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +import ply.lex as lex +lex.lex() + +# Parsing rules + +precedence = ( + ('left','+','-'), + ('left','*','/'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(p): + 'statement : NAME "=" expression' + names[p[1]] = p[3] + +def p_statement_expr(p): + 'statement : expression' + print(p[1]) + +def p_expression_binop(p): + '''expression : expression '+' expression + | expression '-' expression + | expression '*' expression + | expression '/' expression''' + if p[2] == '+' : p[0] = p[1] + p[3] + elif p[2] == '-': p[0] = p[1] - p[3] + elif p[2] == '*': p[0] = p[1] * p[3] + elif p[2] == '/': p[0] = p[1] / p[3] + +def p_expression_uminus(p): + "expression : '-' expression %prec UMINUS" + p[0] = -p[2] + +def p_expression_group(p): + "expression : '(' expression ')'" + p[0] = p[2] + +def p_expression_number(p): + "expression : NUMBER" + p[0] = p[1] + +def p_expression_name(p): + "expression : NAME" + try: + p[0] = names[p[1]] + except LookupError: + print("Undefined name '%s'" % p[1]) + p[0] = 0 + +def p_error(p): + if p: + print("Syntax error at '%s'" % p.value) + else: + print("Syntax error at EOF") + +import ply.yacc as yacc +yacc.yacc() + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + if not s: continue + yacc.parse(s+'\n') diff --git a/setup.py b/setup.py index 670cb3c..1e95122 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ PLY is extremely easy to use and provides very extensive error checking. It is compatible with both Python 2 and Python 3. """, license="""BSD""", - version = "3.4", + version = "3.5", author = "David Beazley", author_email = "dave@dabeaz.com", maintainer = "David Beazley", -- cgit v1.2.1