summaryrefslogtreecommitdiff
path: root/example
diff options
context:
space:
mode:
authorDavid Beazley <dave@dabeaz.com>2006-11-21 15:09:46 +0000
committerDavid Beazley <dave@dabeaz.com>2006-11-21 15:09:46 +0000
commit80c8c2d0a6e63745ff099b33ad2848b6fb17285a (patch)
tree36d223fc55a5649dc85a4f8a3fa9d17cb130c6c3 /example
downloadply-80c8c2d0a6e63745ff099b33ad2848b6fb17285a.tar.gz
initial checkin
Diffstat (limited to 'example')
-rw-r--r--example/BASIC/README79
-rw-r--r--example/BASIC/basic.py68
-rw-r--r--example/BASIC/basiclex.py74
-rw-r--r--example/BASIC/basinterp.py440
-rw-r--r--example/BASIC/basparse.py424
-rw-r--r--example/BASIC/dim.bas14
-rw-r--r--example/BASIC/func.bas5
-rw-r--r--example/BASIC/gcd.bas22
-rw-r--r--example/BASIC/gosub.bas13
-rw-r--r--example/BASIC/hello.bas4
-rw-r--r--example/BASIC/linear.bas17
-rw-r--r--example/BASIC/maxsin.bas12
-rw-r--r--example/BASIC/powers.bas13
-rw-r--r--example/BASIC/rand.bas4
-rw-r--r--example/BASIC/sales.bas20
-rw-r--r--example/BASIC/sears.bas18
-rw-r--r--example/BASIC/sqrt1.bas5
-rw-r--r--example/BASIC/sqrt2.bas4
-rw-r--r--example/GardenSnake/GardenSnake.py709
-rw-r--r--example/GardenSnake/README5
-rw-r--r--example/README10
-rw-r--r--example/ansic/README2
-rw-r--r--example/ansic/clex.py164
-rw-r--r--example/ansic/cparse.py863
-rw-r--r--example/ansic/lextab.py8
-rw-r--r--example/calc/calc.py105
-rwxr-xr-xexample/classcalc/calc.py152
-rwxr-xr-xexample/cleanup.sh2
-rw-r--r--example/hedit/hedit.py48
-rwxr-xr-xexample/newclasscalc/calc.py155
-rw-r--r--example/optcalc/README9
-rw-r--r--example/optcalc/calc.py113
-rw-r--r--example/unicalc/calc.py114
-rw-r--r--example/yply/README41
-rw-r--r--example/yply/ylex.py112
-rw-r--r--example/yply/yparse.py217
-rwxr-xr-xexample/yply/yply.py53
37 files changed, 4118 insertions, 0 deletions
diff --git a/example/BASIC/README b/example/BASIC/README
new file mode 100644
index 0000000..be24a30
--- /dev/null
+++ b/example/BASIC/README
@@ -0,0 +1,79 @@
+Inspired by a September 14, 2006 Salon article "Why Johnny Can't Code" by
+David Brin (http://www.salon.com/tech/feature/2006/09/14/basic/index.html),
+I thought that a fully working BASIC interpreter might be an interesting,
+if not questionable, PLY example. Uh, okay, so maybe it's just a bad idea,
+but in any case, here it is.
+
+In this example, you'll find a rough implementation of 1964 Dartmouth BASIC
+as described in the manual at:
+
+ http://www.bitsavers.org/pdf/dartmouth/BASIC_Oct64.pdf
+
+See also:
+
+ http://en.wikipedia.org/wiki/Dartmouth_BASIC
+
+This dialect is downright primitive---there are no string variables
+and no facilities for interactive input. Moreover, subroutines and functions
+are brain-dead even more than they usually are for BASIC. Of course,
+the GOTO statement is provided.
+
+Nevertheless, there are a few interesting aspects of this example:
+
+ - It illustrates a fully working interpreter including lexing, parsing,
+ and interpretation of instructions.
+
+ - The parser shows how to catch and report various kinds of parsing
+ errors in a more graceful way.
+
+ - The example both parses files (supplied on command line) and
+ interactive input entered line by line.
+
+ - It shows how you might represent parsed information. In this case,
+ each BASIC statement is encoded into a Python tuple containing the
+ statement type and parameters. These tuples are then stored in
+ a dictionary indexed by program line numbers.
+
+ - Even though it's just BASIC, the parser contains more than 80
+ rules and 150 parsing states. Thus, it's a little more meaty than
+ the calculator example.
+
+To use the example, run it as follows:
+
+ % python basic.py hello.bas
+ HELLO WORLD
+ %
+
+or use it interactively:
+
+ % python basic.py
+ [BASIC] 10 PRINT "HELLO WORLD"
+ [BASIC] 20 END
+ [BASIC] RUN
+ HELLO WORLD
+ [BASIC]
+
+The following files are defined:
+
+ basic.py - High level script that controls everything
+ basiclex.py - BASIC tokenizer
+ basparse.py - BASIC parser
+ basinterp.py - BASIC interpreter that runs parsed programs.
+
+In addition, a number of sample BASIC programs (.bas suffix) are
+provided. These were taken out of the Dartmouth manual.
+
+Disclaimer: I haven't spent a ton of time testing this and it's likely that
+I've skimped here and there on a few finer details (e.g., strictly enforcing
+variable naming rules). However, the interpreter seems to be able to run
+the examples in the BASIC manual.
+
+Have fun!
+
+-Dave
+
+
+
+
+
+
diff --git a/example/BASIC/basic.py b/example/BASIC/basic.py
new file mode 100644
index 0000000..6a2f489
--- /dev/null
+++ b/example/BASIC/basic.py
@@ -0,0 +1,68 @@
+# An implementation of Dartmouth BASIC (1964)
+#
+
+import sys
+sys.path.insert(0,"../..")
+
+import basiclex
+import basparse
+import basinterp
+
+# If a filename has been specified, we try to run it.
+# If a runtime error occurs, we bail out and enter
+# interactive mode below
+if len(sys.argv) == 2:
+ data = open(sys.argv[1]).read()
+ prog = basparse.parse(data)
+ if not prog: raise SystemExit
+ b = basinterp.BasicInterpreter(prog)
+ try:
+ b.run()
+ raise SystemExit
+ except RuntimeError:
+ pass
+
+else:
+ b = basinterp.BasicInterpreter({})
+
+# Interactive mode. This incrementally adds/deletes statements
+# from the program stored in the BasicInterpreter object. In
+# addition, special commands 'NEW','LIST',and 'RUN' are added.
+# Specifying a line number with no code deletes that line from
+# the program.
+
+while 1:
+ try:
+ line = raw_input("[BASIC] ")
+ except EOFError:
+ raise SystemExit
+ if not line: continue
+ line += "\n"
+ prog = basparse.parse(line)
+ if not prog: continue
+
+ keys = prog.keys()
+ if keys[0] > 0:
+ b.add_statements(prog)
+ else:
+ stat = prog[keys[0]]
+ if stat[0] == 'RUN':
+ try:
+ b.run()
+ except RuntimeError:
+ pass
+ elif stat[0] == 'LIST':
+ b.list()
+ elif stat[0] == 'BLANK':
+ b.del_line(stat[1])
+ elif stat[0] == 'NEW':
+ b.new()
+
+
+
+
+
+
+
+
+
diff --git a/example/BASIC/basiclex.py b/example/BASIC/basiclex.py
new file mode 100644
index 0000000..463ef9b
--- /dev/null
+++ b/example/BASIC/basiclex.py
@@ -0,0 +1,74 @@
+# An implementation of Dartmouth BASIC (1964)
+
+from ply import *
+
+keywords = (
+ 'LET','READ','DATA','PRINT','GOTO','IF','THEN','FOR','NEXT','TO','STEP',
+ 'END','STOP','DEF','GOSUB','DIM','REM','RETURN','RUN','LIST','NEW',
+)
+
+tokens = keywords + (
+ 'EQUALS','PLUS','MINUS','TIMES','DIVIDE','POWER',
+ 'LPAREN','RPAREN','LT','LE','GT','GE','NE',
+ 'COMMA','SEMI', 'INTEGER','FLOAT', 'STRING',
+ 'ID','NEWLINE'
+)
+
+t_ignore = ' \t'
+
+def t_REM(t):
+ r'REM .*'
+ return t
+
+def t_ID(t):
+ r'[A-Z][A-Z0-9]*'
+ if t.value in keywords:
+ t.type = t.value
+ return t
+
+t_EQUALS = r'='
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_TIMES = r'\*'
+t_POWER = r'\^'
+t_DIVIDE = r'/'
+t_LPAREN = r'\('
+t_RPAREN = r'\)'
+t_LT = r'<'
+t_LE = r'<='
+t_GT = r'>'
+t_GE = r'>='
+t_NE = r'<>'
+t_COMMA = r'\,'
+t_SEMI = r';'
+t_INTEGER = r'\d+'
+t_FLOAT = r'((\d*\.\d+)(E[\+-]?\d+)?|([1-9]\d*E[\+-]?\d+))'
+t_STRING = r'\".*?\"'
+
+def t_NEWLINE(t):
+ r'\n'
+ t.lexer.lineno += 1
+ return t
+
+def t_error(t):
+ print "Illegal character", t.value[0]
+ t.lexer.skip(1)
+
+lex.lex()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/example/BASIC/basinterp.py b/example/BASIC/basinterp.py
new file mode 100644
index 0000000..0252aa3
--- /dev/null
+++ b/example/BASIC/basinterp.py
@@ -0,0 +1,440 @@
+# This file provides the runtime support for running a basic program
+# Assumes the program has been parsed using basparse.py
+
+import sys
+import math
+import random
+
+class BasicInterpreter:
+
+ # Initialize the interpreter. prog is a dictionary
+ # containing (line,statement) mappings
+ def __init__(self,prog):
+ self.prog = prog
+
+ self.functions = { # Built-in function table
+ 'SIN' : lambda z: math.sin(self.eval(z)),
+ 'COS' : lambda z: math.cos(self.eval(z)),
+ 'TAN' : lambda z: math.tan(self.eval(z)),
+ 'ATN' : lambda z: math.atan(self.eval(z)),
+ 'EXP' : lambda z: math.exp(self.eval(z)),
+ 'ABS' : lambda z: abs(self.eval(z)),
+ 'LOG' : lambda z: math.log(self.eval(z)),
+ 'SQR' : lambda z: math.sqrt(self.eval(z)),
+ 'INT' : lambda z: int(self.eval(z)),
+ 'RND' : lambda z: random.random()
+ }
+
+ # Collect all data statements
+ def collect_data(self):
+ self.data = []
+ for lineno in self.stat:
+ if self.prog[lineno][0] == 'DATA':
+ self.data = self.data + self.prog[lineno][1]
+ self.dc = 0 # Initialize the data counter
+
+ # Check for end statements
+ def check_end(self):
+ has_end = 0
+ for lineno in self.stat:
+ if self.prog[lineno][0] == 'END' and not has_end:
+ has_end = lineno
+ if not has_end:
+ print "NO END INSTRUCTION"
+ self.error = 1
+ if has_end != lineno:
+ print "END IS NOT LAST"
+ self.error = 1
+
+ # Check loops
+ def check_loops(self):
+ for pc in range(len(self.stat)):
+ lineno = self.stat[pc]
+ if self.prog[lineno][0] == 'FOR':
+ forinst = self.prog[lineno]
+ loopvar = forinst[1]
+ for i in range(pc+1,len(self.stat)):
+ if self.prog[self.stat[i]][0] == 'NEXT':
+ nextvar = self.prog[self.stat[i]][1]
+ if nextvar != loopvar: continue
+ self.loopend[pc] = i
+ break
+ else:
+ print "FOR WITHOUT NEXT AT LINE" % self.stat[pc]
+ self.error = 1
+
+ # Evaluate an expression
+ def eval(self,expr):
+ etype = expr[0]
+ if etype == 'NUM': return expr[1]
+ elif etype == 'GROUP': return self.eval(expr[1])
+ elif etype == 'UNARY':
+ if expr[1] == '-': return -self.eval(expr[2])
+ elif etype == 'BINOP':
+ if expr[1] == '+': return self.eval(expr[2])+self.eval(expr[3])
+ elif expr[1] == '-': return self.eval(expr[2])-self.eval(expr[3])
+ elif expr[1] == '*': return self.eval(expr[2])*self.eval(expr[3])
+ elif expr[1] == '/': return float(self.eval(expr[2]))/self.eval(expr[3])
+ elif expr[1] == '^': return abs(self.eval(expr[2]))**self.eval(expr[3])
+ elif etype == 'VAR':
+ var,dim1,dim2 = expr[1]
+ if not dim1 and not dim2:
+ if self.vars.has_key(var):
+ return self.vars[var]
+ else:
+ print "UNDEFINED VARIABLE", var, "AT LINE", self.stat[self.pc]
+ raise RuntimeError
+ # May be a list lookup or a function evaluation
+ if dim1 and not dim2:
+ if self.functions.has_key(var):
+ # A function
+ return self.functions[var](dim1)
+ else:
+ # A list evaluation
+ if self.lists.has_key(var):
+ dim1val = self.eval(dim1)
+ if dim1val < 1 or dim1val > len(self.lists[var]):
+ print "LIST INDEX OUT OF BOUNDS AT LINE", self.stat[self.pc]
+ raise RuntimeError
+ return self.lists[var][dim1val-1]
+ if dim1 and dim2:
+ if self.tables.has_key(var):
+ dim1val = self.eval(dim1)
+ dim2val = self.eval(dim2)
+ if dim1val < 1 or dim1val > len(self.tables[var]) or dim2val < 1 or dim2val > len(self.tables[var][0]):
+ print "TABLE INDEX OUT OUT BOUNDS AT LINE", self.stat[self.pc]
+ raise RuntimeError
+ return self.tables[var][dim1val-1][dim2val-1]
+ print "UNDEFINED VARIABLE", var, "AT LINE", self.stat[self.pc]
+ raise RuntimeError
+
+ # Evaluate a relational expression
+ def releval(self,expr):
+ etype = expr[1]
+ lhs = self.eval(expr[2])
+ rhs = self.eval(expr[3])
+ if etype == '<':
+ if lhs < rhs: return 1
+ else: return 0
+
+ elif etype == '<=':
+ if lhs <= rhs: return 1
+ else: return 0
+
+ elif etype == '>':
+ if lhs > rhs: return 1
+ else: return 0
+
+ elif etype == '>=':
+ if lhs >= rhs: return 1
+ else: return 0
+
+ elif etype == '=':
+ if lhs == rhs: return 1
+ else: return 0
+
+ elif etype == '<>':
+ if lhs != rhs: return 1
+ else: return 0
+
+ # Assignment
+ def assign(self,target,value):
+ var, dim1, dim2 = target
+ if not dim1 and not dim2:
+ self.vars[var] = self.eval(value)
+ elif dim1 and not dim2:
+ # List assignment
+ dim1val = self.eval(dim1)
+ if not self.lists.has_key(var):
+ self.lists[var] = [0]*10
+
+ if dim1val > len(self.lists[var]):
+ print "DIMENSION TOO LARGE AT LINE", self.stat[self.pc]
+ raise RuntimeError
+ self.lists[var][dim1val-1] = self.eval(value)
+ elif dim1 and dim2:
+ dim1val = self.eval(dim1)
+ dim2val = self.eval(dim2)
+ if not self.tables.has_key(var):
+ temp = [0]*10
+ v = []
+ for i in range(10): v.append(temp[:])
+ self.tables[var] = v
+ # Variable already exists
+ if dim1val > len(self.tables[var]) or dim2val > len(self.tables[var][0]):
+ print "DIMENSION TOO LARGE AT LINE", self.stat[self.pc]
+ raise RuntimeError
+ self.tables[var][dim1val-1][dim2val-1] = self.eval(value)
+
+ # Change the current line number
+ def goto(self,linenum):
+ if not self.prog.has_key(linenum):
+ print "UNDEFINED LINE NUMBER %d AT LINE %d" % (linenum, self.stat[self.pc])
+ raise RuntimeError
+ self.pc = self.stat.index(linenum)
+
+ # Run it
+ def run(self):
+ self.vars = { } # All variables
+ self.lists = { } # List variables
+ self.tables = { } # Tables
+ self.loops = [ ] # Currently active loops
+ self.loopend= { } # Mapping saying where loops end
+ self.gosub = None # Gosub return point (if any)
+ self.error = 0 # Indicates program error
+
+ self.stat = self.prog.keys() # Ordered list of all line numbers
+ self.stat.sort()
+ self.pc = 0 # Current program counter
+
+ # Processing prior to running
+
+ self.collect_data() # Collect all of the data statements
+ self.check_end()
+ self.check_loops()
+
+ if self.error: raise RuntimeError
+
+ while 1:
+ line = self.stat[self.pc]
+ instr = self.prog[line]
+
+ op = instr[0]
+
+ # END and STOP statements
+ if op == 'END' or op == 'STOP':
+ break # We're done
+
+ # GOTO statement
+ elif op == 'GOTO':
+ newline = instr[1]
+ self.goto(newline)
+ continue
+
+ # PRINT statement
+ elif op == 'PRINT':
+ plist = instr[1]
+ out = ""
+ for label,val in plist:
+ if out:
+ out += ' '*(15 - (len(out) % 15))
+ out += label
+ if val:
+ if label: out += " "
+ eval = self.eval(val)
+ out += str(eval)
+ sys.stdout.write(out)
+ end = instr[2]
+ if not (end == ',' or end == ';'):
+ sys.stdout.write("\n")
+ if end == ',': sys.stdout.write(" "*(15-(len(out) % 15)))
+ if end == ';': sys.stdout.write(" "*(3-(len(out) % 3)))
+
+ # LET statement
+ elif op == 'LET':
+ target = instr[1]
+ value = instr[2]
+ self.assign(target,value)
+
+ # READ statement
+ elif op == 'READ':
+ for target in instr[1]:
+ if self.dc < len(self.data):
+ value = ('NUM',self.data[self.dc])
+ self.assign(target,value)
+ self.dc += 1
+ else:
+ # No more data. Program ends
+ return
+ elif op == 'IF':
+ relop = instr[1]
+ newline = instr[2]
+ if (self.releval(relop)):
+ self.goto(newline)
+ continue
+
+ elif op == 'FOR':
+ loopvar = instr[1]
+ initval = instr[2]
+ finval = instr[3]
+ stepval = instr[4]
+
+ # Check to see if this is a new loop
+ if not self.loops or self.loops[-1][0] != self.pc:
+ # Looks like a new loop. Make the initial assignment
+ newvalue = initval
+ self.assign((loopvar,None,None),initval)
+ if not stepval: stepval = ('NUM',1)
+ stepval = self.eval(stepval) # Evaluate step here
+ self.loops.append((self.pc,stepval))
+ else:
+ # It's a repeat of the previous loop
+ # Update the value of the loop variable according to the step
+ stepval = ('NUM',self.loops[-1][1])
+ newvalue = ('BINOP','+',('VAR',(loopvar,None,None)),stepval)
+
+ if self.loops[-1][1] < 0: relop = '>='
+ else: relop = '<='
+ if not self.releval(('RELOP',relop,newvalue,finval)):
+ # Loop is done. Jump to the NEXT
+ self.pc = self.loopend[self.pc]
+ self.loops.pop()
+ else:
+ self.assign((loopvar,None,None),newvalue)
+
+ elif op == 'NEXT':
+ if not self.loops:
+ print "NEXT WITHOUT FOR AT LINE",line
+ return
+
+ nextvar = instr[1]
+ self.pc = self.loops[-1][0]
+ loopinst = self.prog[self.stat[self.pc]]
+ forvar = loopinst[1]
+ if nextvar != forvar:
+ print "NEXT DOESN'T MATCH FOR AT LINE", line
+ return
+ continue
+ elif op == 'GOSUB':
+ newline = instr[1]
+ if self.gosub:
+ print "ALREADY IN A SUBROUTINE AT LINE", line
+ return
+ self.gosub = self.stat[self.pc]
+ self.goto(newline)
+ continue
+
+ elif op == 'RETURN':
+ if not self.gosub:
+ print "RETURN WITHOUT A GOSUB AT LINE",line
+ return
+ self.goto(self.gosub)
+ self.gosub = None
+
+ elif op == 'FUNC':
+ fname = instr[1]
+ pname = instr[2]
+ expr = instr[3]
+ def eval_func(pvalue,name=pname,self=self,expr=expr):
+ self.assign((pname,None,None),pvalue)
+ return self.eval(expr)
+ self.functions[fname] = eval_func
+
+ elif op == 'DIM':
+ for vname,x,y in instr[1]:
+ if y == 0:
+ # Single dimension variable
+ self.lists[vname] = [0]*x
+ else:
+ # Double dimension variable
+ temp = [0]*y
+ v = []
+ for i in range(x):
+ v.append(temp[:])
+ self.tables[vname] = v
+
+ self.pc += 1
+
+ # Utility functions for program listing
+ def expr_str(self,expr):
+ etype = expr[0]
+ if etype == 'NUM': return str(expr[1])
+ elif etype == 'GROUP': return "(%s)" % self.expr_str(expr[1])
+ elif etype == 'UNARY':
+ if expr[1] == '-': return "-"+str(expr[2])
+ elif etype == 'BINOP':
+ return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3]))
+ elif etype == 'VAR':
+ return self.var_str(expr[1])
+
+ def relexpr_str(self,expr):
+ return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3]))
+
+ def var_str(self,var):
+ varname,dim1,dim2 = var
+ if not dim1 and not dim2: return varname
+ if dim1 and not dim2: return "%s(%s)" % (varname, self.expr_str(dim1))
+ return "%s(%s,%s)" % (varname, self.expr_str(dim1),self.expr_str(dim2))
+
+ # Create a program listing
+ def list(self):
+ stat = self.prog.keys() # Ordered list of all line numbers
+ stat.sort()
+ for line in stat:
+ instr = self.prog[line]
+ op = instr[0]
+ if op in ['END','STOP','RETURN']:
+ print line, op
+ continue
+ elif op == 'REM':
+ print line, instr[1]
+ elif op == 'PRINT':
+ print line, op,
+ first = 1
+ for p in instr[1]:
+ if not first: print ",",
+ if p[0] and p[1]: print '"%s"%s' % (p[0],self.expr_str(p[1])),
+ elif p[1]: print self.expr_str(p[1]),
+ else: print '"%s"' % (p[0],),
+ first = 0
+ if instr[2]: print instr[2]
+ else: print
+ elif op == 'LET':
+ print line,"LET",self.var_str(instr[1]),"=",self.expr_str(instr[2])
+ elif op == 'READ':
+ print line,"READ",
+ first = 1
+ for r in instr[1]:
+ if not first: print ",",
+ print self.var_str(r),
+ first = 0
+ print ""
+ elif op == 'IF':
+ print line,"IF %s THEN %d" % (self.relexpr_str(instr[1]),instr[2])
+ elif op == 'GOTO' or op == 'GOSUB':
+ print line, op, instr[1]
+ elif op == 'FOR':
+ print line,"FOR %s = %s TO %s" % (instr[1],self.expr_str(instr[2]),self.expr_str(instr[3])),
+ if instr[4]: print "STEP %s" % (self.expr_str(instr[4])),
+ print
+ elif op == 'NEXT':
+ print line,"NEXT", instr[1]
+ elif op == 'FUNC':
+ print line,"DEF %s(%s) = %s" % (instr[1],instr[2],self.expr_str(instr[3]))
+ elif op == 'DIM':
+ print line,"DIM",
+ first = 1
+ for vname,x,y in instr[1]:
+ if not first: print ",",
+ first = 0
+ if y == 0:
+ print "%s(%d)" % (vname,x),
+ else:
+ print "%s(%d,%d)" % (vname,x,y),
+
+ print
+ elif op == 'DATA':
+ print line,"DATA",
+ first = 1
+ for v in instr[1]:
+ if not first: print ",",
+ first = 0
+ print v,
+ print
+
+ # Erase the current program
+ def new(self):
+ self.prog = {}
+
+ # Insert statements
+ def add_statements(self,prog):
+ for line,stat in prog.items():
+ self.prog[line] = stat
+
+ # Delete a statement
+ def del_line(self,lineno):
+ try:
+ del self.prog[lineno]
+ except KeyError:
+ pass
+
diff --git a/example/BASIC/basparse.py b/example/BASIC/basparse.py
new file mode 100644
index 0000000..79210ad
--- /dev/null
+++ b/example/BASIC/basparse.py
@@ -0,0 +1,424 @@
+# An implementation of Dartmouth BASIC (1964)
+#
+
+from ply import *
+import basiclex
+
+tokens = basiclex.tokens
+
+precedence = (
+ ('left', 'PLUS','MINUS'),
+ ('left', 'TIMES','DIVIDE'),
+ ('left', 'POWER'),
+ ('right','UMINUS')
+)
+
+#### A BASIC program is a series of statements. We represent the program as a
+#### dictionary of tuples indexed by line number.
+
+def p_program(p):
+ '''program : program statement
+ | statement'''
+
+ if len(p) == 2 and p[1]:
+ p[0] = { }
+ line,stat = p[1]
+ p[0][line] = stat
+ elif len(p) ==3:
+ p[0] = p[1]
+ if not p[0]: p[0] = { }
+ if p[2]:
+ line,stat = p[2]
+ p[0][line] = stat
+
+#### This catch-all rule is used for any catastrophic errors. In this case,
+#### we simply return nothing
+
+def p_program_error(p):
+ '''program : error'''
+ p[0] = None
+ p.parser.error = 1
+
+#### Format of all BASIC statements.
+
+def p_statement(p):
+ '''statement : INTEGER command NEWLINE'''
+ if isinstance(p[2],str):
+ print p[2],"AT LINE", p[1]
+ p[0] = None
+ p.parser.error = 1
+ else:
+ lineno = int(p[1])
+ p[0] = (lineno,p[2])
+
+#### Interactive statements.
+
+def p_statement_interactive(p):
+ '''statement : RUN NEWLINE
+ | LIST NEWLINE
+ | NEW NEWLINE'''
+ p[0] = (0, (p[1],0))
+
+#### Blank line number
+def p_statement_blank(p):
+ '''statement : INTEGER NEWLINE'''
+ p[0] = (0,('BLANK',int(p[1])))
+
+#### Error handling for malformed statements
+
+def p_statement_bad(p):
+ '''statement : INTEGER error NEWLINE'''
+ print "MALFORMED STATEMENT AT LINE", p[1]
+ p[0] = None
+ p.parser.error = 1
+
+#### Blank line
+
+def p_statement_newline(p):
+ '''statement : NEWLINE'''
+ p[0] = None
+
+#### LET statement
+
+def p_command_let(p):
+ '''command : LET variable EQUALS expr'''
+ p[0] = ('LET',p[2],p[4])
+
+def p_command_let_bad(p):
+ '''command : LET variable EQUALS error'''
+ p[0] = "BAD EXPRESSION IN LET"
+
+#### READ statement
+
+def p_command_read(p):
+ '''command : READ varlist'''
+ p[0] = ('READ',p[2])
+
+def p_command_read_bad(p):
+ '''command : READ error'''
+ p[0] = "MALFORMED VARIABLE LIST IN READ"
+
+#### DATA statement
+
+def p_command_data(p):
+ '''command : DATA numlist'''
+ p[0] = ('DATA',p[2])
+
+def p_command_data_bad(p):
+ '''command : DATA error'''
+ p[0] = "MALFORMED NUMBER LIST IN DATA"
+
+#### PRINT statement
+
+def p_command_print(p):
+ '''command : PRINT plist optend'''
+ p[0] = ('PRINT',p[2],p[3])
+
+def p_command_print_bad(p):
+ '''command : PRINT error'''
+ p[0] = "MALFORMED PRINT STATEMENT"
+
+#### Optional ending on PRINT. Either a comma (,) or semicolon (;)
+
+def p_optend(p):
+ '''optend : COMMA
+ | SEMI
+ |'''
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ p[0] = None
+
+#### PRINT statement with no arguments
+
+def p_command_print_empty(p):
+ '''command : PRINT'''
+ p[0] = ('PRINT',[],None)
+
+#### GOTO statement
+
+def p_command_goto(p):
+ '''command : GOTO INTEGER'''
+ p[0] = ('GOTO',int(p[2]))
+
+def p_command_goto_bad(p):
+ '''command : GOTO error'''
+ p[0] = "INVALID LINE NUMBER IN GOTO"
+
+#### IF-THEN statement
+
+def p_command_if(p):
+ '''command : IF relexpr THEN INTEGER'''
+ p[0] = ('IF',p[2],int(p[4]))
+
+def p_command_if_bad(p):
+ '''command : IF error THEN INTEGER'''
+ p[0] = "BAD RELATIONAL EXPRESSION"
+
+def p_command_if_bad2(p):
+ '''command : IF relexpr THEN error'''
+ p[0] = "INVALID LINE NUMBER IN THEN"
+
+#### FOR statement
+
+def p_command_for(p):
+ '''command : FOR ID EQUALS expr TO expr optstep'''
+ p[0] = ('FOR',p[2],p[4],p[6],p[7])
+
+def p_command_for_bad_initial(p):
+ '''command : FOR ID EQUALS error TO expr optstep'''
+ p[0] = "BAD INITIAL VALUE IN FOR STATEMENT"
+
+def p_command_for_bad_final(p):
+ '''command : FOR ID EQUALS expr TO error optstep'''
+ p[0] = "BAD FINAL VALUE IN FOR STATEMENT"
+
+def p_command_for_bad_step(p):
+ '''command : FOR ID EQUALS expr TO expr STEP error'''
+ p[0] = "MALFORMED STEP IN FOR STATEMENT"
+
+#### Optional STEP qualifier on FOR statement
+
+def p_optstep(p):
+ '''optstep : STEP expr
+ | empty'''
+ if len(p) == 3:
+ p[0] = p[2]
+ else:
+ p[0] = None
+
+#### NEXT statement
+
+def p_command_next(p):
+ '''command : NEXT ID'''
+
+ p[0] = ('NEXT',p[2])
+
+def p_command_next_bad(p):
+ '''command : NEXT error'''
+ p[0] = "MALFORMED NEXT"
+
+#### END statement
+
+def p_command_end(p):
+ '''command : END'''
+ p[0] = ('END',)
+
+#### REM statement
+
+def p_command_rem(p):
+ '''command : REM'''
+ p[0] = ('REM',p[1])
+
+#### STOP statement
+
+def p_command_stop(p):
+ '''command : STOP'''
+ p[0] = ('STOP',)
+
+#### DEF statement
+
+def p_command_def(p):
+ '''command : DEF ID LPAREN ID RPAREN EQUALS expr'''
+ p[0] = ('FUNC',p[2],p[4],p[7])
+
+def p_command_def_bad_rhs(p):
+ '''command : DEF ID LPAREN ID RPAREN EQUALS error'''
+ p[0] = "BAD EXPRESSION IN DEF STATEMENT"
+
+def p_command_def_bad_arg(p):
+ '''command : DEF ID LPAREN error RPAREN EQUALS expr'''
+ p[0] = "BAD ARGUMENT IN DEF STATEMENT"
+
+#### GOSUB statement
+
+def p_command_gosub(p):
+ '''command : GOSUB INTEGER'''
+ p[0] = ('GOSUB',int(p[2]))
+
+def p_command_gosub_bad(p):
+ '''command : GOSUB error'''
+ p[0] = "INVALID LINE NUMBER IN GOSUB"
+
+#### RETURN statement
+
+def p_command_return(p):
+ '''command : RETURN'''
+ p[0] = ('RETURN',)
+
+#### DIM statement
+
+def p_command_dim(p):
+ '''command : DIM dimlist'''
+ p[0] = ('DIM',p[2])
+
+def p_command_dim_bad(p):
+ '''command : DIM error'''
+ p[0] = "MALFORMED VARIABLE LIST IN DIM"
+
+#### List of variables supplied to DIM statement
+
+def p_dimlist(p):
+ '''dimlist : dimlist COMMA dimitem
+ | dimitem'''
+ if len(p) == 4:
+ p[0] = p[1]
+ p[0].append(p[3])
+ else:
+ p[0] = [p[1]]
+
+#### DIM items
+
+def p_dimitem_single(p):
+ '''dimitem : ID LPAREN INTEGER RPAREN'''
+ p[0] = (p[1],eval(p[3]),0)
+
+def p_dimitem_double(p):
+ '''dimitem : ID LPAREN INTEGER COMMA INTEGER RPAREN'''
+ p[0] = (p[1],eval(p[3]),eval(p[5]))
+
+#### Arithmetic expressions
+
+def p_expr_binary(p):
+ '''expr : expr PLUS expr
+ | expr MINUS expr
+ | expr TIMES expr
+ | expr DIVIDE expr
+ | expr POWER expr'''
+
+ p[0] = ('BINOP',p[2],p[1],p[3])
+
+def p_expr_number(p):
+ '''expr : INTEGER
+ | FLOAT'''
+ p[0] = ('NUM',eval(p[1]))
+
+def p_expr_variable(p):
+ '''expr : variable'''
+ p[0] = ('VAR',p[1])
+
+def p_expr_group(p):
+ '''expr : LPAREN expr RPAREN'''
+ p[0] = ('GROUP',p[2])
+
+def p_expr_unary(p):
+ '''expr : MINUS expr %prec UMINUS'''
+ p[0] = ('UNARY','-',p[2])
+
+#### Relational expressions
+
+def p_relexpr(p):
+ '''relexpr : expr LT expr
+ | expr LE expr
+ | expr GT expr
+ | expr GE expr
+ | expr EQUALS expr
+ | expr NE expr'''
+ p[0] = ('RELOP',p[2],p[1],p[3])
+
+#### Variables
+
+def p_variable(p):
+ '''variable : ID
+ | ID LPAREN expr RPAREN
+ | ID LPAREN expr COMMA expr RPAREN'''
+ if len(p) == 2:
+ p[0] = (p[1],None,None)
+ elif len(p) == 5:
+ p[0] = (p[1],p[3],None)
+ else:
+ p[0] = (p[1],p[3],p[5])
+
+#### Builds a list of variable targets as a Python list
+
+def p_varlist(p):
+ '''varlist : varlist COMMA variable
+ | variable'''
+ if len(p) > 2:
+ p[0] = p[1]
+ p[0].append(p[3])
+ else:
+ p[0] = [p[1]]
+
+
+#### Builds a list of numbers as a Python list
+
+def p_numlist(p):
+ '''numlist : numlist COMMA number
+ | number'''
+
+ if len(p) > 2:
+ p[0] = p[1]
+ p[0].append(p[3])
+ else:
+ p[0] = [p[1]]
+
+#### A number. May be an integer or a float
+
+def p_number(p):
+ '''number : INTEGER
+ | FLOAT'''
+ p[0] = eval(p[1])
+
+#### A signed number.
+
+def p_number_signed(p):
+ '''number : MINUS INTEGER
+ | MINUS FLOAT'''
+ p[0] = eval("-"+p[2])
+
+#### List of targets for a print statement
+#### Returns a list of tuples (label,expr)
+
+def p_plist(p):
+ '''plist : plist COMMA pitem
+ | pitem'''
+ if len(p) > 3:
+ p[0] = p[1]
+ p[0].append(p[3])
+ else:
+ p[0] = [p[1]]
+
+def p_item_string(p):
+ '''pitem : STRING'''
+ p[0] = (p[1][1:-1],None)
+
+def p_item_string_expr(p):
+ '''pitem : STRING expr'''
+ p[0] = (p[1][1:-1],p[2])
+
+def p_item_expr(p):
+ '''pitem : expr'''
+ p[0] = ("",p[1])
+
+#### Empty
+
+def p_empty(p):
+ '''empty : '''
+
+#### Catastrophic error handler
+def p_error(p):
+ if not p:
+ print "SYNTAX ERROR AT EOF"
+
+bparser = yacc.yacc()
+
+def parse(data):
+ bparser.error = 0
+ p = bparser.parse(data)
+ if bparser.error: return None
+ return p
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/example/BASIC/dim.bas b/example/BASIC/dim.bas
new file mode 100644
index 0000000..87bd95b
--- /dev/null
+++ b/example/BASIC/dim.bas
@@ -0,0 +1,14 @@
+5 DIM A(50,15)
+10 FOR I = 1 TO 50
+20 FOR J = 1 TO 15
+30 LET A(I,J) = I + J
+35 REM PRINT I,J, A(I,J)
+40 NEXT J
+50 NEXT I
+100 FOR I = 1 TO 50
+110 FOR J = 1 TO 15
+120 PRINT A(I,J),
+130 NEXT J
+140 PRINT
+150 NEXT I
+999 END
diff --git a/example/BASIC/func.bas b/example/BASIC/func.bas
new file mode 100644
index 0000000..447ee16
--- /dev/null
+++ b/example/BASIC/func.bas
@@ -0,0 +1,5 @@
+10 DEF FDX(X) = 2*X
+20 FOR I = 0 TO 100
+30 PRINT FDX(I)
+40 NEXT I
+50 END
diff --git a/example/BASIC/gcd.bas b/example/BASIC/gcd.bas
new file mode 100644
index 0000000..d0b7746
--- /dev/null
+++ b/example/BASIC/gcd.bas
@@ -0,0 +1,22 @@
+10 PRINT "A","B","C","GCD"
+20 READ A,B,C
+30 LET X = A
+40 LET Y = B
+50 GOSUB 200
+60 LET X = G
+70 LET Y = C
+80 GOSUB 200
+90 PRINT A, B, C, G
+100 GOTO 20
+110 DATA 60, 90, 120
+120 DATA 38456, 64872, 98765
+130 DATA 32, 384, 72
+200 LET Q = INT(X/Y)
+210 LET R = X - Q*Y
+220 IF R = 0 THEN 300
+230 LET X = Y
+240 LET Y = R
+250 GOTO 200
+300 LET G = Y
+310 RETURN
+999 END
diff --git a/example/BASIC/gosub.bas b/example/BASIC/gosub.bas
new file mode 100644
index 0000000..99737b1
--- /dev/null
+++ b/example/BASIC/gosub.bas
@@ -0,0 +1,13 @@
+100 LET X = 3
+110 GOSUB 400
+120 PRINT U, V, W
+200 LET X = 5
+210 GOSUB 400
+220 LET Z = U + 2*V + 3*W
+230 PRINT Z
+240 GOTO 999
+400 LET U = X*X
+410 LET V = X*X*X
+420 LET W = X*X*X*X + X*X*X + X*X + X
+430 RETURN
+999 END
diff --git a/example/BASIC/hello.bas b/example/BASIC/hello.bas
new file mode 100644
index 0000000..cc6f0b0
--- /dev/null
+++ b/example/BASIC/hello.bas
@@ -0,0 +1,4 @@
+5 REM HELLO WORLD PROGAM
+10 PRINT "HELLO WORLD"
+99 END
+
diff --git a/example/BASIC/linear.bas b/example/BASIC/linear.bas
new file mode 100644
index 0000000..56c0822
--- /dev/null
+++ b/example/BASIC/linear.bas
@@ -0,0 +1,17 @@
+1 REM ::: SOLVE A SYSTEM OF LINEAR EQUATIONS
+2 REM ::: A1*X1 + A2*X2 = B1
+3 REM ::: A3*X1 + A4*X2 = B2
+4 REM --------------------------------------
+10 READ A1, A2, A3, A4
+15 LET D = A1 * A4 - A3 * A2
+20 IF D = 0 THEN 65
+30 READ B1, B2
+37 LET X1 = (B1*A4 - B2*A2) / D
+42 LET X2 = (A1*B2 - A3*B1) / D
+55 PRINT X1, X2
+60 GOTO 30
+65 PRINT "NO UNIQUE SOLUTION"
+70 DATA 1, 2, 4
+80 DATA 2, -7, 5
+85 DATA 1, 3, 4, -7
+90 END
diff --git a/example/BASIC/maxsin.bas b/example/BASIC/maxsin.bas
new file mode 100644
index 0000000..b969015
--- /dev/null
+++ b/example/BASIC/maxsin.bas
@@ -0,0 +1,12 @@
+5 PRINT "X VALUE", "SINE", "RESOLUTION"
+10 READ D
+20 LET M = -1
+30 FOR X = 0 TO 3 STEP D
+40 IF SIN(X) <= M THEN 80
+50 LET X0 = X
+60 LET M = SIN(X)
+80 NEXT X
+85 PRINT X0, M, D
+90 GOTO 10
+100 DATA .1, .01, .001
+110 END
diff --git a/example/BASIC/powers.bas b/example/BASIC/powers.bas
new file mode 100644
index 0000000..a454dc3
--- /dev/null
+++ b/example/BASIC/powers.bas
@@ -0,0 +1,13 @@
+5 PRINT "THIS PROGRAM COMPUTES AND PRINTS THE NTH POWERS"
+6 PRINT "OF THE NUMBERS LESS THAN OR EQUAL TO N FOR VARIOUS"
+7 PRINT "N FROM 1 THROUGH 7"
+8 PRINT
+10 FOR N = 1 TO 7
+15 PRINT "N = "N
+20 FOR I = 1 TO N
+30 PRINT I^N,
+40 NEXT I
+50 PRINT
+60 PRINT
+70 NEXT N
+80 END
diff --git a/example/BASIC/rand.bas b/example/BASIC/rand.bas
new file mode 100644
index 0000000..4ff7a14
--- /dev/null
+++ b/example/BASIC/rand.bas
@@ -0,0 +1,4 @@
+10 FOR I = 1 TO 20
+20 PRINT INT(10*RND(0))
+30 NEXT I
+40 END
diff --git a/example/BASIC/sales.bas b/example/BASIC/sales.bas
new file mode 100644
index 0000000..a39aefb
--- /dev/null
+++ b/example/BASIC/sales.bas
@@ -0,0 +1,20 @@
+10 FOR I = 1 TO 3
+20 READ P(I)
+30 NEXT I
+40 FOR I = 1 TO 3
+50 FOR J = 1 TO 5
+60 READ S(I,J)
+70 NEXT J
+80 NEXT I
+90 FOR J = 1 TO 5
+100 LET S = 0
+110 FOR I = 1 TO 3
+120 LET S = S + P(I) * S(I,J)
+130 NEXT I
+140 PRINT "TOTAL SALES FOR SALESMAN"J, "$"S
+150 NEXT J
+200 DATA 1.25, 4.30, 2.50
+210 DATA 40, 20, 37, 29, 42
+220 DATA 10, 16, 3, 21, 8
+230 DATA 35, 47, 29, 16, 33
+300 END
diff --git a/example/BASIC/sears.bas b/example/BASIC/sears.bas
new file mode 100644
index 0000000..5ced397
--- /dev/null
+++ b/example/BASIC/sears.bas
@@ -0,0 +1,18 @@
+1 REM :: THIS PROGRAM COMPUTES HOW MANY TIMES YOU HAVE TO FOLD
+2 REM :: A PIECE OF PAPER SO THAT IT IS TALLER THAN THE
+3 REM :: SEARS TOWER.
+4 REM :: S = HEIGHT OF TOWER (METERS)
+5 REM :: T = THICKNESS OF PAPER (MILLIMETERS)
+10 LET S = 442
+20 LET T = 0.1
+30 REM CONVERT T TO METERS
+40 LET T = T * .001
+50 LET F = 1
+60 LET H = T
+100 IF H > S THEN 200
+120 LET H = 2 * H
+125 LET F = F + 1
+130 GOTO 100
+200 PRINT "NUMBER OF FOLDS ="F
+220 PRINT "FINAL HEIGHT ="H
+999 END
diff --git a/example/BASIC/sqrt1.bas b/example/BASIC/sqrt1.bas
new file mode 100644
index 0000000..6673a91
--- /dev/null
+++ b/example/BASIC/sqrt1.bas
@@ -0,0 +1,5 @@
+10 LET X = 0
+20 LET X = X + 1
+30 PRINT X, SQR(X)
+40 IF X < 100 THEN 20
+50 END
diff --git a/example/BASIC/sqrt2.bas b/example/BASIC/sqrt2.bas
new file mode 100644
index 0000000..862d85e
--- /dev/null
+++ b/example/BASIC/sqrt2.bas
@@ -0,0 +1,4 @@
+10 FOR X = 1 TO 100
+20 PRINT X, SQR(X)
+30 NEXT X
+40 END
diff --git a/example/GardenSnake/GardenSnake.py b/example/GardenSnake/GardenSnake.py
new file mode 100644
index 0000000..2a7f45e
--- /dev/null
+++ b/example/GardenSnake/GardenSnake.py
@@ -0,0 +1,709 @@
+# GardenSnake - a parser generator demonstration program
+#
+# This implements a modified version of a subset of Python:
+# - only 'def', 'return' and 'if' statements
+# - 'if' only has 'then' clause (no elif nor else)
+# - single-quoted strings only, content in raw format
+# - numbers are decimal.Decimal instances (not integers or floats)
+# - no print statment; use the built-in 'print' function
+# - only < > == + - / * implemented (and unary + -)
+# - assignment and tuple assignment work
+# - no generators of any sort
+# - no ... well, no quite a lot
+
+# Why? I'm thinking about a new indentation-based configuration
+# language for a project and wanted to figure out how to do it. Once
+# I got that working I needed a way to test it out. My original AST
+# was dumb so I decided to target Python's AST and compile it into
+# Python code. Plus, it's pretty cool that it only took a day or so
+# from sitting down with Ply to having working code.
+
+# This uses David Beazley's Ply from http://www.dabeaz.com/ply/
+
+# This work is hereby released into the Public Domain. To view a copy of
+# the public domain dedication, visit
+# http://creativecommons.org/licenses/publicdomain/ or send a letter to
+# Creative Commons, 543 Howard Street, 5th Floor, San Francisco,
+# California, 94105, USA.
+#
+# Portions of this work are derived from Python's Grammar definition
+# and may be covered under the Python copyright and license
+#
+# Andrew Dalke / Dalke Scientific Software, LLC
+# 30 August 2006 / Cape Town, South Africa
+
+# Changelog:
+# 30 August - added link to CC license; removed the "swapcase" encoding
+
+# Modifications for inclusion in PLY distribution
+import sys
+sys.path.insert(0,"../..")
+from ply import *
+
+##### Lexer ######
+#import lex
+import decimal
+
+tokens = (
+ 'DEF',
+ 'IF',
+ 'NAME',
+ 'NUMBER', # Python decimals
+ 'STRING', # single quoted strings only; syntax of raw strings
+ 'LPAR',
+ 'RPAR',
+ 'COLON',
+ 'EQ',
+ 'ASSIGN',
+ 'LT',
+ 'GT',
+ 'PLUS',
+ 'MINUS',
+ 'MULT',
+ 'DIV',
+ 'RETURN',
+ 'WS',
+ 'NEWLINE',
+ 'COMMA',
+ 'SEMICOLON',
+ 'INDENT',
+ 'DEDENT',
+ 'ENDMARKER',
+ )
+
+#t_NUMBER = r'\d+'
+# taken from decmial.py but without the leading sign
+def t_NUMBER(t):
+ r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?"""
+ t.value = decimal.Decimal(t.value)
+ return t
+
+def t_STRING(t):
+ r"'([^\\']+|\\'|\\\\)*'" # I think this is right ...
+ t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun
+ return t
+
+t_COLON = r':'
+t_EQ = r'=='
+t_ASSIGN = r'='
+t_LT = r'<'
+t_GT = r'>'
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_MULT = r'\*'
+t_DIV = r'/'
+t_COMMA = r','
+t_SEMICOLON = r';'
+
+# Ply nicely documented how to do this.
+
+RESERVED = {
+ "def": "DEF",
+ "if": "IF",
+ "return": "RETURN",
+ }
+
+def t_NAME(t):
+ r'[a-zA-Z_][a-zA-Z0-9_]*'
+ t.type = RESERVED.get(t.value, "NAME")
+ return t
+
+# Putting this before t_WS let it consume lines with only comments in
+# them so the latter code never sees the WS part. Not consuming the
+# newline. Needed for "if 1: #comment"
+def t_comment(t):
+ r"[ ]*\043[^\n]*" # \043 is '#'
+ pass
+
+
+# Whitespace
+def t_WS(t):
+ r' [ ]+ '
+ if t.lexer.at_line_start and t.lexer.paren_count == 0:
+ return t
+
+# Don't generate newline tokens when inside of parenthesis, eg
+# a = (1,
+# 2, 3)
+def t_newline(t):
+ r'\n+'
+ t.lexer.lineno += len(t.value)
+ t.type = "NEWLINE"
+ if t.lexer.paren_count == 0:
+ return t
+
+def t_LPAR(t):
+ r'\('
+ t.lexer.paren_count += 1
+ return t
+
+def t_RPAR(t):
+ r'\)'
+ # check for underflow? should be the job of the parser
+ t.lexer.paren_count -= 1
+ return t
+
+
+def t_error(t):
+ raise SyntaxError("Unknown symbol %r" % (t.value[0],))
+ print "Skipping", repr(t.value[0])
+ t.lexer.skip(1)
+
+## I implemented INDENT / DEDENT generation as a post-processing filter
+
+# The original lex token stream contains WS and NEWLINE characters.
+# WS will only occur before any other tokens on a line.
+
+# I have three filters. One tags tokens by adding two attributes.
+# "must_indent" is True if the token must be indented from the
+# previous code. The other is "at_line_start" which is True for WS
+# and the first non-WS/non-NEWLINE on a line. It flags the check so
+# see if the new line has changed indication level.
+
+# Python's syntax has three INDENT states
+# 0) no colon hence no need to indent
+# 1) "if 1: go()" - simple statements have a COLON but no need for an indent
+# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent
+NO_INDENT = 0
+MAY_INDENT = 1
+MUST_INDENT = 2
+
+# only care about whitespace at the start of a line
+def track_tokens_filter(lexer, tokens):
+ lexer.at_line_start = at_line_start = True
+ indent = NO_INDENT
+ saw_colon = False
+ for token in tokens:
+ token.at_line_start = at_line_start
+
+ if token.type == "COLON":
+ at_line_start = False
+ indent = MAY_INDENT
+ token.must_indent = False
+
+ elif token.type == "NEWLINE":
+ at_line_start = True
+ if indent == MAY_INDENT:
+ indent = MUST_INDENT
+ token.must_indent = False
+
+ elif token.type == "WS":
+ assert token.at_line_start == True
+ at_line_start = True
+ token.must_indent = False
+
+ else:
+ # A real token; only indent after COLON NEWLINE
+ if indent == MUST_INDENT:
+ token.must_indent = True
+ else:
+ token.must_indent = False
+ at_line_start = False
+ indent = NO_INDENT
+
+ yield token
+ lexer.at_line_start = at_line_start
+
+def _new_token(type, lineno):
+ tok = lex.LexToken()
+ tok.type = type
+ tok.value = None
+ tok.lineno = lineno
+ return tok
+
+# Synthesize a DEDENT tag
+def DEDENT(lineno):
+ return _new_token("DEDENT", lineno)
+
+# Synthesize an INDENT tag
+def INDENT(lineno):
+ return _new_token("INDENT", lineno)
+
+
+# Track the indentation level and emit the right INDENT / DEDENT events.
+def indentation_filter(tokens):
+ # A stack of indentation levels; will never pop item 0
+ levels = [0]
+ token = None
+ depth = 0
+ prev_was_ws = False
+ for token in tokens:
+## if 1:
+## print "Process", token,
+## if token.at_line_start:
+## print "at_line_start",
+## if token.must_indent:
+## print "must_indent",
+## print
+
+ # WS only occurs at the start of the line
+ # There may be WS followed by NEWLINE so
+ # only track the depth here. Don't indent/dedent
+ # until there's something real.
+ if token.type == "WS":
+ assert depth == 0
+ depth = len(token.value)
+ prev_was_ws = True
+ # WS tokens are never passed to the parser
+ continue
+
+ if token.type == "NEWLINE":
+ depth = 0
+ if prev_was_ws or token.at_line_start:
+ # ignore blank lines
+ continue
+ # pass the other cases on through
+ yield token
+ continue
+
+ # then it must be a real token (not WS, not NEWLINE)
+ # which can affect the indentation level
+
+ prev_was_ws = False
+ if token.must_indent:
+ # The current depth must be larger than the previous level
+ if not (depth > levels[-1]):
+ raise IndentationError("expected an indented block")
+
+ levels.append(depth)
+ yield INDENT(token.lineno)
+
+ elif token.at_line_start:
+ # Must be on the same level or one of the previous levels
+ if depth == levels[-1]:
+ # At the same level
+ pass
+ elif depth > levels[-1]:
+ raise IndentationError("indentation increase but not in new block")
+ else:
+ # Back up; but only if it matches a previous level
+ try:
+ i = levels.index(depth)
+ except ValueError:
+ raise IndentationError("inconsistent indentation")
+ for _ in range(i+1, len(levels)):
+ yield DEDENT(token.lineno)
+ levels.pop()
+
+ yield token
+
+ ### Finished processing ###
+
+ # Must dedent any remaining levels
+ if len(levels) > 1:
+ assert token is not None
+ for _ in range(1, len(levels)):
+ yield DEDENT(token.lineno)
+
+
+# The top-level filter adds an ENDMARKER, if requested.
+# Python's grammar uses it.
+def filter(lexer, add_endmarker = True):
+ token = None
+ tokens = iter(lexer.token, None)
+ tokens = track_tokens_filter(lexer, tokens)
+ for token in indentation_filter(tokens):
+ yield token
+
+ if add_endmarker:
+ lineno = 1
+ if token is not None:
+ lineno = token.lineno
+ yield _new_token("ENDMARKER", lineno)
+
+# Combine Ply and my filters into a new lexer
+
+class IndentLexer(object):
+ def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
+ self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags)
+ self.token_stream = None
+ def input(self, s, add_endmarker=True):
+ self.lexer.paren_count = 0
+ self.lexer.input(s)
+ self.token_stream = filter(self.lexer, add_endmarker)
+ def token(self):
+ try:
+ return self.token_stream.next()
+ except StopIteration:
+ return None
+
+########## Parser (tokens -> AST) ######
+
+# also part of Ply
+#import yacc
+
+# I use the Python AST
+from compiler import ast
+
+# Helper function
+def Assign(left, right):
+ names = []
+ if isinstance(left, ast.Name):
+ # Single assignment on left
+ return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right)
+ elif isinstance(left, ast.Tuple):
+ # List of things - make sure they are Name nodes
+ names = []
+ for child in left.getChildren():
+ if not isinstance(child, ast.Name):
+ raise SyntaxError("that assignment not supported")
+ names.append(child.name)
+ ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
+ return ast.Assign([ast.AssTuple(ass_list)], right)
+ else:
+ raise SyntaxError("Can't do that yet")
+
+
+# The grammar comments come from Python's Grammar/Grammar file
+
+## NB: compound_stmt in single_input is followed by extra NEWLINE!
+# file_input: (NEWLINE | stmt)* ENDMARKER
+def p_file_input_end(p):
+ """file_input_end : file_input ENDMARKER"""
+ p[0] = ast.Stmt(p[1])
+def p_file_input(p):
+ """file_input : file_input NEWLINE
+ | file_input stmt
+ | NEWLINE
+ | stmt"""
+ if isinstance(p[len(p)-1], basestring):
+ if len(p) == 3:
+ p[0] = p[1]
+ else:
+ p[0] = [] # p == 2 --> only a blank line
+ else:
+ if len(p) == 3:
+ p[0] = p[1] + p[2]
+ else:
+ p[0] = p[1]
+
+
+# funcdef: [decorators] 'def' NAME parameters ':' suite
+# ignoring decorators
+def p_funcdef(p):
+ "funcdef : DEF NAME parameters COLON suite"
+ p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5])
+
+# parameters: '(' [varargslist] ')'
+def p_parameters(p):
+ """parameters : LPAR RPAR
+ | LPAR varargslist RPAR"""
+ if len(p) == 3:
+ p[0] = []
+ else:
+ p[0] = p[2]
+
+
+# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) |
+# highly simplified
+def p_varargslist(p):
+ """varargslist : varargslist COMMA NAME
+ | NAME"""
+ if len(p) == 4:
+ p[0] = p[1] + p[3]
+ else:
+ p[0] = [p[1]]
+
+# stmt: simple_stmt | compound_stmt
+def p_stmt_simple(p):
+ """stmt : simple_stmt"""
+ # simple_stmt is a list
+ p[0] = p[1]
+
+def p_stmt_compound(p):
+ """stmt : compound_stmt"""
+ p[0] = [p[1]]
+
+# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+def p_simple_stmt(p):
+ """simple_stmt : small_stmts NEWLINE
+ | small_stmts SEMICOLON NEWLINE"""
+ p[0] = p[1]
+
+def p_small_stmts(p):
+ """small_stmts : small_stmts SEMICOLON small_stmt
+ | small_stmt"""
+ if len(p) == 4:
+ p[0] = p[1] + [p[3]]
+ else:
+ p[0] = [p[1]]
+
+# small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
+# import_stmt | global_stmt | exec_stmt | assert_stmt
+def p_small_stmt(p):
+ """small_stmt : flow_stmt
+ | expr_stmt"""
+ p[0] = p[1]
+
+# expr_stmt: testlist (augassign (yield_expr|testlist) |
+# ('=' (yield_expr|testlist))*)
+# augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+# '<<=' | '>>=' | '**=' | '//=')
+def p_expr_stmt(p):
+ """expr_stmt : testlist ASSIGN testlist
+ | testlist """
+ if len(p) == 2:
+ # a list of expressions
+ p[0] = ast.Discard(p[1])
+ else:
+ p[0] = Assign(p[1], p[3])
+
+def p_flow_stmt(p):
+ "flow_stmt : return_stmt"
+ p[0] = p[1]
+
+# return_stmt: 'return' [testlist]
+def p_return_stmt(p):
+ "return_stmt : RETURN testlist"
+ p[0] = ast.Return(p[2])
+
+
+def p_compound_stmt(p):
+ """compound_stmt : if_stmt
+ | funcdef"""
+ p[0] = p[1]
+
+def p_if_stmt(p):
+ 'if_stmt : IF test COLON suite'
+ p[0] = ast.If([(p[2], p[4])], None)
+
+def p_suite(p):
+ """suite : simple_stmt
+ | NEWLINE INDENT stmts DEDENT"""
+ if len(p) == 2:
+ p[0] = ast.Stmt(p[1])
+ else:
+ p[0] = ast.Stmt(p[3])
+
+
+def p_stmts(p):
+ """stmts : stmts stmt
+ | stmt"""
+ if len(p) == 3:
+ p[0] = p[1] + p[2]
+ else:
+ p[0] = p[1]
+
+## No using Python's approach because Ply supports precedence
+
+# comparison: expr (comp_op expr)*
+# arith_expr: term (('+'|'-') term)*
+# term: factor (('*'|'/'|'%'|'//') factor)*
+# factor: ('+'|'-'|'~') factor | power
+# comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+
+def make_lt_compare((left, right)):
+ return ast.Compare(left, [('<', right),])
+def make_gt_compare((left, right)):
+ return ast.Compare(left, [('>', right),])
+def make_eq_compare((left, right)):
+ return ast.Compare(left, [('==', right),])
+
+
+binary_ops = {
+ "+": ast.Add,
+ "-": ast.Sub,
+ "*": ast.Mul,
+ "/": ast.Div,
+ "<": make_lt_compare,
+ ">": make_gt_compare,
+ "==": make_eq_compare,
+}
+unary_ops = {
+ "+": ast.UnaryAdd,
+ "-": ast.UnarySub,
+ }
+precedence = (
+ ("left", "EQ", "GT", "LT"),
+ ("left", "PLUS", "MINUS"),
+ ("left", "MULT", "DIV"),
+ )
+
+def p_comparison(p):
+ """comparison : comparison PLUS comparison
+ | comparison MINUS comparison
+ | comparison MULT comparison
+ | comparison DIV comparison
+ | comparison LT comparison
+ | comparison EQ comparison
+ | comparison GT comparison
+ | PLUS comparison
+ | MINUS comparison
+ | power"""
+ if len(p) == 4:
+ p[0] = binary_ops[p[2]]((p[1], p[3]))
+ elif len(p) == 3:
+ p[0] = unary_ops[p[1]](p[2])
+ else:
+ p[0] = p[1]
+
+# power: atom trailer* ['**' factor]
+# trailers enables function calls. I only allow one level of calls
+# so this is 'trailer'
+def p_power(p):
+ """power : atom
+ | atom trailer"""
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ if p[2][0] == "CALL":
+ p[0] = ast.CallFunc(p[1], p[2][1], None, None)
+ else:
+ raise AssertionError("not implemented")
+
+def p_atom_name(p):
+ """atom : NAME"""
+ p[0] = ast.Name(p[1])
+
+def p_atom_number(p):
+ """atom : NUMBER
+ | STRING"""
+ p[0] = ast.Const(p[1])
+
+def p_atom_tuple(p):
+ """atom : LPAR testlist RPAR"""
+ p[0] = p[2]
+
+# trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+def p_trailer(p):
+ "trailer : LPAR arglist RPAR"
+ p[0] = ("CALL", p[2])
+
+# testlist: test (',' test)* [',']
+# Contains shift/reduce error
+def p_testlist(p):
+ """testlist : testlist_multi COMMA
+ | testlist_multi """
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ # May need to promote singleton to tuple
+ if isinstance(p[1], list):
+ p[0] = p[1]
+ else:
+ p[0] = [p[1]]
+ # Convert into a tuple?
+ if isinstance(p[0], list):
+ p[0] = ast.Tuple(p[0])
+
+def p_testlist_multi(p):
+ """testlist_multi : testlist_multi COMMA test
+ | test"""
+ if len(p) == 2:
+ # singleton
+ p[0] = p[1]
+ else:
+ if isinstance(p[1], list):
+ p[0] = p[1] + [p[3]]
+ else:
+ # singleton -> tuple
+ p[0] = [p[1], p[3]]
+
+
+# test: or_test ['if' or_test 'else' test] | lambdef
+# as I don't support 'and', 'or', and 'not' this works down to 'comparison'
+def p_test(p):
+ "test : comparison"
+ p[0] = p[1]
+
+
+
+# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test)
+# XXX INCOMPLETE: this doesn't allow the trailing comma
+def p_arglist(p):
+ """arglist : arglist COMMA argument
+ | argument"""
+ if len(p) == 4:
+ p[0] = p[1] + [p[3]]
+ else:
+ p[0] = [p[1]]
+
+# argument: test [gen_for] | test '=' test # Really [keyword '='] test
+def p_argument(p):
+ "argument : test"
+ p[0] = p[1]
+
+def p_error(p):
+ #print "Error!", repr(p)
+ raise SyntaxError(p)
+
+
+class GardenSnakeParser(object):
+ def __init__(self, lexer = None):
+ if lexer is None:
+ lexer = IndentLexer()
+ self.lexer = lexer
+ self.parser = yacc.yacc(start="file_input_end")
+
+ def parse(self, code):
+ self.lexer.input(code)
+ result = self.parser.parse(lexer = self.lexer)
+ return ast.Module(None, result)
+
+
+###### Code generation ######
+
+from compiler import misc, syntax, pycodegen
+
+class GardenSnakeCompiler(object):
+ def __init__(self):
+ self.parser = GardenSnakeParser()
+ def compile(self, code, filename="<string>"):
+ tree = self.parser.parse(code)
+ #print tree
+ misc.set_filename(filename, tree)
+ syntax.check(tree)
+ gen = pycodegen.ModuleCodeGenerator(tree)
+ code = gen.getCode()
+ return code
+
+####### Test code #######
+
+compile = GardenSnakeCompiler().compile
+
+code = r"""
+
+print('LET\'S TRY THIS \\OUT')
+
+#Comment here
+def x(a):
+ print('called with',a)
+ if a == 1:
+ return 2
+ if a*2 > 10: return 999 / 4
+ # Another comment here
+
+ return a+2*3
+
+ints = (1, 2,
+ 3, 4,
+5)
+print('mutiline-expression', ints)
+
+t = 4+1/3*2+6*(9-5+1)
+print('predence test; should be 34+2/3:', t, t==(34+2/3))
+
+print('numbers', 1,2,3,4,5)
+if 1:
+ 8
+ a=9
+ print(x(a))
+
+print(x(1))
+print(x(2))
+print(x(8),'3')
+print('this is decimal', 1/5)
+print('BIG DECIMAL', 1.234567891234567e12345)
+
+"""
+
+# Set up the GardenSnake run-time environment
+def print_(*args):
+ print "-->", " ".join(map(str,args))
+
+globals()["print"] = print_
+
+compiled_code = compile(code)
+
+exec compiled_code in globals()
+print "Done"
diff --git a/example/GardenSnake/README b/example/GardenSnake/README
new file mode 100644
index 0000000..4d8be2d
--- /dev/null
+++ b/example/GardenSnake/README
@@ -0,0 +1,5 @@
+This example is Andrew Dalke's GardenSnake language. It shows how to process an
+indentation-like language like Python. Further details can be found here:
+
+http://dalkescientific.com/writings/diary/archive/2006/08/30/gardensnake_language.html
+
diff --git a/example/README b/example/README
new file mode 100644
index 0000000..63519b5
--- /dev/null
+++ b/example/README
@@ -0,0 +1,10 @@
+Simple examples:
+ calc - Simple calculator
+ classcalc - Simple calculate defined as a class
+
+Complex examples
+ ansic - ANSI C grammar from K&R
+ BASIC - A small BASIC interpreter
+ GardenSnake - A simple python-like language
+ yply - Converts Unix yacc files to PLY programs.
+
diff --git a/example/ansic/README b/example/ansic/README
new file mode 100644
index 0000000..e049d3b
--- /dev/null
+++ b/example/ansic/README
@@ -0,0 +1,2 @@
+This example is incomplete. Was going to specify an ANSI C parser.
+This is part of it.
diff --git a/example/ansic/clex.py b/example/ansic/clex.py
new file mode 100644
index 0000000..6b9d7e7
--- /dev/null
+++ b/example/ansic/clex.py
@@ -0,0 +1,164 @@
+# ----------------------------------------------------------------------
+# clex.py
+#
+# A lexer for ANSI C.
+# ----------------------------------------------------------------------
+
+import sys
+sys.path.insert(0,"../..")
+
+import ply.lex as lex
+
+# Reserved words
+reserved = (
+ 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',
+ 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
+ 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF',
+ 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE',
+ )
+
+tokens = reserved + (
+ # Literals (identifier, integer constant, float constant, string constant, char const)
+ 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
+
+ # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
+ 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
+ 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
+ 'LOR', 'LAND', 'LNOT',
+ 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
+
+ # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
+ 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
+ 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
+
+ # Increment/decrement (++,--)
+ 'PLUSPLUS', 'MINUSMINUS',
+
+ # Structure dereference (->)
+ 'ARROW',
+
+ # Conditional operator (?)
+ 'CONDOP',
+
+ # Delimeters ( ) [ ] { } , . ; :
+ 'LPAREN', 'RPAREN',
+ 'LBRACKET', 'RBRACKET',
+ 'LBRACE', 'RBRACE',
+ 'COMMA', 'PERIOD', 'SEMI', 'COLON',
+
+ # Ellipsis (...)
+ 'ELLIPSIS',
+ )
+
+# Completely ignored characters
+t_ignore = ' \t\x0c'
+
+# Newlines
+def t_NEWLINE(t):
+ r'\n+'
+ t.lexer.lineno += t.value.count("\n")
+
+# Operators
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_TIMES = r'\*'
+t_DIVIDE = r'/'
+t_MOD = r'%'
+t_OR = r'\|'
+t_AND = r'&'
+t_NOT = r'~'
+t_XOR = r'\^'
+t_LSHIFT = r'<<'
+t_RSHIFT = r'>>'
+t_LOR = r'\|\|'
+t_LAND = r'&&'
+t_LNOT = r'!'
+t_LT = r'<'
+t_GT = r'>'
+t_LE = r'<='
+t_GE = r'>='
+t_EQ = r'=='
+t_NE = r'!='
+
+# Assignment operators
+
+t_EQUALS = r'='
+t_TIMESEQUAL = r'\*='
+t_DIVEQUAL = r'/='
+t_MODEQUAL = r'%='
+t_PLUSEQUAL = r'\+='
+t_MINUSEQUAL = r'-='
+t_LSHIFTEQUAL = r'<<='
+t_RSHIFTEQUAL = r'>>='
+t_ANDEQUAL = r'&='
+t_OREQUAL = r'\|='
+t_XOREQUAL = r'^='
+
+# Increment/decrement
+t_PLUSPLUS = r'\+\+'
+t_MINUSMINUS = r'--'
+
+# ->
+t_ARROW = r'->'
+
+# ?
+t_CONDOP = r'\?'
+
+# Delimeters
+t_LPAREN = r'\('
+t_RPAREN = r'\)'
+t_LBRACKET = r'\['
+t_RBRACKET = r'\]'
+t_LBRACE = r'\{'
+t_RBRACE = r'\}'
+t_COMMA = r','
+t_PERIOD = r'\.'
+t_SEMI = r';'
+t_COLON = r':'
+t_ELLIPSIS = r'\.\.\.'
+
+# Identifiers and reserved words
+
+reserved_map = { }
+for r in reserved:
+ reserved_map[r.lower()] = r
+
+def t_ID(t):
+ r'[A-Za-z_][\w_]*'
+ t.type = reserved_map.get(t.value,"ID")
+ return t
+
+# Integer literal
+t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
+
+# Floating literal
+t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
+
+# String literal
+t_SCONST = r'\"([^\\\n]|(\\.))*?\"'
+
+# Character constant 'c' or L'c'
+t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
+
+# Comments
+def t_comment(t):
+ r' /\*(.|\n)*?\*/'
+ t.lineno += t.value.count('\n')
+
+# Preprocessor directive (ignored)
+def t_preprocessor(t):
+ r'\#(.)*?\n'
+ t.lineno += 1
+
+def t_error(t):
+ print "Illegal character %s" % repr(t.value[0])
+ t.lexer.skip(1)
+
+lexer = lex.lex(optimize=1)
+if __name__ == "__main__":
+ lex.runmain(lexer)
+
+
+
+
+
diff --git a/example/ansic/cparse.py b/example/ansic/cparse.py
new file mode 100644
index 0000000..b6a0c42
--- /dev/null
+++ b/example/ansic/cparse.py
@@ -0,0 +1,863 @@
+# -----------------------------------------------------------------------------
+# cparse.py
+#
+# Simple parser for ANSI C. Based on the grammar in K&R, 2nd Ed.
+# -----------------------------------------------------------------------------
+
+import sys
+import clex
+import ply.yacc as yacc
+
+# Get the token map
+tokens = clex.tokens
+
+# translation-unit:
+
+def p_translation_unit_1(t):
+ 'translation_unit : external_declaration'
+ pass
+
+def p_translation_unit_2(t):
+ 'translation_unit : translation_unit external_declaration'
+ pass
+
+# external-declaration:
+
+def p_external_declaration_1(t):
+ 'external_declaration : function_definition'
+ pass
+
+def p_external_declaration_2(t):
+ 'external_declaration : declaration'
+ pass
+
+# function-definition:
+
+def p_function_definition_1(t):
+ 'function_definition : declaration_specifiers declarator declaration_list compound_statement'
+ pass
+
+def p_function_definition_2(t):
+ 'function_definition : declarator declaration_list compound_statement'
+ pass
+
+def p_function_definition_3(t):
+ 'function_definition : declarator compound_statement'
+ pass
+
+def p_function_definition_4(t):
+ 'function_definition : declaration_specifiers declarator compound_statement'
+ pass
+
+# declaration:
+
+def p_declaration_1(t):
+ 'declaration : declaration_specifiers init_declarator_list SEMI'
+ pass
+
+def p_declaration_2(t):
+ 'declaration : declaration_specifiers SEMI'
+ pass
+
+# declaration-list:
+
+def p_declaration_list_1(t):
+ 'declaration_list : declaration'
+ pass
+
+def p_declaration_list_2(t):
+ 'declaration_list : declaration_list declaration '
+ pass
+
+# declaration-specifiers
+def p_declaration_specifiers_1(t):
+ 'declaration_specifiers : storage_class_specifier declaration_specifiers'
+ pass
+
+def p_declaration_specifiers_2(t):
+ 'declaration_specifiers : type_specifier declaration_specifiers'
+ pass
+
+def p_declaration_specifiers_3(t):
+ 'declaration_specifiers : type_qualifier declaration_specifiers'
+ pass
+
+def p_declaration_specifiers_4(t):
+ 'declaration_specifiers : storage_class_specifier'
+ pass
+
+def p_declaration_specifiers_5(t):
+ 'declaration_specifiers : type_specifier'
+ pass
+
+def p_declaration_specifiers_6(t):
+ 'declaration_specifiers : type_qualifier'
+ pass
+
+# storage-class-specifier
+def p_storage_class_specifier(t):
+ '''storage_class_specifier : AUTO
+ | REGISTER
+ | STATIC
+ | EXTERN
+ | TYPEDEF
+ '''
+ pass
+
+# type-specifier:
+def p_type_specifier(t):
+ '''type_specifier : VOID
+ | CHAR
+ | SHORT
+ | INT
+ | LONG
+ | FLOAT
+ | DOUBLE
+ | SIGNED
+ | UNSIGNED
+ | struct_or_union_specifier
+ | enum_specifier
+ | TYPEID
+ '''
+ pass
+
+# type-qualifier:
+def p_type_qualifier(t):
+ '''type_qualifier : CONST
+ | VOLATILE'''
+ pass
+
+# struct-or-union-specifier
+
+def p_struct_or_union_specifier_1(t):
+ 'struct_or_union_specifier : struct_or_union ID LBRACE struct_declaration_list RBRACE'
+ pass
+
+def p_struct_or_union_specifier_2(t):
+ 'struct_or_union_specifier : struct_or_union LBRACE struct_declaration_list RBRACE'
+ pass
+
+def p_struct_or_union_specifier_3(t):
+ 'struct_or_union_specifier : struct_or_union ID'
+ pass
+
+# struct-or-union:
+def p_struct_or_union(t):
+ '''struct_or_union : STRUCT
+ | UNION
+ '''
+ pass
+
+# struct-declaration-list:
+
+def p_struct_declaration_list_1(t):
+ 'struct_declaration_list : struct_declaration'
+ pass
+
+def p_struct_declaration_list_2(t):
+ 'struct_declaration_list : struct_declarator_list struct_declaration'
+ pass
+
+# init-declarator-list:
+
+def p_init_declarator_list_1(t):
+ 'init_declarator_list : init_declarator'
+ pass
+
+def p_init_declarator_list_2(t):
+ 'init_declarator_list : init_declarator_list COMMA init_declarator'
+ pass
+
+# init-declarator
+
+def p_init_declarator_1(t):
+ 'init_declarator : declarator'
+ pass
+
+def p_init_declarator_2(t):
+ 'init_declarator : declarator EQUALS initializer'
+ pass
+
+# struct-declaration:
+
+def p_struct_declaration(t):
+ 'struct_declaration : specifier_qualifier_list struct_declarator_list SEMI'
+ pass
+
+# specifier-qualifier-list:
+
+def p_specifier_qualifier_list_1(t):
+ 'specifier_qualifier_list : type_specifier specifier_qualifier_list'
+ pass
+
+def p_specifier_qualifier_list_2(t):
+ 'specifier_qualifier_list : type_specifier'
+ pass
+
+def p_specifier_qualifier_list_3(t):
+ 'specifier_qualifier_list : type_qualifier specifier_qualifier_list'
+ pass
+
+def p_specifier_qualifier_list_4(t):
+ 'specifier_qualifier_list : type_qualifier'
+ pass
+
+# struct-declarator-list:
+
+def p_struct_declarator_list_1(t):
+ 'struct_declarator_list : struct_declarator'
+ pass
+
+def p_struct_declarator_list_2(t):
+ 'struct_declarator_list : struct_declarator_list COMMA struct_declarator'
+ pass
+
+# struct-declarator:
+
+def p_struct_declarator_1(t):
+ 'struct_declarator : declarator'
+ pass
+
+def p_struct_declarator_2(t):
+ 'struct_declarator : declarator COLON constant_expression'
+ pass
+
+def p_struct_declarator_3(t):
+ 'struct_declarator : COLON constant_expression'
+ pass
+
+# enum-specifier:
+
+def p_enum_specifier_1(t):
+ 'enum_specifier : ENUM ID LBRACE enumerator_list RBRACE'
+ pass
+
+def p_enum_specifier_2(t):
+ 'enum_specifier : ENUM LBRACE enumerator_list RBRACE'
+ pass
+
+def p_enum_specifier_3(t):
+ 'enum_specifier : ENUM ID'
+ pass
+
+# enumerator_list:
+def p_enumerator_list_1(t):
+ 'enumerator_list : enumerator'
+ pass
+
+def p_enumerator_list_2(t):
+ 'enumerator_list : enumerator_list COMMA enumerator'
+ pass
+
+# enumerator:
+def p_enumerator_1(t):
+ 'enumerator : ID'
+ pass
+
+def p_enumerator_2(t):
+ 'enumerator : ID EQUALS constant_expression'
+ pass
+
+# declarator:
+
+def p_declarator_1(t):
+ 'declarator : pointer direct_declarator'
+ pass
+
+def p_declarator_2(t):
+ 'declarator : direct_declarator'
+ pass
+
+# direct-declarator:
+
+def p_direct_declarator_1(t):
+ 'direct_declarator : ID'
+ pass
+
+def p_direct_declarator_2(t):
+ 'direct_declarator : LPAREN declarator RPAREN'
+ pass
+
+def p_direct_declarator_3(t):
+ 'direct_declarator : direct_declarator LBRACKET constant_expression_opt RBRACKET'
+ pass
+
+def p_direct_declarator_4(t):
+ 'direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN '
+ pass
+
+def p_direct_declarator_5(t):
+ 'direct_declarator : direct_declarator LPAREN identifier_list RPAREN '
+ pass
+
+def p_direct_declarator_6(t):
+ 'direct_declarator : direct_declarator LPAREN RPAREN '
+ pass
+
+# pointer:
+def p_pointer_1(t):
+ 'pointer : TIMES type_qualifier_list'
+ pass
+
+def p_pointer_2(t):
+ 'pointer : TIMES'
+ pass
+
+def p_pointer_3(t):
+ 'pointer : TIMES type_qualifier_list pointer'
+ pass
+
+def p_pointer_4(t):
+ 'pointer : TIMES pointer'
+ pass
+
+# type-qualifier-list:
+
+def p_type_qualifier_list_1(t):
+ 'type_qualifier_list : type_qualifier'
+ pass
+
+def p_type_qualifier_list_2(t):
+ 'type_qualifier_list : type_qualifier_list type_qualifier'
+ pass
+
+# parameter-type-list:
+
+def p_parameter_type_list_1(t):
+ 'parameter_type_list : parameter_list'
+ pass
+
+def p_parameter_type_list_2(t):
+ 'parameter_type_list : parameter_list COMMA ELLIPSIS'
+ pass
+
+# parameter-list:
+
+def p_parameter_list_1(t):
+ 'parameter_list : parameter_declaration'
+ pass
+
+def p_parameter_list_2(t):
+ 'parameter_list : parameter_list COMMA parameter_declaration'
+ pass
+
+# parameter-declaration:
+def p_parameter_declaration_1(t):
+ 'parameter_declaration : declaration_specifiers declarator'
+ pass
+
+def p_parameter_declaration_2(t):
+ 'parameter_declaration : declaration_specifiers abstract_declarator_opt'
+ pass
+
+# identifier-list:
+def p_identifier_list_1(t):
+ 'identifier_list : ID'
+ pass
+
+def p_identifier_list_2(t):
+ 'identifier_list : identifier_list COMMA ID'
+ pass
+
+# initializer:
+
+def p_initializer_1(t):
+ 'initializer : assignment_expression'
+ pass
+
+def p_initializer_2(t):
+ '''initializer : LBRACE initializer_list RBRACE
+ | LBRACE initializer_list COMMA RBRACE'''
+ pass
+
+# initializer-list:
+
+def p_initializer_list_1(t):
+ 'initializer_list : initializer'
+ pass
+
+def p_initializer_list_2(t):
+ 'initializer_list : initializer_list COMMA initializer'
+ pass
+
+# type-name:
+
+def p_type_name(t):
+ 'type_name : specifier_qualifier_list abstract_declarator_opt'
+ pass
+
+def p_abstract_declarator_opt_1(t):
+ 'abstract_declarator_opt : empty'
+ pass
+
+def p_abstract_declarator_opt_2(t):
+ 'abstract_declarator_opt : abstract_declarator'
+ pass
+
+# abstract-declarator:
+
+def p_abstract_declarator_1(t):
+ 'abstract_declarator : pointer '
+ pass
+
+def p_abstract_declarator_2(t):
+ 'abstract_declarator : pointer direct_abstract_declarator'
+ pass
+
+def p_abstract_declarator_3(t):
+ 'abstract_declarator : direct_abstract_declarator'
+ pass
+
+# direct-abstract-declarator:
+
+def p_direct_abstract_declarator_1(t):
+ 'direct_abstract_declarator : LPAREN abstract_declarator RPAREN'
+ pass
+
+def p_direct_abstract_declarator_2(t):
+ 'direct_abstract_declarator : direct_abstract_declarator LBRACKET constant_expression_opt RBRACKET'
+ pass
+
+def p_direct_abstract_declarator_3(t):
+ 'direct_abstract_declarator : LBRACKET constant_expression_opt RBRACKET'
+ pass
+
+def p_direct_abstract_declarator_4(t):
+ 'direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN'
+ pass
+
+def p_direct_abstract_declarator_5(t):
+ 'direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN'
+ pass
+
+# Optional fields in abstract declarators
+
+def p_constant_expression_opt_1(t):
+ 'constant_expression_opt : empty'
+ pass
+
+def p_constant_expression_opt_2(t):
+ 'constant_expression_opt : constant_expression'
+ pass
+
+def p_parameter_type_list_opt_1(t):
+ 'parameter_type_list_opt : empty'
+ pass
+
+def p_parameter_type_list_opt_2(t):
+ 'parameter_type_list_opt : parameter_type_list'
+ pass
+
+# statement:
+
+def p_statement(t):
+ '''
+ statement : labeled_statement
+ | expression_statement
+ | compound_statement
+ | selection_statement
+ | iteration_statement
+ | jump_statement
+ '''
+ pass
+
+# labeled-statement:
+
+def p_labeled_statement_1(t):
+ 'labeled_statement : ID COLON statement'
+ pass
+
+def p_labeled_statement_2(t):
+ 'labeled_statement : CASE constant_expression COLON statement'
+ pass
+
+def p_labeled_statement_3(t):
+ 'labeled_statement : DEFAULT COLON statement'
+ pass
+
+# expression-statement:
+def p_expression_statement(t):
+ 'expression_statement : expression_opt SEMI'
+ pass
+
+# compound-statement:
+
+def p_compound_statement_1(t):
+ 'compound_statement : LBRACE declaration_list statement_list RBRACE'
+ pass
+
+def p_compound_statement_2(t):
+ 'compound_statement : LBRACE statement_list RBRACE'
+ pass
+
+def p_compound_statement_3(t):
+ 'compound_statement : LBRACE declaration_list RBRACE'
+ pass
+
+def p_compound_statement_4(t):
+ 'compound_statement : LBRACE RBRACE'
+ pass
+
+# statement-list:
+
+def p_statement_list_1(t):
+ 'statement_list : statement'
+ pass
+
+def p_statement_list_2(t):
+ 'statement_list : statement_list statement'
+ pass
+
+# selection-statement
+
+def p_selection_statement_1(t):
+ 'selection_statement : IF LPAREN expression RPAREN statement'
+ pass
+
+def p_selection_statement_2(t):
+ 'selection_statement : IF LPAREN expression RPAREN statement ELSE statement '
+ pass
+
+def p_selection_statement_3(t):
+ 'selection_statement : SWITCH LPAREN expression RPAREN statement '
+ pass
+
+# iteration_statement:
+
+def p_iteration_statement_1(t):
+ 'iteration_statement : WHILE LPAREN expression RPAREN statement'
+ pass
+
+def p_iteration_statement_2(t):
+ 'iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement '
+ pass
+
+def p_iteration_statement_3(t):
+ 'iteration_statement : DO statement WHILE LPAREN expression RPAREN SEMI'
+ pass
+
+# jump_statement:
+
+def p_jump_statement_1(t):
+ 'jump_statement : GOTO ID SEMI'
+ pass
+
+def p_jump_statement_2(t):
+ 'jump_statement : CONTINUE SEMI'
+ pass
+
+def p_jump_statement_3(t):
+ 'jump_statement : BREAK SEMI'
+ pass
+
+def p_jump_statement_4(t):
+ 'jump_statement : RETURN expression_opt SEMI'
+ pass
+
+def p_expression_opt_1(t):
+ 'expression_opt : empty'
+ pass
+
+def p_expression_opt_2(t):
+ 'expression_opt : expression'
+ pass
+
+# expression:
+def p_expression_1(t):
+ 'expression : assignment_expression'
+ pass
+
+def p_expression_2(t):
+ 'expression : expression COMMA assignment_expression'
+ pass
+
+# assigment_expression:
+def p_assignment_expression_1(t):
+ 'assignment_expression : conditional_expression'
+ pass
+
+def p_assignment_expression_2(t):
+ 'assignment_expression : unary_expression assignment_operator assignment_expression'
+ pass
+
+# assignment_operator:
+def p_assignment_operator(t):
+ '''
+ assignment_operator : EQUALS
+ | TIMESEQUAL
+ | DIVEQUAL
+ | MODEQUAL
+ | PLUSEQUAL
+ | MINUSEQUAL
+ | LSHIFTEQUAL
+ | RSHIFTEQUAL
+ | ANDEQUAL
+ | OREQUAL
+ | XOREQUAL
+ '''
+ pass
+
+# conditional-expression
+def p_conditional_expression_1(t):
+ 'conditional_expression : logical_or_expression'
+ pass
+
+def p_conditional_expression_2(t):
+ 'conditional_expression : logical_or_expression CONDOP expression COLON conditional_expression '
+ pass
+
+# constant-expression
+
+def p_constant_expression(t):
+ 'constant_expression : conditional_expression'
+ pass
+
+# logical-or-expression
+
+def p_logical_or_expression_1(t):
+ 'logical_or_expression : logical_and_expression'
+ pass
+
+def p_logical_or_expression_2(t):
+ 'logical_or_expression : logical_or_expression LOR logical_and_expression'
+ pass
+
+# logical-and-expression
+
+def p_logical_and_expression_1(t):
+ 'logical_and_expression : inclusive_or_expression'
+ pass
+
+def p_logical_and_expression_2(t):
+ 'logical_and_expression : logical_and_expression LAND inclusive_or_expression'
+ pass
+
+# inclusive-or-expression:
+
+def p_inclusive_or_expression_1(t):
+ 'inclusive_or_expression : exclusive_or_expression'
+ pass
+
+def p_inclusive_or_expression_2(t):
+ 'inclusive_or_expression : inclusive_or_expression OR exclusive_or_expression'
+ pass
+
+# exclusive-or-expression:
+
+def p_exclusive_or_expression_1(t):
+ 'exclusive_or_expression : and_expression'
+ pass
+
+def p_exclusive_or_expression_2(t):
+ 'exclusive_or_expression : exclusive_or_expression XOR and_expression'
+ pass
+
+# AND-expression
+
+def p_and_expression_1(t):
+ 'and_expression : equality_expression'
+ pass
+
+def p_and_expression_2(t):
+ 'and_expression : and_expression AND equality_expression'
+ pass
+
+
+# equality-expression:
+def p_equality_expression_1(t):
+ 'equality_expression : relational_expression'
+ pass
+
+def p_equality_expression_2(t):
+ 'equality_expression : equality_expression EQ relational_expression'
+ pass
+
+def p_equality_expression_3(t):
+ 'equality_expression : equality_expression NE relational_expression'
+ pass
+
+
+# relational-expression:
+def p_relational_expression_1(t):
+ 'relational_expression : shift_expression'
+ pass
+
+def p_relational_expression_2(t):
+ 'relational_expression : relational_expression LT shift_expression'
+ pass
+
+def p_relational_expression_3(t):
+ 'relational_expression : relational_expression GT shift_expression'
+ pass
+
+def p_relational_expression_4(t):
+ 'relational_expression : relational_expression LE shift_expression'
+ pass
+
+def p_relational_expression_5(t):
+ 'relational_expression : relational_expression GE shift_expression'
+ pass
+
+# shift-expression
+
+def p_shift_expression_1(t):
+ 'shift_expression : additive_expression'
+ pass
+
+def p_shift_expression_2(t):
+ 'shift_expression : shift_expression LSHIFT additive_expression'
+ pass
+
+def p_shift_expression_3(t):
+ 'shift_expression : shift_expression RSHIFT additive_expression'
+ pass
+
+# additive-expression
+
+def p_additive_expression_1(t):
+ 'additive_expression : multiplicative_expression'
+ pass
+
+def p_additive_expression_2(t):
+ 'additive_expression : additive_expression PLUS multiplicative_expression'
+ pass
+
+def p_additive_expression_3(t):
+ 'additive_expression : additive_expression MINUS multiplicative_expression'
+ pass
+
+# multiplicative-expression
+
+def p_multiplicative_expression_1(t):
+ 'multiplicative_expression : cast_expression'
+ pass
+
+def p_multiplicative_expression_2(t):
+ 'multiplicative_expression : multiplicative_expression TIMES cast_expression'
+ pass
+
+def p_multiplicative_expression_3(t):
+ 'multiplicative_expression : multiplicative_expression DIVIDE cast_expression'
+ pass
+
+def p_multiplicative_expression_4(t):
+ 'multiplicative_expression : multiplicative_expression MOD cast_expression'
+ pass
+
+# cast-expression:
+
+def p_cast_expression_1(t):
+ 'cast_expression : unary_expression'
+ pass
+
+def p_cast_expression_2(t):
+ 'cast_expression : LPAREN type_name RPAREN cast_expression'
+ pass
+
+# unary-expression:
+def p_unary_expression_1(t):
+ 'unary_expression : postfix_expression'
+ pass
+
+def p_unary_expression_2(t):
+ 'unary_expression : PLUSPLUS unary_expression'
+ pass
+
+def p_unary_expression_3(t):
+ 'unary_expression : MINUSMINUS unary_expression'
+ pass
+
+def p_unary_expression_4(t):
+ 'unary_expression : unary_operator cast_expression'
+ pass
+
+def p_unary_expression_5(t):
+ 'unary_expression : SIZEOF unary_expression'
+ pass
+
+def p_unary_expression_6(t):
+ 'unary_expression : SIZEOF LPAREN type_name RPAREN'
+ pass
+
+#unary-operator
+def p_unary_operator(t):
+ '''unary_operator : AND
+ | TIMES
+ | PLUS
+ | MINUS
+ | NOT
+ | LNOT '''
+ pass
+
+# postfix-expression:
+def p_postfix_expression_1(t):
+ 'postfix_expression : primary_expression'
+ pass
+
+def p_postfix_expression_2(t):
+ 'postfix_expression : postfix_expression LBRACKET expression RBRACKET'
+ pass
+
+def p_postfix_expression_3(t):
+ 'postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN'
+ pass
+
+def p_postfix_expression_4(t):
+ 'postfix_expression : postfix_expression LPAREN RPAREN'
+ pass
+
+def p_postfix_expression_5(t):
+ 'postfix_expression : postfix_expression PERIOD ID'
+ pass
+
+def p_postfix_expression_6(t):
+ 'postfix_expression : postfix_expression ARROW ID'
+ pass
+
+def p_postfix_expression_7(t):
+ 'postfix_expression : postfix_expression PLUSPLUS'
+ pass
+
+def p_postfix_expression_8(t):
+ 'postfix_expression : postfix_expression MINUSMINUS'
+ pass
+
+# primary-expression:
+def p_primary_expression(t):
+ '''primary_expression : ID
+ | constant
+ | SCONST
+ | LPAREN expression RPAREN'''
+ pass
+
+# argument-expression-list:
+def p_argument_expression_list(t):
+ '''argument_expression_list : assignment_expression
+ | argument_expression_list COMMA assignment_expression'''
+ pass
+
+# constant:
+def p_constant(t):
+ '''constant : ICONST
+ | FCONST
+ | CCONST'''
+ pass
+
+
+def p_empty(t):
+ 'empty : '
+ pass
+
+def p_error(t):
+ print "Whoa. We're hosed"
+
+import profile
+# Build the grammar
+
+yacc.yacc(method='LALR')
+
+#profile.run("yacc.yacc(method='LALR')")
+
+
+
+
diff --git a/example/ansic/lextab.py b/example/ansic/lextab.py
new file mode 100644
index 0000000..ce9804b
--- /dev/null
+++ b/example/ansic/lextab.py
@@ -0,0 +1,8 @@
+# lextab.py. This file automatically created by PLY (version 2.2). Don't edit!
+_lextokens = {'SHORT': None, 'SIGNED': None, 'TIMES': None, 'TYPEID': None, 'GT': None, 'ARROW': None, 'FCONST': None, 'CONST': None, 'GE': None, 'PERIOD': None, 'SEMI': None, 'REGISTER': None, 'ENUM': None, 'SIZEOF': None, 'COMMA': None, 'RBRACE': None, 'RPAREN': None, 'RSHIFTEQUAL': None, 'LT': None, 'OREQUAL': None, 'XOREQUAL': None, 'DOUBLE': None, 'LBRACE': None, 'STRUCT': None, 'LPAREN': None, 'PLUSEQUAL': None, 'LNOT': None, 'NOT': None, 'CONDOP': None, 'LE': None, 'FLOAT': None, 'GOTO': None, 'LOR': None, 'EQ': None, 'MOD': None, 'ICONST': None, 'LONG': None, 'PLUS': None, 'DIVIDE': None, 'WHILE': None, 'UNION': None, 'CHAR': None, 'SWITCH': None, 'DO': None, 'FOR': None, 'VOID': None, 'EXTERN': None, 'RETURN': None, 'MINUSEQUAL': None, 'ELSE': None, 'ANDEQUAL': None, 'BREAK': None, 'CCONST': None, 'INT': None, 'DIVEQUAL': None, 'DEFAULT': None, 'TIMESEQUAL': None, 'MINUS': None, 'OR': None, 'CONTINUE': None, 'IF': None, 'UNSIGNED': None, 'ID': None, 'MINUSMINUS': None, 'COLON': None, 'LSHIFTEQUAL': None, 'RBRACKET': None, 'VOLATILE': None, 'CASE': None, 'PLUSPLUS': None, 'RSHIFT': None, 'MODEQUAL': None, 'LAND': None, 'AND': None, 'ELLIPSIS': None, 'STATIC': None, 'LBRACKET': None, 'LSHIFT': None, 'NE': None, 'TYPEDEF': None, 'AUTO': None, 'XOR': None, 'EQUALS': None, 'SCONST': None}
+_lexreflags = 0
+_lexliterals = ''
+_lexstateinfo = {'INITIAL': 'inclusive'}
+_lexstatere = {'INITIAL': [('(?P<t_NEWLINE>\\n+)|(?P<t_ID>[A-Za-z_][\\w_]*)|(?P<t_comment> /\\*(.|\\n)*?\\*/)|(?P<t_preprocessor>\\#(.)*?\\n)|(?P<t_FCONST>((\\d+)(\\.\\d+)(e(\\+|-)?(\\d+))? | (\\d+)e(\\+|-)?(\\d+))([lL]|[fF])?)|(?P<t_ICONST>\\d+([uU]|[lL]|[uU][lL]|[lL][uU])?)|(?P<t_CCONST>(L)?\\\'([^\\\\\\n]|(\\\\.))*?\\\')|(?P<t_SCONST>\\"([^\\\\\\n]|(\\\\.))*?\\")|(?P<t_ELLIPSIS>\\.\\.\\.)|(?P<t_LOR>\\|\\|)|(?P<t_PLUSPLUS>\\+\\+)|(?P<t_TIMESEQUAL>\\*=)|(?P<t_RSHIFTEQUAL>>>=)|(?P<t_OREQUAL>\\|=)|(?P<t_PLUSEQUAL>\\+=)|(?P<t_LSHIFTEQUAL><<=)|(?P<t_RBRACKET>\\])|(?P<t_MODEQUAL>%=)|(?P<t_XOREQUAL>^=)|(?P<t_LSHIFT><<)|(?P<t_TIMES>\\*)|(?P<t_LAND>&&)|(?P<t_MINUSMINUS>--)|(?P<t_NE>!=)|(?P<t_LPAREN>\\()|(?P<t_ANDEQUAL>&=)|(?P<t_RSHIFT>>>)|(?P<t_LBRACKET>\\[)|(?P<t_LBRACE>\\{)|(?P<t_OR>\\|)|(?P<t_RBRACE>\\})|(?P<t_ARROW>->)|(?P<t_PLUS>\\+)|(?P<t_CONDOP>\\?)|(?P<t_LE><=)|(?P<t_MINUSEQUAL>-=)|(?P<t_PERIOD>\\.)|(?P<t_DIVEQUAL>/=)|(?P<t_EQ>==)|(?P<t_GE>>=)|(?P<t_RPAREN>\\))|(?P<t_XOR>\\^)|(?P<t_SEMI>;)|(?P<t_AND>&)|(?P<t_NOT>~)|(?P<t_EQUALS>=)|(?P<t_MOD>%)|(?P<t_LT><)|(?P<t_MINUS>-)|(?P<t_LNOT>!)|(?P<t_DIVIDE>/)|(?P<t_COMMA>,)|(?P<t_GT>>)|(?P<t_COLON>:)', [None, ('t_NEWLINE', 'NEWLINE'), ('t_ID', 'ID'), ('t_comment', 'comment'), None, ('t_preprocessor', 'preprocessor'), None, (None, 'FCONST'), None, None, None, None, None, None, None, None, None, None, (None, 'ICONST'), None, (None, 'CCONST'), None, None, None, (None, 'SCONST'), None, None, (None, 'ELLIPSIS'), (None, 'LOR'), (None, 'PLUSPLUS'), (None, 'TIMESEQUAL'), (None, 'RSHIFTEQUAL'), (None, 'OREQUAL'), (None, 'PLUSEQUAL'), (None, 'LSHIFTEQUAL'), (None, 'RBRACKET'), (None, 'MODEQUAL'), (None, 'XOREQUAL'), (None, 'LSHIFT'), (None, 'TIMES'), (None, 'LAND'), (None, 'MINUSMINUS'), (None, 'NE'), (None, 'LPAREN'), (None, 'ANDEQUAL'), (None, 'RSHIFT'), (None, 'LBRACKET'), (None, 'LBRACE'), (None, 'OR'), (None, 'RBRACE'), (None, 'ARROW'), (None, 'PLUS'), (None, 'CONDOP'), (None, 'LE'), (None, 'MINUSEQUAL'), (None, 'PERIOD'), (None, 'DIVEQUAL'), (None, 'EQ'), (None, 'GE'), (None, 'RPAREN'), (None, 'XOR'), (None, 'SEMI'), (None, 'AND'), (None, 'NOT'), (None, 'EQUALS'), (None, 'MOD'), (None, 'LT'), (None, 'MINUS'), (None, 'LNOT'), (None, 'DIVIDE'), (None, 'COMMA'), (None, 'GT'), (None, 'COLON')])]}
+_lexstateignore = {'INITIAL': ' \t\f'}
+_lexstateerrorf = {'INITIAL': 't_error'}
diff --git a/example/calc/calc.py b/example/calc/calc.py
new file mode 100644
index 0000000..5bf5d5d
--- /dev/null
+++ b/example/calc/calc.py
@@ -0,0 +1,105 @@
+# -----------------------------------------------------------------------------
+# calc.py
+#
+# A simple calculator with variables. This is from O'Reilly's
+# "Lex and Yacc", p. 63.
+# -----------------------------------------------------------------------------
+
+import sys
+sys.path.insert(0,"../..")
+
+tokens = (
+ 'NAME','NUMBER',
+ )
+
+literals = ['=','+','-','*','/', '(',')']
+
+# Tokens
+
+t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
+
+def t_NUMBER(t):
+ r'\d+'
+ try:
+ t.value = int(t.value)
+ except ValueError:
+ print "Integer value too large", t.value
+ t.value = 0
+ return t
+
+t_ignore = " \t"
+
+def t_newline(t):
+ r'\n+'
+ t.lexer.lineno += t.value.count("\n")
+
+def t_error(t):
+ print "Illegal character '%s'" % t.value[0]
+ t.lexer.skip(1)
+
+# Build the lexer
+import ply.lex as lex
+lex.lex()
+
+# Parsing rules
+
+precedence = (
+ ('left','+','-'),
+ ('left','*','/'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(p):
+ 'statement : NAME "=" expression'
+ names[p[1]] = p[3]
+
+def p_statement_expr(p):
+ 'statement : expression'
+ print p[1]
+
+def p_expression_binop(p):
+ '''expression : expression '+' expression
+ | expression '-' expression
+ | expression '*' expression
+ | expression '/' expression'''
+ if p[2] == '+' : p[0] = p[1] + p[3]
+ elif p[2] == '-': p[0] = p[1] - p[3]
+ elif p[2] == '*': p[0] = p[1] * p[3]
+ elif p[2] == '/': p[0] = p[1] / p[3]
+
+def p_expression_uminus(p):
+ "expression : '-' expression %prec UMINUS"
+ p[0] = -p[2]
+
+def p_expression_group(p):
+ "expression : '(' expression ')'"
+ p[0] = p[2]
+
+def p_expression_number(p):
+ "expression : NUMBER"
+ p[0] = p[1]
+
+def p_expression_name(p):
+ "expression : NAME"
+ try:
+ p[0] = names[p[1]]
+ except LookupError:
+ print "Undefined name '%s'" % p[1]
+ p[0] = 0
+
+def p_error(p):
+ print "Syntax error at '%s'" % p.value
+
+import ply.yacc as yacc
+yacc.yacc()
+
+while 1:
+ try:
+ s = raw_input('calc > ')
+ except EOFError:
+ break
+ if not s: continue
+ yacc.parse(s)
diff --git a/example/classcalc/calc.py b/example/classcalc/calc.py
new file mode 100755
index 0000000..7ec09a6
--- /dev/null
+++ b/example/classcalc/calc.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+
+# -----------------------------------------------------------------------------
+# calc.py
+#
+# A simple calculator with variables. This is from O'Reilly's
+# "Lex and Yacc", p. 63.
+#
+# Class-based example contributed to PLY by David McNab
+# -----------------------------------------------------------------------------
+
+import sys
+sys.path.insert(0,"../..")
+
+import readline
+import ply.lex as lex
+import ply.yacc as yacc
+import os
+
+class Parser:
+ """
+ Base class for a lexer/parser that has the rules defined as methods
+ """
+ tokens = ()
+ precedence = ()
+
+ def __init__(self, **kw):
+ self.debug = kw.get('debug', 0)
+ self.names = { }
+ try:
+ modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__
+ except:
+ modname = "parser"+"_"+self.__class__.__name__
+ self.debugfile = modname + ".dbg"
+ self.tabmodule = modname + "_" + "parsetab"
+ #print self.debugfile, self.tabmodule
+
+ # Build the lexer and parser
+ lex.lex(module=self, debug=self.debug)
+ yacc.yacc(module=self,
+ debug=self.debug,
+ debugfile=self.debugfile,
+ tabmodule=self.tabmodule)
+
+ def run(self):
+ while 1:
+ try:
+ s = raw_input('calc > ')
+ except EOFError:
+ break
+ if not s: continue
+ yacc.parse(s)
+
+
+class Calc(Parser):
+
+ tokens = (
+ 'NAME','NUMBER',
+ 'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS',
+ 'LPAREN','RPAREN',
+ )
+
+ # Tokens
+
+ t_PLUS = r'\+'
+ t_MINUS = r'-'
+ t_EXP = r'\*\*'
+ t_TIMES = r'\*'
+ t_DIVIDE = r'/'
+ t_EQUALS = r'='
+ t_LPAREN = r'\('
+ t_RPAREN = r'\)'
+ t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
+
+ def t_NUMBER(self, t):
+ r'\d+'
+ try:
+ t.value = int(t.value)
+ except ValueError:
+ print "Integer value too large", t.value
+ t.value = 0
+ #print "parsed number %s" % repr(t.value)
+ return t
+
+ t_ignore = " \t"
+
+ def t_newline(self, t):
+ r'\n+'
+ t.lexer.lineno += t.value.count("\n")
+
+ def t_error(self, t):
+ print "Illegal character '%s'" % t.value[0]
+ t.lexer.skip(1)
+
+ # Parsing rules
+
+ precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('left', 'EXP'),
+ ('right','UMINUS'),
+ )
+
+ def p_statement_assign(self, p):
+ 'statement : NAME EQUALS expression'
+ self.names[p[1]] = p[3]
+
+ def p_statement_expr(self, p):
+ 'statement : expression'
+ print p[1]
+
+ def p_expression_binop(self, p):
+ """
+ expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression
+ | expression EXP expression
+ """
+ #print [repr(p[i]) for i in range(0,4)]
+ if p[2] == '+' : p[0] = p[1] + p[3]
+ elif p[2] == '-': p[0] = p[1] - p[3]
+ elif p[2] == '*': p[0] = p[1] * p[3]
+ elif p[2] == '/': p[0] = p[1] / p[3]
+ elif p[2] == '**': p[0] = p[1] ** p[3]
+
+ def p_expression_uminus(self, p):
+ 'expression : MINUS expression %prec UMINUS'
+ p[0] = -p[2]
+
+ def p_expression_group(self, p):
+ 'expression : LPAREN expression RPAREN'
+ p[0] = p[2]
+
+ def p_expression_number(self, p):
+ 'expression : NUMBER'
+ p[0] = p[1]
+
+ def p_expression_name(self, p):
+ 'expression : NAME'
+ try:
+ p[0] = self.names[p[1]]
+ except LookupError:
+ print "Undefined name '%s'" % p[1]
+ p[0] = 0
+
+ def p_error(self, p):
+ print "Syntax error at '%s'" % p.value
+
+if __name__ == '__main__':
+ calc = Calc()
+ calc.run()
diff --git a/example/cleanup.sh b/example/cleanup.sh
new file mode 100755
index 0000000..3e115f4
--- /dev/null
+++ b/example/cleanup.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+rm -f */*.pyc */parsetab.py */parser.out */*~ */*.class
diff --git a/example/hedit/hedit.py b/example/hedit/hedit.py
new file mode 100644
index 0000000..a3c58c7
--- /dev/null
+++ b/example/hedit/hedit.py
@@ -0,0 +1,48 @@
+# -----------------------------------------------------------------------------
+# hedit.py
+#
+# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson)
+#
+# These tokens can't be easily tokenized because they are of the following
+# form:
+#
+# nHc1...cn
+#
+# where n is a positive integer and c1 ... cn are characters.
+#
+# This example shows how to modify the state of the lexer to parse
+# such tokens
+# -----------------------------------------------------------------------------
+
+import sys
+sys.path.insert(0,"../..")
+
+
+tokens = (
+ 'H_EDIT_DESCRIPTOR',
+ )
+
+# Tokens
+t_ignore = " \t\n"
+
+def t_H_EDIT_DESCRIPTOR(t):
+ r"\d+H.*" # This grabs all of the remaining text
+ i = t.value.index('H')
+ n = eval(t.value[:i])
+
+ # Adjust the tokenizing position
+ t.lexer.lexpos -= len(t.value) - (i+1+n)
+
+ t.value = t.value[i+1:i+1+n]
+ return t
+
+def t_error(t):
+ print "Illegal character '%s'" % t.value[0]
+ t.lexer.skip(1)
+
+# Build the lexer
+import ply.lex as lex
+lex.lex()
+lex.runmain()
+
+
diff --git a/example/newclasscalc/calc.py b/example/newclasscalc/calc.py
new file mode 100755
index 0000000..b021b6b
--- /dev/null
+++ b/example/newclasscalc/calc.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+
+# -----------------------------------------------------------------------------
+# calc.py
+#
+# A simple calculator with variables. This is from O'Reilly's
+# "Lex and Yacc", p. 63.
+#
+# Class-based example contributed to PLY by David McNab.
+#
+# Modified to use new-style classes. Test case.
+# -----------------------------------------------------------------------------
+
+import sys
+sys.path.insert(0,"../..")
+
+import readline
+import ply.lex as lex
+import ply.yacc as yacc
+import os
+
+class Parser(object):
+ """
+ Base class for a lexer/parser that has the rules defined as methods
+ """
+ tokens = ()
+ precedence = ()
+
+
+ def __init__(self, **kw):
+ self.debug = kw.get('debug', 0)
+ self.names = { }
+ try:
+ modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__
+ except:
+ modname = "parser"+"_"+self.__class__.__name__
+ self.debugfile = modname + ".dbg"
+ self.tabmodule = modname + "_" + "parsetab"
+ #print self.debugfile, self.tabmodule
+
+ # Build the lexer and parser
+ lex.lex(module=self, debug=self.debug)
+ yacc.yacc(module=self,
+ debug=self.debug,
+ debugfile=self.debugfile,
+ tabmodule=self.tabmodule)
+
+ def run(self):
+ while 1:
+ try:
+ s = raw_input('calc > ')
+ except EOFError:
+ break
+ if not s: continue
+ yacc.parse(s)
+
+
+class Calc(Parser):
+
+ tokens = (
+ 'NAME','NUMBER',
+ 'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS',
+ 'LPAREN','RPAREN',
+ )
+
+ # Tokens
+
+ t_PLUS = r'\+'
+ t_MINUS = r'-'
+ t_EXP = r'\*\*'
+ t_TIMES = r'\*'
+ t_DIVIDE = r'/'
+ t_EQUALS = r'='
+ t_LPAREN = r'\('
+ t_RPAREN = r'\)'
+ t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
+
+ def t_NUMBER(self, t):
+ r'\d+'
+ try:
+ t.value = int(t.value)
+ except ValueError:
+ print "Integer value too large", t.value
+ t.value = 0
+ #print "parsed number %s" % repr(t.value)
+ return t
+
+ t_ignore = " \t"
+
+ def t_newline(self, t):
+ r'\n+'
+ t.lexer.lineno += t.value.count("\n")
+
+ def t_error(self, t):
+ print "Illegal character '%s'" % t.value[0]
+ t.lexer.skip(1)
+
+ # Parsing rules
+
+ precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('left', 'EXP'),
+ ('right','UMINUS'),
+ )
+
+ def p_statement_assign(self, p):
+ 'statement : NAME EQUALS expression'
+ self.names[p[1]] = p[3]
+
+ def p_statement_expr(self, p):
+ 'statement : expression'
+ print p[1]
+
+ def p_expression_binop(self, p):
+ """
+ expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression
+ | expression EXP expression
+ """
+ #print [repr(p[i]) for i in range(0,4)]
+ if p[2] == '+' : p[0] = p[1] + p[3]
+ elif p[2] == '-': p[0] = p[1] - p[3]
+ elif p[2] == '*': p[0] = p[1] * p[3]
+ elif p[2] == '/': p[0] = p[1] / p[3]
+ elif p[2] == '**': p[0] = p[1] ** p[3]
+
+ def p_expression_uminus(self, p):
+ 'expression : MINUS expression %prec UMINUS'
+ p[0] = -p[2]
+
+ def p_expression_group(self, p):
+ 'expression : LPAREN expression RPAREN'
+ p[0] = p[2]
+
+ def p_expression_number(self, p):
+ 'expression : NUMBER'
+ p[0] = p[1]
+
+ def p_expression_name(self, p):
+ 'expression : NAME'
+ try:
+ p[0] = self.names[p[1]]
+ except LookupError:
+ print "Undefined name '%s'" % p[1]
+ p[0] = 0
+
+ def p_error(self, p):
+ print "Syntax error at '%s'" % p.value
+
+if __name__ == '__main__':
+ calc = Calc()
+ calc.run()
diff --git a/example/optcalc/README b/example/optcalc/README
new file mode 100644
index 0000000..6d196f0
--- /dev/null
+++ b/example/optcalc/README
@@ -0,0 +1,9 @@
+An example showing how to use Python optimized mode.
+To run:
+
+ - First run 'python calc.py'
+
+ - Then run 'python -OO calc.py'
+
+If working corretly, the second version should run the
+same way.
diff --git a/example/optcalc/calc.py b/example/optcalc/calc.py
new file mode 100644
index 0000000..325f67c
--- /dev/null
+++ b/example/optcalc/calc.py
@@ -0,0 +1,113 @@
+# -----------------------------------------------------------------------------
+# calc.py
+#
+# A simple calculator with variables. This is from O'Reilly's
+# "Lex and Yacc", p. 63.
+# -----------------------------------------------------------------------------
+
+import sys
+sys.path.insert(0,"../..")
+
+tokens = (
+ 'NAME','NUMBER',
+ 'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
+ 'LPAREN','RPAREN',
+ )
+
+# Tokens
+
+t_PLUS = r'\+'
+t_MINUS = r'-'
+t_TIMES = r'\*'
+t_DIVIDE = r'/'
+t_EQUALS = r'='
+t_LPAREN = r'\('
+t_RPAREN = r'\)'
+t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
+
+def t_NUMBER(t):
+ r'\d+'
+ try:
+ t.value = int(t.value)
+ except ValueError:
+ print "Integer value too large", t.value
+ t.value = 0
+ return t
+
+t_ignore = " \t"
+
+def t_newline(t):
+ r'\n+'
+ t.lexer.lineno += t.value.count("\n")
+
+def t_error(t):
+ print "Illegal character '%s'" % t.value[0]
+ t.lexer.skip(1)
+
+# Build the lexer
+import ply.lex as lex
+lex.lex(optimize=1)
+
+# Parsing rules
+
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(t):
+ 'statement : NAME EQUALS expression'
+ names[t[1]] = t[3]
+
+def p_statement_expr(t):
+ 'statement : expression'
+ print t[1]
+
+def p_expression_binop(t):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if t[2] == '+' : t[0] = t[1] + t[3]
+ elif t[2] == '-': t[0] = t[1] - t[3]
+ elif t[2] == '*': t[0] = t[1] * t[3]
+ elif t[2] == '/': t[0] = t[1] / t[3]
+ elif t[2] == '<': t[0] = t[1] < t[3]
+
+def p_expression_uminus(t):
+ 'expression : MINUS expression %prec UMINUS'
+ t[0] = -t[2]
+
+def p_expression_group(t):
+ 'expression : LPAREN expression RPAREN'
+ t[0] = t[2]
+
+def p_expression_number(t):
+ 'expression : NUMBER'
+ t[0] = t[1]
+
+def p_expression_name(t):
+ 'expression : NAME'
+ try:
+ t[0] = names[t[1]]
+ except LookupError:
+ print "Undefined name '%s'" % t[1]
+ t[0] = 0
+
+def p_error(t):
+ print "Syntax error at '%s'" % t.value
+
+import ply.yacc as yacc
+yacc.yacc(optimize=1)
+
+while 1:
+ try:
+ s = raw_input('calc > ')
+ except EOFError:
+ break
+ yacc.parse(s)
+
diff --git a/example/unicalc/calc.py b/example/unicalc/calc.py
new file mode 100644
index 0000000..7e60433
--- /dev/null
+++ b/example/unicalc/calc.py
@@ -0,0 +1,114 @@
+# -----------------------------------------------------------------------------
+# calc.py
+#
+# A simple calculator with variables. This is from O'Reilly's
+# "Lex and Yacc", p. 63.
+#
+# This example uses unicode strings for tokens, docstrings, and input.
+# -----------------------------------------------------------------------------
+
+import sys
+sys.path.insert(0,"../..")
+
+tokens = (
+ 'NAME','NUMBER',
+ 'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
+ 'LPAREN','RPAREN',
+ )
+
+# Tokens
+
+t_PLUS = ur'\+'
+t_MINUS = ur'-'
+t_TIMES = ur'\*'
+t_DIVIDE = ur'/'
+t_EQUALS = ur'='
+t_LPAREN = ur'\('
+t_RPAREN = ur'\)'
+t_NAME = ur'[a-zA-Z_][a-zA-Z0-9_]*'
+
+def t_NUMBER(t):
+ ur'\d+'
+ try:
+ t.value = int(t.value)
+ except ValueError:
+ print "Integer value too large", t.value
+ t.value = 0
+ return t
+
+t_ignore = u" \t"
+
+def t_newline(t):
+ ur'\n+'
+ t.lexer.lineno += t.value.count("\n")
+
+def t_error(t):
+ print "Illegal character '%s'" % t.value[0]
+ t.lexer.skip(1)
+
+# Build the lexer
+import ply.lex as lex
+lex.lex()
+
+# Parsing rules
+
+precedence = (
+ ('left','PLUS','MINUS'),
+ ('left','TIMES','DIVIDE'),
+ ('right','UMINUS'),
+ )
+
+# dictionary of names
+names = { }
+
+def p_statement_assign(p):
+ 'statement : NAME EQUALS expression'
+ names[p[1]] = p[3]
+
+def p_statement_expr(p):
+ 'statement : expression'
+ print p[1]
+
+def p_expression_binop(p):
+ '''expression : expression PLUS expression
+ | expression MINUS expression
+ | expression TIMES expression
+ | expression DIVIDE expression'''
+ if p[2] == u'+' : p[0] = p[1] + p[3]
+ elif p[2] == u'-': p[0] = p[1] - p[3]
+ elif p[2] == u'*': p[0] = p[1] * p[3]
+ elif p[2] == u'/': p[0] = p[1] / p[3]
+
+def p_expression_uminus(p):
+ 'expression : MINUS expression %prec UMINUS'
+ p[0] = -p[2]
+
+def p_expression_group(p):
+ 'expression : LPAREN expression RPAREN'
+ p[0] = p[2]
+
+def p_expression_number(p):
+ 'expression : NUMBER'
+ p[0] = p[1]
+
+def p_expression_name(p):
+ 'expression : NAME'
+ try:
+ p[0] = names[p[1]]
+ except LookupError:
+ print "Undefined name '%s'" % p[1]
+ p[0] = 0
+
+def p_error(p):
+ print "Syntax error at '%s'" % p.value
+
+import ply.yacc as yacc
+yacc.yacc()
+
+while 1:
+ try:
+ s = raw_input('calc > ')
+ except EOFError:
+ break
+ if not s: continue
+ yacc.parse(unicode(s))
diff --git a/example/yply/README b/example/yply/README
new file mode 100644
index 0000000..bfadf36
--- /dev/null
+++ b/example/yply/README
@@ -0,0 +1,41 @@
+yply.py
+
+This example implements a program yply.py that converts a UNIX-yacc
+specification file into a PLY-compatible program. To use, simply
+run it like this:
+
+ % python yply.py [-nocode] inputfile.y >myparser.py
+
+The output of this program is Python code. In the output,
+any C code in the original file is included, but is commented out.
+If you use the -nocode option, then all of the C code in the
+original file is just discarded.
+
+To use the resulting grammer with PLY, you'll need to edit the
+myparser.py file. Within this file, some stub code is included that
+can be used to test the construction of the parsing tables. However,
+you'll need to do more editing to make a workable parser.
+
+Disclaimer: This just an example I threw together in an afternoon.
+It might have some bugs. However, it worked when I tried it on
+a yacc-specified C++ parser containing 442 rules and 855 parsing
+states.
+
+Comments:
+
+1. This example does not parse specification files meant for lex/flex.
+ You'll need to specify the tokenizer on your own.
+
+2. This example shows a number of interesting PLY features including
+
+ - Parsing of literal text delimited by nested parentheses
+ - Some interaction between the parser and the lexer.
+ - Use of literals in the grammar specification
+ - One pass compilation. The program just emits the result,
+ there is no intermediate parse tree.
+
+3. This program could probably be cleaned up and enhanced a lot.
+ It would be great if someone wanted to work on this (hint).
+
+-Dave
+
diff --git a/example/yply/ylex.py b/example/yply/ylex.py
new file mode 100644
index 0000000..67d2354
--- /dev/null
+++ b/example/yply/ylex.py
@@ -0,0 +1,112 @@
+# lexer for yacc-grammars
+#
+# Author: David Beazley (dave@dabeaz.com)
+# Date : October 2, 2006
+
+import sys
+sys.path.append("../..")
+
+from ply import *
+
+tokens = (
+ 'LITERAL','SECTION','TOKEN','LEFT','RIGHT','PREC','START','TYPE','NONASSOC','UNION','CODE',
+ 'ID','QLITERAL','NUMBER',
+)
+
+states = (('code','exclusive'),)
+
+literals = [ ';', ',', '<', '>', '|',':' ]
+t_ignore = ' \t'
+
+t_TOKEN = r'%token'
+t_LEFT = r'%left'
+t_RIGHT = r'%right'
+t_NONASSOC = r'%nonassoc'
+t_PREC = r'%prec'
+t_START = r'%start'
+t_TYPE = r'%type'
+t_UNION = r'%union'
+t_ID = r'[a-zA-Z_][a-zA-Z_0-9]*'
+t_QLITERAL = r'''(?P<quote>['"]).*?(?P=quote)'''
+t_NUMBER = r'\d+'
+
+def t_SECTION(t):
+ r'%%'
+ if getattr(t.lexer,"lastsection",0):
+ t.value = t.lexer.lexdata[t.lexpos+2:]
+ t.lexer.lexpos = len(t.lexer.lexdata)
+ else:
+ t.lexer.lastsection = 0
+ return t
+
+# Comments
+def t_ccomment(t):
+ r'/\*(.|\n)*?\*/'
+ t.lineno += t.value.count('\n')
+
+t_ignore_cppcomment = r'//.*'
+
+def t_LITERAL(t):
+ r'%\{(.|\n)*?%\}'
+ t.lexer.lineno += t.value.count("\n")
+ return t
+
+def t_NEWLINE(t):
+ r'\n'
+ t.lexer.lineno += 1
+
+def t_code(t):
+ r'\{'
+ t.lexer.codestart = t.lexpos
+ t.lexer.level = 1
+ t.lexer.begin('code')
+
+def t_code_ignore_string(t):
+ r'\"([^\\\n]|(\\.))*?\"'
+
+def t_code_ignore_char(t):
+ r'\'([^\\\n]|(\\.))*?\''
+
+def t_code_ignore_comment(t):
+ r'/\*(.|\n)*?\*/'
+
+def t_code_ignore_cppcom(t):
+ r'//.*'
+
+def t_code_lbrace(t):
+ r'\{'
+ t.lexer.level += 1
+
+def t_code_rbrace(t):
+ r'\}'
+ t.lexer.level -= 1
+ if t.lexer.level == 0:
+ t.type = 'CODE'
+ t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos+1]
+ t.lexer.begin('INITIAL')
+ t.lexer.lineno += t.value.count('\n')
+ return t
+
+t_code_ignore_nonspace = r'[^\s\}\'\"\{]+'
+t_code_ignore_whitespace = r'\s+'
+t_code_ignore = ""
+
+def t_code_error(t):
+ raise RuntimeError
+
+def t_error(t):
+ print "%d: Illegal character '%s'" % (t.lineno, t.value[0])
+ print t.value
+ t.lexer.skip(1)
+
+lex.lex()
+
+if __name__ == '__main__':
+ lex.runmain()
+
+
+
+
+
+
+
diff --git a/example/yply/yparse.py b/example/yply/yparse.py
new file mode 100644
index 0000000..ab5b884
--- /dev/null
+++ b/example/yply/yparse.py
@@ -0,0 +1,217 @@
+# parser for Unix yacc-based grammars
+#
+# Author: David Beazley (dave@dabeaz.com)
+# Date : October 2, 2006
+
+import ylex
+tokens = ylex.tokens
+
+from ply import *
+
+tokenlist = []
+preclist = []
+
+emit_code = 1
+
+def p_yacc(p):
+ '''yacc : defsection rulesection'''
+
+def p_defsection(p):
+ '''defsection : definitions SECTION
+ | SECTION'''
+ p.lexer.lastsection = 1
+ print "tokens = ", repr(tokenlist)
+ print
+ print "precedence = ", repr(preclist)
+ print
+ print "# -------------- RULES ----------------"
+ print
+
+def p_rulesection(p):
+ '''rulesection : rules SECTION'''
+
+ print "# -------------- RULES END ----------------"
+ print_code(p[2],0)
+
+def p_definitions(p):
+ '''definitions : definitions definition
+ | definition'''
+
+def p_definition_literal(p):
+ '''definition : LITERAL'''
+ print_code(p[1],0)
+
+def p_definition_start(p):
+ '''definition : START ID'''
+ print "start = '%s'" % p[2]
+
+def p_definition_token(p):
+ '''definition : toktype opttype idlist optsemi '''
+ for i in p[3]:
+ if i[0] not in "'\"":
+ tokenlist.append(i)
+ if p[1] == '%left':
+ preclist.append(('left',) + tuple(p[3]))
+ elif p[1] == '%right':
+ preclist.append(('right',) + tuple(p[3]))
+ elif p[1] == '%nonassoc':
+ preclist.append(('nonassoc',)+ tuple(p[3]))
+
+def p_toktype(p):
+ '''toktype : TOKEN
+ | LEFT
+ | RIGHT
+ | NONASSOC'''
+ p[0] = p[1]
+
+def p_opttype(p):
+ '''opttype : '<' ID '>'
+ | empty'''
+
+def p_idlist(p):
+ '''idlist : idlist optcomma tokenid
+ | tokenid'''
+ if len(p) == 2:
+ p[0] = [p[1]]
+ else:
+ p[0] = p[1]
+ p[1].append(p[3])
+
+def p_tokenid(p):
+ '''tokenid : ID
+ | ID NUMBER
+ | QLITERAL
+ | QLITERAL NUMBER'''
+ p[0] = p[1]
+
+def p_optsemi(p):
+ '''optsemi : ';'
+ | empty'''
+
+def p_optcomma(p):
+ '''optcomma : ','
+ | empty'''
+
+def p_definition_type(p):
+ '''definition : TYPE '<' ID '>' namelist optsemi'''
+ # type declarations are ignored
+
+def p_namelist(p):
+ '''namelist : namelist optcomma ID
+ | ID'''
+
+def p_definition_union(p):
+ '''definition : UNION CODE optsemi'''
+ # Union declarations are ignored
+
+def p_rules(p):
+ '''rules : rules rule
+ | rule'''
+ if len(p) == 2:
+ rule = p[1]
+ else:
+ rule = p[2]
+
+ # Print out a Python equivalent of this rule
+
+ embedded = [ ] # Embedded actions (a mess)
+ embed_count = 0
+
+ rulename = rule[0]
+ rulecount = 1
+ for r in rule[1]:
+ # r contains one of the rule possibilities
+ print "def p_%s_%d(p):" % (rulename,rulecount)
+ prod = []
+ prodcode = ""
+ for i in range(len(r)):
+ item = r[i]
+ if item[0] == '{': # A code block
+ if i == len(r) - 1:
+ prodcode = item
+ break
+ else:
+ # an embedded action
+ embed_name = "_embed%d_%s" % (embed_count,rulename)
+ prod.append(embed_name)
+ embedded.append((embed_name,item))
+ embed_count += 1
+ else:
+ prod.append(item)
+ print " '''%s : %s'''" % (rulename, " ".join(prod))
+ # Emit code
+ print_code(prodcode,4)
+ print
+ rulecount += 1
+
+ for e,code in embedded:
+ print "def p_%s(p):" % e
+ print " '''%s : '''" % e
+ print_code(code,4)
+ print
+
+def p_rule(p):
+ '''rule : ID ':' rulelist ';' '''
+ p[0] = (p[1],[p[3]])
+
+def p_rule2(p):
+ '''rule : ID ':' rulelist morerules ';' '''
+ p[4].insert(0,p[3])
+ p[0] = (p[1],p[4])
+
+def p_rule_empty(p):
+ '''rule : ID ':' ';' '''
+ p[0] = (p[1],[[]])
+
+def p_rule_empty2(p):
+ '''rule : ID ':' morerules ';' '''
+
+ p[3].insert(0,[])
+ p[0] = (p[1],p[3])
+
+def p_morerules(p):
+ '''morerules : morerules '|' rulelist
+ | '|' rulelist
+ | '|' '''
+
+ if len(p) == 2:
+ p[0] = [[]]
+ elif len(p) == 3:
+ p[0] = [p[2]]
+ else:
+ p[0] = p[1]
+ p[0].append(p[3])
+
+# print "morerules", len(p), p[0]
+
+def p_rulelist(p):
+ '''rulelist : rulelist ruleitem
+ | ruleitem'''
+
+ if len(p) == 2:
+ p[0] = [p[1]]
+ else:
+ p[0] = p[1]
+ p[1].append(p[2])
+
+def p_ruleitem(p):
+ '''ruleitem : ID
+ | QLITERAL
+ | CODE
+ | PREC'''
+ p[0] = p[1]
+
+def p_empty(p):
+ '''empty : '''
+
+def p_error(p):
+ pass
+
+yacc.yacc(debug=0)
+
+def print_code(code,indent):
+ if not emit_code: return
+ codelines = code.splitlines()
+ for c in codelines:
+ print "%s# %s" % (" "*indent,c)
+
diff --git a/example/yply/yply.py b/example/yply/yply.py
new file mode 100755
index 0000000..a439817
--- /dev/null
+++ b/example/yply/yply.py
@@ -0,0 +1,53 @@
+#!/usr/local/bin/python
+# yply.py
+#
+# Author: David Beazley (dave@dabeaz.com)
+# Date : October 2, 2006
+#
+# Converts a UNIX-yacc specification file into a PLY-compatible
+# specification. To use, simply do this:
+#
+# % python yply.py [-nocode] inputfile.y >myparser.py
+#
+# The output of this program is Python code. In the output,
+# any C code in the original file is included, but is commented.
+# If you use the -nocode option, then all of the C code in the
+# original file is discarded.
+#
+# Disclaimer: This just an example I threw together in an afternoon.
+# It might have some bugs. However, it worked when I tried it on
+# a yacc-specified C++ parser containing 442 rules and 855 parsing
+# states.
+#
+
+import sys
+sys.path.insert(0,"../..")
+
+import ylex
+import yparse
+
+from ply import *
+
+if len(sys.argv) == 1:
+ print "usage : yply.py [-nocode] inputfile"
+ raise SystemExit
+
+if len(sys.argv) == 3:
+ if sys.argv[1] == '-nocode':
+ yparse.emit_code = 0
+ else:
+ print "Unknown option '%s'" % sys.argv[1]
+ raise SystemExit
+ filename = sys.argv[2]
+else:
+ filename = sys.argv[1]
+
+yacc.parse(open(filename).read())
+
+print """
+if __name__ == '__main__':
+ from ply import *
+ yacc.yacc()
+"""
+
+