diff options
author | Paul McGuire <ptmcg@austin.rr.com> | 2019-07-13 13:36:36 -0500 |
---|---|---|
committer | Paul McGuire <ptmcg@austin.rr.com> | 2019-07-13 13:36:36 -0500 |
commit | 5a566b59170fb3fe705a7691806c4afd158df520 (patch) | |
tree | 7bf9c6077b3e0c03217b1c33055e1a7188cf5bcc | |
parent | 7d96e569a1b5f4505dac8d6f24c4b27562acf875 (diff) | |
download | pyparsing-git-5a566b59170fb3fe705a7691806c4afd158df520.tar.gz |
Update/cleanup code in examples
-rw-r--r-- | .travis.yml | 1 | ||||
-rw-r--r-- | examples/LAparser.py | 876 | ||||
-rw-r--r-- | examples/commasep.py | 7 | ||||
-rw-r--r-- | examples/eval_arith.py | 456 | ||||
-rw-r--r-- | examples/httpServerLogParser.py | 145 | ||||
-rw-r--r-- | examples/lucene_grammar.py | 657 | ||||
-rw-r--r-- | examples/protobuf_parser.py | 200 | ||||
-rw-r--r-- | examples/removeLineBreaks.py | 90 |
8 files changed, 1245 insertions, 1187 deletions
diff --git a/.travis.yml b/.travis.yml index 5f29e57..0d3fccf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,6 +25,7 @@ script: - PYTHONPATH=. python examples/sexpParser.py - PYTHONPATH=. python examples/oc.py - PYTHONPATH=. python examples/delta_time.py + - PYTHONPATH=. python examples/eval_arith.py after_success: - codecov diff --git a/examples/LAparser.py b/examples/LAparser.py index 41e8b4f..330b8f5 100644 --- a/examples/LAparser.py +++ b/examples/LAparser.py @@ -1,414 +1,462 @@ -"""
-Purpose: Linear Algebra Parser
-Based on: SimpleCalc.py example (author Paul McGuire) in pyparsing-1.3.3
-Author: Mike Ellis
-Copyright: Ellis & Grant, Inc. 2005
-License: You may freely use, modify, and distribute this software.
-Warranty: THIS SOFTWARE HAS NO WARRANTY WHATSOEVER. USE AT YOUR OWN RISK.
-Notes: Parses infix linear algebra (LA) notation for vectors, matrices, and scalars.
- Output is C code function calls. The parser can be run as an interactive
- interpreter or included as module to use for in-place substitution into C files
- containing LA equations.
-
- Supported operations are:
- OPERATION: INPUT OUTPUT
- Scalar addition: "a = b+c" "a=(b+c)"
- Scalar subtraction: "a = b-c" "a=(b-c)"
- Scalar multiplication: "a = b*c" "a=b*c"
- Scalar division: "a = b/c" "a=b/c"
- Scalar exponentiation: "a = b^c" "a=pow(b,c)"
- Vector scaling: "V3_a = V3_b * c" "vCopy(a,vScale(b,c))"
- Vector addition: "V3_a = V3_b + V3_c" "vCopy(a,vAdd(b,c))"
- Vector subtraction: "V3_a = V3_b - V3_c" "vCopy(a,vSubtract(b,c))"
- Vector dot product: "a = V3_b * V3_c" "a=vDot(b,c)"
- Vector outer product: "M3_a = V3_b @ V3_c" "a=vOuterProduct(b,c)"
- Vector magn. squared: "a = V3_b^Mag2" "a=vMagnitude2(b)"
- Vector magnitude: "a = V3_b^Mag" "a=sqrt(vMagnitude2(b))"
- Matrix scaling: "M3_a = M3_b * c" "mCopy(a,mScale(b,c))"
- Matrix addition: "M3_a = M3_b + M3_c" "mCopy(a,mAdd(b,c))"
- Matrix subtraction: "M3_a = M3_b - M3_c" "mCopy(a,mSubtract(b,c))"
- Matrix multiplication: "M3_a = M3_b * M3_c" "mCopy(a,mMultiply(b,c))"
- Matrix by vector mult.: "V3_a = M3_b * V3_c" "vCopy(a,mvMultiply(b,c))"
- Matrix inversion: "M3_a = M3_b^-1" "mCopy(a,mInverse(b))"
- Matrix transpose: "M3_a = M3_b^T" "mCopy(a,mTranspose(b))"
- Matrix determinant: "a = M3_b^Det" "a=mDeterminant(b)"
-
- The parser requires the expression to be an equation. Each non-scalar variable
- must be prefixed with a type tag, 'M3_' for 3x3 matrices and 'V3_' for 3-vectors.
- For proper compilation of the C code, the variables need to be declared without
- the prefix as float[3] for vectors and float[3][3] for matrices. The operations do
- not modify any variables on the right-hand side of the equation.
-
- Equations may include nested expressions within parentheses. The allowed binary
- operators are '+-*/^' for scalars, and '+-*^@' for vectors and matrices with the
- meanings defined in the table above.
-
- Specifying an improper combination of operands, e.g. adding a vector to a matrix,
- is detected by the parser and results in a Python TypeError Exception. The usual cause
- of this is omitting one or more tag prefixes. The parser knows nothing about a
- a variable's C declaration and relies entirely on the type tags. Errors in C
- declarations are not caught until compile time.
-
-Usage: To process LA equations embedded in source files, import this module and
- pass input and output file objects to the fprocess() function. You can
- can also invoke the parser from the command line, e.g. 'python LAparser.py',
- to run a small test suite and enter an interactive loop where you can enter
- LA equations and see the resulting C code.
-
-"""
-
-import re,sys
-from pyparsing import Word, alphas, ParseException, Literal, CaselessLiteral \
-, Combine, Optional, nums, Forward, ZeroOrMore, \
- StringEnd, alphanums
-
-# Debugging flag can be set to either "debug_flag=True" or "debug_flag=False"
-debug_flag=False
-
-#----------------------------------------------------------------------------
-# Variables that hold intermediate parsing results and a couple of
-# helper functions.
-exprStack = [] # Holds operators and operands parsed from input.
-targetvar = None # Holds variable name to left of '=' sign in LA equation.
-
-
-def _pushFirst( str, loc, toks ):
- if debug_flag: print("pushing ", toks[0], "str is ", str)
- exprStack.append( toks[0] )
-
-def _assignVar( str, loc, toks ):
- global targetvar
- targetvar = toks[0]
-
-#-----------------------------------------------------------------------------
-# The following statements define the grammar for the parser.
-
-point = Literal('.')
-e = CaselessLiteral('E')
-plusorminus = Literal('+') | Literal('-')
-number = Word(nums)
-integer = Combine( Optional(plusorminus) + number )
-floatnumber = Combine( integer +
- Optional( point + Optional(number) ) +
- Optional( e + integer )
- )
-
-lbracket = Literal("[")
-rbracket = Literal("]")
-ident = Forward()
-## The definition below treats array accesses as identifiers. This means your expressions
-## can include references to array elements, rows and columns, e.g., a = b[i] + 5.
-## Expressions within []'s are not presently supported, so a = b[i+1] will raise
-## a ParseException.
-ident = Combine(Word(alphas + '-',alphanums + '_') + \
- ZeroOrMore(lbracket + (Word(alphas + '-',alphanums + '_')|integer) + rbracket) \
- )
-
-plus = Literal( "+" )
-minus = Literal( "-" )
-mult = Literal( "*" )
-div = Literal( "/" )
-outer = Literal( "@" )
-lpar = Literal( "(" ).suppress()
-rpar = Literal( ")" ).suppress()
-addop = plus | minus
-multop = mult | div | outer
-expop = Literal( "^" )
-assignop = Literal( "=" )
-
-expr = Forward()
-atom = ( ( e | floatnumber | integer | ident ).setParseAction(_pushFirst) |
- ( lpar + expr.suppress() + rpar )
- )
-factor = Forward()
-factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( _pushFirst ) )
-
-term = factor + ZeroOrMore( ( multop + factor ).setParseAction( _pushFirst ) )
-expr << term + ZeroOrMore( ( addop + term ).setParseAction( _pushFirst ) )
-equation = (ident + assignop).setParseAction(_assignVar) + expr + StringEnd()
-
-# End of grammar definition
-#-----------------------------------------------------------------------------
-## The following are helper variables and functions used by the Binary Infix Operator
-## Functions described below.
-
-vprefix = 'V3_'
-vplen = len(vprefix)
-mprefix = 'M3_'
-mplen = len(mprefix)
-
-## We don't support unary negation for vectors and matrices
-class UnaryUnsupportedError(Exception): pass
-
-def _isvec(ident):
- if ident[0] == '-' and ident[1:vplen+1] == vprefix:
- raise UnaryUnsupportedError
- else: return ident[0:vplen] == vprefix
-
-def _ismat(ident):
- if ident[0] == '-' and ident[1:mplen+1] == mprefix:
- raise UnaryUnsupportedError
- else: return ident[0:mplen] == mprefix
-
-def _isscalar(ident): return not (_isvec(ident) or _ismat(ident))
-
-## Binary infix operator (BIO) functions. These are called when the stack evaluator
-## pops a binary operator like '+' or '*". The stack evaluator pops the two operand, a and b,
-## and calls the function that is mapped to the operator with a and b as arguments. Thus,
-## 'x + y' yields a call to addfunc(x,y). Each of the BIO functions checks the prefixes of its
-## arguments to determine whether the operand is scalar, vector, or matrix. This information
-## is used to generate appropriate C code. For scalars, this is essentially the input string, e.g.
-## 'a + b*5' as input yields 'a + b*5' as output. For vectors and matrices, the input is translated to
-## nested function calls, e.g. "V3_a + V3_b*5" yields "V3_vAdd(a,vScale(b,5)". Note that prefixes are
-## stripped from operands and function names within the argument list to the outer function and
-## the appropriate prefix is placed on the outer function for removal later as the stack evaluation
-## recurses toward the final assignment statement.
-
-def _addfunc(a,b):
- if _isscalar(a) and _isscalar(b): return "(%s+%s)"%(a,b)
- if _isvec(a) and _isvec(b): return "%svAdd(%s,%s)"%(vprefix,a[vplen:],b[vplen:])
- if _ismat(a) and _ismat(b): return "%smAdd(%s,%s)"%(mprefix,a[mplen:],b[mplen:])
- else: raise TypeError
-
-def _subfunc(a,b):
- if _isscalar(a) and _isscalar(b): return "(%s-%s)"%(a,b)
- if _isvec(a) and _isvec(b): return "%svSubtract(%s,%s)"%(vprefix,a[vplen:],b[vplen:])
- if _ismat(a) and _ismat(b): return "%smSubtract(%s,%s)"%(mprefix,a[mplen:],b[mplen:])
- else: raise TypeError
-
-def _mulfunc(a,b):
- if _isscalar(a) and _isscalar(b): return "%s*%s"%(a,b)
- if _isvec(a) and _isvec(b): return "vDot(%s,%s)"%(a[vplen:],b[vplen:])
- if _ismat(a) and _ismat(b): return "%smMultiply(%s,%s)"%(mprefix,a[mplen:],b[mplen:])
- if _ismat(a) and _isvec(b): return "%smvMultiply(%s,%s)"%(vprefix,a[mplen:],b[vplen:])
- if _ismat(a) and _isscalar(b): return "%smScale(%s,%s)"%(mprefix,a[mplen:],b)
- if _isvec(a) and _isscalar(b): return "%svScale(%s,%s)"%(vprefix,a[mplen:],b)
- else: raise TypeError
-
-def _outermulfunc(a,b):
- ## The '@' operator is used for the vector outer product.
- if _isvec(a) and _isvec(b):
- return "%svOuterProduct(%s,%s)"%(mprefix,a[vplen:],b[vplen:])
- else: raise TypeError
-
-def _divfunc(a,b):
- ## The '/' operator is used only for scalar division
- if _isscalar(a) and _isscalar(b): return "%s/%s"%(a,b)
- else: raise TypeError
-
-def _expfunc(a,b):
- ## The '^' operator is used for exponentiation on scalars and
- ## as a marker for unary operations on vectors and matrices.
- if _isscalar(a) and _isscalar(b): return "pow(%s,%s)"%(str(a),str(b))
- if _ismat(a) and b=='-1': return "%smInverse(%s)"%(mprefix,a[mplen:])
- if _ismat(a) and b=='T': return "%smTranspose(%s)"%(mprefix,a[mplen:])
- if _ismat(a) and b=='Det': return "mDeterminant(%s)"%(a[mplen:])
- if _isvec(a) and b=='Mag': return "sqrt(vMagnitude2(%s))"%(a[vplen:])
- if _isvec(a) and b=='Mag2': return "vMagnitude2(%s)"%(a[vplen:])
- else: raise TypeError
-
-def _assignfunc(a,b):
- ## The '=' operator is used for assignment
- if _isscalar(a) and _isscalar(b): return "%s=%s"%(a,b)
- if _isvec(a) and _isvec(b): return "vCopy(%s,%s)"%(a[vplen:],b[vplen:])
- if _ismat(a) and _ismat(b): return "mCopy(%s,%s)"%(a[mplen:],b[mplen:])
- else: raise TypeError
-
-## End of BIO func definitions
-##----------------------------------------------------------------------------
-
-# Map operator symbols to corresponding BIO funcs
-opn = { "+" : ( _addfunc ),
- "-" : ( _subfunc ),
- "*" : ( _mulfunc ),
- "@" : ( _outermulfunc ),
- "/" : ( _divfunc),
- "^" : ( _expfunc ), }
-
-
-##----------------------------------------------------------------------------
-# Recursive function that evaluates the expression stack
-def _evaluateStack( s ):
- op = s.pop()
- if op in "+-*/@^":
- op2 = _evaluateStack( s )
- op1 = _evaluateStack( s )
- result = opn[op]( op1, op2 )
- if debug_flag: print(result)
- return result
- else:
- return op
-
-##----------------------------------------------------------------------------
-# The parse function that invokes all of the above.
-def parse(input_string):
- """
- Accepts an input string containing an LA equation, e.g.,
- "M3_mymatrix = M3_anothermatrix^-1" returns C code function
- calls that implement the expression.
- """
-
- global exprStack
- global targetvar
-
- # Start with a blank exprStack and a blank targetvar
- exprStack = []
- targetvar=None
-
- if input_string != '':
- # try parsing the input string
- try:
- L=equation.parseString( input_string )
- except ParseException as err:
- print('Parse Failure', file=sys.stderr)
- print(err.line, file=sys.stderr)
- print(" "*(err.column-1) + "^", file=sys.stderr)
- print(err, file=sys.stderr)
- raise
-
- # show result of parsing the input string
- if debug_flag:
- print(input_string, "->", L)
- print("exprStack=", exprStack)
-
- # Evaluate the stack of parsed operands, emitting C code.
- try:
- result=_evaluateStack(exprStack)
- except TypeError:
- print("Unsupported operation on right side of '%s'.\nCheck for missing or incorrect tags on non-scalar operands."%input_string, file=sys.stderr)
- raise
- except UnaryUnsupportedError:
- print("Unary negation is not supported for vectors and matrices: '%s'"%input_string, file=sys.stderr)
- raise
-
- # Create final assignment and print it.
- if debug_flag: print("var=",targetvar)
- if targetvar != None:
- try:
- result = _assignfunc(targetvar,result)
- except TypeError:
- print("Left side tag does not match right side of '%s'"%input_string, file=sys.stderr)
- raise
- except UnaryUnsupportedError:
- print("Unary negation is not supported for vectors and matrices: '%s'"%input_string, file=sys.stderr)
- raise
-
- return result
- else:
- print("Empty left side in '%s'"%input_string, file=sys.stderr)
- raise TypeError
-
-##-----------------------------------------------------------------------------------
-def fprocess(infilep,outfilep):
- """
- Scans an input file for LA equations between double square brackets,
- e.g. [[ M3_mymatrix = M3_anothermatrix^-1 ]], and replaces the expression
- with a comment containing the equation followed by nested function calls
- that implement the equation as C code. A trailing semi-colon is appended.
- The equation within [[ ]] should NOT end with a semicolon as that will raise
- a ParseException. However, it is ok to have a semicolon after the right brackets.
-
- Other text in the file is unaltered.
-
- The arguments are file objects (NOT file names) opened for reading and
- writing, respectively.
- """
- pattern = r'\[\[\s*(.*?)\s*\]\]'
- eqn = re.compile(pattern,re.DOTALL)
- s = infilep.read()
- def parser(mo):
- ccode = parse(mo.group(1))
- return "/* %s */\n%s;\nLAParserBufferReset();\n"%(mo.group(1),ccode)
-
- content = eqn.sub(parser,s)
- outfilep.write(content)
-
-##-----------------------------------------------------------------------------------
-def test():
- """
- Tests the parsing of various supported expressions. Raises
- an AssertError if the output is not what is expected. Prints the
- input, expected output, and actual output for all tests.
- """
- print("Testing LAParser")
- testcases = [
- ("Scalar addition","a = b+c","a=(b+c)"),
- ("Vector addition","V3_a = V3_b + V3_c","vCopy(a,vAdd(b,c))"),
- ("Vector addition","V3_a=V3_b+V3_c","vCopy(a,vAdd(b,c))"),
- ("Matrix addition","M3_a = M3_b + M3_c","mCopy(a,mAdd(b,c))"),
- ("Matrix addition","M3_a=M3_b+M3_c","mCopy(a,mAdd(b,c))"),
- ("Scalar subtraction","a = b-c","a=(b-c)"),
- ("Vector subtraction","V3_a = V3_b - V3_c","vCopy(a,vSubtract(b,c))"),
- ("Matrix subtraction","M3_a = M3_b - M3_c","mCopy(a,mSubtract(b,c))"),
- ("Scalar multiplication","a = b*c","a=b*c"),
- ("Scalar division","a = b/c","a=b/c"),
- ("Vector multiplication (dot product)","a = V3_b * V3_c","a=vDot(b,c)"),
- ("Vector multiplication (outer product)","M3_a = V3_b @ V3_c","mCopy(a,vOuterProduct(b,c))"),
- ("Matrix multiplication","M3_a = M3_b * M3_c","mCopy(a,mMultiply(b,c))"),
- ("Vector scaling","V3_a = V3_b * c","vCopy(a,vScale(b,c))"),
- ("Matrix scaling","M3_a = M3_b * c","mCopy(a,mScale(b,c))"),
- ("Matrix by vector multiplication","V3_a = M3_b * V3_c","vCopy(a,mvMultiply(b,c))"),
- ("Scalar exponentiation","a = b^c","a=pow(b,c)"),
- ("Matrix inversion","M3_a = M3_b^-1","mCopy(a,mInverse(b))"),
- ("Matrix transpose","M3_a = M3_b^T","mCopy(a,mTranspose(b))"),
- ("Matrix determinant","a = M3_b^Det","a=mDeterminant(b)"),
- ("Vector magnitude squared","a = V3_b^Mag2","a=vMagnitude2(b)"),
- ("Vector magnitude","a = V3_b^Mag","a=sqrt(vMagnitude2(b))"),
- ("Complicated expression", "myscalar = (M3_amatrix * V3_bvector)^Mag + 5*(-xyz[i] + 2.03^2)","myscalar=(sqrt(vMagnitude2(mvMultiply(amatrix,bvector)))+5*(-xyz[i]+pow(2.03,2)))"),
- ("Complicated Multiline", "myscalar = \n(M3_amatrix * V3_bvector)^Mag +\n 5*(xyz + 2.03^2)","myscalar=(sqrt(vMagnitude2(mvMultiply(amatrix,bvector)))+5*(xyz+pow(2.03,2)))")
-
- ]
-
- for t in testcases:
- name,input,expected = t
- print(name)
- print(" %s input"%input)
- print(" %s expected"%expected)
- result = parse(input)
- print(" %s received"%result)
- print("")
- assert expected == result
-
- ##TODO: Write testcases with invalid expressions and test that the expected
- ## exceptions are raised.
-
- print("Tests completed!")
-##----------------------------------------------------------------------------
-## The following is executed only when this module is executed as
-## command line script. It runs a small test suite (see above)
-## and then enters an interactive loop where you
-## can enter expressions and see the resulting C code as output.
-
-if __name__ == '__main__':
- # run testcases
- test()
-
- # input_string
- input_string=''
-
- # Display instructions on how to use the program interactively
- interactiveusage = """
- Entering interactive mode:
- Type in an equation to be parsed or 'quit' to exit the program.
- Type 'debug on' to print parsing details as each string is processed.
- Type 'debug off' to stop printing parsing details
- """
- print(interactiveusage)
- input_string = input("> ")
-
- while input_string != 'quit':
- if input_string == "debug on":
- debug_flag = True
- elif input_string == "debug off":
- debug_flag = False
- else:
- try:
- print(parse(input_string))
- except Exception:
- pass
-
- # obtain new input string
- input_string = input("> ")
-
- # if user types 'quit' then say goodbye
- print("Good bye!")
+""" +Purpose: Linear Algebra Parser +Based on: SimpleCalc.py example (author Paul McGuire) in pyparsing-1.3.3 +Author: Mike Ellis +Copyright: Ellis & Grant, Inc. 2005 +License: You may freely use, modify, and distribute this software. +Warranty: THIS SOFTWARE HAS NO WARRANTY WHATSOEVER. USE AT YOUR OWN RISK. +Notes: Parses infix linear algebra (LA) notation for vectors, matrices, and scalars. + Output is C code function calls. The parser can be run as an interactive + interpreter or included as module to use for in-place substitution into C files + containing LA equations. + + Supported operations are: + OPERATION: INPUT OUTPUT + Scalar addition: "a = b+c" "a=(b+c)" + Scalar subtraction: "a = b-c" "a=(b-c)" + Scalar multiplication: "a = b*c" "a=b*c" + Scalar division: "a = b/c" "a=b/c" + Scalar exponentiation: "a = b^c" "a=pow(b,c)" + Vector scaling: "V3_a = V3_b * c" "vCopy(a,vScale(b,c))" + Vector addition: "V3_a = V3_b + V3_c" "vCopy(a,vAdd(b,c))" + Vector subtraction: "V3_a = V3_b - V3_c" "vCopy(a,vSubtract(b,c))" + Vector dot product: "a = V3_b * V3_c" "a=vDot(b,c)" + Vector outer product: "M3_a = V3_b @ V3_c" "a=vOuterProduct(b,c)" + Vector magn. squared: "a = V3_b^Mag2" "a=vMagnitude2(b)" + Vector magnitude: "a = V3_b^Mag" "a=sqrt(vMagnitude2(b))" + Matrix scaling: "M3_a = M3_b * c" "mCopy(a,mScale(b,c))" + Matrix addition: "M3_a = M3_b + M3_c" "mCopy(a,mAdd(b,c))" + Matrix subtraction: "M3_a = M3_b - M3_c" "mCopy(a,mSubtract(b,c))" + Matrix multiplication: "M3_a = M3_b * M3_c" "mCopy(a,mMultiply(b,c))" + Matrix by vector mult.: "V3_a = M3_b * V3_c" "vCopy(a,mvMultiply(b,c))" + Matrix inversion: "M3_a = M3_b^-1" "mCopy(a,mInverse(b))" + Matrix transpose: "M3_a = M3_b^T" "mCopy(a,mTranspose(b))" + Matrix determinant: "a = M3_b^Det" "a=mDeterminant(b)" + + The parser requires the expression to be an equation. Each non-scalar variable + must be prefixed with a type tag, 'M3_' for 3x3 matrices and 'V3_' for 3-vectors. + For proper compilation of the C code, the variables need to be declared without + the prefix as float[3] for vectors and float[3][3] for matrices. The operations do + not modify any variables on the right-hand side of the equation. + + Equations may include nested expressions within parentheses. The allowed binary + operators are '+-*/^' for scalars, and '+-*^@' for vectors and matrices with the + meanings defined in the table above. + + Specifying an improper combination of operands, e.g. adding a vector to a matrix, + is detected by the parser and results in a Python TypeError Exception. The usual cause + of this is omitting one or more tag prefixes. The parser knows nothing about a + a variable's C declaration and relies entirely on the type tags. Errors in C + declarations are not caught until compile time. + +Usage: To process LA equations embedded in source files, import this module and + pass input and output file objects to the fprocess() function. You can + can also invoke the parser from the command line, e.g. 'python LAparser.py', + to run a small test suite and enter an interactive loop where you can enter + LA equations and see the resulting C code. + +""" + +import re,sys +from pyparsing import Word, alphas, ParseException, Literal, CaselessLiteral \ +, Combine, Optional, nums, Forward, ZeroOrMore, \ + StringEnd, alphanums + +# Debugging flag can be set to either "debug_flag=True" or "debug_flag=False" +debug_flag=False + +#---------------------------------------------------------------------------- +# Variables that hold intermediate parsing results and a couple of +# helper functions. +exprStack = [] # Holds operators and operands parsed from input. +targetvar = None # Holds variable name to left of '=' sign in LA equation. + + +def _pushFirst( str, loc, toks ): + if debug_flag: print("pushing ", toks[0], "str is ", str) + exprStack.append( toks[0] ) + +def _assignVar( str, loc, toks ): + global targetvar + targetvar = toks[0] + +#----------------------------------------------------------------------------- +# The following statements define the grammar for the parser. + +point = Literal('.') +e = CaselessLiteral('E') +plusorminus = Literal('+') | Literal('-') +number = Word(nums) +integer = Combine( Optional(plusorminus) + number ) +floatnumber = Combine( integer + + Optional( point + Optional(number) ) + + Optional( e + integer ) + ) + +lbracket = Literal("[") +rbracket = Literal("]") +ident = Forward() +## The definition below treats array accesses as identifiers. This means your expressions +## can include references to array elements, rows and columns, e.g., a = b[i] + 5. +## Expressions within []'s are not presently supported, so a = b[i+1] will raise +## a ParseException. +ident = Combine(Word(alphas + '-',alphanums + '_') + \ + ZeroOrMore(lbracket + (Word(alphas + '-',alphanums + '_')|integer) + rbracket) \ + ) + +plus = Literal( "+" ) +minus = Literal( "-" ) +mult = Literal( "*" ) +div = Literal( "/" ) +outer = Literal( "@" ) +lpar = Literal( "(" ).suppress() +rpar = Literal( ")" ).suppress() +addop = plus | minus +multop = mult | div | outer +expop = Literal( "^" ) +assignop = Literal( "=" ) + +expr = Forward() +atom = ( ( e | floatnumber | integer | ident ).setParseAction(_pushFirst) | + ( lpar + expr.suppress() + rpar ) + ) +factor = Forward() +factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( _pushFirst ) ) + +term = factor + ZeroOrMore( ( multop + factor ).setParseAction( _pushFirst ) ) +expr << term + ZeroOrMore( ( addop + term ).setParseAction( _pushFirst ) ) +equation = (ident + assignop).setParseAction(_assignVar) + expr + StringEnd() + +# End of grammar definition +#----------------------------------------------------------------------------- +## The following are helper variables and functions used by the Binary Infix Operator +## Functions described below. + +vprefix = 'V3_' +vplen = len(vprefix) +mprefix = 'M3_' +mplen = len(mprefix) + +## We don't support unary negation for vectors and matrices +class UnaryUnsupportedError(Exception): pass + +def _isvec(ident): + if ident[0] == '-' and ident[1:vplen+1] == vprefix: + raise UnaryUnsupportedError + else: return ident[0:vplen] == vprefix + +def _ismat(ident): + if ident[0] == '-' and ident[1:mplen+1] == mprefix: + raise UnaryUnsupportedError + else: return ident[0:mplen] == mprefix + +def _isscalar(ident): return not (_isvec(ident) or _ismat(ident)) + +## Binary infix operator (BIO) functions. These are called when the stack evaluator +## pops a binary operator like '+' or '*". The stack evaluator pops the two operand, a and b, +## and calls the function that is mapped to the operator with a and b as arguments. Thus, +## 'x + y' yields a call to addfunc(x,y). Each of the BIO functions checks the prefixes of its +## arguments to determine whether the operand is scalar, vector, or matrix. This information +## is used to generate appropriate C code. For scalars, this is essentially the input string, e.g. +## 'a + b*5' as input yields 'a + b*5' as output. For vectors and matrices, the input is translated to +## nested function calls, e.g. "V3_a + V3_b*5" yields "V3_vAdd(a,vScale(b,5)". Note that prefixes are +## stripped from operands and function names within the argument list to the outer function and +## the appropriate prefix is placed on the outer function for removal later as the stack evaluation +## recurses toward the final assignment statement. + +def _addfunc(a,b): + if _isscalar(a) and _isscalar(b): return "(%s+%s)"%(a,b) + if _isvec(a) and _isvec(b): return "%svAdd(%s,%s)"%(vprefix,a[vplen:],b[vplen:]) + if _ismat(a) and _ismat(b): return "%smAdd(%s,%s)"%(mprefix,a[mplen:],b[mplen:]) + else: raise TypeError + +def _subfunc(a,b): + if _isscalar(a) and _isscalar(b): return "(%s-%s)"%(a,b) + if _isvec(a) and _isvec(b): return "%svSubtract(%s,%s)"%(vprefix,a[vplen:],b[vplen:]) + if _ismat(a) and _ismat(b): return "%smSubtract(%s,%s)"%(mprefix,a[mplen:],b[mplen:]) + else: raise TypeError + +def _mulfunc(a,b): + if _isscalar(a) and _isscalar(b): return "%s*%s"%(a,b) + if _isvec(a) and _isvec(b): return "vDot(%s,%s)"%(a[vplen:],b[vplen:]) + if _ismat(a) and _ismat(b): return "%smMultiply(%s,%s)"%(mprefix,a[mplen:],b[mplen:]) + if _ismat(a) and _isvec(b): return "%smvMultiply(%s,%s)"%(vprefix,a[mplen:],b[vplen:]) + if _ismat(a) and _isscalar(b): return "%smScale(%s,%s)"%(mprefix,a[mplen:],b) + if _isvec(a) and _isscalar(b): return "%svScale(%s,%s)"%(vprefix,a[mplen:],b) + else: raise TypeError + +def _outermulfunc(a,b): + ## The '@' operator is used for the vector outer product. + if _isvec(a) and _isvec(b): + return "%svOuterProduct(%s,%s)"%(mprefix,a[vplen:],b[vplen:]) + else: raise TypeError + +def _divfunc(a,b): + ## The '/' operator is used only for scalar division + if _isscalar(a) and _isscalar(b): return "%s/%s"%(a,b) + else: raise TypeError + +def _expfunc(a,b): + ## The '^' operator is used for exponentiation on scalars and + ## as a marker for unary operations on vectors and matrices. + if _isscalar(a) and _isscalar(b): return "pow(%s,%s)"%(str(a),str(b)) + if _ismat(a) and b=='-1': return "%smInverse(%s)"%(mprefix,a[mplen:]) + if _ismat(a) and b=='T': return "%smTranspose(%s)"%(mprefix,a[mplen:]) + if _ismat(a) and b=='Det': return "mDeterminant(%s)"%(a[mplen:]) + if _isvec(a) and b=='Mag': return "sqrt(vMagnitude2(%s))"%(a[vplen:]) + if _isvec(a) and b=='Mag2': return "vMagnitude2(%s)"%(a[vplen:]) + else: raise TypeError + +def _assignfunc(a,b): + ## The '=' operator is used for assignment + if _isscalar(a) and _isscalar(b): return "%s=%s"%(a,b) + if _isvec(a) and _isvec(b): return "vCopy(%s,%s)"%(a[vplen:],b[vplen:]) + if _ismat(a) and _ismat(b): return "mCopy(%s,%s)"%(a[mplen:],b[mplen:]) + else: raise TypeError + +## End of BIO func definitions +##---------------------------------------------------------------------------- + +# Map operator symbols to corresponding BIO funcs +opn = { "+" : ( _addfunc ), + "-" : ( _subfunc ), + "*" : ( _mulfunc ), + "@" : ( _outermulfunc ), + "/" : ( _divfunc), + "^" : ( _expfunc ), } + + +##---------------------------------------------------------------------------- +# Recursive function that evaluates the expression stack +def _evaluateStack( s ): + op = s.pop() + if op in "+-*/@^": + op2 = _evaluateStack( s ) + op1 = _evaluateStack( s ) + result = opn[op]( op1, op2 ) + if debug_flag: print(result) + return result + else: + return op + +##---------------------------------------------------------------------------- +# The parse function that invokes all of the above. +def parse(input_string): + """ + Accepts an input string containing an LA equation, e.g., + "M3_mymatrix = M3_anothermatrix^-1" returns C code function + calls that implement the expression. + """ + + global exprStack + global targetvar + + # Start with a blank exprStack and a blank targetvar + exprStack = [] + targetvar=None + + if input_string != '': + # try parsing the input string + try: + L=equation.parseString( input_string ) + except ParseException as err: + print('Parse Failure', file=sys.stderr) + print(err.line, file=sys.stderr) + print(" "*(err.column-1) + "^", file=sys.stderr) + print(err, file=sys.stderr) + raise + + # show result of parsing the input string + if debug_flag: + print(input_string, "->", L) + print("exprStack=", exprStack) + + # Evaluate the stack of parsed operands, emitting C code. + try: + result=_evaluateStack(exprStack) + except TypeError: + print("Unsupported operation on right side of '%s'.\nCheck for missing or incorrect tags on non-scalar operands."%input_string, file=sys.stderr) + raise + except UnaryUnsupportedError: + print("Unary negation is not supported for vectors and matrices: '%s'"%input_string, file=sys.stderr) + raise + + # Create final assignment and print it. + if debug_flag: print("var=",targetvar) + if targetvar != None: + try: + result = _assignfunc(targetvar,result) + except TypeError: + print("Left side tag does not match right side of '%s'"%input_string, file=sys.stderr) + raise + except UnaryUnsupportedError: + print("Unary negation is not supported for vectors and matrices: '%s'"%input_string, file=sys.stderr) + raise + + return result + else: + print("Empty left side in '%s'"%input_string, file=sys.stderr) + raise TypeError + +##----------------------------------------------------------------------------------- +def fprocess(infilep,outfilep): + """ + Scans an input file for LA equations between double square brackets, + e.g. [[ M3_mymatrix = M3_anothermatrix^-1 ]], and replaces the expression + with a comment containing the equation followed by nested function calls + that implement the equation as C code. A trailing semi-colon is appended. + The equation within [[ ]] should NOT end with a semicolon as that will raise + a ParseException. However, it is ok to have a semicolon after the right brackets. + + Other text in the file is unaltered. + + The arguments are file objects (NOT file names) opened for reading and + writing, respectively. + """ + pattern = r'\[\[\s*(.*?)\s*\]\]' + eqn = re.compile(pattern,re.DOTALL) + s = infilep.read() + def parser(mo): + ccode = parse(mo.group(1)) + return "/* %s */\n%s;\nLAParserBufferReset();\n"%(mo.group(1),ccode) + + content = eqn.sub(parser,s) + outfilep.write(content) + +##----------------------------------------------------------------------------------- +def test(): + """ + Tests the parsing of various supported expressions. Raises + an AssertError if the output is not what is expected. Prints the + input, expected output, and actual output for all tests. + """ + print("Testing LAParser") + testcases = [ + ("Scalar addition","a = b+c","a=(b+c)"), + ("Vector addition","V3_a = V3_b + V3_c","vCopy(a,vAdd(b,c))"), + ("Vector addition","V3_a=V3_b+V3_c","vCopy(a,vAdd(b,c))"), + ("Matrix addition","M3_a = M3_b + M3_c","mCopy(a,mAdd(b,c))"), + ("Matrix addition","M3_a=M3_b+M3_c","mCopy(a,mAdd(b,c))"), + ("Scalar subtraction","a = b-c","a=(b-c)"), + ("Vector subtraction","V3_a = V3_b - V3_c","vCopy(a,vSubtract(b,c))"), + ("Matrix subtraction","M3_a = M3_b - M3_c","mCopy(a,mSubtract(b,c))"), + ("Scalar multiplication","a = b*c","a=b*c"), + ("Scalar division","a = b/c","a=b/c"), + ("Vector multiplication (dot product)","a = V3_b * V3_c","a=vDot(b,c)"), + ("Vector multiplication (outer product)","M3_a = V3_b @ V3_c","mCopy(a,vOuterProduct(b,c))"), + ("Matrix multiplication","M3_a = M3_b * M3_c","mCopy(a,mMultiply(b,c))"), + ("Vector scaling","V3_a = V3_b * c","vCopy(a,vScale(b,c))"), + ("Matrix scaling","M3_a = M3_b * c","mCopy(a,mScale(b,c))"), + ("Matrix by vector multiplication","V3_a = M3_b * V3_c","vCopy(a,mvMultiply(b,c))"), + ("Scalar exponentiation","a = b^c","a=pow(b,c)"), + ("Matrix inversion","M3_a = M3_b^-1","mCopy(a,mInverse(b))"), + ("Matrix transpose","M3_a = M3_b^T","mCopy(a,mTranspose(b))"), + ("Matrix determinant","a = M3_b^Det","a=mDeterminant(b)"), + ("Vector magnitude squared","a = V3_b^Mag2","a=vMagnitude2(b)"), + ("Vector magnitude","a = V3_b^Mag","a=sqrt(vMagnitude2(b))"), + ("Complicated expression", "myscalar = (M3_amatrix * V3_bvector)^Mag + 5*(-xyz[i] + 2.03^2)","myscalar=(sqrt(vMagnitude2(mvMultiply(amatrix,bvector)))+5*(-xyz[i]+pow(2.03,2)))"), + ("Complicated Multiline", "myscalar = \n(M3_amatrix * V3_bvector)^Mag +\n 5*(xyz + 2.03^2)","myscalar=(sqrt(vMagnitude2(mvMultiply(amatrix,bvector)))+5*(xyz+pow(2.03,2)))") + + ] + + + all_passed = [True] + + def post_test(test, parsed): + + # copy exprStack to evaluate and clear before running next test + parsed_stack = exprStack[:] + exprStack.clear() + + name, testcase, expected = next(tc for tc in testcases if tc[1] == test) + + this_test_passed = False + try: + try: + result=_evaluateStack(parsed_stack) + except TypeError: + print("Unsupported operation on right side of '%s'.\nCheck for missing or incorrect tags on non-scalar operands."%input_string, file=sys.stderr) + raise + except UnaryUnsupportedError: + print("Unary negation is not supported for vectors and matrices: '%s'"%input_string, file=sys.stderr) + raise + + # Create final assignment and print it. + if debug_flag: print("var=",targetvar) + if targetvar != None: + try: + result = _assignfunc(targetvar,result) + except TypeError: + print("Left side tag does not match right side of '%s'"%input_string, file=sys.stderr) + raise + except UnaryUnsupportedError: + print("Unary negation is not supported for vectors and matrices: '%s'"%input_string, file=sys.stderr) + raise + + else: + print("Empty left side in '%s'"%input_string, file=sys.stderr) + raise TypeError + + parsed['result'] = result + parsed['passed'] = this_test_passed = result == expected + + finally: + all_passed[0] = all_passed[0] and this_test_passed + print('\n' + name) + + equation.runTests((t[1] for t in testcases), postParse=post_test) + + + ##TODO: Write testcases with invalid expressions and test that the expected + ## exceptions are raised. + + print("Tests completed!") + print("PASSED" if all_passed[0] else "FAILED") + assert all_passed[0] + +##---------------------------------------------------------------------------- +## The following is executed only when this module is executed as +## command line script. It runs a small test suite (see above) +## and then enters an interactive loop where you +## can enter expressions and see the resulting C code as output. + +if __name__ == '__main__': + + import sys + if not sys.flags.interactive: + # run testcases + test() + sys.exit(0) + + # input_string + input_string='' + + # Display instructions on how to use the program interactively + interactiveusage = """ + Entering interactive mode: + Type in an equation to be parsed or 'quit' to exit the program. + Type 'debug on' to print parsing details as each string is processed. + Type 'debug off' to stop printing parsing details + """ + print(interactiveusage) + input_string = input("> ") + + while input_string != 'quit': + if input_string == "debug on": + debug_flag = True + elif input_string == "debug off": + debug_flag = False + else: + try: + print(parse(input_string)) + except Exception: + pass + + # obtain new input string + input_string = input("> ") + + # if user types 'quit' then say goodbye + print("Good bye!") + import os + os._exit(0) + diff --git a/examples/commasep.py b/examples/commasep.py index eae6dc1..067647d 100644 --- a/examples/commasep.py +++ b/examples/commasep.py @@ -1,7 +1,7 @@ # commasep.py # # comma-separated list example, to illustrate the advantages of using -# the pyparsing commaSeparatedList as opposed to string.split(","): +# the pyparsing comma_separated_list as opposed to string.split(","): # - leading and trailing whitespace is implicitly trimmed from list elements # - list elements can be quoted strings, which can safely contain commas without breaking # into separate elements @@ -9,7 +9,8 @@ # Copyright (c) 2004-2016, Paul McGuire # -from pyparsing import commaSeparatedList +import pyparsing as pp +ppc = pp.pyparsing_common testData = [ "a,b,c,100.2,,3", @@ -20,4 +21,4 @@ testData = [ "", ] -commaSeparatedList.runTests(testData) +ppc.comma_separated_list.runTests(testData) diff --git a/examples/eval_arith.py b/examples/eval_arith.py index 133f6c2..0896c01 100644 --- a/examples/eval_arith.py +++ b/examples/eval_arith.py @@ -1,227 +1,229 @@ -# eval_arith.py
-#
-# Copyright 2009, 2011 Paul McGuire
-#
-# Expansion on the pyparsing example simpleArith.py, to include evaluation
-# of the parsed tokens.
-#
-# Added support for exponentiation, using right-to-left evaluation of
-# operands
-#
-from pyparsing import Word, nums, alphas, Combine, oneOf, \
- opAssoc, infixNotation, Literal
-
-class EvalConstant(object):
- "Class to evaluate a parsed constant or variable"
- vars_ = {}
- def __init__(self, tokens):
- self.value = tokens[0]
- def eval(self):
- if self.value in EvalConstant.vars_:
- return EvalConstant.vars_[self.value]
- else:
- return float(self.value)
-
-class EvalSignOp(object):
- "Class to evaluate expressions with a leading + or - sign"
- def __init__(self, tokens):
- self.sign, self.value = tokens[0]
- def eval(self):
- mult = {'+':1, '-':-1}[self.sign]
- return mult * self.value.eval()
-
-def operatorOperands(tokenlist):
- "generator to extract operators and operands in pairs"
- it = iter(tokenlist)
- while 1:
- try:
- yield (next(it), next(it))
- except StopIteration:
- break
-
-class EvalPowerOp(object):
- "Class to evaluate multiplication and division expressions"
- def __init__(self, tokens):
- self.value = tokens[0]
- def eval(self):
- res = self.value[-1].eval()
- for val in self.value[-3::-2]:
- res = val.eval()**res
- return res
-
-class EvalMultOp(object):
- "Class to evaluate multiplication and division expressions"
- def __init__(self, tokens):
- self.value = tokens[0]
- def eval(self):
- prod = self.value[0].eval()
- for op,val in operatorOperands(self.value[1:]):
- if op == '*':
- prod *= val.eval()
- if op == '/':
- prod /= val.eval()
- return prod
-
-class EvalAddOp(object):
- "Class to evaluate addition and subtraction expressions"
- def __init__(self, tokens):
- self.value = tokens[0]
- def eval(self):
- sum = self.value[0].eval()
- for op,val in operatorOperands(self.value[1:]):
- if op == '+':
- sum += val.eval()
- if op == '-':
- sum -= val.eval()
- return sum
-
-class EvalComparisonOp(object):
- "Class to evaluate comparison expressions"
- opMap = {
- "<" : lambda a,b : a < b,
- "<=" : lambda a,b : a <= b,
- ">" : lambda a,b : a > b,
- ">=" : lambda a,b : a >= b,
- "!=" : lambda a,b : a != b,
- "=" : lambda a,b : a == b,
- "LT" : lambda a,b : a < b,
- "LE" : lambda a,b : a <= b,
- "GT" : lambda a,b : a > b,
- "GE" : lambda a,b : a >= b,
- "NE" : lambda a,b : a != b,
- "EQ" : lambda a,b : a == b,
- "<>" : lambda a,b : a != b,
- }
- def __init__(self, tokens):
- self.value = tokens[0]
- def eval(self):
- val1 = self.value[0].eval()
- for op,val in operatorOperands(self.value[1:]):
- fn = EvalComparisonOp.opMap[op]
- val2 = val.eval()
- if not fn(val1,val2):
- break
- val1 = val2
- else:
- return True
- return False
-
-
-# define the parser
-integer = Word(nums)
-real = Combine(Word(nums) + "." + Word(nums))
-variable = Word(alphas,exact=1)
-operand = real | integer | variable
-
-signop = oneOf('+ -')
-multop = oneOf('* /')
-plusop = oneOf('+ -')
-expop = Literal('**')
-
-# use parse actions to attach EvalXXX constructors to sub-expressions
-operand.setParseAction(EvalConstant)
-arith_expr = infixNotation(operand,
- [
- (signop, 1, opAssoc.RIGHT, EvalSignOp),
- (expop, 2, opAssoc.LEFT, EvalPowerOp),
- (multop, 2, opAssoc.LEFT, EvalMultOp),
- (plusop, 2, opAssoc.LEFT, EvalAddOp),
- ])
-
-comparisonop = oneOf("< <= > >= != = <> LT GT LE GE EQ NE")
-comp_expr = infixNotation(arith_expr,
- [
- (comparisonop, 2, opAssoc.LEFT, EvalComparisonOp),
- ])
-
-def main():
- # sample expressions posted on comp.lang.python, asking for advice
- # in safely evaluating them
- rules=[
- '( A - B ) = 0',
- '(A + B + C + D + E + F + G + H + I) = J',
- '(A + B + C + D + E + F + G + H) = I',
- '(A + B + C + D + E + F) = G',
- '(A + B + C + D + E) = (F + G + H + I + J)',
- '(A + B + C + D + E) = (F + G + H + I)',
- '(A + B + C + D + E) = F',
- '(A + B + C + D) = (E + F + G + H)',
- '(A + B + C) = (D + E + F)',
- '(A + B) = (C + D + E + F)',
- '(A + B) = (C + D)',
- '(A + B) = (C - D + E - F - G + H + I + J)',
- '(A + B) = C',
- '(A + B) = 0',
- '(A+B+C+D+E) = (F+G+H+I+J)',
- '(A+B+C+D) = (E+F+G+H)',
- '(A+B+C+D)=(E+F+G+H)',
- '(A+B+C)=(D+E+F)',
- '(A+B)=(C+D)',
- '(A+B)=C',
- '(A-B)=C',
- '(A/(B+C))',
- '(B/(C+D))',
- '(G + H) = I',
- '-0.99 LE ((A+B+C)-(D+E+F+G)) LE 0.99',
- '-0.99 LE (A-(B+C)) LE 0.99',
- '-1000.00 LE A LE 0.00',
- '-5000.00 LE A LE 0.00',
- 'A < B',
- 'A < 7000',
- 'A = -(B)',
- 'A = C',
- 'A = 0',
- 'A GT 0',
- 'A GT 0.00',
- 'A GT 7.00',
- 'A LE B',
- 'A LT -1000.00',
- 'A LT -5000',
- 'A LT 0',
- 'A=(B+C+D)',
- 'A=B',
- 'I = (G + H)',
- '0.00 LE A LE 4.00',
- '4.00 LT A LE 7.00',
- '0.00 LE A LE 4.00 LE E > D',
- '2**2**(A+3)',
- ]
- vars_={'A': 0, 'B': 1.1, 'C': 2.2, 'D': 3.3, 'E': 4.4, 'F': 5.5, 'G':
- 6.6, 'H':7.7, 'I':8.8, 'J':9.9}
-
- # define tests from given rules
- tests = []
- for t in rules:
- t_orig = t
- t = t.replace("=","==")
- t = t.replace("EQ","==")
- t = t.replace("LE","<=")
- t = t.replace("GT",">")
- t = t.replace("LT","<")
- t = t.replace("GE",">=")
- t = t.replace("LE","<=")
- t = t.replace("NE","!=")
- t = t.replace("<>","!=")
- tests.append( (t_orig,eval(t,vars_)) )
-
- # copy vars_ to EvalConstant lookup dict
- EvalConstant.vars_ = vars_
- failed = 0
- for test,expected in tests:
- ret = comp_expr.parseString(test)[0]
- parsedvalue = ret.eval()
- print(test, expected, parsedvalue)
- if parsedvalue != expected:
- print("<<< FAIL")
- failed += 1
- else:
- print('')
-
- print('')
- if failed:
- print(failed, "tests FAILED")
- else:
- print("all tests PASSED")
-
-if __name__=='__main__':
- main()
+# eval_arith.py +# +# Copyright 2009, 2011 Paul McGuire +# +# Expansion on the pyparsing example simpleArith.py, to include evaluation +# of the parsed tokens. +# +# Added support for exponentiation, using right-to-left evaluation of +# operands +# +from pyparsing import Word, nums, alphas, Combine, oneOf, \ + opAssoc, infixNotation, Literal + +class EvalConstant(object): + "Class to evaluate a parsed constant or variable" + vars_ = {} + def __init__(self, tokens): + self.value = tokens[0] + def eval(self): + if self.value in EvalConstant.vars_: + return EvalConstant.vars_[self.value] + else: + return float(self.value) + +class EvalSignOp(object): + "Class to evaluate expressions with a leading + or - sign" + def __init__(self, tokens): + self.sign, self.value = tokens[0] + def eval(self): + mult = {'+':1, '-':-1}[self.sign] + return mult * self.value.eval() + +def operatorOperands(tokenlist): + "generator to extract operators and operands in pairs" + it = iter(tokenlist) + while 1: + try: + yield (next(it), next(it)) + except StopIteration: + break + +class EvalPowerOp(object): + "Class to evaluate multiplication and division expressions" + def __init__(self, tokens): + self.value = tokens[0] + def eval(self): + res = self.value[-1].eval() + for val in self.value[-3::-2]: + res = val.eval()**res + return res + +class EvalMultOp(object): + "Class to evaluate multiplication and division expressions" + def __init__(self, tokens): + self.value = tokens[0] + def eval(self): + prod = self.value[0].eval() + for op,val in operatorOperands(self.value[1:]): + if op == '*': + prod *= val.eval() + if op == '/': + prod /= val.eval() + return prod + +class EvalAddOp(object): + "Class to evaluate addition and subtraction expressions" + def __init__(self, tokens): + self.value = tokens[0] + def eval(self): + sum = self.value[0].eval() + for op,val in operatorOperands(self.value[1:]): + if op == '+': + sum += val.eval() + if op == '-': + sum -= val.eval() + return sum + +class EvalComparisonOp(object): + "Class to evaluate comparison expressions" + opMap = { + "<" : lambda a,b : a < b, + "<=" : lambda a,b : a <= b, + ">" : lambda a,b : a > b, + ">=" : lambda a,b : a >= b, + "!=" : lambda a,b : a != b, + "=" : lambda a,b : a == b, + "LT" : lambda a,b : a < b, + "LE" : lambda a,b : a <= b, + "GT" : lambda a,b : a > b, + "GE" : lambda a,b : a >= b, + "NE" : lambda a,b : a != b, + "EQ" : lambda a,b : a == b, + "<>" : lambda a,b : a != b, + } + def __init__(self, tokens): + self.value = tokens[0] + def eval(self): + val1 = self.value[0].eval() + for op,val in operatorOperands(self.value[1:]): + fn = EvalComparisonOp.opMap[op] + val2 = val.eval() + if not fn(val1,val2): + break + val1 = val2 + else: + return True + return False + + +# define the parser +integer = Word(nums) +real = Combine(Word(nums) + "." + Word(nums)) +variable = Word(alphas,exact=1) +operand = real | integer | variable + +signop = oneOf('+ -') +multop = oneOf('* /') +plusop = oneOf('+ -') +expop = Literal('**') + +# use parse actions to attach EvalXXX constructors to sub-expressions +operand.setParseAction(EvalConstant) +arith_expr = infixNotation(operand, + [ + (signop, 1, opAssoc.RIGHT, EvalSignOp), + (expop, 2, opAssoc.LEFT, EvalPowerOp), + (multop, 2, opAssoc.LEFT, EvalMultOp), + (plusop, 2, opAssoc.LEFT, EvalAddOp), + ]) + +comparisonop = oneOf("< <= > >= != = <> LT GT LE GE EQ NE") +comp_expr = infixNotation(arith_expr, + [ + (comparisonop, 2, opAssoc.LEFT, EvalComparisonOp), + ]) + +def main(): + # sample expressions posted on comp.lang.python, asking for advice + # in safely evaluating them + rules=[ + '( A - B ) = 0', + '(A + B + C + D + E + F + G + H + I) = J', + '(A + B + C + D + E + F + G + H) = I', + '(A + B + C + D + E + F) = G', + '(A + B + C + D + E) = (F + G + H + I + J)', + '(A + B + C + D + E) = (F + G + H + I)', + '(A + B + C + D + E) = F', + '(A + B + C + D) = (E + F + G + H)', + '(A + B + C) = (D + E + F)', + '(A + B) = (C + D + E + F)', + '(A + B) = (C + D)', + '(A + B) = (C - D + E - F - G + H + I + J)', + '(A + B) = C', + '(A + B) = 0', + '(A+B+C+D+E) = (F+G+H+I+J)', + '(A+B+C+D) = (E+F+G+H)', + '(A+B+C+D)=(E+F+G+H)', + '(A+B+C)=(D+E+F)', + '(A+B)=(C+D)', + '(A+B)=C', + '(A-B)=C', + '(A/(B+C))', + '(B/(C+D))', + '(G + H) = I', + '-0.99 LE ((A+B+C)-(D+E+F+G)) LE 0.99', + '-0.99 LE (A-(B+C)) LE 0.99', + '-1000.00 LE A LE 0.00', + '-5000.00 LE A LE 0.00', + 'A < B', + 'A < 7000', + 'A = -(B)', + 'A = C', + 'A = 0', + 'A GT 0', + 'A GT 0.00', + 'A GT 7.00', + 'A LE B', + 'A LT -1000.00', + 'A LT -5000', + 'A LT 0', + 'A=(B+C+D)', + 'A=B', + 'I = (G + H)', + '0.00 LE A LE 4.00', + '4.00 LT A LE 7.00', + '0.00 LE A LE 4.00 LE E > D', + '2**2**(A+3)', + ] + vars_={'A': 0, 'B': 1.1, 'C': 2.2, 'D': 3.3, 'E': 4.4, 'F': 5.5, 'G': + 6.6, 'H':7.7, 'I':8.8, 'J':9.9} + + # define tests from given rules + tests = [] + for t in rules: + t_orig = t + t = t.replace("=","==") + t = t.replace("EQ","==") + t = t.replace("LE","<=") + t = t.replace("GT",">") + t = t.replace("LT","<") + t = t.replace("GE",">=") + t = t.replace("LE","<=") + t = t.replace("NE","!=") + t = t.replace("<>","!=") + tests.append( (t_orig,eval(t,vars_)) ) + + # copy vars_ to EvalConstant lookup dict + EvalConstant.vars_ = vars_ + failed = 0 + for test,expected in tests: + ret = comp_expr.parseString(test)[0] + parsedvalue = ret.eval() + print(test, expected, parsedvalue) + if parsedvalue != expected: + print("<<< FAIL") + failed += 1 + else: + print('') + + print('') + if failed: + print(failed, "tests FAILED") + return 1 + else: + print("all tests PASSED") + return 0 + +if __name__=='__main__': + exit(main()) diff --git a/examples/httpServerLogParser.py b/examples/httpServerLogParser.py index 261cea3..b10678b 100644 --- a/examples/httpServerLogParser.py +++ b/examples/httpServerLogParser.py @@ -1,73 +1,72 @@ -# httpServerLogParser.py
-#
-# Copyright (c) 2016, Paul McGuire
-#
-"""
-Parser for HTTP server log output, of the form:
-
-195.146.134.15 - - [20/Jan/2003:08:55:36 -0800]
-"GET /path/to/page.html HTTP/1.0" 200 4649 "http://www.somedomain.com/020602/page.html"
-"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
-127.0.0.1 - u.surname@domain.com [12/Sep/2006:14:13:53 +0300]
-"GET /skins/monobook/external.png HTTP/1.0" 304 - "http://wiki.mysite.com/skins/monobook/main.css"
-"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060728 Firefox/1.5.0.6"
-
-You can then break it up as follows:
-IP ADDRESS - -
-Server Date / Time [SPACE]
-"GET /path/to/page
-HTTP/Type Request"
-Success Code
-Bytes Sent To Client
-Referer
-Client Software
-"""
-
-from pyparsing import alphas,nums, dblQuotedString, Combine, Word, Group, delimitedList, Suppress, removeQuotes
-import string
-
-def getCmdFields( s, l, t ):
- t["method"],t["requestURI"],t["protocolVersion"] = t[0].strip('"').split()
-
-logLineBNF = None
-def getLogLineBNF():
- global logLineBNF
-
- if logLineBNF is None:
- integer = Word( nums )
- ipAddress = delimitedList( integer, ".", combine=True )
-
- timeZoneOffset = Word("+-",nums)
- month = Word(string.uppercase, string.lowercase, exact=3)
- serverDateTime = Group( Suppress("[") +
- Combine( integer + "/" + month + "/" + integer +
- ":" + integer + ":" + integer + ":" + integer ) +
- timeZoneOffset +
- Suppress("]") )
-
- logLineBNF = ( ipAddress.setResultsName("ipAddr") +
- Suppress("-") +
- ("-" | Word( alphas+nums+"@._" )).setResultsName("auth") +
- serverDateTime.setResultsName("timestamp") +
- dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) +
- (integer | "-").setResultsName("statusCode") +
- (integer | "-").setResultsName("numBytesSent") +
- dblQuotedString.setResultsName("referrer").setParseAction(removeQuotes) +
- dblQuotedString.setResultsName("clientSfw").setParseAction(removeQuotes) )
- return logLineBNF
-
-testdata = """
-195.146.134.15 - - [20/Jan/2003:08:55:36 -0800] "GET /path/to/page.html HTTP/1.0" 200 4649 "http://www.somedomain.com/020602/page.html" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
-111.111.111.11 - - [16/Feb/2004:04:09:49 -0800] "GET /ads/redirectads/336x280redirect.htm HTTP/1.1" 304 - "http://www.foobarp.org/theme_detail.php?type=vs&cat=0&mid=27512" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
-11.111.11.111 - - [16/Feb/2004:10:35:12 -0800] "GET /ads/redirectads/468x60redirect.htm HTTP/1.1" 200 541 "http://11.11.111.11/adframe.php?n=ad1f311a&what=zone:56" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1) Opera 7.20 [ru\"]"
-127.0.0.1 - u.surname@domain.com [12/Sep/2006:14:13:53 +0300] "GET /skins/monobook/external.png HTTP/1.0" 304 - "http://wiki.mysite.com/skins/monobook/main.css" "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060728 Firefox/1.5.0.6"
-"""
-for line in testdata.split("\n"):
- if not line: continue
- fields = getLogLineBNF().parseString(line)
- print(fields.dump())
- #~ print repr(fields)
- #~ for k in fields.keys():
- #~ print "fields." + k + " =", fields[k]
- print(fields.asXML("LOG"))
- print()
+# httpServerLogParser.py +# +# Copyright (c) 2016, Paul McGuire +# +""" +Parser for HTTP server log output, of the form: + +195.146.134.15 - - [20/Jan/2003:08:55:36 -0800] +"GET /path/to/page.html HTTP/1.0" 200 4649 "http://www.somedomain.com/020602/page.html" +"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)" +127.0.0.1 - u.surname@domain.com [12/Sep/2006:14:13:53 +0300] +"GET /skins/monobook/external.png HTTP/1.0" 304 - "http://wiki.mysite.com/skins/monobook/main.css" +"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060728 Firefox/1.5.0.6" + +You can then break it up as follows: +IP ADDRESS - - +Server Date / Time [SPACE] +"GET /path/to/page +HTTP/Type Request" +Success Code +Bytes Sent To Client +Referer +Client Software +""" + +from pyparsing import alphas,nums, dblQuotedString, Combine, Word, Group, delimitedList, Suppress, removeQuotes +import string + +def getCmdFields( s, l, t ): + t["method"],t["requestURI"],t["protocolVersion"] = t[0].strip('"').split() + +logLineBNF = None +def getLogLineBNF(): + global logLineBNF + + if logLineBNF is None: + integer = Word( nums ) + ipAddress = delimitedList( integer, ".", combine=True ) + + timeZoneOffset = Word("+-",nums) + month = Word(string.ascii_uppercase, string.ascii_lowercase, exact=3) + serverDateTime = Group( Suppress("[") + + Combine( integer + "/" + month + "/" + integer + + ":" + integer + ":" + integer + ":" + integer ) + + timeZoneOffset + + Suppress("]") ) + + logLineBNF = ( ipAddress.setResultsName("ipAddr") + + Suppress("-") + + ("-" | Word( alphas+nums+"@._" )).setResultsName("auth") + + serverDateTime.setResultsName("timestamp") + + dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) + + (integer | "-").setResultsName("statusCode") + + (integer | "-").setResultsName("numBytesSent") + + dblQuotedString.setResultsName("referrer").setParseAction(removeQuotes) + + dblQuotedString.setResultsName("clientSfw").setParseAction(removeQuotes) ) + return logLineBNF + +testdata = """ +195.146.134.15 - - [20/Jan/2003:08:55:36 -0800] "GET /path/to/page.html HTTP/1.0" 200 4649 "http://www.somedomain.com/020602/page.html" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)" +111.111.111.11 - - [16/Feb/2004:04:09:49 -0800] "GET /ads/redirectads/336x280redirect.htm HTTP/1.1" 304 - "http://www.foobarp.org/theme_detail.php?type=vs&cat=0&mid=27512" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)" +11.111.11.111 - - [16/Feb/2004:10:35:12 -0800] "GET /ads/redirectads/468x60redirect.htm HTTP/1.1" 200 541 "http://11.11.111.11/adframe.php?n=ad1f311a&what=zone:56" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1) Opera 7.20 [ru\"]" +127.0.0.1 - u.surname@domain.com [12/Sep/2006:14:13:53 +0300] "GET /skins/monobook/external.png HTTP/1.0" 304 - "http://wiki.mysite.com/skins/monobook/main.css" "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060728 Firefox/1.5.0.6" +""" +for line in testdata.split("\n"): + if not line: continue + fields = getLogLineBNF().parseString(line) + print(fields.dump()) + #~ print repr(fields) + #~ for k in fields.keys(): + #~ print "fields." + k + " =", fields[k] + print() diff --git a/examples/lucene_grammar.py b/examples/lucene_grammar.py index 07eb319..bf92509 100644 --- a/examples/lucene_grammar.py +++ b/examples/lucene_grammar.py @@ -1,325 +1,332 @@ -#
-# lucene_grammar.py
-#
-# Copyright 2011, Paul McGuire
-#
-# implementation of Lucene grammar, as decribed
-# at http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/docs/queryparsersyntax.html
-#
-
-import pyparsing as pp
-from pyparsing import pyparsing_common as ppc
-pp.ParserElement.enablePackrat()
-
-COLON,LBRACK,RBRACK,LBRACE,RBRACE,TILDE,CARAT = map(pp.Literal,":[]{}~^")
-LPAR,RPAR = map(pp.Suppress,"()")
-and_, or_, not_, to_ = map(pp.CaselessKeyword, "AND OR NOT TO".split())
-keyword = and_ | or_ | not_ | to_
-
-expression = pp.Forward()
-
-valid_word = pp.Regex(r'([a-zA-Z0-9*_+.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&))+').setName("word")
-valid_word.setParseAction(
- lambda t : t[0].replace('\\\\',chr(127)).replace('\\','').replace(chr(127),'\\')
- )
-
-string = pp.QuotedString('"')
-
-required_modifier = pp.Literal("+")("required")
-prohibit_modifier = pp.Literal("-")("prohibit")
-integer = ppc.integer()
-proximity_modifier = pp.Group(TILDE + integer("proximity"))
-number = ppc.fnumber()
-fuzzy_modifier = TILDE + pp.Optional(number, default=0.5)("fuzzy")
-
-term = pp.Forward()
-field_name = valid_word().setName("fieldname")
-incl_range_search = pp.Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK)
-excl_range_search = pp.Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE)
-range_search = incl_range_search("incl_range") | excl_range_search("excl_range")
-boost = (CARAT + number("boost"))
-
-string_expr = pp.Group(string + proximity_modifier) | string
-word_expr = pp.Group(valid_word + fuzzy_modifier) | valid_word
-term << (pp.Optional(field_name("field") + COLON)
- + (word_expr | string_expr | range_search | pp.Group(LPAR + expression + RPAR))
- + pp.Optional(boost))
-term.setParseAction(lambda t:[t] if 'field' in t or 'boost' in t else None)
-
-expression << pp.infixNotation(term,
- [
- (required_modifier | prohibit_modifier, 1, pp.opAssoc.RIGHT),
- ((not_ | '!').setParseAction(lambda: "NOT"), 1, pp.opAssoc.RIGHT),
- ((and_ | '&&').setParseAction(lambda: "AND"), 2, pp.opAssoc.LEFT),
- (pp.Optional(or_ | '||').setParseAction(lambda: "OR"), 2, pp.opAssoc.LEFT),
- ])
-
-# test strings taken from grammar description doc, and TestQueryParser.java
-tests = r"""
- # Success tests
- a and b
- a and not b
- a and !b
- a && !b
- a&&!b
- name:a
- name:a and not title:b
- (a^100 c d f) and !z
- name:"blah de blah"
- title:(+return +"pink panther")
- title:"The Right Way" AND text:go
- title:"Do it right" AND right
- title:Do it right
- roam~
- roam~0.8
- "jakarta apache"~10
- mod_date:[20020101 TO 20030101]
- title:{Aida TO Carmen}
- jakarta apache
- jakarta^4 apache
- "jakarta apache"^4 "Apache Lucene"
- "jakarta apache" jakarta
- "jakarta apache" OR jakarta
- "jakarta apache" AND "Apache Lucene"
- +jakarta lucene
- "jakarta apache" NOT "Apache Lucene"
- "jakarta apache" -"Apache Lucene"
- (jakarta OR apache) AND website
- \(1+1\)\:2
- c\:\\windows
- (fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)
- (fieldX:xxxxx fieldy:xxxxxxxx)^2 AND (fieldx:the fieldy:foo)
- (fieldX:xxxxx~0.5 fieldy:xxxxxxxx)^2 AND (fieldx:the fieldy:foo)
- +term -term term
- foo:term AND field:anotherTerm
- germ term^2.0
- (term)^2.0
- (foo OR bar) AND (baz OR boo)
- +(apple \"steve jobs\") -(foo bar baz)
- +title:(dog OR cat) -author:\"bob dole\"
- a AND b
- +a +b
- (a AND b)
- c OR (a AND b)
- c (+a +b)
- a AND NOT b
- +a -b
- a AND -b
- a AND !b
- a && b
- a && ! b
- a OR b
- a b
- a || b
- a OR !b
- a -b
- a OR ! b
- a OR -b
- a - b
- a + b
- a ! b
- +foo:term +anotherterm
- hello
- term^2.0
- (germ term)^2.0
- term^2
- +(foo bar) +(baz boo)
- ((a OR b) AND NOT c) OR d
- (+(a b) -c) d
- field
- a&&b
- .NET
- term
- germ
- 3
- term 1.0 1 2
- term term1 term2
- term term term
- term*
- term*^2
- term*^2.0
- term~
- term~2.0
- term~0.7
- term~^3
- term~2.0^3.0
- term*germ
- term*germ^3
- term*germ^3.0
- term~1.1
- [A TO C]
- t*erm*
- *term*
- term term^3.0 term
- term stop^3.0 term
- term +stop term
- term -stop term
- drop AND (stop) AND roll
- +drop +roll
- term +(stop) term
- term -(stop) term
- drop AND stop AND roll
- term phrase term
- term (phrase1 phrase2) term
- term AND NOT phrase term
- +term -(phrase1 phrase2) term
- stop^3
- stop
- (stop)^3
- ((stop))^3
- (stop^3)
- ((stop)^3)
- (stop)
- ((stop))
- term +stop
- [ a TO z]
- [a TO z]
- [ a TO z ]
- { a TO z}
- {a TO z}
- { a TO z }
- { a TO z }^2.0
- {a TO z}^2.0
- [ a TO z] OR bar
- [a TO z] bar
- [ a TO z] AND bar
- +[a TO z] +bar
- ( bar blar { a TO z})
- bar blar {a TO z}
- gack ( bar blar { a TO z})
- gack (bar blar {a TO z})
- [* TO Z]
- [* TO z]
- [A TO *]
- [a TO *]
- [* TO *]
- [\* TO \*]
- \!blah
- \:blah
- blah
- \~blah
- \*blah
- a
- a-b:c
- a+b:c
- a\:b:c
- a\\b:c
- a:b-c
- a:b+c
- a:b\:c
- a:b\\c
- a:b-c*
- a:b+c*
- a:b\:c*
- a:b\\c*
- a:b-c~2.0
- a:b+c~2.0
- a:b\:c~
- a:b\\c~
- [a- TO a+]
- [ a\\ TO a\* ]
- c\:\\temp\\\~foo.txt
- abc
- XYZ
- (item:\\ item:ABCD\\)
- \*
- *
- \\
- \||
- \&&
- a\:b\:c
- a\\b\:c
- a\:b\\c
- a\:b\:c\*
- a\:b\\\\c\*
- a:b-c~
- a:b+c~
- a\:b\:c\~
- a\:b\\c\~
- +weltbank +worlbank
- +term +term +term
- term +term term
- term term +term
- term +term +term
- -term term term
- -term +term +term
- on
- on^1.0
- hello^2.0
- the^3
- the
- some phrase
- xunit~
- one two three
- A AND B OR C AND D
- +A +B +C +D
- foo:zoo*
- foo:zoo*^2
- zoo
- foo:*
- foo:*^2
- *:foo
- a:the OR a:foo
- a:woo OR a:the
- *:*
- (*:*)
- +*:* -*:*
- the wizard of ozzy
- """
-
-failtests = r"""
- # Failure tests
- field:term:with:colon some more terms
- (sub query)^5.0^2.0 plus more
- a:b:c
- a:b:c~
- a:b:c*
- a:b:c~2.0
- \+blah
- \-blah
- foo \|| bar
- foo \AND bar
- \a
- a\-b:c
- a\+b:c
- a\b:c
- a:b\-c
- a:b\+c
- a\-b\:c
- a\+b\:c
- a:b\c*
- a:b\-c~
- a:b\+c~
- a:b\c
- a:b\-c*
- a:b\+c*
- [ a\- TO a\+ ]
- [a\ TO a*]
- a\\\+b
- a\+b
- c:\temp\~foo.txt
- XY\
- a\u0062c
- a:b\c~2.0
- XY\u005a
- XY\u005A
- item:\ item:ABCD\
- \
- a\ or b
- a\:b\-c
- a\:b\+c
- a\:b\-c\*
- a\:b\+c\*
- a\:b\-c\~
- a\:b\+c\~
- a:b\c~
- [ a\ TO a* ]
- """
-
-success1, _ = expression.runTests(tests)
-success2, _ = expression.runTests(failtests, failureTests=True)
-
-print(("FAIL", "OK")[success1 and success2])
-
-if not (success1 and success2):
- raise Exception("failure in lucene grammar parser, check output")
+# +# lucene_grammar.py +# +# Copyright 2011, Paul McGuire +# +# implementation of Lucene grammar, as decribed +# at http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/docs/queryparsersyntax.html +# + +import pyparsing as pp +from pyparsing import pyparsing_common as ppc +pp.ParserElement.enablePackrat() + +COLON,LBRACK,RBRACK,LBRACE,RBRACE,TILDE,CARAT = map(pp.Literal,":[]{}~^") +LPAR,RPAR = map(pp.Suppress,"()") +and_, or_, not_, to_ = map(pp.CaselessKeyword, "AND OR NOT TO".split()) +keyword = and_ | or_ | not_ | to_ + +expression = pp.Forward() + +valid_word = pp.Regex(r'([a-zA-Z0-9*_+.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&))+').setName("word") +valid_word.setParseAction( + lambda t : t[0].replace('\\\\',chr(127)).replace('\\','').replace(chr(127),'\\') + ) + +string = pp.QuotedString('"') + +required_modifier = pp.Literal("+")("required") +prohibit_modifier = pp.Literal("-")("prohibit") +integer = ppc.integer() +proximity_modifier = pp.Group(TILDE + integer("proximity")) +number = ppc.fnumber() +fuzzy_modifier = TILDE + pp.Optional(number, default=0.5)("fuzzy") + +term = pp.Forward() +field_name = valid_word().setName("fieldname") +incl_range_search = pp.Group(LBRACK - term("lower") + to_ + term("upper") + RBRACK) +excl_range_search = pp.Group(LBRACE - term("lower") + to_ + term("upper") + RBRACE) +range_search = incl_range_search("incl_range") | excl_range_search("excl_range") +boost = (CARAT - number("boost")) + +string_expr = pp.Group(string + proximity_modifier) | string +word_expr = pp.Group(valid_word + fuzzy_modifier) | valid_word +term << (pp.Optional(field_name("field") + COLON) + + (word_expr | string_expr | range_search | pp.Group(LPAR + expression + RPAR)) + + pp.Optional(boost)) +term.setParseAction(lambda t:[t] if 'field' in t or 'boost' in t else None) + +expression << pp.infixNotation(term, + [ + (required_modifier | prohibit_modifier, 1, pp.opAssoc.RIGHT), + ((not_ | '!').setParseAction(lambda: "NOT"), 1, pp.opAssoc.RIGHT), + ((and_ | '&&').setParseAction(lambda: "AND"), 2, pp.opAssoc.LEFT), + (pp.Optional(or_ | '||').setParseAction(lambda: "OR"), 2, pp.opAssoc.LEFT), + ]) + +if __name__ == '__main__': + + # test strings taken from grammar description doc, and TestQueryParser.java + tests = r""" + # Success tests + a and b + a and not b + a and !b + a && !b + a&&!b + name:a + name:a and not title:b + (a^100 c d f) and !z + name:"blah de blah" + title:(+return +"pink panther") + title:"The Right Way" AND text:go + title:"Do it right" AND right + title:Do it right + roam~ + roam~0.8 + "jakarta apache"~10 + mod_date:[20020101 TO 20030101] + title:{Aida TO Carmen} + jakarta apache + jakarta^4 apache + "jakarta apache"^4 "Apache Lucene" + "jakarta apache" jakarta + "jakarta apache" OR jakarta + "jakarta apache" AND "Apache Lucene" + +jakarta lucene + "jakarta apache" NOT "Apache Lucene" + "jakarta apache" -"Apache Lucene" + (jakarta OR apache) AND website + \(1+1\)\:2 + c\:\\windows + (fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo) + (fieldX:xxxxx fieldy:xxxxxxxx)^2 AND (fieldx:the fieldy:foo) + (fieldX:xxxxx~0.5 fieldy:xxxxxxxx)^2 AND (fieldx:the fieldy:foo) + +term -term term + foo:term AND field:anotherTerm + germ term^2.0 + (term)^2.0 + (foo OR bar) AND (baz OR boo) + +(apple \"steve jobs\") -(foo bar baz) + +title:(dog OR cat) -author:\"bob dole\" + a AND b + +a +b + (a AND b) + c OR (a AND b) + c (+a +b) + a AND NOT b + +a -b + a AND -b + a AND !b + a && b + a && ! b + a OR b + a b + a || b + a OR !b + a -b + a OR ! b + a OR -b + a - b + a + b + a ! b + +foo:term +anotherterm + hello + term^2.0 + (germ term)^2.0 + term^2 + +(foo bar) +(baz boo) + ((a OR b) AND NOT c) OR d + (+(a b) -c) d + field + a&&b + .NET + term + germ + 3 + term 1.0 1 2 + term term1 term2 + term term term + term* + term*^2 + term*^2.0 + term~ + term~2.0 + term~0.7 + term~^3 + term~2.0^3.0 + term*germ + term*germ^3 + term*germ^3.0 + term~1.1 + [A TO C] + t*erm* + *term* + term term^3.0 term + term stop^3.0 term + term +stop term + term -stop term + drop AND (stop) AND roll + +drop +roll + term +(stop) term + term -(stop) term + drop AND stop AND roll + term phrase term + term (phrase1 phrase2) term + term AND NOT phrase term + +term -(phrase1 phrase2) term + stop^3 + stop + (stop)^3 + ((stop))^3 + (stop^3) + ((stop)^3) + (stop) + ((stop)) + term +stop + [ a TO z] + [a TO z] + [ a TO z ] + { a TO z} + {a TO z} + { a TO z } + { a TO z }^2.0 + {a TO z}^2.0 + [ a TO z] OR bar + [a TO z] bar + [ a TO z] AND bar + +[a TO z] +bar + ( bar blar { a TO z}) + bar blar {a TO z} + gack ( bar blar { a TO z}) + gack (bar blar {a TO z}) + [* TO Z] + [* TO z] + [A TO *] + [a TO *] + [* TO *] + [\* TO \*] + \!blah + \:blah + blah + \~blah + \*blah + a + a-b:c + a+b:c + a\:b:c + a\\b:c + a:b-c + a:b+c + a:b\:c + a:b\\c + a:b-c* + a:b+c* + a:b\:c* + a:b\\c* + a:b-c~2.0 + a:b+c~2.0 + a:b\:c~ + a:b\\c~ + [a- TO a+] + [ a\\ TO a\* ] + c\:\\temp\\\~foo.txt + abc + XYZ + (item:\\ item:ABCD\\) + \* + * + \\ + \|| + \&& + a\:b\:c + a\\b\:c + a\:b\\c + a\:b\:c\* + a\:b\\\\c\* + a:b-c~ + a:b+c~ + a\:b\:c\~ + a\:b\\c\~ + +weltbank +worlbank + +term +term +term + term +term term + term term +term + term +term +term + -term term term + -term +term +term + on + on^1.0 + hello^2.0 + the^3 + the + some phrase + xunit~ + one two three + A AND B OR C AND D + +A +B +C +D + foo:zoo* + foo:zoo*^2 + zoo + foo:* + foo:*^2 + *:foo + a:the OR a:foo + a:woo OR a:the + *:* + (*:*) + +*:* -*:* + the wizard of ozzy + """ + + failtests = r""" + # Failure tests + + # multiple ':'s in term + field:term:with:colon some more terms + + # multiple '^'s in term + (sub query)^5.0^2.0 plus more + a:b:c + a:b:c~ + a:b:c* + a:b:c~2.0 + \+blah + \-blah + foo \|| bar + foo \AND bar + \a + a\-b:c + a\+b:c + a\b:c + a:b\-c + a:b\+c + a\-b\:c + a\+b\:c + a:b\c* + a:b\-c~ + a:b\+c~ + a:b\c + a:b\-c* + a:b\+c* + [ a\- TO a\+ ] + [a\ TO a*] + a\\\+b + a\+b + c:\temp\~foo.txt + XY\ + a\u0062c + a:b\c~2.0 + XY\u005a + XY\u005A + item:\ item:ABCD\ + \ + a\ or b + a\:b\-c + a\:b\+c + a\:b\-c\* + a\:b\+c\* + a\:b\-c\~ + a\:b\+c\~ + a:b\c~ + [ a\ TO a* ] + """ + + success1, _ = expression.runTests(tests) + success2, _ = expression.runTests(failtests, failureTests=True) + + print("All tests:", ("FAIL", "OK")[success1 and success2]) + + if not (success1 and success2): + import sys + sys.exit(1) diff --git a/examples/protobuf_parser.py b/examples/protobuf_parser.py index 68a8f63..0b3e909 100644 --- a/examples/protobuf_parser.py +++ b/examples/protobuf_parser.py @@ -1,100 +1,100 @@ -# protobuf_parser.py
-#
-# simple parser for parsing protobuf .proto files
-#
-# Copyright 2010, Paul McGuire
-#
-
-from pyparsing import (Word, alphas, alphanums, Regex, Suppress, Forward,
- Group, oneOf, ZeroOrMore, Optional, delimitedList,
- restOfLine, quotedString, Dict)
-
-ident = Word(alphas+"_",alphanums+"_").setName("identifier")
-integer = Regex(r"[+-]?\d+")
-
-LBRACE,RBRACE,LBRACK,RBRACK,LPAR,RPAR,EQ,SEMI = map(Suppress,"{}[]()=;")
-
-kwds = """message required optional repeated enum extensions extends extend
- to package service rpc returns true false option import"""
-for kw in kwds.split():
- exec("{0}_ = Keyword('{1}')".format(kw.upper(), kw))
-
-messageBody = Forward()
-
-messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody("body") + RBRACE
-
-typespec = oneOf("""double float int32 int64 uint32 uint64 sint32 sint64
- fixed32 fixed64 sfixed32 sfixed64 bool string bytes""") | ident
-rvalue = integer | TRUE_ | FALSE_ | ident
-fieldDirective = LBRACK + Group(ident + EQ + rvalue) + RBRACK
-fieldDefn = (( REQUIRED_ | OPTIONAL_ | REPEATED_ )("fieldQualifier") -
- typespec("typespec") + ident("ident") + EQ + integer("fieldint") + ZeroOrMore(fieldDirective) + SEMI)
-
-# enumDefn ::= 'enum' ident '{' { ident '=' integer ';' }* '}'
-enumDefn = ENUM_("typespec") - ident('name') + LBRACE + Dict( ZeroOrMore( Group(ident + EQ + integer + SEMI) ))('values') + RBRACE
-
-# extensionsDefn ::= 'extensions' integer 'to' integer ';'
-extensionsDefn = EXTENSIONS_ - integer + TO_ + integer + SEMI
-
-# messageExtension ::= 'extend' ident '{' messageBody '}'
-messageExtension = EXTEND_ - ident + LBRACE + messageBody + RBRACE
-
-# messageBody ::= { fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension }*
-messageBody << Group(ZeroOrMore( Group(fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension) ))
-
-# methodDefn ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';'
-methodDefn = (RPC_ - ident("methodName") +
- LPAR + Optional(ident("methodParam")) + RPAR +
- RETURNS_ + LPAR + Optional(ident("methodReturn")) + RPAR)
-
-# serviceDefn ::= 'service' ident '{' methodDefn* '}'
-serviceDefn = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(methodDefn)) + RBRACE
-
-# packageDirective ::= 'package' ident [ '.' ident]* ';'
-packageDirective = Group(PACKAGE_ - delimitedList(ident, '.', combine=True) + SEMI)
-
-comment = '//' + restOfLine
-
-importDirective = IMPORT_ - quotedString("importFileSpec") + SEMI
-
-optionDirective = OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI
-
-topLevelStatement = Group(messageDefn | messageExtension | enumDefn | serviceDefn | importDirective | optionDirective)
-
-parser = Optional(packageDirective) + ZeroOrMore(topLevelStatement)
-
-parser.ignore(comment)
-
-
-test1 = """message Person {
- required int32 id = 1;
- required string name = 2;
- optional string email = 3;
-}"""
-
-test2 = """package tutorial;
-
-message Person {
- required string name = 1;
- required int32 id = 2;
- optional string email = 3;
-
- enum PhoneType {
- MOBILE = 0;
- HOME = 1;
- WORK = 2;
- }
-
- message PhoneNumber {
- required string number = 1;
- optional PhoneType type = 2 [default = HOME];
- }
-
- repeated PhoneNumber phone = 4;
-}
-
-message AddressBook {
- repeated Person person = 1;
-}"""
-
-parser.runTests([test1, test2])
+# protobuf_parser.py +# +# simple parser for parsing protobuf .proto files +# +# Copyright 2010, Paul McGuire +# + +from pyparsing import (Word, alphas, alphanums, Regex, Suppress, Forward, + Keyword, Group, oneOf, ZeroOrMore, Optional, delimitedList, + restOfLine, quotedString, Dict) + +ident = Word(alphas+"_",alphanums+"_").setName("identifier") +integer = Regex(r"[+-]?\d+") + +LBRACE,RBRACE,LBRACK,RBRACK,LPAR,RPAR,EQ,SEMI = map(Suppress,"{}[]()=;") + +kwds = """message required optional repeated enum extensions extends extend + to package service rpc returns true false option import""" +for kw in kwds.split(): + exec("{0}_ = Keyword('{1}')".format(kw.upper(), kw)) + +messageBody = Forward() + +messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody("body") + RBRACE + +typespec = oneOf("""double float int32 int64 uint32 uint64 sint32 sint64 + fixed32 fixed64 sfixed32 sfixed64 bool string bytes""") | ident +rvalue = integer | TRUE_ | FALSE_ | ident +fieldDirective = LBRACK + Group(ident + EQ + rvalue) + RBRACK +fieldDefn = (( REQUIRED_ | OPTIONAL_ | REPEATED_ )("fieldQualifier") - + typespec("typespec") + ident("ident") + EQ + integer("fieldint") + ZeroOrMore(fieldDirective) + SEMI) + +# enumDefn ::= 'enum' ident '{' { ident '=' integer ';' }* '}' +enumDefn = ENUM_("typespec") - ident('name') + LBRACE + Dict( ZeroOrMore( Group(ident + EQ + integer + SEMI) ))('values') + RBRACE + +# extensionsDefn ::= 'extensions' integer 'to' integer ';' +extensionsDefn = EXTENSIONS_ - integer + TO_ + integer + SEMI + +# messageExtension ::= 'extend' ident '{' messageBody '}' +messageExtension = EXTEND_ - ident + LBRACE + messageBody + RBRACE + +# messageBody ::= { fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension }* +messageBody << Group(ZeroOrMore( Group(fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension) )) + +# methodDefn ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';' +methodDefn = (RPC_ - ident("methodName") + + LPAR + Optional(ident("methodParam")) + RPAR + + RETURNS_ + LPAR + Optional(ident("methodReturn")) + RPAR) + +# serviceDefn ::= 'service' ident '{' methodDefn* '}' +serviceDefn = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(methodDefn)) + RBRACE + +# packageDirective ::= 'package' ident [ '.' ident]* ';' +packageDirective = Group(PACKAGE_ - delimitedList(ident, '.', combine=True) + SEMI) + +comment = '//' + restOfLine + +importDirective = IMPORT_ - quotedString("importFileSpec") + SEMI + +optionDirective = OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI + +topLevelStatement = Group(messageDefn | messageExtension | enumDefn | serviceDefn | importDirective | optionDirective) + +parser = Optional(packageDirective) + ZeroOrMore(topLevelStatement) + +parser.ignore(comment) + + +test1 = """message Person { + required int32 id = 1; + required string name = 2; + optional string email = 3; +}""" + +test2 = """package tutorial; + +message Person { + required string name = 1; + required int32 id = 2; + optional string email = 3; + + enum PhoneType { + MOBILE = 0; + HOME = 1; + WORK = 2; + } + + message PhoneNumber { + required string number = 1; + optional PhoneType type = 2 [default = HOME]; + } + + repeated PhoneNumber phone = 4; +} + +message AddressBook { + repeated Person person = 1; +}""" + +parser.runTests([test1, test2]) diff --git a/examples/removeLineBreaks.py b/examples/removeLineBreaks.py index df07fba..84bd33e 100644 --- a/examples/removeLineBreaks.py +++ b/examples/removeLineBreaks.py @@ -1,45 +1,45 @@ -# removeLineBreaks.py
-#
-# Demonstration of the pyparsing module, converting text files
-# with hard line-breaks to text files with line breaks only
-# between paragraphs. (Helps when converting downloads from Project
-# Gutenberg - https://www.gutenberg.org/ - to import to word processing apps
-# that can reformat paragraphs once hard line-breaks are removed.)
-#
-# Uses parse actions and transformString to remove unwanted line breaks,
-# and to double up line breaks between paragraphs.
-#
-# Copyright 2006, by Paul McGuire
-#
-from pyparsing import *
-
-# define an expression for the body of a line of text - use a parse action to reject any
-# empty lines
-def mustBeNonBlank(s,l,t):
- if not t[0]:
- raise ParseException(s,l,"line body can't be empty")
-lineBody = SkipTo(lineEnd).setParseAction(mustBeNonBlank)
-
-# now define a line with a trailing lineEnd, to be replaced with a space character
-textLine = lineBody + Suppress(lineEnd).setParseAction(replaceWith(" "))
-
-# define a paragraph, with a separating lineEnd, to be replaced with a double newline
-para = OneOrMore(textLine) + Suppress(lineEnd).setParseAction(replaceWith("\n\n"))
-
-
-# run a test
-test = """
- Now is the
- time for
- all
- good men
- to come to
-
- the aid of their
- country.
-"""
-print(para.transformString(test))
-
-# process an entire file
-z = para.transformString(file("Successful Methods of Public Speaking.txt").read())
-file("Successful Methods of Public Speaking(2).txt","w").write(z)
+# removeLineBreaks.py +# +# Demonstration of the pyparsing module, converting text files +# with hard line-breaks to text files with line breaks only +# between paragraphs. (Helps when converting downloads from Project +# Gutenberg - https://www.gutenberg.org/ - to import to word processing apps +# that can reformat paragraphs once hard line-breaks are removed.) +# +# Uses parse actions and transformString to remove unwanted line breaks, +# and to double up line breaks between paragraphs. +# +# Copyright 2006, by Paul McGuire +# +from pyparsing import * + +# define an expression for the body of a line of text - use a parse action to reject any +# empty lines +def mustBeNonBlank(s,l,t): + if not t[0]: + raise ParseException(s,l,"line body can't be empty") +lineBody = SkipTo(lineEnd).setParseAction(mustBeNonBlank) + +# now define a line with a trailing lineEnd, to be replaced with a space character +textLine = lineBody + Suppress(lineEnd).setParseAction(replaceWith(" ")) + +# define a paragraph, with a separating lineEnd, to be replaced with a double newline +para = OneOrMore(textLine) + Suppress(lineEnd).setParseAction(replaceWith("\n\n")) + + +# run a test +test = """ + Now is the + time for + all + good men + to come to + + the aid of their + country. +""" +print(para.transformString(test)) + +# process an entire file +z = para.transformString(open("Successful Methods of Public Speaking.txt").read()) +open("Successful Methods of Public Speaking(2).txt","w").write(z) |