Code updates to current idiomspyparsing_2.4.0

author: Paul McGuire <ptmcg@austin.rr.com> 2019-04-07 13:45:07 -0500
committer: Paul McGuire <ptmcg@austin.rr.com> 2019-04-07 13:45:07 -0500
commit: adf8dd00b736ba1934914e1db78528af02662b65 (patch)
tree: f673c526701163c1ce94608a2af3181a78bdf40f /examples/antlr_grammar.py
parent: 2a81eb83ff5450e85bd22bd2ed35e43938c18208 (diff)
download: pyparsing-git-adf8dd00b736ba1934914e1db78528af02662b65.tar.gz
1 files changed, 112 insertions, 53 deletions
diff --git a/examples/antlr_grammar.py b/examples/antlr_grammar.py
index 09ef264..c131cfb 100644
--- a/examples/antlr_grammar.py
+++ b/examples/antlr_grammar.py
@@ -7,49 +7,71 @@ Created on 4 sept. 2010
 
 Submitted by Luca DallOlio, September, 2010
 (Minor updates by Paul McGuire, June, 2012)
+(Code idiom updates by Paul McGuire, April, 2019)
 '''
-from pyparsing import Word, ZeroOrMore, printables, Suppress, OneOrMore, Group, \
-    LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword, \
-    cStyleComment, Regex, Forward, MatchFirst, And, oneOf, alphas, alphanums, \
-    delimitedList, ungroup
+from pyparsing import (Word, ZeroOrMore, printables, Suppress, OneOrMore, Group,
+    LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword,
+    cStyleComment, Regex, Forward, MatchFirst, And, oneOf, alphas, alphanums,
+    delimitedList, Char)
 
 # http://www.antlr.org/grammar/ANTLR/ANTLRv3.g
 
+QUOTE,APOS,EQ,LBRACK,RBRACK,LBRACE,RBRACE,LPAR,RPAR,ROOT,BANG,AT,TIL,SEMI,COLON,VERT = map(Suppress,
+                                                                                           '"\'=[]{}()^!@~;:|')
+BSLASH = Literal('\\')
+keywords = (SRC_, SCOPE_, OPTIONS_, TOKENS_, FRAGMENT, ID, LEXER, PARSER, GRAMMAR, TREE, CATCH, FINALLY,
+            THROWS, PROTECTED, PUBLIC, PRIVATE, ) = map(Keyword,
+    """src scope options tokens fragment id lexer parser grammar tree catch finally throws protected 
+       public private """.split())
+KEYWORD = MatchFirst(keywords)
+
 # Tokens
 EOL = Suppress(LineEnd()) # $
+SGL_PRINTABLE = Char(printables)
 singleTextString = originalTextFor(ZeroOrMore(~EOL + (White(" \t") | Word(printables)))).leaveWhitespace()
 XDIGIT = hexnums
 INT = Word(nums)
-ESC = Literal('\\') + (oneOf(list(r'nrtbf\">'+"'")) | ('u' + Word(hexnums, exact=4)) | Word(printables, exact=1))
-LITERAL_CHAR = ESC | ~(Literal("'") | Literal('\\')) + Word(printables, exact=1)
-CHAR_LITERAL = Suppress("'") + LITERAL_CHAR + Suppress("'")
-STRING_LITERAL = Suppress("'") + Combine(OneOrMore(LITERAL_CHAR)) + Suppress("'")
+ESC = BSLASH + (oneOf(list(r'nrtbf\">'+"'")) | ('u' + Word(hexnums, exact=4)) | SGL_PRINTABLE)
+LITERAL_CHAR = ESC | ~(APOS | BSLASH) + SGL_PRINTABLE
+CHAR_LITERAL = APOS + LITERAL_CHAR + APOS
+STRING_LITERAL = APOS + Combine(OneOrMore(LITERAL_CHAR)) + APOS
 DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"'
-DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(Word(printables, exact=1)) + '>>'
+DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(SGL_PRINTABLE) + '>>'
 TOKEN_REF = Word(alphas.upper(), alphanums+'_')
 RULE_REF = Word(alphas.lower(), alphanums+'_')
-ACTION_ESC = (Suppress("\\") + Suppress("'")) | Suppress('\\"') | Suppress('\\') + (~(Literal("'") | Literal('"')) + Word(printables, exact=1))
-ACTION_CHAR_LITERAL = Suppress("'") + (ACTION_ESC | ~(Literal('\\') | Literal("'")) + Word(printables, exact=1)) + Suppress("'")
-ACTION_STRING_LITERAL = Suppress('"') + ZeroOrMore(ACTION_ESC | ~(Literal('\\') | Literal('"')) + Word(printables, exact=1)) + Suppress('"')
-SRC = Suppress('src') + ACTION_STRING_LITERAL("file") + INT("line")
+ACTION_ESC = (BSLASH.suppress() + APOS
+             | BSLASH.suppress()
+             | BSLASH.suppress() + (~(APOS | QUOTE) + SGL_PRINTABLE)
+              )
+ACTION_CHAR_LITERAL = (APOS + (ACTION_ESC | ~(BSLASH | APOS) + SGL_PRINTABLE) + APOS)
+ACTION_STRING_LITERAL = (QUOTE + ZeroOrMore(ACTION_ESC | ~(BSLASH | QUOTE) + SGL_PRINTABLE) + QUOTE)
+
+SRC = SRC_.suppress() + ACTION_STRING_LITERAL("file") + INT("line")
 id = TOKEN_REF | RULE_REF
 SL_COMMENT = Suppress('//') + Suppress('$ANTLR') + SRC | ZeroOrMore(~EOL + Word(printables)) + EOL
 ML_COMMENT = cStyleComment
 WS = OneOrMore(Suppress(' ') | Suppress('\t') | (Optional(Suppress('\r')) + Literal('\n')))
 WS_LOOP = ZeroOrMore(SL_COMMENT | ML_COMMENT)
 NESTED_ARG_ACTION = Forward()
-NESTED_ARG_ACTION << Suppress('[') + ZeroOrMore(NESTED_ARG_ACTION | ACTION_STRING_LITERAL | ACTION_CHAR_LITERAL) + Suppress(']')
+NESTED_ARG_ACTION << (LBRACK
+                      + ZeroOrMore(NESTED_ARG_ACTION
+                                   | ACTION_STRING_LITERAL
+                                   | ACTION_CHAR_LITERAL)
+                      + RBRACK)
 ARG_ACTION = NESTED_ARG_ACTION
 NESTED_ACTION = Forward()
-NESTED_ACTION << Suppress('{') + ZeroOrMore(NESTED_ACTION | SL_COMMENT | ML_COMMENT | ACTION_STRING_LITERAL | ACTION_CHAR_LITERAL) + Suppress('}')
+NESTED_ACTION << (LBRACE
+                  + ZeroOrMore(NESTED_ACTION
+                               | SL_COMMENT
+                               | ML_COMMENT
+                               | ACTION_STRING_LITERAL
+                               | ACTION_CHAR_LITERAL)
+                  + RBRACE)
 ACTION = NESTED_ACTION + Optional('?')
-SCOPE = Suppress('scope')
-OPTIONS = Suppress('options') + Suppress('{') # + WS_LOOP + Suppress('{')
-TOKENS = Suppress('tokens') + Suppress('{') # + WS_LOOP + Suppress('{')
-FRAGMENT = 'fragment';
-TREE_BEGIN = Suppress('^(')
-ROOT = Suppress('^')
-BANG = Suppress('!')
+SCOPE = SCOPE_.suppress()
+OPTIONS = OPTIONS_.suppress() + LBRACE # + WS_LOOP + Suppress('{')
+TOKENS = TOKENS_.suppress() + LBRACE # + WS_LOOP + Suppress('{')
+TREE_BEGIN = ROOT + LPAR
 RANGE = Suppress('..')
 REWRITE = Suppress('->')
 
@@ -58,26 +80,36 @@ REWRITE = Suppress('->')
 # Grammar heading
 optionValue = id | STRING_LITERAL | CHAR_LITERAL | INT | Literal('*').setName("s")
 
-option = Group(id("id") + Suppress('=') + optionValue("value"))("option")
-optionsSpec = OPTIONS + Group(OneOrMore(option + Suppress(';')))("options") + Suppress('}')
-tokenSpec = Group(TOKEN_REF("token_ref") + (Suppress('=') + (STRING_LITERAL | CHAR_LITERAL)("lit")))("token") + Suppress(';')
-tokensSpec = TOKENS + Group(OneOrMore(tokenSpec))("tokens") + Suppress('}')
-attrScope = Suppress('scope') + id + ACTION
-grammarType = Keyword('lexer') + Keyword('parser') + Keyword('tree')
-actionScopeName = id | Keyword('lexer')("l") | Keyword('parser')("p")
-action = Suppress('@') + Optional(actionScopeName + Suppress('::')) + id + ACTION
-
-grammarHeading = Optional(ML_COMMENT("ML_COMMENT")) + Optional(grammarType) + Suppress('grammar') + id("grammarName") + Suppress(';') + Optional(optionsSpec) + Optional(tokensSpec) + ZeroOrMore(attrScope) + ZeroOrMore(action)
-
-modifier = Keyword('protected') | Keyword('public') | Keyword('private') | Keyword('fragment')
-ruleAction = Suppress('@') + id + ACTION
-throwsSpec = Suppress('throws') + delimitedList(id)
-ruleScopeSpec = (Suppress('scope') + ACTION) | (Suppress('scope') + delimitedList(id) + Suppress(';')) | (Suppress('scope') + ACTION + Suppress('scope') + delimitedList(id) + Suppress(';'))
+option = Group(id("id") + EQ + optionValue("value"))("option")
+optionsSpec = OPTIONS + Group(OneOrMore(option + SEMI))("options") + RBRACE
+tokenSpec = Group(TOKEN_REF("token_ref")
+                  + (EQ + (STRING_LITERAL | CHAR_LITERAL)("lit")))("token") + SEMI
+tokensSpec = TOKENS + Group(OneOrMore(tokenSpec))("tokens") + RBRACE
+attrScope = SCOPE_.suppress() + id + ACTION
+grammarType = LEXER + PARSER + TREE
+actionScopeName = id | LEXER("l") | PARSER("p")
+action = AT + Optional(actionScopeName + Suppress('::')) + id + ACTION
+
+grammarHeading = (Optional(ML_COMMENT("ML_COMMENT"))
+                  + Optional(grammarType)
+                  + GRAMMAR
+                  + id("grammarName") + SEMI
+                  + Optional(optionsSpec)
+                  + Optional(tokensSpec)
+                  + ZeroOrMore(attrScope)
+                  + ZeroOrMore(action))
+
+modifier = PROTECTED | PUBLIC | PRIVATE | FRAGMENT
+ruleAction = AT + id + ACTION
+throwsSpec = THROWS.suppress() + delimitedList(id)
+ruleScopeSpec = ((SCOPE_.suppress() + ACTION)
+                 | (SCOPE_.suppress() + delimitedList(id) + SEMI)
+                 | (SCOPE_.suppress() + ACTION + SCOPE_.suppress() + delimitedList(id) + SEMI))
 unary_op = oneOf("^ !")
 notTerminal = CHAR_LITERAL | TOKEN_REF | STRING_LITERAL
 terminal = (CHAR_LITERAL | TOKEN_REF + Optional(ARG_ACTION) | STRING_LITERAL | '.') + Optional(unary_op)
 block = Forward()
-notSet = Suppress('~') + (notTerminal | block)
+notSet = TIL + (notTerminal | block)
 rangeNotPython = CHAR_LITERAL("c1") + RANGE + CHAR_LITERAL("c2")
 atom = Group((rangeNotPython + Optional(unary_op)("op"))
              | terminal
@@ -85,21 +117,44 @@ atom = Group((rangeNotPython + Optional(unary_op)("op"))
              | (RULE_REF + Optional(ARG_ACTION("arg")) + Optional(unary_op)("op"))
              )
 element = Forward()
-treeSpec = Suppress('^(') + element*(2,) + Suppress(')')
+treeSpec = ROOT + LPAR + element*(2,) + RPAR
 ebnfSuffix = oneOf("? * +")
 ebnf = block + Optional(ebnfSuffix("op") | '=>')
-elementNoOptionSpec = (id("result_name") + oneOf('= +=')("labelOp") + atom("atom") + Optional(ebnfSuffix)) | (id("result_name") + oneOf('= +=')("labelOp") + block + Optional(ebnfSuffix)) | atom("atom") + Optional(ebnfSuffix) | ebnf | ACTION | (treeSpec + Optional(ebnfSuffix)) # |   SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED )
-element << Group(elementNoOptionSpec)("element")
-alternative = Group(Group(OneOrMore(element))("elements")) # Do not ask me why group is needed twice... seems like the xml that you see is not always the real structure?
+elementNoOptionSpec = ((id("result_name")  + oneOf('= +=')("labelOp")  + atom("atom") + Optional(ebnfSuffix))
+                       | (id("result_name") + oneOf('= +=')("labelOp") + block + Optional(ebnfSuffix))
+                       | atom("atom") + Optional(ebnfSuffix)
+                       | ebnf
+                       | ACTION
+                       | (treeSpec + Optional(ebnfSuffix))
+                       ) # |   SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED )
+element <<= Group(elementNoOptionSpec)("element")
+# Do not ask me why group is needed twice... seems like the xml that you see is not always the real structure?
+alternative = Group(Group(OneOrMore(element))("elements"))
 rewrite = Optional(Literal('TODO REWRITE RULES TODO'))
-block << Suppress('(') + Optional(Optional(optionsSpec("opts")) + Suppress(':')) + Group(alternative('a1') + rewrite + Group(ZeroOrMore(Suppress('|') + alternative('a2') + rewrite))("alternatives"))("block") + Suppress(')')
-altList = alternative('a1') + rewrite + Group(ZeroOrMore(Suppress('|') + alternative('a2') + rewrite))("alternatives")
-exceptionHandler = Suppress('catch') + ARG_ACTION + ACTION
-finallyClause = Suppress('finally') + ACTION
+block <<= (LPAR
+           + Optional(Optional(optionsSpec("opts")) + COLON)
+           + Group(alternative('a1')
+                   + rewrite
+                   + Group(ZeroOrMore(VERT
+                                      + alternative('a2')
+                                      + rewrite))("alternatives"))("block")
+           + RPAR)
+altList = alternative('a1') + rewrite + Group(ZeroOrMore(VERT + alternative('a2') + rewrite))("alternatives")
+exceptionHandler = CATCH.suppress() + ARG_ACTION + ACTION
+finallyClause = FINALLY.suppress() + ACTION
 exceptionGroup = (OneOrMore(exceptionHandler) + Optional(finallyClause)) | finallyClause
 
-ruleHeading = Optional(ML_COMMENT)("ruleComment") + Optional(modifier)("modifier") + id("ruleName") + Optional("!") + Optional(ARG_ACTION("arg")) + Optional(Suppress('returns') + ARG_ACTION("rt")) + Optional(throwsSpec) + Optional(optionsSpec) + Optional(ruleScopeSpec) + ZeroOrMore(ruleAction)
-rule = Group(ruleHeading + Suppress(':') + altList + Suppress(';') + Optional(exceptionGroup))("rule")
+ruleHeading = (Optional(ML_COMMENT)("ruleComment")
+               + Optional(modifier)("modifier")
+               + id("ruleName")
+               + Optional("!")
+               + Optional(ARG_ACTION("arg"))
+               + Optional(Suppress('returns') + ARG_ACTION("rt"))
+               + Optional(throwsSpec)
+               + Optional(optionsSpec)
+               + Optional(ruleScopeSpec)
+               + ZeroOrMore(ruleAction))
+rule = Group(ruleHeading + COLON + altList + SEMI + Optional(exceptionGroup))("rule")
 
 grammarDef = grammarHeading + Group(OneOrMore(rule))("rules")
 
@@ -149,7 +204,7 @@ def __antlrAlternativeConverter(pyparsingRules, antlrAlternative):
         rule = Group(And(elementList))("anonymous_and")
     else:
         rule = elementList[0]
-    assert rule != None
+    assert rule is not None
     return rule
 
 def __antlrRuleConverter(pyparsingRules, antlrRule):
@@ -161,11 +216,13 @@ def __antlrRuleConverter(pyparsingRules, antlrRule):
 
 def antlrConverter(antlrGrammarTree):
     pyparsingRules = {}
+
     antlrTokens = {}
     for antlrToken in antlrGrammarTree.tokens:
         antlrTokens[antlrToken.token_ref] = antlrToken.lit
     for antlrTokenName, antlrToken in list(antlrTokens.items()):
         pyparsingRules[antlrTokenName] = Literal(antlrToken)
+
     antlrRules = {}
     for antlrRule in antlrGrammarTree.rules:
         antlrRules[antlrRule.ruleName] = antlrRule
@@ -173,12 +230,14 @@ def antlrConverter(antlrGrammarTree):
     for antlrRuleName, antlrRule in list(antlrRules.items()):
         pyparsingRule = __antlrRuleConverter(pyparsingRules, antlrRule)
         assert pyparsingRule != None
-        pyparsingRules[antlrRuleName] << pyparsingRule
+        pyparsingRules[antlrRuleName] <<= pyparsingRule
+
     return pyparsingRules
 
 if __name__ == "__main__":
 
-    text = """grammar SimpleCalc;
+    text = """\
+grammar SimpleCalc;
 
 options {
     language = Python;
@@ -216,8 +275,8 @@ fragment DIGIT    : '0'..'9' ;
 
     grammar().validate()
     antlrGrammarTree = grammar().parseString(text)
-    print(antlrGrammarTree.asXML("antlrGrammarTree"))
+    print(antlrGrammarTree.dump())
     pyparsingRules = antlrConverter(antlrGrammarTree)
     pyparsingRule = pyparsingRules["expr"]
     pyparsingTree = pyparsingRule.parseString("2 - 5 * 42 + 7 / 25")
-    print(pyparsingTree.asXML("pyparsingTree"))
+    print(pyparsingTree.dump())
author	Paul McGuire <ptmcg@austin.rr.com>	2019-04-07 13:45:07 -0500
committer	Paul McGuire <ptmcg@austin.rr.com>	2019-04-07 13:45:07 -0500
commit	adf8dd00b736ba1934914e1db78528af02662b65 (patch)
tree	f673c526701163c1ce94608a2af3181a78bdf40f /examples/antlr_grammar.py
parent	2a81eb83ff5450e85bd22bd2ed35e43938c18208 (diff)
download	pyparsing-git-adf8dd00b736ba1934914e1db78528af02662b65.tar.gz