summaryrefslogtreecommitdiff
path: root/examples/antlr_grammar.py
diff options
context:
space:
mode:
authorPaul McGuire <ptmcg@austin.rr.com>2019-04-07 13:45:07 -0500
committerPaul McGuire <ptmcg@austin.rr.com>2019-04-07 13:45:07 -0500
commitadf8dd00b736ba1934914e1db78528af02662b65 (patch)
treef673c526701163c1ce94608a2af3181a78bdf40f /examples/antlr_grammar.py
parent2a81eb83ff5450e85bd22bd2ed35e43938c18208 (diff)
downloadpyparsing-git-adf8dd00b736ba1934914e1db78528af02662b65.tar.gz
Code updates to current idiomspyparsing_2.4.0
Diffstat (limited to 'examples/antlr_grammar.py')
-rw-r--r--examples/antlr_grammar.py165
1 files changed, 112 insertions, 53 deletions
diff --git a/examples/antlr_grammar.py b/examples/antlr_grammar.py
index 09ef264..c131cfb 100644
--- a/examples/antlr_grammar.py
+++ b/examples/antlr_grammar.py
@@ -7,49 +7,71 @@ Created on 4 sept. 2010
Submitted by Luca DallOlio, September, 2010
(Minor updates by Paul McGuire, June, 2012)
+(Code idiom updates by Paul McGuire, April, 2019)
'''
-from pyparsing import Word, ZeroOrMore, printables, Suppress, OneOrMore, Group, \
- LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword, \
- cStyleComment, Regex, Forward, MatchFirst, And, oneOf, alphas, alphanums, \
- delimitedList, ungroup
+from pyparsing import (Word, ZeroOrMore, printables, Suppress, OneOrMore, Group,
+ LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword,
+ cStyleComment, Regex, Forward, MatchFirst, And, oneOf, alphas, alphanums,
+ delimitedList, Char)
# http://www.antlr.org/grammar/ANTLR/ANTLRv3.g
+QUOTE,APOS,EQ,LBRACK,RBRACK,LBRACE,RBRACE,LPAR,RPAR,ROOT,BANG,AT,TIL,SEMI,COLON,VERT = map(Suppress,
+ '"\'=[]{}()^!@~;:|')
+BSLASH = Literal('\\')
+keywords = (SRC_, SCOPE_, OPTIONS_, TOKENS_, FRAGMENT, ID, LEXER, PARSER, GRAMMAR, TREE, CATCH, FINALLY,
+ THROWS, PROTECTED, PUBLIC, PRIVATE, ) = map(Keyword,
+ """src scope options tokens fragment id lexer parser grammar tree catch finally throws protected
+ public private """.split())
+KEYWORD = MatchFirst(keywords)
+
# Tokens
EOL = Suppress(LineEnd()) # $
+SGL_PRINTABLE = Char(printables)
singleTextString = originalTextFor(ZeroOrMore(~EOL + (White(" \t") | Word(printables)))).leaveWhitespace()
XDIGIT = hexnums
INT = Word(nums)
-ESC = Literal('\\') + (oneOf(list(r'nrtbf\">'+"'")) | ('u' + Word(hexnums, exact=4)) | Word(printables, exact=1))
-LITERAL_CHAR = ESC | ~(Literal("'") | Literal('\\')) + Word(printables, exact=1)
-CHAR_LITERAL = Suppress("'") + LITERAL_CHAR + Suppress("'")
-STRING_LITERAL = Suppress("'") + Combine(OneOrMore(LITERAL_CHAR)) + Suppress("'")
+ESC = BSLASH + (oneOf(list(r'nrtbf\">'+"'")) | ('u' + Word(hexnums, exact=4)) | SGL_PRINTABLE)
+LITERAL_CHAR = ESC | ~(APOS | BSLASH) + SGL_PRINTABLE
+CHAR_LITERAL = APOS + LITERAL_CHAR + APOS
+STRING_LITERAL = APOS + Combine(OneOrMore(LITERAL_CHAR)) + APOS
DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"'
-DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(Word(printables, exact=1)) + '>>'
+DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(SGL_PRINTABLE) + '>>'
TOKEN_REF = Word(alphas.upper(), alphanums+'_')
RULE_REF = Word(alphas.lower(), alphanums+'_')
-ACTION_ESC = (Suppress("\\") + Suppress("'")) | Suppress('\\"') | Suppress('\\') + (~(Literal("'") | Literal('"')) + Word(printables, exact=1))
-ACTION_CHAR_LITERAL = Suppress("'") + (ACTION_ESC | ~(Literal('\\') | Literal("'")) + Word(printables, exact=1)) + Suppress("'")
-ACTION_STRING_LITERAL = Suppress('"') + ZeroOrMore(ACTION_ESC | ~(Literal('\\') | Literal('"')) + Word(printables, exact=1)) + Suppress('"')
-SRC = Suppress('src') + ACTION_STRING_LITERAL("file") + INT("line")
+ACTION_ESC = (BSLASH.suppress() + APOS
+ | BSLASH.suppress()
+ | BSLASH.suppress() + (~(APOS | QUOTE) + SGL_PRINTABLE)
+ )
+ACTION_CHAR_LITERAL = (APOS + (ACTION_ESC | ~(BSLASH | APOS) + SGL_PRINTABLE) + APOS)
+ACTION_STRING_LITERAL = (QUOTE + ZeroOrMore(ACTION_ESC | ~(BSLASH | QUOTE) + SGL_PRINTABLE) + QUOTE)
+
+SRC = SRC_.suppress() + ACTION_STRING_LITERAL("file") + INT("line")
id = TOKEN_REF | RULE_REF
SL_COMMENT = Suppress('//') + Suppress('$ANTLR') + SRC | ZeroOrMore(~EOL + Word(printables)) + EOL
ML_COMMENT = cStyleComment
WS = OneOrMore(Suppress(' ') | Suppress('\t') | (Optional(Suppress('\r')) + Literal('\n')))
WS_LOOP = ZeroOrMore(SL_COMMENT | ML_COMMENT)
NESTED_ARG_ACTION = Forward()
-NESTED_ARG_ACTION << Suppress('[') + ZeroOrMore(NESTED_ARG_ACTION | ACTION_STRING_LITERAL | ACTION_CHAR_LITERAL) + Suppress(']')
+NESTED_ARG_ACTION << (LBRACK
+ + ZeroOrMore(NESTED_ARG_ACTION
+ | ACTION_STRING_LITERAL
+ | ACTION_CHAR_LITERAL)
+ + RBRACK)
ARG_ACTION = NESTED_ARG_ACTION
NESTED_ACTION = Forward()
-NESTED_ACTION << Suppress('{') + ZeroOrMore(NESTED_ACTION | SL_COMMENT | ML_COMMENT | ACTION_STRING_LITERAL | ACTION_CHAR_LITERAL) + Suppress('}')
+NESTED_ACTION << (LBRACE
+ + ZeroOrMore(NESTED_ACTION
+ | SL_COMMENT
+ | ML_COMMENT
+ | ACTION_STRING_LITERAL
+ | ACTION_CHAR_LITERAL)
+ + RBRACE)
ACTION = NESTED_ACTION + Optional('?')
-SCOPE = Suppress('scope')
-OPTIONS = Suppress('options') + Suppress('{') # + WS_LOOP + Suppress('{')
-TOKENS = Suppress('tokens') + Suppress('{') # + WS_LOOP + Suppress('{')
-FRAGMENT = 'fragment';
-TREE_BEGIN = Suppress('^(')
-ROOT = Suppress('^')
-BANG = Suppress('!')
+SCOPE = SCOPE_.suppress()
+OPTIONS = OPTIONS_.suppress() + LBRACE # + WS_LOOP + Suppress('{')
+TOKENS = TOKENS_.suppress() + LBRACE # + WS_LOOP + Suppress('{')
+TREE_BEGIN = ROOT + LPAR
RANGE = Suppress('..')
REWRITE = Suppress('->')
@@ -58,26 +80,36 @@ REWRITE = Suppress('->')
# Grammar heading
optionValue = id | STRING_LITERAL | CHAR_LITERAL | INT | Literal('*').setName("s")
-option = Group(id("id") + Suppress('=') + optionValue("value"))("option")
-optionsSpec = OPTIONS + Group(OneOrMore(option + Suppress(';')))("options") + Suppress('}')
-tokenSpec = Group(TOKEN_REF("token_ref") + (Suppress('=') + (STRING_LITERAL | CHAR_LITERAL)("lit")))("token") + Suppress(';')
-tokensSpec = TOKENS + Group(OneOrMore(tokenSpec))("tokens") + Suppress('}')
-attrScope = Suppress('scope') + id + ACTION
-grammarType = Keyword('lexer') + Keyword('parser') + Keyword('tree')
-actionScopeName = id | Keyword('lexer')("l") | Keyword('parser')("p")
-action = Suppress('@') + Optional(actionScopeName + Suppress('::')) + id + ACTION
-
-grammarHeading = Optional(ML_COMMENT("ML_COMMENT")) + Optional(grammarType) + Suppress('grammar') + id("grammarName") + Suppress(';') + Optional(optionsSpec) + Optional(tokensSpec) + ZeroOrMore(attrScope) + ZeroOrMore(action)
-
-modifier = Keyword('protected') | Keyword('public') | Keyword('private') | Keyword('fragment')
-ruleAction = Suppress('@') + id + ACTION
-throwsSpec = Suppress('throws') + delimitedList(id)
-ruleScopeSpec = (Suppress('scope') + ACTION) | (Suppress('scope') + delimitedList(id) + Suppress(';')) | (Suppress('scope') + ACTION + Suppress('scope') + delimitedList(id) + Suppress(';'))
+option = Group(id("id") + EQ + optionValue("value"))("option")
+optionsSpec = OPTIONS + Group(OneOrMore(option + SEMI))("options") + RBRACE
+tokenSpec = Group(TOKEN_REF("token_ref")
+ + (EQ + (STRING_LITERAL | CHAR_LITERAL)("lit")))("token") + SEMI
+tokensSpec = TOKENS + Group(OneOrMore(tokenSpec))("tokens") + RBRACE
+attrScope = SCOPE_.suppress() + id + ACTION
+grammarType = LEXER + PARSER + TREE
+actionScopeName = id | LEXER("l") | PARSER("p")
+action = AT + Optional(actionScopeName + Suppress('::')) + id + ACTION
+
+grammarHeading = (Optional(ML_COMMENT("ML_COMMENT"))
+ + Optional(grammarType)
+ + GRAMMAR
+ + id("grammarName") + SEMI
+ + Optional(optionsSpec)
+ + Optional(tokensSpec)
+ + ZeroOrMore(attrScope)
+ + ZeroOrMore(action))
+
+modifier = PROTECTED | PUBLIC | PRIVATE | FRAGMENT
+ruleAction = AT + id + ACTION
+throwsSpec = THROWS.suppress() + delimitedList(id)
+ruleScopeSpec = ((SCOPE_.suppress() + ACTION)
+ | (SCOPE_.suppress() + delimitedList(id) + SEMI)
+ | (SCOPE_.suppress() + ACTION + SCOPE_.suppress() + delimitedList(id) + SEMI))
unary_op = oneOf("^ !")
notTerminal = CHAR_LITERAL | TOKEN_REF | STRING_LITERAL
terminal = (CHAR_LITERAL | TOKEN_REF + Optional(ARG_ACTION) | STRING_LITERAL | '.') + Optional(unary_op)
block = Forward()
-notSet = Suppress('~') + (notTerminal | block)
+notSet = TIL + (notTerminal | block)
rangeNotPython = CHAR_LITERAL("c1") + RANGE + CHAR_LITERAL("c2")
atom = Group((rangeNotPython + Optional(unary_op)("op"))
| terminal
@@ -85,21 +117,44 @@ atom = Group((rangeNotPython + Optional(unary_op)("op"))
| (RULE_REF + Optional(ARG_ACTION("arg")) + Optional(unary_op)("op"))
)
element = Forward()
-treeSpec = Suppress('^(') + element*(2,) + Suppress(')')
+treeSpec = ROOT + LPAR + element*(2,) + RPAR
ebnfSuffix = oneOf("? * +")
ebnf = block + Optional(ebnfSuffix("op") | '=>')
-elementNoOptionSpec = (id("result_name") + oneOf('= +=')("labelOp") + atom("atom") + Optional(ebnfSuffix)) | (id("result_name") + oneOf('= +=')("labelOp") + block + Optional(ebnfSuffix)) | atom("atom") + Optional(ebnfSuffix) | ebnf | ACTION | (treeSpec + Optional(ebnfSuffix)) # | SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED )
-element << Group(elementNoOptionSpec)("element")
-alternative = Group(Group(OneOrMore(element))("elements")) # Do not ask me why group is needed twice... seems like the xml that you see is not always the real structure?
+elementNoOptionSpec = ((id("result_name") + oneOf('= +=')("labelOp") + atom("atom") + Optional(ebnfSuffix))
+ | (id("result_name") + oneOf('= +=')("labelOp") + block + Optional(ebnfSuffix))
+ | atom("atom") + Optional(ebnfSuffix)
+ | ebnf
+ | ACTION
+ | (treeSpec + Optional(ebnfSuffix))
+ ) # | SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED )
+element <<= Group(elementNoOptionSpec)("element")
+# Do not ask me why group is needed twice... seems like the xml that you see is not always the real structure?
+alternative = Group(Group(OneOrMore(element))("elements"))
rewrite = Optional(Literal('TODO REWRITE RULES TODO'))
-block << Suppress('(') + Optional(Optional(optionsSpec("opts")) + Suppress(':')) + Group(alternative('a1') + rewrite + Group(ZeroOrMore(Suppress('|') + alternative('a2') + rewrite))("alternatives"))("block") + Suppress(')')
-altList = alternative('a1') + rewrite + Group(ZeroOrMore(Suppress('|') + alternative('a2') + rewrite))("alternatives")
-exceptionHandler = Suppress('catch') + ARG_ACTION + ACTION
-finallyClause = Suppress('finally') + ACTION
+block <<= (LPAR
+ + Optional(Optional(optionsSpec("opts")) + COLON)
+ + Group(alternative('a1')
+ + rewrite
+ + Group(ZeroOrMore(VERT
+ + alternative('a2')
+ + rewrite))("alternatives"))("block")
+ + RPAR)
+altList = alternative('a1') + rewrite + Group(ZeroOrMore(VERT + alternative('a2') + rewrite))("alternatives")
+exceptionHandler = CATCH.suppress() + ARG_ACTION + ACTION
+finallyClause = FINALLY.suppress() + ACTION
exceptionGroup = (OneOrMore(exceptionHandler) + Optional(finallyClause)) | finallyClause
-ruleHeading = Optional(ML_COMMENT)("ruleComment") + Optional(modifier)("modifier") + id("ruleName") + Optional("!") + Optional(ARG_ACTION("arg")) + Optional(Suppress('returns') + ARG_ACTION("rt")) + Optional(throwsSpec) + Optional(optionsSpec) + Optional(ruleScopeSpec) + ZeroOrMore(ruleAction)
-rule = Group(ruleHeading + Suppress(':') + altList + Suppress(';') + Optional(exceptionGroup))("rule")
+ruleHeading = (Optional(ML_COMMENT)("ruleComment")
+ + Optional(modifier)("modifier")
+ + id("ruleName")
+ + Optional("!")
+ + Optional(ARG_ACTION("arg"))
+ + Optional(Suppress('returns') + ARG_ACTION("rt"))
+ + Optional(throwsSpec)
+ + Optional(optionsSpec)
+ + Optional(ruleScopeSpec)
+ + ZeroOrMore(ruleAction))
+rule = Group(ruleHeading + COLON + altList + SEMI + Optional(exceptionGroup))("rule")
grammarDef = grammarHeading + Group(OneOrMore(rule))("rules")
@@ -149,7 +204,7 @@ def __antlrAlternativeConverter(pyparsingRules, antlrAlternative):
rule = Group(And(elementList))("anonymous_and")
else:
rule = elementList[0]
- assert rule != None
+ assert rule is not None
return rule
def __antlrRuleConverter(pyparsingRules, antlrRule):
@@ -161,11 +216,13 @@ def __antlrRuleConverter(pyparsingRules, antlrRule):
def antlrConverter(antlrGrammarTree):
pyparsingRules = {}
+
antlrTokens = {}
for antlrToken in antlrGrammarTree.tokens:
antlrTokens[antlrToken.token_ref] = antlrToken.lit
for antlrTokenName, antlrToken in list(antlrTokens.items()):
pyparsingRules[antlrTokenName] = Literal(antlrToken)
+
antlrRules = {}
for antlrRule in antlrGrammarTree.rules:
antlrRules[antlrRule.ruleName] = antlrRule
@@ -173,12 +230,14 @@ def antlrConverter(antlrGrammarTree):
for antlrRuleName, antlrRule in list(antlrRules.items()):
pyparsingRule = __antlrRuleConverter(pyparsingRules, antlrRule)
assert pyparsingRule != None
- pyparsingRules[antlrRuleName] << pyparsingRule
+ pyparsingRules[antlrRuleName] <<= pyparsingRule
+
return pyparsingRules
if __name__ == "__main__":
- text = """grammar SimpleCalc;
+ text = """\
+grammar SimpleCalc;
options {
language = Python;
@@ -216,8 +275,8 @@ fragment DIGIT : '0'..'9' ;
grammar().validate()
antlrGrammarTree = grammar().parseString(text)
- print(antlrGrammarTree.asXML("antlrGrammarTree"))
+ print(antlrGrammarTree.dump())
pyparsingRules = antlrConverter(antlrGrammarTree)
pyparsingRule = pyparsingRules["expr"]
pyparsingTree = pyparsingRule.parseString("2 - 5 * 42 + 7 / 25")
- print(pyparsingTree.asXML("pyparsingTree"))
+ print(pyparsingTree.dump())