summaryrefslogtreecommitdiff
path: root/examples/antlr_grammar.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/antlr_grammar.py')
-rw-r--r--examples/antlr_grammar.py322
1 files changed, 214 insertions, 108 deletions
diff --git a/examples/antlr_grammar.py b/examples/antlr_grammar.py
index c131cfb..49151ee 100644
--- a/examples/antlr_grammar.py
+++ b/examples/antlr_grammar.py
@@ -1,4 +1,4 @@
-'''
+"""
antlr_grammar.py
Created on 4 sept. 2010
@@ -8,196 +8,299 @@ Created on 4 sept. 2010
Submitted by Luca DallOlio, September, 2010
(Minor updates by Paul McGuire, June, 2012)
(Code idiom updates by Paul McGuire, April, 2019)
-'''
-from pyparsing import (Word, ZeroOrMore, printables, Suppress, OneOrMore, Group,
- LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword,
- cStyleComment, Regex, Forward, MatchFirst, And, oneOf, alphas, alphanums,
- delimitedList, Char)
+"""
+from pyparsing import (
+ Word,
+ ZeroOrMore,
+ printables,
+ Suppress,
+ OneOrMore,
+ Group,
+ LineEnd,
+ Optional,
+ White,
+ originalTextFor,
+ hexnums,
+ nums,
+ Combine,
+ Literal,
+ Keyword,
+ cStyleComment,
+ Regex,
+ Forward,
+ MatchFirst,
+ And,
+ oneOf,
+ alphas,
+ alphanums,
+ delimitedList,
+ Char,
+)
# http://www.antlr.org/grammar/ANTLR/ANTLRv3.g
-QUOTE,APOS,EQ,LBRACK,RBRACK,LBRACE,RBRACE,LPAR,RPAR,ROOT,BANG,AT,TIL,SEMI,COLON,VERT = map(Suppress,
- '"\'=[]{}()^!@~;:|')
-BSLASH = Literal('\\')
-keywords = (SRC_, SCOPE_, OPTIONS_, TOKENS_, FRAGMENT, ID, LEXER, PARSER, GRAMMAR, TREE, CATCH, FINALLY,
- THROWS, PROTECTED, PUBLIC, PRIVATE, ) = map(Keyword,
- """src scope options tokens fragment id lexer parser grammar tree catch finally throws protected
- public private """.split())
+(
+ QUOTE,
+ APOS,
+ EQ,
+ LBRACK,
+ RBRACK,
+ LBRACE,
+ RBRACE,
+ LPAR,
+ RPAR,
+ ROOT,
+ BANG,
+ AT,
+ TIL,
+ SEMI,
+ COLON,
+ VERT,
+) = map(Suppress, "\"'=[]{}()^!@~;:|")
+BSLASH = Literal("\\")
+keywords = (
+ SRC_,
+ SCOPE_,
+ OPTIONS_,
+ TOKENS_,
+ FRAGMENT,
+ ID,
+ LEXER,
+ PARSER,
+ GRAMMAR,
+ TREE,
+ CATCH,
+ FINALLY,
+ THROWS,
+ PROTECTED,
+ PUBLIC,
+ PRIVATE,
+) = map(
+ Keyword,
+ """src scope options tokens fragment id lexer parser grammar tree catch finally throws protected
+ public private """.split(),
+)
KEYWORD = MatchFirst(keywords)
# Tokens
-EOL = Suppress(LineEnd()) # $
+EOL = Suppress(LineEnd()) # $
SGL_PRINTABLE = Char(printables)
-singleTextString = originalTextFor(ZeroOrMore(~EOL + (White(" \t") | Word(printables)))).leaveWhitespace()
+singleTextString = originalTextFor(
+ ZeroOrMore(~EOL + (White(" \t") | Word(printables)))
+).leaveWhitespace()
XDIGIT = hexnums
INT = Word(nums)
-ESC = BSLASH + (oneOf(list(r'nrtbf\">'+"'")) | ('u' + Word(hexnums, exact=4)) | SGL_PRINTABLE)
+ESC = BSLASH + (
+ oneOf(list(r"nrtbf\">" + "'")) | ("u" + Word(hexnums, exact=4)) | SGL_PRINTABLE
+)
LITERAL_CHAR = ESC | ~(APOS | BSLASH) + SGL_PRINTABLE
CHAR_LITERAL = APOS + LITERAL_CHAR + APOS
STRING_LITERAL = APOS + Combine(OneOrMore(LITERAL_CHAR)) + APOS
DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"'
-DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(SGL_PRINTABLE) + '>>'
-TOKEN_REF = Word(alphas.upper(), alphanums+'_')
-RULE_REF = Word(alphas.lower(), alphanums+'_')
-ACTION_ESC = (BSLASH.suppress() + APOS
- | BSLASH.suppress()
- | BSLASH.suppress() + (~(APOS | QUOTE) + SGL_PRINTABLE)
- )
-ACTION_CHAR_LITERAL = (APOS + (ACTION_ESC | ~(BSLASH | APOS) + SGL_PRINTABLE) + APOS)
-ACTION_STRING_LITERAL = (QUOTE + ZeroOrMore(ACTION_ESC | ~(BSLASH | QUOTE) + SGL_PRINTABLE) + QUOTE)
+DOUBLE_ANGLE_STRING_LITERAL = "<<" + ZeroOrMore(SGL_PRINTABLE) + ">>"
+TOKEN_REF = Word(alphas.upper(), alphanums + "_")
+RULE_REF = Word(alphas.lower(), alphanums + "_")
+ACTION_ESC = (
+ BSLASH.suppress() + APOS
+ | BSLASH.suppress()
+ | BSLASH.suppress() + (~(APOS | QUOTE) + SGL_PRINTABLE)
+)
+ACTION_CHAR_LITERAL = APOS + (ACTION_ESC | ~(BSLASH | APOS) + SGL_PRINTABLE) + APOS
+ACTION_STRING_LITERAL = (
+ QUOTE + ZeroOrMore(ACTION_ESC | ~(BSLASH | QUOTE) + SGL_PRINTABLE) + QUOTE
+)
SRC = SRC_.suppress() + ACTION_STRING_LITERAL("file") + INT("line")
id = TOKEN_REF | RULE_REF
-SL_COMMENT = Suppress('//') + Suppress('$ANTLR') + SRC | ZeroOrMore(~EOL + Word(printables)) + EOL
+SL_COMMENT = (
+ Suppress("//") + Suppress("$ANTLR") + SRC
+ | ZeroOrMore(~EOL + Word(printables)) + EOL
+)
ML_COMMENT = cStyleComment
-WS = OneOrMore(Suppress(' ') | Suppress('\t') | (Optional(Suppress('\r')) + Literal('\n')))
+WS = OneOrMore(
+ Suppress(" ") | Suppress("\t") | (Optional(Suppress("\r")) + Literal("\n"))
+)
WS_LOOP = ZeroOrMore(SL_COMMENT | ML_COMMENT)
NESTED_ARG_ACTION = Forward()
-NESTED_ARG_ACTION << (LBRACK
- + ZeroOrMore(NESTED_ARG_ACTION
- | ACTION_STRING_LITERAL
- | ACTION_CHAR_LITERAL)
- + RBRACK)
+NESTED_ARG_ACTION << (
+ LBRACK
+ + ZeroOrMore(NESTED_ARG_ACTION | ACTION_STRING_LITERAL | ACTION_CHAR_LITERAL)
+ + RBRACK
+)
ARG_ACTION = NESTED_ARG_ACTION
NESTED_ACTION = Forward()
-NESTED_ACTION << (LBRACE
- + ZeroOrMore(NESTED_ACTION
- | SL_COMMENT
- | ML_COMMENT
- | ACTION_STRING_LITERAL
- | ACTION_CHAR_LITERAL)
- + RBRACE)
-ACTION = NESTED_ACTION + Optional('?')
+NESTED_ACTION << (
+ LBRACE
+ + ZeroOrMore(
+ NESTED_ACTION
+ | SL_COMMENT
+ | ML_COMMENT
+ | ACTION_STRING_LITERAL
+ | ACTION_CHAR_LITERAL
+ )
+ + RBRACE
+)
+ACTION = NESTED_ACTION + Optional("?")
SCOPE = SCOPE_.suppress()
-OPTIONS = OPTIONS_.suppress() + LBRACE # + WS_LOOP + Suppress('{')
-TOKENS = TOKENS_.suppress() + LBRACE # + WS_LOOP + Suppress('{')
+OPTIONS = OPTIONS_.suppress() + LBRACE # + WS_LOOP + Suppress('{')
+TOKENS = TOKENS_.suppress() + LBRACE # + WS_LOOP + Suppress('{')
TREE_BEGIN = ROOT + LPAR
-RANGE = Suppress('..')
-REWRITE = Suppress('->')
+RANGE = Suppress("..")
+REWRITE = Suppress("->")
# General Parser Definitions
# Grammar heading
-optionValue = id | STRING_LITERAL | CHAR_LITERAL | INT | Literal('*').setName("s")
+optionValue = id | STRING_LITERAL | CHAR_LITERAL | INT | Literal("*").setName("s")
option = Group(id("id") + EQ + optionValue("value"))("option")
optionsSpec = OPTIONS + Group(OneOrMore(option + SEMI))("options") + RBRACE
-tokenSpec = Group(TOKEN_REF("token_ref")
- + (EQ + (STRING_LITERAL | CHAR_LITERAL)("lit")))("token") + SEMI
+tokenSpec = (
+ Group(TOKEN_REF("token_ref") + (EQ + (STRING_LITERAL | CHAR_LITERAL)("lit")))(
+ "token"
+ )
+ + SEMI
+)
tokensSpec = TOKENS + Group(OneOrMore(tokenSpec))("tokens") + RBRACE
attrScope = SCOPE_.suppress() + id + ACTION
grammarType = LEXER + PARSER + TREE
actionScopeName = id | LEXER("l") | PARSER("p")
-action = AT + Optional(actionScopeName + Suppress('::')) + id + ACTION
-
-grammarHeading = (Optional(ML_COMMENT("ML_COMMENT"))
- + Optional(grammarType)
- + GRAMMAR
- + id("grammarName") + SEMI
- + Optional(optionsSpec)
- + Optional(tokensSpec)
- + ZeroOrMore(attrScope)
- + ZeroOrMore(action))
+action = AT + Optional(actionScopeName + Suppress("::")) + id + ACTION
+
+grammarHeading = (
+ Optional(ML_COMMENT("ML_COMMENT"))
+ + Optional(grammarType)
+ + GRAMMAR
+ + id("grammarName")
+ + SEMI
+ + Optional(optionsSpec)
+ + Optional(tokensSpec)
+ + ZeroOrMore(attrScope)
+ + ZeroOrMore(action)
+)
modifier = PROTECTED | PUBLIC | PRIVATE | FRAGMENT
ruleAction = AT + id + ACTION
throwsSpec = THROWS.suppress() + delimitedList(id)
-ruleScopeSpec = ((SCOPE_.suppress() + ACTION)
- | (SCOPE_.suppress() + delimitedList(id) + SEMI)
- | (SCOPE_.suppress() + ACTION + SCOPE_.suppress() + delimitedList(id) + SEMI))
+ruleScopeSpec = (
+ (SCOPE_.suppress() + ACTION)
+ | (SCOPE_.suppress() + delimitedList(id) + SEMI)
+ | (SCOPE_.suppress() + ACTION + SCOPE_.suppress() + delimitedList(id) + SEMI)
+)
unary_op = oneOf("^ !")
notTerminal = CHAR_LITERAL | TOKEN_REF | STRING_LITERAL
-terminal = (CHAR_LITERAL | TOKEN_REF + Optional(ARG_ACTION) | STRING_LITERAL | '.') + Optional(unary_op)
+terminal = (
+ CHAR_LITERAL | TOKEN_REF + Optional(ARG_ACTION) | STRING_LITERAL | "."
+) + Optional(unary_op)
block = Forward()
notSet = TIL + (notTerminal | block)
rangeNotPython = CHAR_LITERAL("c1") + RANGE + CHAR_LITERAL("c2")
-atom = Group((rangeNotPython + Optional(unary_op)("op"))
- | terminal
- | (notSet + Optional(unary_op)("op"))
- | (RULE_REF + Optional(ARG_ACTION("arg")) + Optional(unary_op)("op"))
- )
+atom = Group(
+ (rangeNotPython + Optional(unary_op)("op"))
+ | terminal
+ | (notSet + Optional(unary_op)("op"))
+ | (RULE_REF + Optional(ARG_ACTION("arg")) + Optional(unary_op)("op"))
+)
element = Forward()
-treeSpec = ROOT + LPAR + element*(2,) + RPAR
+treeSpec = ROOT + LPAR + element * (2,) + RPAR
ebnfSuffix = oneOf("? * +")
-ebnf = block + Optional(ebnfSuffix("op") | '=>')
-elementNoOptionSpec = ((id("result_name") + oneOf('= +=')("labelOp") + atom("atom") + Optional(ebnfSuffix))
- | (id("result_name") + oneOf('= +=')("labelOp") + block + Optional(ebnfSuffix))
- | atom("atom") + Optional(ebnfSuffix)
- | ebnf
- | ACTION
- | (treeSpec + Optional(ebnfSuffix))
- ) # | SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED )
+ebnf = block + Optional(ebnfSuffix("op") | "=>")
+elementNoOptionSpec = (
+ (id("result_name") + oneOf("= +=")("labelOp") + atom("atom") + Optional(ebnfSuffix))
+ | (id("result_name") + oneOf("= +=")("labelOp") + block + Optional(ebnfSuffix))
+ | atom("atom") + Optional(ebnfSuffix)
+ | ebnf
+ | ACTION
+ | (treeSpec + Optional(ebnfSuffix))
+) # | SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED )
element <<= Group(elementNoOptionSpec)("element")
# Do not ask me why group is needed twice... seems like the xml that you see is not always the real structure?
alternative = Group(Group(OneOrMore(element))("elements"))
-rewrite = Optional(Literal('TODO REWRITE RULES TODO'))
-block <<= (LPAR
- + Optional(Optional(optionsSpec("opts")) + COLON)
- + Group(alternative('a1')
- + rewrite
- + Group(ZeroOrMore(VERT
- + alternative('a2')
- + rewrite))("alternatives"))("block")
- + RPAR)
-altList = alternative('a1') + rewrite + Group(ZeroOrMore(VERT + alternative('a2') + rewrite))("alternatives")
+rewrite = Optional(Literal("TODO REWRITE RULES TODO"))
+block <<= (
+ LPAR
+ + Optional(Optional(optionsSpec("opts")) + COLON)
+ + Group(
+ alternative("a1")
+ + rewrite
+ + Group(ZeroOrMore(VERT + alternative("a2") + rewrite))("alternatives")
+ )("block")
+ + RPAR
+)
+altList = (
+ alternative("a1")
+ + rewrite
+ + Group(ZeroOrMore(VERT + alternative("a2") + rewrite))("alternatives")
+)
exceptionHandler = CATCH.suppress() + ARG_ACTION + ACTION
finallyClause = FINALLY.suppress() + ACTION
exceptionGroup = (OneOrMore(exceptionHandler) + Optional(finallyClause)) | finallyClause
-ruleHeading = (Optional(ML_COMMENT)("ruleComment")
- + Optional(modifier)("modifier")
- + id("ruleName")
- + Optional("!")
- + Optional(ARG_ACTION("arg"))
- + Optional(Suppress('returns') + ARG_ACTION("rt"))
- + Optional(throwsSpec)
- + Optional(optionsSpec)
- + Optional(ruleScopeSpec)
- + ZeroOrMore(ruleAction))
+ruleHeading = (
+ Optional(ML_COMMENT)("ruleComment")
+ + Optional(modifier)("modifier")
+ + id("ruleName")
+ + Optional("!")
+ + Optional(ARG_ACTION("arg"))
+ + Optional(Suppress("returns") + ARG_ACTION("rt"))
+ + Optional(throwsSpec)
+ + Optional(optionsSpec)
+ + Optional(ruleScopeSpec)
+ + ZeroOrMore(ruleAction)
+)
rule = Group(ruleHeading + COLON + altList + SEMI + Optional(exceptionGroup))("rule")
grammarDef = grammarHeading + Group(OneOrMore(rule))("rules")
+
def grammar():
return grammarDef
+
def __antlrAlternativesConverter(pyparsingRules, antlrBlock):
rule = None
- if hasattr(antlrBlock, 'alternatives') and antlrBlock.alternatives != '' and len(antlrBlock.alternatives) > 0:
+ if (
+ hasattr(antlrBlock, "alternatives")
+ and antlrBlock.alternatives != ""
+ and len(antlrBlock.alternatives) > 0
+ ):
alternatives = []
alternatives.append(__antlrAlternativeConverter(pyparsingRules, antlrBlock.a1))
for alternative in antlrBlock.alternatives:
- alternatives.append(__antlrAlternativeConverter(pyparsingRules, alternative))
+ alternatives.append(
+ __antlrAlternativeConverter(pyparsingRules, alternative)
+ )
rule = MatchFirst(alternatives)("anonymous_or")
- elif hasattr(antlrBlock, 'a1') and antlrBlock.a1 != '':
+ elif hasattr(antlrBlock, "a1") and antlrBlock.a1 != "":
rule = __antlrAlternativeConverter(pyparsingRules, antlrBlock.a1)
else:
- raise Exception('Not yet implemented')
+ raise Exception("Not yet implemented")
assert rule != None
return rule
+
def __antlrAlternativeConverter(pyparsingRules, antlrAlternative):
elementList = []
for element in antlrAlternative.elements:
rule = None
- if hasattr(element.atom, 'c1') and element.atom.c1 != '':
- regex = r'['+str(element.atom.c1[0])+'-'+str(element.atom.c2[0]+']')
+ if hasattr(element.atom, "c1") and element.atom.c1 != "":
+ regex = r"[" + str(element.atom.c1[0]) + "-" + str(element.atom.c2[0] + "]")
rule = Regex(regex)("anonymous_regex")
- elif hasattr(element, 'block') and element.block != '':
+ elif hasattr(element, "block") and element.block != "":
rule = __antlrAlternativesConverter(pyparsingRules, element.block)
else:
ruleRef = element.atom[0]
assert ruleRef in pyparsingRules
rule = pyparsingRules[ruleRef](ruleRef)
- if hasattr(element, 'op') and element.op != '':
- if element.op == '+':
+ if hasattr(element, "op") and element.op != "":
+ if element.op == "+":
rule = Group(OneOrMore(rule))("anonymous_one_or_more")
- elif element.op == '*':
+ elif element.op == "*":
rule = Group(ZeroOrMore(rule))("anonymous_zero_or_more")
- elif element.op == '?':
+ elif element.op == "?":
rule = Optional(rule)
else:
- raise Exception('rule operator not yet implemented : ' + element.op)
+ raise Exception("rule operator not yet implemented : " + element.op)
rule = rule
elementList.append(rule)
if len(elementList) > 1:
@@ -207,6 +310,7 @@ def __antlrAlternativeConverter(pyparsingRules, antlrAlternative):
assert rule is not None
return rule
+
def __antlrRuleConverter(pyparsingRules, antlrRule):
rule = None
rule = __antlrAlternativesConverter(pyparsingRules, antlrRule)
@@ -214,6 +318,7 @@ def __antlrRuleConverter(pyparsingRules, antlrRule):
rule(antlrRule.ruleName)
return rule
+
def antlrConverter(antlrGrammarTree):
pyparsingRules = {}
@@ -226,7 +331,7 @@ def antlrConverter(antlrGrammarTree):
antlrRules = {}
for antlrRule in antlrGrammarTree.rules:
antlrRules[antlrRule.ruleName] = antlrRule
- pyparsingRules[antlrRule.ruleName] = Forward() # antlr is a top down grammar
+ pyparsingRules[antlrRule.ruleName] = Forward() # antlr is a top down grammar
for antlrRuleName, antlrRule in list(antlrRules.items()):
pyparsingRule = __antlrRuleConverter(pyparsingRules, antlrRule)
assert pyparsingRule != None
@@ -234,6 +339,7 @@ def antlrConverter(antlrGrammarTree):
return pyparsingRules
+
if __name__ == "__main__":
text = """\