diff options
Diffstat (limited to 'examples/antlr_grammar.py')
-rw-r--r-- | examples/antlr_grammar.py | 322 |
1 files changed, 214 insertions, 108 deletions
diff --git a/examples/antlr_grammar.py b/examples/antlr_grammar.py index c131cfb..49151ee 100644 --- a/examples/antlr_grammar.py +++ b/examples/antlr_grammar.py @@ -1,4 +1,4 @@ -''' +""" antlr_grammar.py Created on 4 sept. 2010 @@ -8,196 +8,299 @@ Created on 4 sept. 2010 Submitted by Luca DallOlio, September, 2010 (Minor updates by Paul McGuire, June, 2012) (Code idiom updates by Paul McGuire, April, 2019) -''' -from pyparsing import (Word, ZeroOrMore, printables, Suppress, OneOrMore, Group, - LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword, - cStyleComment, Regex, Forward, MatchFirst, And, oneOf, alphas, alphanums, - delimitedList, Char) +""" +from pyparsing import ( + Word, + ZeroOrMore, + printables, + Suppress, + OneOrMore, + Group, + LineEnd, + Optional, + White, + originalTextFor, + hexnums, + nums, + Combine, + Literal, + Keyword, + cStyleComment, + Regex, + Forward, + MatchFirst, + And, + oneOf, + alphas, + alphanums, + delimitedList, + Char, +) # http://www.antlr.org/grammar/ANTLR/ANTLRv3.g -QUOTE,APOS,EQ,LBRACK,RBRACK,LBRACE,RBRACE,LPAR,RPAR,ROOT,BANG,AT,TIL,SEMI,COLON,VERT = map(Suppress, - '"\'=[]{}()^!@~;:|') -BSLASH = Literal('\\') -keywords = (SRC_, SCOPE_, OPTIONS_, TOKENS_, FRAGMENT, ID, LEXER, PARSER, GRAMMAR, TREE, CATCH, FINALLY, - THROWS, PROTECTED, PUBLIC, PRIVATE, ) = map(Keyword, - """src scope options tokens fragment id lexer parser grammar tree catch finally throws protected - public private """.split()) +( + QUOTE, + APOS, + EQ, + LBRACK, + RBRACK, + LBRACE, + RBRACE, + LPAR, + RPAR, + ROOT, + BANG, + AT, + TIL, + SEMI, + COLON, + VERT, +) = map(Suppress, "\"'=[]{}()^!@~;:|") +BSLASH = Literal("\\") +keywords = ( + SRC_, + SCOPE_, + OPTIONS_, + TOKENS_, + FRAGMENT, + ID, + LEXER, + PARSER, + GRAMMAR, + TREE, + CATCH, + FINALLY, + THROWS, + PROTECTED, + PUBLIC, + PRIVATE, +) = map( + Keyword, + """src scope options tokens fragment id lexer parser grammar tree catch finally throws protected + public private """.split(), +) KEYWORD = MatchFirst(keywords) # Tokens -EOL = Suppress(LineEnd()) # $ +EOL = Suppress(LineEnd()) # $ SGL_PRINTABLE = Char(printables) -singleTextString = originalTextFor(ZeroOrMore(~EOL + (White(" \t") | Word(printables)))).leaveWhitespace() +singleTextString = originalTextFor( + ZeroOrMore(~EOL + (White(" \t") | Word(printables))) +).leaveWhitespace() XDIGIT = hexnums INT = Word(nums) -ESC = BSLASH + (oneOf(list(r'nrtbf\">'+"'")) | ('u' + Word(hexnums, exact=4)) | SGL_PRINTABLE) +ESC = BSLASH + ( + oneOf(list(r"nrtbf\">" + "'")) | ("u" + Word(hexnums, exact=4)) | SGL_PRINTABLE +) LITERAL_CHAR = ESC | ~(APOS | BSLASH) + SGL_PRINTABLE CHAR_LITERAL = APOS + LITERAL_CHAR + APOS STRING_LITERAL = APOS + Combine(OneOrMore(LITERAL_CHAR)) + APOS DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"' -DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(SGL_PRINTABLE) + '>>' -TOKEN_REF = Word(alphas.upper(), alphanums+'_') -RULE_REF = Word(alphas.lower(), alphanums+'_') -ACTION_ESC = (BSLASH.suppress() + APOS - | BSLASH.suppress() - | BSLASH.suppress() + (~(APOS | QUOTE) + SGL_PRINTABLE) - ) -ACTION_CHAR_LITERAL = (APOS + (ACTION_ESC | ~(BSLASH | APOS) + SGL_PRINTABLE) + APOS) -ACTION_STRING_LITERAL = (QUOTE + ZeroOrMore(ACTION_ESC | ~(BSLASH | QUOTE) + SGL_PRINTABLE) + QUOTE) +DOUBLE_ANGLE_STRING_LITERAL = "<<" + ZeroOrMore(SGL_PRINTABLE) + ">>" +TOKEN_REF = Word(alphas.upper(), alphanums + "_") +RULE_REF = Word(alphas.lower(), alphanums + "_") +ACTION_ESC = ( + BSLASH.suppress() + APOS + | BSLASH.suppress() + | BSLASH.suppress() + (~(APOS | QUOTE) + SGL_PRINTABLE) +) +ACTION_CHAR_LITERAL = APOS + (ACTION_ESC | ~(BSLASH | APOS) + SGL_PRINTABLE) + APOS +ACTION_STRING_LITERAL = ( + QUOTE + ZeroOrMore(ACTION_ESC | ~(BSLASH | QUOTE) + SGL_PRINTABLE) + QUOTE +) SRC = SRC_.suppress() + ACTION_STRING_LITERAL("file") + INT("line") id = TOKEN_REF | RULE_REF -SL_COMMENT = Suppress('//') + Suppress('$ANTLR') + SRC | ZeroOrMore(~EOL + Word(printables)) + EOL +SL_COMMENT = ( + Suppress("//") + Suppress("$ANTLR") + SRC + | ZeroOrMore(~EOL + Word(printables)) + EOL +) ML_COMMENT = cStyleComment -WS = OneOrMore(Suppress(' ') | Suppress('\t') | (Optional(Suppress('\r')) + Literal('\n'))) +WS = OneOrMore( + Suppress(" ") | Suppress("\t") | (Optional(Suppress("\r")) + Literal("\n")) +) WS_LOOP = ZeroOrMore(SL_COMMENT | ML_COMMENT) NESTED_ARG_ACTION = Forward() -NESTED_ARG_ACTION << (LBRACK - + ZeroOrMore(NESTED_ARG_ACTION - | ACTION_STRING_LITERAL - | ACTION_CHAR_LITERAL) - + RBRACK) +NESTED_ARG_ACTION << ( + LBRACK + + ZeroOrMore(NESTED_ARG_ACTION | ACTION_STRING_LITERAL | ACTION_CHAR_LITERAL) + + RBRACK +) ARG_ACTION = NESTED_ARG_ACTION NESTED_ACTION = Forward() -NESTED_ACTION << (LBRACE - + ZeroOrMore(NESTED_ACTION - | SL_COMMENT - | ML_COMMENT - | ACTION_STRING_LITERAL - | ACTION_CHAR_LITERAL) - + RBRACE) -ACTION = NESTED_ACTION + Optional('?') +NESTED_ACTION << ( + LBRACE + + ZeroOrMore( + NESTED_ACTION + | SL_COMMENT + | ML_COMMENT + | ACTION_STRING_LITERAL + | ACTION_CHAR_LITERAL + ) + + RBRACE +) +ACTION = NESTED_ACTION + Optional("?") SCOPE = SCOPE_.suppress() -OPTIONS = OPTIONS_.suppress() + LBRACE # + WS_LOOP + Suppress('{') -TOKENS = TOKENS_.suppress() + LBRACE # + WS_LOOP + Suppress('{') +OPTIONS = OPTIONS_.suppress() + LBRACE # + WS_LOOP + Suppress('{') +TOKENS = TOKENS_.suppress() + LBRACE # + WS_LOOP + Suppress('{') TREE_BEGIN = ROOT + LPAR -RANGE = Suppress('..') -REWRITE = Suppress('->') +RANGE = Suppress("..") +REWRITE = Suppress("->") # General Parser Definitions # Grammar heading -optionValue = id | STRING_LITERAL | CHAR_LITERAL | INT | Literal('*').setName("s") +optionValue = id | STRING_LITERAL | CHAR_LITERAL | INT | Literal("*").setName("s") option = Group(id("id") + EQ + optionValue("value"))("option") optionsSpec = OPTIONS + Group(OneOrMore(option + SEMI))("options") + RBRACE -tokenSpec = Group(TOKEN_REF("token_ref") - + (EQ + (STRING_LITERAL | CHAR_LITERAL)("lit")))("token") + SEMI +tokenSpec = ( + Group(TOKEN_REF("token_ref") + (EQ + (STRING_LITERAL | CHAR_LITERAL)("lit")))( + "token" + ) + + SEMI +) tokensSpec = TOKENS + Group(OneOrMore(tokenSpec))("tokens") + RBRACE attrScope = SCOPE_.suppress() + id + ACTION grammarType = LEXER + PARSER + TREE actionScopeName = id | LEXER("l") | PARSER("p") -action = AT + Optional(actionScopeName + Suppress('::')) + id + ACTION - -grammarHeading = (Optional(ML_COMMENT("ML_COMMENT")) - + Optional(grammarType) - + GRAMMAR - + id("grammarName") + SEMI - + Optional(optionsSpec) - + Optional(tokensSpec) - + ZeroOrMore(attrScope) - + ZeroOrMore(action)) +action = AT + Optional(actionScopeName + Suppress("::")) + id + ACTION + +grammarHeading = ( + Optional(ML_COMMENT("ML_COMMENT")) + + Optional(grammarType) + + GRAMMAR + + id("grammarName") + + SEMI + + Optional(optionsSpec) + + Optional(tokensSpec) + + ZeroOrMore(attrScope) + + ZeroOrMore(action) +) modifier = PROTECTED | PUBLIC | PRIVATE | FRAGMENT ruleAction = AT + id + ACTION throwsSpec = THROWS.suppress() + delimitedList(id) -ruleScopeSpec = ((SCOPE_.suppress() + ACTION) - | (SCOPE_.suppress() + delimitedList(id) + SEMI) - | (SCOPE_.suppress() + ACTION + SCOPE_.suppress() + delimitedList(id) + SEMI)) +ruleScopeSpec = ( + (SCOPE_.suppress() + ACTION) + | (SCOPE_.suppress() + delimitedList(id) + SEMI) + | (SCOPE_.suppress() + ACTION + SCOPE_.suppress() + delimitedList(id) + SEMI) +) unary_op = oneOf("^ !") notTerminal = CHAR_LITERAL | TOKEN_REF | STRING_LITERAL -terminal = (CHAR_LITERAL | TOKEN_REF + Optional(ARG_ACTION) | STRING_LITERAL | '.') + Optional(unary_op) +terminal = ( + CHAR_LITERAL | TOKEN_REF + Optional(ARG_ACTION) | STRING_LITERAL | "." +) + Optional(unary_op) block = Forward() notSet = TIL + (notTerminal | block) rangeNotPython = CHAR_LITERAL("c1") + RANGE + CHAR_LITERAL("c2") -atom = Group((rangeNotPython + Optional(unary_op)("op")) - | terminal - | (notSet + Optional(unary_op)("op")) - | (RULE_REF + Optional(ARG_ACTION("arg")) + Optional(unary_op)("op")) - ) +atom = Group( + (rangeNotPython + Optional(unary_op)("op")) + | terminal + | (notSet + Optional(unary_op)("op")) + | (RULE_REF + Optional(ARG_ACTION("arg")) + Optional(unary_op)("op")) +) element = Forward() -treeSpec = ROOT + LPAR + element*(2,) + RPAR +treeSpec = ROOT + LPAR + element * (2,) + RPAR ebnfSuffix = oneOf("? * +") -ebnf = block + Optional(ebnfSuffix("op") | '=>') -elementNoOptionSpec = ((id("result_name") + oneOf('= +=')("labelOp") + atom("atom") + Optional(ebnfSuffix)) - | (id("result_name") + oneOf('= +=')("labelOp") + block + Optional(ebnfSuffix)) - | atom("atom") + Optional(ebnfSuffix) - | ebnf - | ACTION - | (treeSpec + Optional(ebnfSuffix)) - ) # | SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED ) +ebnf = block + Optional(ebnfSuffix("op") | "=>") +elementNoOptionSpec = ( + (id("result_name") + oneOf("= +=")("labelOp") + atom("atom") + Optional(ebnfSuffix)) + | (id("result_name") + oneOf("= +=")("labelOp") + block + Optional(ebnfSuffix)) + | atom("atom") + Optional(ebnfSuffix) + | ebnf + | ACTION + | (treeSpec + Optional(ebnfSuffix)) +) # | SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED ) element <<= Group(elementNoOptionSpec)("element") # Do not ask me why group is needed twice... seems like the xml that you see is not always the real structure? alternative = Group(Group(OneOrMore(element))("elements")) -rewrite = Optional(Literal('TODO REWRITE RULES TODO')) -block <<= (LPAR - + Optional(Optional(optionsSpec("opts")) + COLON) - + Group(alternative('a1') - + rewrite - + Group(ZeroOrMore(VERT - + alternative('a2') - + rewrite))("alternatives"))("block") - + RPAR) -altList = alternative('a1') + rewrite + Group(ZeroOrMore(VERT + alternative('a2') + rewrite))("alternatives") +rewrite = Optional(Literal("TODO REWRITE RULES TODO")) +block <<= ( + LPAR + + Optional(Optional(optionsSpec("opts")) + COLON) + + Group( + alternative("a1") + + rewrite + + Group(ZeroOrMore(VERT + alternative("a2") + rewrite))("alternatives") + )("block") + + RPAR +) +altList = ( + alternative("a1") + + rewrite + + Group(ZeroOrMore(VERT + alternative("a2") + rewrite))("alternatives") +) exceptionHandler = CATCH.suppress() + ARG_ACTION + ACTION finallyClause = FINALLY.suppress() + ACTION exceptionGroup = (OneOrMore(exceptionHandler) + Optional(finallyClause)) | finallyClause -ruleHeading = (Optional(ML_COMMENT)("ruleComment") - + Optional(modifier)("modifier") - + id("ruleName") - + Optional("!") - + Optional(ARG_ACTION("arg")) - + Optional(Suppress('returns') + ARG_ACTION("rt")) - + Optional(throwsSpec) - + Optional(optionsSpec) - + Optional(ruleScopeSpec) - + ZeroOrMore(ruleAction)) +ruleHeading = ( + Optional(ML_COMMENT)("ruleComment") + + Optional(modifier)("modifier") + + id("ruleName") + + Optional("!") + + Optional(ARG_ACTION("arg")) + + Optional(Suppress("returns") + ARG_ACTION("rt")) + + Optional(throwsSpec) + + Optional(optionsSpec) + + Optional(ruleScopeSpec) + + ZeroOrMore(ruleAction) +) rule = Group(ruleHeading + COLON + altList + SEMI + Optional(exceptionGroup))("rule") grammarDef = grammarHeading + Group(OneOrMore(rule))("rules") + def grammar(): return grammarDef + def __antlrAlternativesConverter(pyparsingRules, antlrBlock): rule = None - if hasattr(antlrBlock, 'alternatives') and antlrBlock.alternatives != '' and len(antlrBlock.alternatives) > 0: + if ( + hasattr(antlrBlock, "alternatives") + and antlrBlock.alternatives != "" + and len(antlrBlock.alternatives) > 0 + ): alternatives = [] alternatives.append(__antlrAlternativeConverter(pyparsingRules, antlrBlock.a1)) for alternative in antlrBlock.alternatives: - alternatives.append(__antlrAlternativeConverter(pyparsingRules, alternative)) + alternatives.append( + __antlrAlternativeConverter(pyparsingRules, alternative) + ) rule = MatchFirst(alternatives)("anonymous_or") - elif hasattr(antlrBlock, 'a1') and antlrBlock.a1 != '': + elif hasattr(antlrBlock, "a1") and antlrBlock.a1 != "": rule = __antlrAlternativeConverter(pyparsingRules, antlrBlock.a1) else: - raise Exception('Not yet implemented') + raise Exception("Not yet implemented") assert rule != None return rule + def __antlrAlternativeConverter(pyparsingRules, antlrAlternative): elementList = [] for element in antlrAlternative.elements: rule = None - if hasattr(element.atom, 'c1') and element.atom.c1 != '': - regex = r'['+str(element.atom.c1[0])+'-'+str(element.atom.c2[0]+']') + if hasattr(element.atom, "c1") and element.atom.c1 != "": + regex = r"[" + str(element.atom.c1[0]) + "-" + str(element.atom.c2[0] + "]") rule = Regex(regex)("anonymous_regex") - elif hasattr(element, 'block') and element.block != '': + elif hasattr(element, "block") and element.block != "": rule = __antlrAlternativesConverter(pyparsingRules, element.block) else: ruleRef = element.atom[0] assert ruleRef in pyparsingRules rule = pyparsingRules[ruleRef](ruleRef) - if hasattr(element, 'op') and element.op != '': - if element.op == '+': + if hasattr(element, "op") and element.op != "": + if element.op == "+": rule = Group(OneOrMore(rule))("anonymous_one_or_more") - elif element.op == '*': + elif element.op == "*": rule = Group(ZeroOrMore(rule))("anonymous_zero_or_more") - elif element.op == '?': + elif element.op == "?": rule = Optional(rule) else: - raise Exception('rule operator not yet implemented : ' + element.op) + raise Exception("rule operator not yet implemented : " + element.op) rule = rule elementList.append(rule) if len(elementList) > 1: @@ -207,6 +310,7 @@ def __antlrAlternativeConverter(pyparsingRules, antlrAlternative): assert rule is not None return rule + def __antlrRuleConverter(pyparsingRules, antlrRule): rule = None rule = __antlrAlternativesConverter(pyparsingRules, antlrRule) @@ -214,6 +318,7 @@ def __antlrRuleConverter(pyparsingRules, antlrRule): rule(antlrRule.ruleName) return rule + def antlrConverter(antlrGrammarTree): pyparsingRules = {} @@ -226,7 +331,7 @@ def antlrConverter(antlrGrammarTree): antlrRules = {} for antlrRule in antlrGrammarTree.rules: antlrRules[antlrRule.ruleName] = antlrRule - pyparsingRules[antlrRule.ruleName] = Forward() # antlr is a top down grammar + pyparsingRules[antlrRule.ruleName] = Forward() # antlr is a top down grammar for antlrRuleName, antlrRule in list(antlrRules.items()): pyparsingRule = __antlrRuleConverter(pyparsingRules, antlrRule) assert pyparsingRule != None @@ -234,6 +339,7 @@ def antlrConverter(antlrGrammarTree): return pyparsingRules + if __name__ == "__main__": text = """\ |