summaryrefslogtreecommitdiff
path: root/src/examples/antlr_grammar.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/examples/antlr_grammar.py')
-rw-r--r--src/examples/antlr_grammar.py218
1 files changed, 218 insertions, 0 deletions
diff --git a/src/examples/antlr_grammar.py b/src/examples/antlr_grammar.py
new file mode 100644
index 0000000..b355ab5
--- /dev/null
+++ b/src/examples/antlr_grammar.py
@@ -0,0 +1,218 @@
+'''
+antlr_grammar.py
+
+Created on 4 sept. 2010
+
+@author: luca
+
+(Minor updates by Paul McGuire, June, 2012)
+'''
+from pyparsing import Word, ZeroOrMore, printables, Suppress, OneOrMore, Group, \
+ LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword, \
+ cStyleComment, Regex, Forward, MatchFirst, And, srange, oneOf, alphas, alphanums, \
+ delimitedList
+
+# http://www.antlr.org/grammar/ANTLR/ANTLRv3.g
+
+# Tokens
+EOL = Suppress(LineEnd()) # $
+singleTextString = originalTextFor(ZeroOrMore(~EOL + (White(" \t") | Word(printables)))).leaveWhitespace()
+XDIGIT = hexnums
+INT = Word(nums)
+ESC = Literal('\\') + (oneOf(list(r'nrtbf\">'+"'")) | ('u' + Word(hexnums, exact=4)) | Word(printables, exact=1))
+LITERAL_CHAR = ESC | ~(Literal("'") | Literal('\\')) + Word(printables, exact=1)
+CHAR_LITERAL = Suppress("'") + LITERAL_CHAR + Suppress("'")
+STRING_LITERAL = Suppress("'") + Combine(OneOrMore(LITERAL_CHAR)) + Suppress("'")
+DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"'
+DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(Word(printables, exact=1)) + '>>'
+TOKEN_REF = Word(alphas.upper(), alphanums+'_')
+RULE_REF = Word(alphas.lower(), alphanums+'_')
+ACTION_ESC = (Suppress("\\") + Suppress("'")) | Suppress('\\"') | Suppress('\\') + (~(Literal("'") | Literal('"')) + Word(printables, exact=1))
+ACTION_CHAR_LITERAL = Suppress("'") + (ACTION_ESC | ~(Literal('\\') | Literal("'")) + Word(printables, exact=1)) + Suppress("'")
+ACTION_STRING_LITERAL = Suppress('"') + ZeroOrMore(ACTION_ESC | ~(Literal('\\') | Literal('"')) + Word(printables, exact=1)) + Suppress('"')
+SRC = Suppress('src') + ACTION_STRING_LITERAL("file") + INT("line")
+id = TOKEN_REF | RULE_REF
+SL_COMMENT = Suppress('//') + Suppress('$ANTLR') + SRC | ZeroOrMore(~EOL + Word(printables)) + EOL
+ML_COMMENT = cStyleComment
+WS = OneOrMore(Suppress(' ') | Suppress('\t') | (Optional(Suppress('\r')) + Literal('\n')))
+WS_LOOP = ZeroOrMore(SL_COMMENT | ML_COMMENT)
+NESTED_ARG_ACTION = Forward()
+NESTED_ARG_ACTION << Suppress('[') + ZeroOrMore(NESTED_ARG_ACTION | ACTION_STRING_LITERAL | ACTION_CHAR_LITERAL) + Suppress(']')
+ARG_ACTION = NESTED_ARG_ACTION
+NESTED_ACTION = Forward()
+NESTED_ACTION << Suppress('{') + ZeroOrMore(NESTED_ACTION | SL_COMMENT | ML_COMMENT | ACTION_STRING_LITERAL | ACTION_CHAR_LITERAL) + Suppress('}')
+ACTION = NESTED_ACTION + Optional('?')
+SCOPE = Suppress('scope')
+OPTIONS = Suppress('options') + Suppress('{') # + WS_LOOP + Suppress('{')
+TOKENS = Suppress('tokens') + Suppress('{') # + WS_LOOP + Suppress('{')
+FRAGMENT = 'fragment';
+TREE_BEGIN = Suppress('^(')
+ROOT = Suppress('^')
+BANG = Suppress('!')
+RANGE = Suppress('..')
+REWRITE = Suppress('->')
+
+# General Parser Definitions
+
+# Grammar heading
+optionValue = id | STRING_LITERAL | CHAR_LITERAL | INT | Literal('*').setName("s")
+
+option = Group(id("id") + Suppress('=') + optionValue("value"))("option")
+optionsSpec = OPTIONS + Group(OneOrMore(option + Suppress(';')))("options") + Suppress('}')
+tokenSpec = Group(TOKEN_REF("token_ref") + (Suppress('=') + (STRING_LITERAL | CHAR_LITERAL)("lit")))("token") + Suppress(';')
+tokensSpec = TOKENS + Group(OneOrMore(tokenSpec))("tokens") + Suppress('}')
+attrScope = Suppress('scope') + id + ACTION
+grammarType = Keyword('lexer') + Keyword('parser') + Keyword('tree')
+actionScopeName = id | Keyword('lexer')("l") | Keyword('parser')("p")
+action = Suppress('@') + Optional(actionScopeName + Suppress('::')) + id + ACTION
+
+grammarHeading = Optional(ML_COMMENT("ML_COMMENT")) + Optional(grammarType) + Suppress('grammar') + id("grammarName") + Suppress(';') + Optional(optionsSpec) + Optional(tokensSpec) + ZeroOrMore(attrScope) + ZeroOrMore(action)
+
+modifier = Keyword('protected') | Keyword('public') | Keyword('private') | Keyword('fragment')
+ruleAction = Suppress('@') + id + ACTION
+throwsSpec = Suppress('throws') + delimitedList(id)
+ruleScopeSpec = (Suppress('scope') + ACTION) | (Suppress('scope') + delimitedList(id) + Suppress(';')) | (Suppress('scope') + ACTION + Suppress('scope') + delimitedList(id) + Suppress(';'))
+unary_op = oneOf("^ !")
+notTerminal = CHAR_LITERAL | TOKEN_REF | STRING_LITERAL
+terminal = (CHAR_LITERAL | TOKEN_REF + Optional(ARG_ACTION) | STRING_LITERAL | '.') + Optional(unary_op)
+block = Forward()
+notSet = Suppress('~') + (notTerminal | block)
+rangeNotPython = CHAR_LITERAL("c1") + RANGE + CHAR_LITERAL("c2")
+atom = Group(rangeNotPython + Optional(unary_op)("op")) | terminal | (notSet + Optional(unary_op)("op")) | (RULE_REF + Optional(ARG_ACTION("arg")) + Optional(unary_op)("op"))
+element = Forward()
+treeSpec = Suppress('^(') + element*(2,) + Suppress(')')
+ebnfSuffix = oneOf("? * +")
+ebnf = block + Optional(ebnfSuffix("op") | '=>')
+elementNoOptionSpec = (id("result_name") + oneOf('= +=')("labelOp") + atom("atom") + Optional(ebnfSuffix)) | (id("result_name") + oneOf('= +=')("labelOp") + block + Optional(ebnfSuffix)) | atom("atom") + Optional(ebnfSuffix) | ebnf | ACTION | (treeSpec + Optional(ebnfSuffix)) # | SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED )
+element << Group(elementNoOptionSpec)("element")
+alternative = Group(Group(OneOrMore(element))("elements")) # Do not ask me why group is needed twice... seems like the xml that you see is not always the real structure?
+rewrite = Optional(Literal('TODO REWRITE RULES TODO'))
+block << Suppress('(') + Optional(Optional(optionsSpec("opts")) + Suppress(':')) + Group(alternative('a1') + rewrite + Group(ZeroOrMore(Suppress('|') + alternative('a2') + rewrite))("alternatives"))("block") + Suppress(')')
+altList = alternative('a1') + rewrite + Group(ZeroOrMore(Suppress('|') + alternative('a2') + rewrite))("alternatives")
+exceptionHandler = Suppress('catch') + ARG_ACTION + ACTION
+finallyClause = Suppress('finally') + ACTION
+exceptionGroup = (OneOrMore(exceptionHandler) + Optional(finallyClause)) | finallyClause
+
+ruleHeading = Optional(ML_COMMENT)("ruleComment") + Optional(modifier)("modifier") + id("ruleName") + Optional("!") + Optional(ARG_ACTION("arg")) + Optional(Suppress('returns') + ARG_ACTION("rt")) + Optional(throwsSpec) + Optional(optionsSpec) + Optional(ruleScopeSpec) + ZeroOrMore(ruleAction)
+rule = Group(ruleHeading + Suppress(':') + altList + Suppress(';') + Optional(exceptionGroup))("rule")
+
+grammarDef = grammarHeading + Group(OneOrMore(rule))("rules")
+
+def grammar():
+ return grammarDef
+
+def __antlrAlternativesConverter(pyparsingRules, antlrBlock):
+ rule = None
+ if hasattr(antlrBlock, 'alternatives') and antlrBlock.alternatives != '' and len(antlrBlock.alternatives) > 0:
+ alternatives = []
+ alternatives.append(__antlrAlternativeConverter(pyparsingRules, antlrBlock.a1))
+ for alternative in antlrBlock.alternatives:
+ alternatives.append(__antlrAlternativeConverter(pyparsingRules, alternative))
+ rule = MatchFirst(alternatives)("anonymous_or")
+ elif hasattr(antlrBlock, 'a1') and antlrBlock.a1 != '':
+ rule = __antlrAlternativeConverter(pyparsingRules, antlrBlock.a1)
+ else:
+ raise Exception('Not yet implemented')
+ assert rule != None
+ return rule
+
+def __antlrAlternativeConverter(pyparsingRules, antlrAlternative):
+ elementList = []
+ for element in antlrAlternative.elements:
+ rule = None
+ if hasattr(element.atom, 'c1') and element.atom.c1 != '':
+ regex = r'['+str(element.atom.c1[0])+'-'+str(element.atom.c2[0]+']')
+ rule = Regex(regex)("anonymous_regex")
+ elif hasattr(element, 'block') and element.block != '':
+ rule = __antlrAlternativesConverter(pyparsingRules, element.block)
+ else:
+ ruleRef = element.atom
+ assert ruleRef in pyparsingRules
+ rule = pyparsingRules[element.atom](element.atom)
+ if hasattr(element, 'op') and element.op != '':
+ if element.op == '+':
+ rule = Group(OneOrMore(rule))("anonymous_one_or_more")
+ elif element.op == '*':
+ rule = Group(ZeroOrMore(rule))("anonymous_zero_or_more")
+ elif element.op == '?':
+ rule = Optional(rule)
+ else:
+ raise Exception('rule operator not yet implemented : ' + element.op)
+ rule = rule
+ elementList.append(rule)
+ if len(elementList) > 1:
+ rule = Group(And(elementList))("anonymous_and")
+ else:
+ rule = elementList[0]
+ assert rule != None
+ return rule
+
+def __antlrRuleConverter(pyparsingRules, antlrRule):
+ rule = None
+ rule = __antlrAlternativesConverter(pyparsingRules, antlrRule)
+ assert rule != None
+ rule(antlrRule.ruleName)
+ return rule
+
+def antlrConverter(antlrGrammarTree):
+ pyparsingRules = {}
+ antlrTokens = {}
+ for antlrToken in antlrGrammarTree.tokens:
+ antlrTokens[antlrToken.token_ref] = antlrToken.lit
+ for antlrTokenName, antlrToken in antlrTokens.items():
+ pyparsingRules[antlrTokenName] = Literal(antlrToken)
+ antlrRules = {}
+ for antlrRule in antlrGrammarTree.rules:
+ antlrRules[antlrRule.ruleName] = antlrRule
+ pyparsingRules[antlrRule.ruleName] = Forward() # antlr is a top down grammar
+ for antlrRuleName, antlrRule in antlrRules.items():
+ pyparsingRule = __antlrRuleConverter(pyparsingRules, antlrRule)
+ assert pyparsingRule != None
+ pyparsingRules[antlrRuleName] << pyparsingRule
+ return pyparsingRules
+
+if __name__ == "__main__":
+
+ text = """grammar SimpleCalc;
+
+options {
+ language = Python;
+}
+
+tokens {
+ PLUS = '+' ;
+ MINUS = '-' ;
+ MULT = '*' ;
+ DIV = '/' ;
+}
+
+/*------------------------------------------------------------------
+ * PARSER RULES
+ *------------------------------------------------------------------*/
+
+expr : term ( ( PLUS | MINUS ) term )* ;
+
+term : factor ( ( MULT | DIV ) factor )* ;
+
+factor : NUMBER ;
+
+
+/*------------------------------------------------------------------
+ * LEXER RULES
+ *------------------------------------------------------------------*/
+
+NUMBER : (DIGIT)+ ;
+
+/* WHITESPACE : ( '\t' | ' ' | '\r' | '\n'| '\u000C' )+ { $channel = HIDDEN; } ; */
+
+fragment DIGIT : '0'..'9' ;
+
+"""
+
+ grammar().validate()
+ antlrGrammarTree = grammar().parseString(text)
+ print antlrGrammarTree.asXML("antlrGrammarTree")
+ pyparsingRules = antlrConverter(antlrGrammarTree)
+ pyparsingRule = pyparsingRules["expr"]
+ pyparsingTree = pyparsingRule.parseString("2 - 5 * 42 + 7 / 25")
+ print pyparsingTree.asXML("pyparsingTree")