# oc.py # # A subset-C parser, (BNF taken from 1996 International Obfuscated C Code Contest) # # Copyright, 2010, Paul McGuire # """ http://www.ioccc.org/1996/august.hint The following is a description of the OC grammar: OC grammar ========== Terminals are in quotes, () is used for bracketing. program: decl* decl: vardecl fundecl vardecl: type NAME ; type NAME "[" INT "]" ; fundecl: type NAME "(" args ")" "{" body "}" args: /*empty*/ ( arg "," )* arg arg: type NAME body: vardecl* stmt* stmt: ifstmt whilestmt dowhilestmt "return" expr ";" expr ";" "{" stmt* "}" ";" ifstmt: "if" "(" expr ")" stmt "if" "(" expr ")" stmt "else" stmt whilestmt: "while" "(" expr ")" stmt dowhilestmt: "do" stmt "while" "(" expr ")" ";" expr: expr binop expr unop expr expr "[" expr "]" "(" expr ")" expr "(" exprs ")" NAME INT CHAR STRING exprs: /*empty*/ (expr ",")* expr binop: "+" | "-" | "*" | "/" | "%" | "=" | "<" | "==" | "!=" unop: "!" | "-" | "*" type: "int" stars "char" stars stars: "*"* """ from pyparsing import * LPAR,RPAR,LBRACK,RBRACK,LBRACE,RBRACE,SEMI,COMMA = map(Suppress, "()[]{};,") INT = Keyword("int") CHAR = Keyword("char") WHILE = Keyword("while") DO = Keyword("do") IF = Keyword("if") ELSE = Keyword("else") RETURN = Keyword("return") NAME = Word(alphas+"_", alphanums+"_") integer = Regex(r"[+-]?\d+") char = Regex(r"'.'") string_ = dblQuotedString INT = Keyword("int") CHAR = Keyword("char") TYPE = Group((INT | CHAR) + ZeroOrMore("*")) expr = Forward() operand = NAME | integer | char | string_ expr << (infixNotation(operand, [ (oneOf('! - *'), 1, opAssoc.RIGHT), (oneOf('++ --'), 1, opAssoc.RIGHT), (oneOf('++ --'), 1, opAssoc.LEFT), (oneOf('* / %'), 2, opAssoc.LEFT), (oneOf('+ -'), 2, opAssoc.LEFT), (oneOf('< == > <= >= !='), 2, opAssoc.LEFT), (Regex(r'=[^=]'), 2, opAssoc.LEFT), ]) + Optional( LBRACK + expr + RBRACK | LPAR + Group(Optional(delimitedList(expr))) + RPAR ) ) stmt = Forward() ifstmt = IF - LPAR + expr + RPAR + stmt + Optional(ELSE + stmt) whilestmt = WHILE - LPAR + expr + RPAR + stmt dowhilestmt = DO - stmt + WHILE + LPAR + expr + RPAR + SEMI returnstmt = RETURN - expr + SEMI stmt << Group( ifstmt | whilestmt | dowhilestmt | returnstmt | expr + SEMI | LBRACE + ZeroOrMore(stmt) + RBRACE | SEMI) vardecl = Group(TYPE + NAME + Optional(LBRACK + integer + RBRACK)) + SEMI arg = Group(TYPE + NAME) body = ZeroOrMore(vardecl) + ZeroOrMore(stmt) fundecl = Group(TYPE + NAME + LPAR + Optional(Group(delimitedList(arg))) + RPAR + LBRACE + Group(body) + RBRACE) decl = fundecl | vardecl program = ZeroOrMore(decl) program.ignore(cStyleComment) # set parser element names for vname in ("ifstmt whilestmt dowhilestmt returnstmt TYPE " "NAME fundecl vardecl program arg body stmt".split()): v = vars()[vname] v.setName(vname) #~ for vname in "fundecl stmt".split(): #~ v = vars()[vname] #~ v.setDebug() test = r""" /* A factorial program */ int putstr(char *s) { while(*s) putchar(*s++); } int fac(int n) { if (n == 0) return 1; else return n*fac(n-1); } int putn(int n) { if (9 < n) putn(n / 10); putchar((n%10) + '0'); } int facpr(int n) { putstr("factorial "); putn(n); putstr(" = "); putn(fac(n)); putstr("\n"); } int main() { int i; i = 0; while(i < 10) facpr(i++); return 0; } """ ast = program.parseString(test,parseAll=True) ast.pprint()