# # lua_parser.py # # A simple parser for the Lua language. # # Copyright 2020, Paul McGuire # """ from https://www.lua.org/manual/5.1/manual.html#8 chunk ::= {stat [`;´]} [laststat [`;´]] block ::= chunk stat ::= varlist `=´ explist | functioncall | do block end | while exp do block end | repeat block until exp | if exp then block {elseif exp then block} [else block] end | for Name `=´ exp `,´ exp [`,´ exp] do block end | for namelist in explist do block end | function funcname funcbody | local function Name funcbody | local namelist [`=´ explist] laststat ::= return [explist] | break funcname ::= Name {`.´ Name} [`:´ Name] varlist ::= var {`,´ var} var ::= Name | prefixexp `[´ exp `]´ | prefixexp `.´ Name namelist ::= Name {`,´ Name} explist ::= {exp `,´} exp exp ::= nil | false | true | Number | String | `...´ | function | prefixexp | tableconstructor | exp binop exp | unop exp prefixexp ::= var | functioncall | `(´ exp `)´ functioncall ::= prefixexp args | prefixexp `:´ Name args args ::= `(´ [explist] `)´ | tableconstructor | String function ::= function funcbody funcbody ::= `(´ [parlist] `)´ block end parlist ::= namelist [`,´ `...´] | `...´ tableconstructor ::= `{´ [fieldlist] `}´ fieldlist ::= field {fieldsep field} [fieldsep] field ::= `[´ exp `]´ `=´ exp | Name `=´ exp | exp fieldsep ::= `,´ | `;´ binop ::= `+´ | `-´ | `*´ | `/´ | `^´ | `%´ | `..´ | `<´ | `<=´ | `>´ | `>=´ | `==´ | `~=´ | and | or unop ::= `-´ | not | `#´ """ import pyparsing as pp ppc = pp.pyparsing_common pp.ParserElement.enablePackrat() LBRACK, RBRACK, LBRACE, RBRACE, LPAR, RPAR, EQ, COMMA, SEMI, COLON = map( pp.Suppress, "[]{}()=,;:" ) OPT_SEMI = pp.Optional(SEMI).suppress() ELLIPSIS = pp.Literal("...") keywords = { k.upper(): pp.Keyword(k) for k in """\ return break do end while if then elseif else for in function local repeat until nil false true and or not """.split() } vars().update(keywords) comment_intro = pp.Literal("--") short_comment = comment_intro + pp.restOfLine long_comment = comment_intro + LBRACK + ... + RBRACK lua_comment = long_comment | short_comment ident = ppc.identifier name = pp.delimitedList(ident, delim=".", combine=True) namelist = pp.delimitedList(name) number = ppc.number # does not parse levels multiline_string = pp.QuotedString("[[", endQuoteChar="]]", multiline=True) string = pp.QuotedString("'") | pp.QuotedString('"') | multiline_string exp = pp.Forward() # explist1 ::= {exp ','} exp explist1 = pp.delimitedList(exp) stat = pp.Forward() # laststat ::= return [explist1] | break laststat = pp.Group(RETURN + explist1) | BREAK # block ::= {stat [';']} [laststat[';']] block = pp.Group(stat + OPT_SEMI)[1, ...] + pp.Optional(laststat) # field ::= '[' exp ']' '=' exp | Name '=' exp | exp field = pp.Group( LBRACK + exp + RBRACK + EQ + pp.Group(exp) | name + EQ + pp.Group(exp) | exp ) # fieldsep ::= ',' | ';' fieldsep = COMMA | SEMI # fieldlist ::= field {fieldsep field} [fieldsep] field_list = pp.delimitedList(field, delim=fieldsep) + pp.Optional(fieldsep) # tableconstructor ::= '{' [fieldlist] '}' tableconstructor = pp.Group(LBRACE + pp.Optional(field_list) + RBRACE) # parlist1 ::= namelist [',' '...'] | '...' parlist = namelist + pp.Optional(COMMA + ELLIPSIS) | ELLIPSIS # funcname ::= Name {'.' Name} [':' Name] funcname = pp.Group(name + COLON + name) | name # function ::= function funcbody # funcbody ::= '(' [parlist1] ')' block end funcbody = pp.Group(LPAR + parlist + RPAR) + block + END function = FUNCTION + funcbody # args ::= '(' [explist1] ')' | tableconstructor | String args = LPAR + pp.Optional(explist1) + RPAR | tableconstructor | string # this portion of the spec is left-recursive, must break LR loop # varlist1 ::= var {',' var} # var ::= Name | prefixexp '[' exp ']' | prefixexp '.' Name # prefixexp ::= var | functioncall | '(' exp ')' # functioncall ::= prefixexp args | prefixexp ':' Name args prefixexp = name | LPAR + exp + RPAR functioncall = prefixexp + args | prefixexp + COLON + name + args var = pp.Forward() var_atom = functioncall | name | LPAR + exp + RPAR index_ref = pp.Group(LBRACK + exp + RBRACK) var <<= pp.delimitedList(pp.Group(var_atom + index_ref) | var_atom, delim=".") varlist1 = pp.delimitedList(var) # exp ::= nil | false | true | Number | String | '...' | # function | prefixexp | tableconstructor exp_atom = ( NIL | FALSE | TRUE | number | string | ELLIPSIS | functioncall | var # prefixexp | tableconstructor ) exp <<= pp.infixNotation( exp_atom, [ (pp.oneOf("+ -"), 2, pp.opAssoc.LEFT), (AND, 2, pp.opAssoc.LEFT), (OR, 2, pp.opAssoc.LEFT), ], ) assignment_stat = pp.Optional(LOCAL) + varlist1 + EQ + explist1 func_call_stat = pp.Optional(LOCAL) + functioncall do_stat = DO + block + END while_stat = WHILE + exp + block + END repeat_stat = REPEAT + block + UNTIL + exp for_loop_stat = ( FOR + name + EQ + exp + COMMA + exp + pp.Optional(COMMA + exp) + DO + block + END ) for_seq_stat = FOR + namelist + IN + explist1 + DO + block + END if_stat = ( IF + exp + THEN + block + pp.Group(ELSEIF + exp + THEN + block)[...] + pp.Optional(pp.Group(ELSE + block)) + END ) function_def = pp.Optional(LOCAL) + FUNCTION + funcname + funcbody for var_name in """ assignment_stat func_call_stat do_stat while_stat repeat_stat for_loop_stat for_seq_stat if_stat function_def """.split(): vars()[var_name].setName(var_name) # stat ::= varlist1 '=' explist1 | # functioncall | # do block end | # while exp do block end | # repeat block until exp | # if exp then block {elseif exp then block} [else block] end | # for Name '=' exp ',' exp [',' exp] do block end | # for namelist in explist1 do block end | # function funcname funcbody | # local function Name funcbody | # local namelist ['=' explist1] stat <<= pp.Group( assignment_stat | do_stat | while_stat | repeat_stat | for_loop_stat | for_seq_stat | func_call_stat | if_stat ) # ignore comments function_def.ignore(lua_comment) if __name__ == "__main__": sample = r""" function test(x) local t = {foo=1, bar=2, arg=x} n = 0 if t['foo'] then n = n + 1 end end """ try: result = function_def.parseString(sample) result.pprint() except pp.ParseException as pe: print(pe.explain())