From 5885454b0001e7758ce220c6b149b1bc37d5f678 Mon Sep 17 00:00:00 2001 From: lb_ii Date: Sun, 12 Jul 2020 15:26:54 +0300 Subject: GARDENSNAKE: Restore original GardenSnake example by Andrew Dalke --- example/GardenSnake/GardenSnake.py | 777 +++++++++++++++++++++++++++++++++++++ example/GardenSnake/README | 5 + 2 files changed, 782 insertions(+) create mode 100644 example/GardenSnake/GardenSnake.py create mode 100644 example/GardenSnake/README diff --git a/example/GardenSnake/GardenSnake.py b/example/GardenSnake/GardenSnake.py new file mode 100644 index 0000000..8b493b4 --- /dev/null +++ b/example/GardenSnake/GardenSnake.py @@ -0,0 +1,777 @@ +# GardenSnake - a parser generator demonstration program +# +# This implements a modified version of a subset of Python: +# - only 'def', 'return' and 'if' statements +# - 'if' only has 'then' clause (no elif nor else) +# - single-quoted strings only, content in raw format +# - numbers are decimal.Decimal instances (not integers or floats) +# - no print statment; use the built-in 'print' function +# - only < > == + - / * implemented (and unary + -) +# - assignment and tuple assignment work +# - no generators of any sort +# - no ... well, no quite a lot + +# Why? I'm thinking about a new indentation-based configuration +# language for a project and wanted to figure out how to do it. Once +# I got that working I needed a way to test it out. My original AST +# was dumb so I decided to target Python's AST and compile it into +# Python code. Plus, it's pretty cool that it only took a day or so +# from sitting down with Ply to having working code. + +# This uses David Beazley's Ply from http://www.dabeaz.com/ply/ + +# This work is hereby released into the Public Domain. To view a copy of +# the public domain dedication, visit +# http://creativecommons.org/licenses/publicdomain/ or send a letter to +# Creative Commons, 543 Howard Street, 5th Floor, San Francisco, +# California, 94105, USA. +# +# Portions of this work are derived from Python's Grammar definition +# and may be covered under the Python copyright and license +# +# Andrew Dalke / Dalke Scientific Software, LLC +# 30 August 2006 / Cape Town, South Africa + +# Changelog: +# 30 August - added link to CC license; removed the "swapcase" encoding + +# Modifications for inclusion in PLY distribution +import sys +sys.path.insert(0, "../..") +from ply import * + +##### Lexer ###### +#import lex +import decimal + +tokens = ( + 'DEF', + 'IF', + 'NAME', + 'NUMBER', # Python decimals + 'STRING', # single quoted strings only; syntax of raw strings + 'LPAR', + 'RPAR', + 'COLON', + 'EQ', + 'ASSIGN', + 'LT', + 'GT', + 'PLUS', + 'MINUS', + 'MULT', + 'DIV', + 'RETURN', + 'WS', + 'NEWLINE', + 'COMMA', + 'SEMICOLON', + 'INDENT', + 'DEDENT', + 'ENDMARKER', +) + +#t_NUMBER = r'\d+' +# taken from decmial.py but without the leading sign + + +def t_NUMBER(t): + r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?""" + t.value = decimal.Decimal(t.value) + return t + + +def t_STRING(t): + r"'([^\\']+|\\'|\\\\)*'" # I think this is right ... + t.value = t.value[1:-1].decode("string-escape") # .swapcase() # for fun + return t + +t_COLON = r':' +t_EQ = r'==' +t_ASSIGN = r'=' +t_LT = r'<' +t_GT = r'>' +t_PLUS = r'\+' +t_MINUS = r'-' +t_MULT = r'\*' +t_DIV = r'/' +t_COMMA = r',' +t_SEMICOLON = r';' + +# Ply nicely documented how to do this. + +RESERVED = { + "def": "DEF", + "if": "IF", + "return": "RETURN", +} + + +def t_NAME(t): + r'[a-zA-Z_][a-zA-Z0-9_]*' + t.type = RESERVED.get(t.value, "NAME") + return t + +# Putting this before t_WS let it consume lines with only comments in +# them so the latter code never sees the WS part. Not consuming the +# newline. Needed for "if 1: #comment" + + +def t_comment(t): + r"[ ]*\043[^\n]*" # \043 is '#' + pass + + +# Whitespace +def t_WS(t): + r' [ ]+ ' + if t.lexer.at_line_start and t.lexer.paren_count == 0: + return t + +# Don't generate newline tokens when inside of parenthesis, eg +# a = (1, +# 2, 3) + + +def t_newline(t): + r'\n+' + t.lexer.lineno += len(t.value) + t.type = "NEWLINE" + if t.lexer.paren_count == 0: + return t + + +def t_LPAR(t): + r'\(' + t.lexer.paren_count += 1 + return t + + +def t_RPAR(t): + r'\)' + # check for underflow? should be the job of the parser + t.lexer.paren_count -= 1 + return t + + +def t_error(t): + raise SyntaxError("Unknown symbol %r" % (t.value[0],)) + print "Skipping", repr(t.value[0]) + t.lexer.skip(1) + +# I implemented INDENT / DEDENT generation as a post-processing filter + +# The original lex token stream contains WS and NEWLINE characters. +# WS will only occur before any other tokens on a line. + +# I have three filters. One tags tokens by adding two attributes. +# "must_indent" is True if the token must be indented from the +# previous code. The other is "at_line_start" which is True for WS +# and the first non-WS/non-NEWLINE on a line. It flags the check so +# see if the new line has changed indication level. + +# Python's syntax has three INDENT states +# 0) no colon hence no need to indent +# 1) "if 1: go()" - simple statements have a COLON but no need for an indent +# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent +NO_INDENT = 0 +MAY_INDENT = 1 +MUST_INDENT = 2 + +# only care about whitespace at the start of a line + + +def track_tokens_filter(lexer, tokens): + lexer.at_line_start = at_line_start = True + indent = NO_INDENT + saw_colon = False + for token in tokens: + token.at_line_start = at_line_start + + if token.type == "COLON": + at_line_start = False + indent = MAY_INDENT + token.must_indent = False + + elif token.type == "NEWLINE": + at_line_start = True + if indent == MAY_INDENT: + indent = MUST_INDENT + token.must_indent = False + + elif token.type == "WS": + assert token.at_line_start == True + at_line_start = True + token.must_indent = False + + else: + # A real token; only indent after COLON NEWLINE + if indent == MUST_INDENT: + token.must_indent = True + else: + token.must_indent = False + at_line_start = False + indent = NO_INDENT + + yield token + lexer.at_line_start = at_line_start + + +def _new_token(type, lineno): + tok = lex.LexToken() + tok.type = type + tok.value = None + tok.lineno = lineno + return tok + +# Synthesize a DEDENT tag + + +def DEDENT(lineno): + return _new_token("DEDENT", lineno) + +# Synthesize an INDENT tag + + +def INDENT(lineno): + return _new_token("INDENT", lineno) + + +# Track the indentation level and emit the right INDENT / DEDENT events. +def indentation_filter(tokens): + # A stack of indentation levels; will never pop item 0 + levels = [0] + token = None + depth = 0 + prev_was_ws = False + for token in tokens: + # if 1: + # print "Process", token, + # if token.at_line_start: + # print "at_line_start", + # if token.must_indent: + # print "must_indent", + # print + + # WS only occurs at the start of the line + # There may be WS followed by NEWLINE so + # only track the depth here. Don't indent/dedent + # until there's something real. + if token.type == "WS": + assert depth == 0 + depth = len(token.value) + prev_was_ws = True + # WS tokens are never passed to the parser + continue + + if token.type == "NEWLINE": + depth = 0 + if prev_was_ws or token.at_line_start: + # ignore blank lines + continue + # pass the other cases on through + yield token + continue + + # then it must be a real token (not WS, not NEWLINE) + # which can affect the indentation level + + prev_was_ws = False + if token.must_indent: + # The current depth must be larger than the previous level + if not (depth > levels[-1]): + raise IndentationError("expected an indented block") + + levels.append(depth) + yield INDENT(token.lineno) + + elif token.at_line_start: + # Must be on the same level or one of the previous levels + if depth == levels[-1]: + # At the same level + pass + elif depth > levels[-1]: + raise IndentationError( + "indentation increase but not in new block") + else: + # Back up; but only if it matches a previous level + try: + i = levels.index(depth) + except ValueError: + raise IndentationError("inconsistent indentation") + for _ in range(i + 1, len(levels)): + yield DEDENT(token.lineno) + levels.pop() + + yield token + + ### Finished processing ### + + # Must dedent any remaining levels + if len(levels) > 1: + assert token is not None + for _ in range(1, len(levels)): + yield DEDENT(token.lineno) + + +# The top-level filter adds an ENDMARKER, if requested. +# Python's grammar uses it. +def filter(lexer, add_endmarker=True): + token = None + tokens = iter(lexer.token, None) + tokens = track_tokens_filter(lexer, tokens) + for token in indentation_filter(tokens): + yield token + + if add_endmarker: + lineno = 1 + if token is not None: + lineno = token.lineno + yield _new_token("ENDMARKER", lineno) + +# Combine Ply and my filters into a new lexer + + +class IndentLexer(object): + + def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0): + self.lexer = lex.lex(debug=debug, optimize=optimize, + lextab=lextab, reflags=reflags) + self.token_stream = None + + def input(self, s, add_endmarker=True): + self.lexer.paren_count = 0 + self.lexer.input(s) + self.token_stream = filter(self.lexer, add_endmarker) + + def token(self): + try: + return self.token_stream.next() + except StopIteration: + return None + +########## Parser (tokens -> AST) ###### + +# also part of Ply +#import yacc + +# I use the Python AST +from compiler import ast + +# Helper function + + +def Assign(left, right): + names = [] + if isinstance(left, ast.Name): + # Single assignment on left + return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right) + elif isinstance(left, ast.Tuple): + # List of things - make sure they are Name nodes + names = [] + for child in left.getChildren(): + if not isinstance(child, ast.Name): + raise SyntaxError("that assignment not supported") + names.append(child.name) + ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names] + return ast.Assign([ast.AssTuple(ass_list)], right) + else: + raise SyntaxError("Can't do that yet") + + +# The grammar comments come from Python's Grammar/Grammar file + +# NB: compound_stmt in single_input is followed by extra NEWLINE! +# file_input: (NEWLINE | stmt)* ENDMARKER +def p_file_input_end(p): + """file_input_end : file_input ENDMARKER""" + p[0] = ast.Stmt(p[1]) + + +def p_file_input(p): + """file_input : file_input NEWLINE + | file_input stmt + | NEWLINE + | stmt""" + if isinstance(p[len(p) - 1], basestring): + if len(p) == 3: + p[0] = p[1] + else: + p[0] = [] # p == 2 --> only a blank line + else: + if len(p) == 3: + p[0] = p[1] + p[2] + else: + p[0] = p[1] + + +# funcdef: [decorators] 'def' NAME parameters ':' suite +# ignoring decorators +def p_funcdef(p): + "funcdef : DEF NAME parameters COLON suite" + p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5]) + +# parameters: '(' [varargslist] ')' + + +def p_parameters(p): + """parameters : LPAR RPAR + | LPAR varargslist RPAR""" + if len(p) == 3: + p[0] = [] + else: + p[0] = p[2] + + +# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | +# highly simplified +def p_varargslist(p): + """varargslist : varargslist COMMA NAME + | NAME""" + if len(p) == 4: + p[0] = p[1] + p[3] + else: + p[0] = [p[1]] + +# stmt: simple_stmt | compound_stmt + + +def p_stmt_simple(p): + """stmt : simple_stmt""" + # simple_stmt is a list + p[0] = p[1] + + +def p_stmt_compound(p): + """stmt : compound_stmt""" + p[0] = [p[1]] + +# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE + + +def p_simple_stmt(p): + """simple_stmt : small_stmts NEWLINE + | small_stmts SEMICOLON NEWLINE""" + p[0] = p[1] + + +def p_small_stmts(p): + """small_stmts : small_stmts SEMICOLON small_stmt + | small_stmt""" + if len(p) == 4: + p[0] = p[1] + [p[3]] + else: + p[0] = [p[1]] + +# small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | +# import_stmt | global_stmt | exec_stmt | assert_stmt + + +def p_small_stmt(p): + """small_stmt : flow_stmt + | expr_stmt""" + p[0] = p[1] + +# expr_stmt: testlist (augassign (yield_expr|testlist) | +# ('=' (yield_expr|testlist))*) +# augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | +# '<<=' | '>>=' | '**=' | '//=') + + +def p_expr_stmt(p): + """expr_stmt : testlist ASSIGN testlist + | testlist """ + if len(p) == 2: + # a list of expressions + p[0] = ast.Discard(p[1]) + else: + p[0] = Assign(p[1], p[3]) + + +def p_flow_stmt(p): + "flow_stmt : return_stmt" + p[0] = p[1] + +# return_stmt: 'return' [testlist] + + +def p_return_stmt(p): + "return_stmt : RETURN testlist" + p[0] = ast.Return(p[2]) + + +def p_compound_stmt(p): + """compound_stmt : if_stmt + | funcdef""" + p[0] = p[1] + + +def p_if_stmt(p): + 'if_stmt : IF test COLON suite' + p[0] = ast.If([(p[2], p[4])], None) + + +def p_suite(p): + """suite : simple_stmt + | NEWLINE INDENT stmts DEDENT""" + if len(p) == 2: + p[0] = ast.Stmt(p[1]) + else: + p[0] = ast.Stmt(p[3]) + + +def p_stmts(p): + """stmts : stmts stmt + | stmt""" + if len(p) == 3: + p[0] = p[1] + p[2] + else: + p[0] = p[1] + +# No using Python's approach because Ply supports precedence + +# comparison: expr (comp_op expr)* +# arith_expr: term (('+'|'-') term)* +# term: factor (('*'|'/'|'%'|'//') factor)* +# factor: ('+'|'-'|'~') factor | power +# comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' + + +def make_lt_compare((left, right)): + return ast.Compare(left, [('<', right), ]) + + +def make_gt_compare((left, right)): + return ast.Compare(left, [('>', right), ]) + + +def make_eq_compare((left, right)): + return ast.Compare(left, [('==', right), ]) + + +binary_ops = { + "+": ast.Add, + "-": ast.Sub, + "*": ast.Mul, + "/": ast.Div, + "<": make_lt_compare, + ">": make_gt_compare, + "==": make_eq_compare, +} +unary_ops = { + "+": ast.UnaryAdd, + "-": ast.UnarySub, +} +precedence = ( + ("left", "EQ", "GT", "LT"), + ("left", "PLUS", "MINUS"), + ("left", "MULT", "DIV"), +) + + +def p_comparison(p): + """comparison : comparison PLUS comparison + | comparison MINUS comparison + | comparison MULT comparison + | comparison DIV comparison + | comparison LT comparison + | comparison EQ comparison + | comparison GT comparison + | PLUS comparison + | MINUS comparison + | power""" + if len(p) == 4: + p[0] = binary_ops[p[2]]((p[1], p[3])) + elif len(p) == 3: + p[0] = unary_ops[p[1]](p[2]) + else: + p[0] = p[1] + +# power: atom trailer* ['**' factor] +# trailers enables function calls. I only allow one level of calls +# so this is 'trailer' + + +def p_power(p): + """power : atom + | atom trailer""" + if len(p) == 2: + p[0] = p[1] + else: + if p[2][0] == "CALL": + p[0] = ast.CallFunc(p[1], p[2][1], None, None) + else: + raise AssertionError("not implemented") + + +def p_atom_name(p): + """atom : NAME""" + p[0] = ast.Name(p[1]) + + +def p_atom_number(p): + """atom : NUMBER + | STRING""" + p[0] = ast.Const(p[1]) + + +def p_atom_tuple(p): + """atom : LPAR testlist RPAR""" + p[0] = p[2] + +# trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME + + +def p_trailer(p): + "trailer : LPAR arglist RPAR" + p[0] = ("CALL", p[2]) + +# testlist: test (',' test)* [','] +# Contains shift/reduce error + + +def p_testlist(p): + """testlist : testlist_multi COMMA + | testlist_multi """ + if len(p) == 2: + p[0] = p[1] + else: + # May need to promote singleton to tuple + if isinstance(p[1], list): + p[0] = p[1] + else: + p[0] = [p[1]] + # Convert into a tuple? + if isinstance(p[0], list): + p[0] = ast.Tuple(p[0]) + + +def p_testlist_multi(p): + """testlist_multi : testlist_multi COMMA test + | test""" + if len(p) == 2: + # singleton + p[0] = p[1] + else: + if isinstance(p[1], list): + p[0] = p[1] + [p[3]] + else: + # singleton -> tuple + p[0] = [p[1], p[3]] + + +# test: or_test ['if' or_test 'else' test] | lambdef +# as I don't support 'and', 'or', and 'not' this works down to 'comparison' +def p_test(p): + "test : comparison" + p[0] = p[1] + + +# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) +# XXX INCOMPLETE: this doesn't allow the trailing comma +def p_arglist(p): + """arglist : arglist COMMA argument + | argument""" + if len(p) == 4: + p[0] = p[1] + [p[3]] + else: + p[0] = [p[1]] + +# argument: test [gen_for] | test '=' test # Really [keyword '='] test + + +def p_argument(p): + "argument : test" + p[0] = p[1] + + +def p_error(p): + # print "Error!", repr(p) + raise SyntaxError(p) + + +class GardenSnakeParser(object): + + def __init__(self, lexer=None): + if lexer is None: + lexer = IndentLexer() + self.lexer = lexer + self.parser = yacc.yacc(start="file_input_end") + + def parse(self, code): + self.lexer.input(code) + result = self.parser.parse(lexer=self.lexer) + return ast.Module(None, result) + + +###### Code generation ###### + +from compiler import misc, syntax, pycodegen + + +class GardenSnakeCompiler(object): + + def __init__(self): + self.parser = GardenSnakeParser() + + def compile(self, code, filename=""): + tree = self.parser.parse(code) + # print tree + misc.set_filename(filename, tree) + syntax.check(tree) + gen = pycodegen.ModuleCodeGenerator(tree) + code = gen.getCode() + return code + +####### Test code ####### + +compile = GardenSnakeCompiler().compile + +code = r""" + +print('LET\'S TRY THIS \\OUT') + +#Comment here +def x(a): + print('called with',a) + if a == 1: + return 2 + if a*2 > 10: return 999 / 4 + # Another comment here + + return a+2*3 + +ints = (1, 2, + 3, 4, +5) +print('mutiline-expression', ints) + +t = 4+1/3*2+6*(9-5+1) +print('predence test; should be 34+2/3:', t, t==(34+2/3)) + +print('numbers', 1,2,3,4,5) +if 1: + 8 + a=9 + print(x(a)) + +print(x(1)) +print(x(2)) +print(x(8),'3') +print('this is decimal', 1/5) +print('BIG DECIMAL', 1.234567891234567e12345) + +""" + +# Set up the GardenSnake run-time environment + + +def print_(*args): + print "-->", " ".join(map(str, args)) + +globals()["print"] = print_ + +compiled_code = compile(code) + +exec compiled_code in globals() +print "Done" diff --git a/example/GardenSnake/README b/example/GardenSnake/README new file mode 100644 index 0000000..4d8be2d --- /dev/null +++ b/example/GardenSnake/README @@ -0,0 +1,5 @@ +This example is Andrew Dalke's GardenSnake language. It shows how to process an +indentation-like language like Python. Further details can be found here: + +http://dalkescientific.com/writings/diary/archive/2006/08/30/gardensnake_language.html + -- cgit v1.2.1 From f5342b9d4a89e8e4bb9cb43821370f6094a5778c Mon Sep 17 00:00:00 2001 From: lb_ii Date: Sun, 12 Jul 2020 15:55:49 +0300 Subject: GARDENSNAKE: Run 2to3 on GardenSnake.py --- example/GardenSnake/GardenSnake.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/example/GardenSnake/GardenSnake.py b/example/GardenSnake/GardenSnake.py index 8b493b4..af70b0b 100644 --- a/example/GardenSnake/GardenSnake.py +++ b/example/GardenSnake/GardenSnake.py @@ -156,7 +156,7 @@ def t_RPAR(t): def t_error(t): raise SyntaxError("Unknown symbol %r" % (t.value[0],)) - print "Skipping", repr(t.value[0]) + print("Skipping", repr(t.value[0])) t.lexer.skip(1) # I implemented INDENT / DEDENT generation as a post-processing filter @@ -342,11 +342,11 @@ class IndentLexer(object): def input(self, s, add_endmarker=True): self.lexer.paren_count = 0 self.lexer.input(s) - self.token_stream = filter(self.lexer, add_endmarker) + self.token_stream = list(filter(self.lexer, add_endmarker)) def token(self): try: - return self.token_stream.next() + return next(self.token_stream) except StopIteration: return None @@ -393,7 +393,7 @@ def p_file_input(p): | file_input stmt | NEWLINE | stmt""" - if isinstance(p[len(p) - 1], basestring): + if isinstance(p[len(p) - 1], str): if len(p) == 3: p[0] = p[1] else: @@ -537,15 +537,18 @@ def p_stmts(p): # comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -def make_lt_compare((left, right)): +def make_lt_compare(xxx_todo_changeme): + (left, right) = xxx_todo_changeme return ast.Compare(left, [('<', right), ]) -def make_gt_compare((left, right)): +def make_gt_compare(xxx_todo_changeme1): + (left, right) = xxx_todo_changeme1 return ast.Compare(left, [('>', right), ]) -def make_eq_compare((left, right)): +def make_eq_compare(xxx_todo_changeme2): + (left, right) = xxx_todo_changeme2 return ast.Compare(left, [('==', right), ]) @@ -767,11 +770,11 @@ print('BIG DECIMAL', 1.234567891234567e12345) def print_(*args): - print "-->", " ".join(map(str, args)) + print("-->", " ".join(map(str, args))) globals()["print"] = print_ compiled_code = compile(code) -exec compiled_code in globals() -print "Done" +exec(compiled_code, globals()) +print("Done") -- cgit v1.2.1 From 8b87d22ddc5bc8b73af7f2f3cef9feb433900d35 Mon Sep 17 00:00:00 2001 From: lb_ii Date: Sun, 12 Jul 2020 16:13:22 +0300 Subject: GARDENSNAKE: Fix various Python 3 issues --- example/GardenSnake/GardenSnake.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/example/GardenSnake/GardenSnake.py b/example/GardenSnake/GardenSnake.py index af70b0b..139e9eb 100644 --- a/example/GardenSnake/GardenSnake.py +++ b/example/GardenSnake/GardenSnake.py @@ -83,7 +83,7 @@ def t_NUMBER(t): def t_STRING(t): r"'([^\\']+|\\'|\\\\)*'" # I think this is right ... - t.value = t.value[1:-1].decode("string-escape") # .swapcase() # for fun + t.value = t.value[1:-1].encode().decode("unicode_escape") # .swapcase() # for fun return t t_COLON = r':' @@ -342,7 +342,7 @@ class IndentLexer(object): def input(self, s, add_endmarker=True): self.lexer.paren_count = 0 self.lexer.input(s) - self.token_stream = list(filter(self.lexer, add_endmarker)) + self.token_stream = filter(self.lexer, add_endmarker) def token(self): try: @@ -356,7 +356,7 @@ class IndentLexer(object): #import yacc # I use the Python AST -from compiler import ast +import ast # Helper function @@ -769,11 +769,6 @@ print('BIG DECIMAL', 1.234567891234567e12345) # Set up the GardenSnake run-time environment -def print_(*args): - print("-->", " ".join(map(str, args))) - -globals()["print"] = print_ - compiled_code = compile(code) exec(compiled_code, globals()) -- cgit v1.2.1 From 4ba077cc6498c644251da5eca075ec0f2e36ed95 Mon Sep 17 00:00:00 2001 From: lb_ii Date: Sun, 12 Jul 2020 16:00:36 +0300 Subject: GARDENSNAKE: Use modern lex Modern lex fixup --- example/GardenSnake/GardenSnake.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/example/GardenSnake/GardenSnake.py b/example/GardenSnake/GardenSnake.py index 139e9eb..ca10308 100644 --- a/example/GardenSnake/GardenSnake.py +++ b/example/GardenSnake/GardenSnake.py @@ -222,6 +222,7 @@ def _new_token(type, lineno): tok.type = type tok.value = None tok.lineno = lineno + tok.lexpos = 0 return tok # Synthesize a DEDENT tag @@ -334,9 +335,8 @@ def filter(lexer, add_endmarker=True): class IndentLexer(object): - def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0): - self.lexer = lex.lex(debug=debug, optimize=optimize, - lextab=lextab, reflags=reflags) + def __init__(self, debug=0, reflags=0): + self.lexer = lex.lex(debug=debug, reflags=reflags) self.token_stream = None def input(self, s, add_endmarker=True): -- cgit v1.2.1 From 8c01ee4fe3e3f705933bda4101176780771f1252 Mon Sep 17 00:00:00 2001 From: lb_ii Date: Sun, 12 Jul 2020 18:07:37 +0300 Subject: GARDENSNAKE: Fix ast - various statements --- example/GardenSnake/GardenSnake.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/example/GardenSnake/GardenSnake.py b/example/GardenSnake/GardenSnake.py index ca10308..64b4b27 100644 --- a/example/GardenSnake/GardenSnake.py +++ b/example/GardenSnake/GardenSnake.py @@ -385,7 +385,7 @@ def Assign(left, right): # file_input: (NEWLINE | stmt)* ENDMARKER def p_file_input_end(p): """file_input_end : file_input ENDMARKER""" - p[0] = ast.Stmt(p[1]) + p[0] = p[1] def p_file_input(p): @@ -483,7 +483,7 @@ def p_expr_stmt(p): | testlist """ if len(p) == 2: # a list of expressions - p[0] = ast.Discard(p[1]) + p[0] = ast.Expr(p[1]) else: p[0] = Assign(p[1], p[3]) @@ -508,16 +508,16 @@ def p_compound_stmt(p): def p_if_stmt(p): 'if_stmt : IF test COLON suite' - p[0] = ast.If([(p[2], p[4])], None) + p[0] = ast.If(p[2], p[4], []) def p_suite(p): """suite : simple_stmt | NEWLINE INDENT stmts DEDENT""" if len(p) == 2: - p[0] = ast.Stmt(p[1]) + p[0] = p[1] else: - p[0] = ast.Stmt(p[3]) + p[0] = p[3] def p_stmts(p): @@ -704,7 +704,7 @@ class GardenSnakeParser(object): def parse(self, code): self.lexer.input(code) result = self.parser.parse(lexer=self.lexer) - return ast.Module(None, result) + return ast.Module(result) ###### Code generation ###### -- cgit v1.2.1 From f892c324f90b805f62e056eb536d354519c7e51d Mon Sep 17 00:00:00 2001 From: lb_ii Date: Sun, 12 Jul 2020 18:07:16 +0300 Subject: GARDENSNAKE: Fix ast - string & decimal constants --- example/GardenSnake/GardenSnake.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/example/GardenSnake/GardenSnake.py b/example/GardenSnake/GardenSnake.py index 64b4b27..a89252c 100644 --- a/example/GardenSnake/GardenSnake.py +++ b/example/GardenSnake/GardenSnake.py @@ -78,6 +78,10 @@ tokens = ( def t_NUMBER(t): r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?""" t.value = decimal.Decimal(t.value) + if t.value == int(t.value): + t.value = int(t.value) + else: + t.value = float(t.value) return t @@ -613,10 +617,12 @@ def p_atom_name(p): def p_atom_number(p): - """atom : NUMBER - | STRING""" - p[0] = ast.Const(p[1]) + """atom : NUMBER""" + p[0] = ast.Num(p[1]) +def p_atom_string(p): + """atom : STRING""" + p[0] = ast.Str(p[1]) def p_atom_tuple(p): """atom : LPAR testlist RPAR""" @@ -763,6 +769,7 @@ print(x(2)) print(x(8),'3') print('this is decimal', 1/5) print('BIG DECIMAL', 1.234567891234567e12345) +print('LITTE DECIMAL', 1.234567891234567e-12345) """ -- cgit v1.2.1 From cd171da8b0a2b14209a71c5f44d457aa5ec6a864 Mon Sep 17 00:00:00 2001 From: lb_ii Date: Sun, 12 Jul 2020 16:15:03 +0300 Subject: GARDENSNAKE: Fix ast - binary & unary operators --- example/GardenSnake/GardenSnake.py | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/example/GardenSnake/GardenSnake.py b/example/GardenSnake/GardenSnake.py index a89252c..48ba539 100644 --- a/example/GardenSnake/GardenSnake.py +++ b/example/GardenSnake/GardenSnake.py @@ -541,33 +541,20 @@ def p_stmts(p): # comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -def make_lt_compare(xxx_todo_changeme): - (left, right) = xxx_todo_changeme - return ast.Compare(left, [('<', right), ]) - - -def make_gt_compare(xxx_todo_changeme1): - (left, right) = xxx_todo_changeme1 - return ast.Compare(left, [('>', right), ]) - - -def make_eq_compare(xxx_todo_changeme2): - (left, right) = xxx_todo_changeme2 - return ast.Compare(left, [('==', right), ]) - - binary_ops = { "+": ast.Add, "-": ast.Sub, - "*": ast.Mul, + "*": ast.Mult, "/": ast.Div, - "<": make_lt_compare, - ">": make_gt_compare, - "==": make_eq_compare, +} +compare_ops = { + "<": ast.Lt, + ">": ast.Gt, + "==": ast.Eq, } unary_ops = { - "+": ast.UnaryAdd, - "-": ast.UnarySub, + "+": ast.UAdd, + "-": ast.USub, } precedence = ( ("left", "EQ", "GT", "LT"), @@ -588,9 +575,12 @@ def p_comparison(p): | MINUS comparison | power""" if len(p) == 4: - p[0] = binary_ops[p[2]]((p[1], p[3])) + if p[2] in binary_ops: + p[0] = ast.BinOp(p[1], binary_ops[p[2]](), p[3]) + else: + p[0] = ast.Compare(p[1], [compare_ops[p[2]]()], [p[3]]) elif len(p) == 3: - p[0] = unary_ops[p[1]](p[2]) + p[0] = ast.UnaryOp(unary_ops[p[1]](), p[2]) else: p[0] = p[1] -- cgit v1.2.1 From c4a644cd87cf39054153810f1fe340091f12652d Mon Sep 17 00:00:00 2001 From: lb_ii Date: Sun, 12 Jul 2020 17:05:30 +0300 Subject: GARDENSNAKE: Fix ast - load & assignment of variables --- example/GardenSnake/GardenSnake.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/example/GardenSnake/GardenSnake.py b/example/GardenSnake/GardenSnake.py index 48ba539..6f9f6a6 100644 --- a/example/GardenSnake/GardenSnake.py +++ b/example/GardenSnake/GardenSnake.py @@ -369,16 +369,16 @@ def Assign(left, right): names = [] if isinstance(left, ast.Name): # Single assignment on left - return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right) + return ast.Assign([ast.Name(left.id, ctx=ast.Store())], right) elif isinstance(left, ast.Tuple): # List of things - make sure they are Name nodes names = [] - for child in left.getChildren(): + for child in left.elts: if not isinstance(child, ast.Name): raise SyntaxError("that assignment not supported") - names.append(child.name) - ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names] - return ast.Assign([ast.AssTuple(ass_list)], right) + names.append(child.id) + ass_list = [ast.Name(name, ctx=ast.Store()) for name in names] + return ast.Assign([ast.Tuple(ass_list, ctx=ast.Store())], right) else: raise SyntaxError("Can't do that yet") @@ -603,7 +603,7 @@ def p_power(p): def p_atom_name(p): """atom : NAME""" - p[0] = ast.Name(p[1]) + p[0] = ast.Name(p[1],ctx=ast.Load()) def p_atom_number(p): @@ -642,7 +642,7 @@ def p_testlist(p): p[0] = [p[1]] # Convert into a tuple? if isinstance(p[0], list): - p[0] = ast.Tuple(p[0]) + p[0] = ast.Tuple(p[0], ctx=ast.Load()) def p_testlist_multi(p): @@ -761,6 +761,9 @@ print('this is decimal', 1/5) print('BIG DECIMAL', 1.234567891234567e12345) print('LITTE DECIMAL', 1.234567891234567e-12345) +print(t,a) +a,t = t,a +print(t,a) """ # Set up the GardenSnake run-time environment -- cgit v1.2.1 From 22d0b03e7824a2ec8e675923aff2053b69e020ac Mon Sep 17 00:00:00 2001 From: lb_ii Date: Sun, 12 Jul 2020 16:15:55 +0300 Subject: GARDENSNAKE: Fix ast - defs & calls of functions --- example/GardenSnake/GardenSnake.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example/GardenSnake/GardenSnake.py b/example/GardenSnake/GardenSnake.py index 6f9f6a6..6e6f42a 100644 --- a/example/GardenSnake/GardenSnake.py +++ b/example/GardenSnake/GardenSnake.py @@ -413,7 +413,7 @@ def p_file_input(p): # ignoring decorators def p_funcdef(p): "funcdef : DEF NAME parameters COLON suite" - p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5]) + p[0] = ast.FunctionDef(p[2], args=ast.arguments([ast.arg(x, None) for x in p[3]], None, [], [], None, []), body=p[5], decorator_list=[], returns=None) # parameters: '(' [varargslist] ')' @@ -596,7 +596,7 @@ def p_power(p): p[0] = p[1] else: if p[2][0] == "CALL": - p[0] = ast.CallFunc(p[1], p[2][1], None, None) + p[0] = ast.Call(p[1], p[2][1], []) else: raise AssertionError("not implemented") -- cgit v1.2.1 From 01f0dbde28663d3a08d7cead348b53c1910df066 Mon Sep 17 00:00:00 2001 From: lb_ii Date: Sun, 12 Jul 2020 16:10:05 +0300 Subject: GARDENSNAKE: Use Python 3 compile function --- example/GardenSnake/GardenSnake.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/example/GardenSnake/GardenSnake.py b/example/GardenSnake/GardenSnake.py index 6e6f42a..a5d0938 100644 --- a/example/GardenSnake/GardenSnake.py +++ b/example/GardenSnake/GardenSnake.py @@ -705,26 +705,20 @@ class GardenSnakeParser(object): ###### Code generation ###### -from compiler import misc, syntax, pycodegen - - class GardenSnakeCompiler(object): def __init__(self): self.parser = GardenSnakeParser() - def compile(self, code, filename=""): + def do_compile(self, code, filename=""): tree = self.parser.parse(code) - # print tree - misc.set_filename(filename, tree) - syntax.check(tree) - gen = pycodegen.ModuleCodeGenerator(tree) - code = gen.getCode() - return code + tree = ast.fix_missing_locations(tree) + # print(ast.dump(tree)) + return compile(tree, filename, "exec") ####### Test code ####### -compile = GardenSnakeCompiler().compile +do_compile = GardenSnakeCompiler().do_compile code = r""" @@ -769,7 +763,7 @@ print(t,a) # Set up the GardenSnake run-time environment -compiled_code = compile(code) +compiled_code = do_compile(code) exec(compiled_code, globals()) print("Done") -- cgit v1.2.1 From 5a72096c57f2a300e786824c0cb67cef61304f90 Mon Sep 17 00:00:00 2001 From: lb_ii Date: Sun, 12 Jul 2020 16:01:14 +0300 Subject: GARDENSNAKE: Fix whitespace errors --- example/GardenSnake/GardenSnake.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example/GardenSnake/GardenSnake.py b/example/GardenSnake/GardenSnake.py index a5d0938..3520ee4 100644 --- a/example/GardenSnake/GardenSnake.py +++ b/example/GardenSnake/GardenSnake.py @@ -76,7 +76,7 @@ tokens = ( def t_NUMBER(t): - r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?""" + r"""(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?""" t.value = decimal.Decimal(t.value) if t.value == int(t.value): t.value = int(t.value) @@ -128,7 +128,7 @@ def t_comment(t): # Whitespace def t_WS(t): - r' [ ]+ ' + r'[ ]+' if t.lexer.at_line_start and t.lexer.paren_count == 0: return t -- cgit v1.2.1 From 69c3df713f01e1f43980eecf720dfc3e25e1a70e Mon Sep 17 00:00:00 2001 From: lb_ii Date: Sun, 12 Jul 2020 18:36:00 +0300 Subject: GARDENSNAKE: Update changelog header --- example/GardenSnake/GardenSnake.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/example/GardenSnake/GardenSnake.py b/example/GardenSnake/GardenSnake.py index 3520ee4..0633f87 100644 --- a/example/GardenSnake/GardenSnake.py +++ b/example/GardenSnake/GardenSnake.py @@ -31,9 +31,13 @@ # # Andrew Dalke / Dalke Scientific Software, LLC # 30 August 2006 / Cape Town, South Africa +# +# Lolbot Iichan / Moonworks +# 12 July 2020 / Moscow, Russia # Changelog: # 30 August - added link to CC license; removed the "swapcase" encoding +# 12 July - ported to Python 3; fixed whitespace in some regex # Modifications for inclusion in PLY distribution import sys -- cgit v1.2.1