From af80858e888c5f36979da88fcb1080de7b848967 Mon Sep 17 00:00:00 2001 From: David Beazley Date: Thu, 27 Oct 2022 13:44:12 -0500 Subject: Reorganization. Added makefile --- CHANGES | 7 + MANIFEST.in | 4 + Makefile | 26 + README.md | 17 +- ply-2022_01_02.tar.gz | Bin 49761 -> 0 bytes ply-tests.tar.gz | Bin 14317 -> 0 bytes ply/__init__.py | 5 - ply/lex.py | 901 --------------- ply/yacc.py | 2482 ---------------------------------------- pyproject.toml | 3 + setup.cfg | 18 + src/ply/__init__.py | 5 + src/ply/lex.py | 901 +++++++++++++++ src/ply/yacc.py | 2482 ++++++++++++++++++++++++++++++++++++++++ test/README | 8 - test/calclex.py | 49 - test/cleanup.sh | 4 - test/lex_closure.py | 54 - test/lex_doc1.py | 26 - test/lex_dup1.py | 29 - test/lex_dup2.py | 33 - test/lex_dup3.py | 31 - test/lex_empty.py | 20 - test/lex_error1.py | 24 - test/lex_error2.py | 26 - test/lex_error3.py | 27 - test/lex_error4.py | 27 - test/lex_hedit.py | 47 - test/lex_ignore.py | 31 - test/lex_ignore2.py | 29 - test/lex_literal1.py | 25 - test/lex_literal2.py | 25 - test/lex_literal3.py | 26 - test/lex_many_tokens.py | 27 - test/lex_module.py | 10 - test/lex_module_import.py | 42 - test/lex_object.py | 55 - test/lex_re1.py | 27 - test/lex_re2.py | 27 - test/lex_re3.py | 29 - test/lex_rule1.py | 27 - test/lex_rule2.py | 29 - test/lex_rule3.py | 27 - test/lex_state1.py | 40 - test/lex_state2.py | 40 - test/lex_state3.py | 42 - test/lex_state4.py | 41 - test/lex_state5.py | 40 - test/lex_state_noerror.py | 39 - test/lex_state_norule.py | 40 - test/lex_state_try.py | 45 - test/lex_token1.py | 19 - test/lex_token2.py | 22 - test/lex_token3.py | 24 - test/lex_token4.py | 26 - test/lex_token_dup.py | 29 - test/test_cpp_nonascii.c | 2 - test/testcpp.py | 153 --- test/testlex.py | 402 ------- test/testyacc.py | 404 ------- test/yacc_badargs.py | 68 -- test/yacc_badid.py | 77 -- test/yacc_badprec.py | 64 -- test/yacc_badprec2.py | 68 -- test/yacc_badprec3.py | 68 -- test/yacc_badrule.py | 68 -- test/yacc_badtok.py | 68 -- test/yacc_dup.py | 68 -- test/yacc_error1.py | 68 -- test/yacc_error2.py | 68 -- test/yacc_error3.py | 67 -- test/yacc_error4.py | 72 -- test/yacc_error5.py | 94 -- test/yacc_error6.py | 80 -- test/yacc_error7.py | 80 -- test/yacc_inf.py | 56 - test/yacc_literal.py | 69 -- test/yacc_misplaced.py | 68 -- test/yacc_missing1.py | 68 -- test/yacc_nested.py | 33 - test/yacc_nodoc.py | 67 -- test/yacc_noerror.py | 66 -- test/yacc_nop.py | 68 -- test/yacc_notfunc.py | 66 -- test/yacc_notok.py | 67 -- test/yacc_prec1.py | 68 -- test/yacc_rr.py | 72 -- test/yacc_rr_unused.py | 30 - test/yacc_simple.py | 68 -- test/yacc_sr.py | 63 - test/yacc_term1.py | 68 -- test/yacc_unicode_literals.py | 70 -- test/yacc_unused.py | 77 -- test/yacc_unused_rule.py | 72 -- test/yacc_uprec.py | 63 - test/yacc_uprec2.py | 63 - tests/README | 8 + tests/calclex.py | 46 + tests/cleanup.sh | 4 + tests/lex_closure.py | 51 + tests/lex_doc1.py | 23 + tests/lex_dup1.py | 26 + tests/lex_dup2.py | 30 + tests/lex_dup3.py | 28 + tests/lex_empty.py | 17 + tests/lex_error1.py | 21 + tests/lex_error2.py | 23 + tests/lex_error3.py | 24 + tests/lex_error4.py | 24 + tests/lex_hedit.py | 45 + tests/lex_ignore.py | 28 + tests/lex_ignore2.py | 26 + tests/lex_literal1.py | 22 + tests/lex_literal2.py | 22 + tests/lex_literal3.py | 23 + tests/lex_many_tokens.py | 25 + tests/lex_module.py | 7 + tests/lex_module_import.py | 42 + tests/lex_object.py | 53 + tests/lex_re1.py | 24 + tests/lex_re2.py | 24 + tests/lex_re3.py | 26 + tests/lex_rule1.py | 24 + tests/lex_rule2.py | 26 + tests/lex_rule3.py | 24 + tests/lex_state1.py | 37 + tests/lex_state2.py | 37 + tests/lex_state3.py | 39 + tests/lex_state4.py | 38 + tests/lex_state5.py | 37 + tests/lex_state_noerror.py | 36 + tests/lex_state_norule.py | 37 + tests/lex_state_try.py | 42 + tests/lex_token1.py | 16 + tests/lex_token2.py | 19 + tests/lex_token3.py | 21 + tests/lex_token4.py | 23 + tests/lex_token_dup.py | 26 + tests/test_cpp_nonascii.c | 2 + tests/testlex.py | 401 +++++++ tests/testyacc.py | 403 +++++++ tests/yacc_badargs.py | 68 ++ tests/yacc_badid.py | 74 ++ tests/yacc_badprec.py | 61 + tests/yacc_badprec2.py | 65 ++ tests/yacc_badprec3.py | 65 ++ tests/yacc_badrule.py | 65 ++ tests/yacc_badtok.py | 65 ++ tests/yacc_dup.py | 65 ++ tests/yacc_error1.py | 65 ++ tests/yacc_error2.py | 65 ++ tests/yacc_error3.py | 64 ++ tests/yacc_error4.py | 69 ++ tests/yacc_error5.py | 91 ++ tests/yacc_error6.py | 77 ++ tests/yacc_error7.py | 77 ++ tests/yacc_inf.py | 53 + tests/yacc_literal.py | 66 ++ tests/yacc_misplaced.py | 65 ++ tests/yacc_missing1.py | 65 ++ tests/yacc_nested.py | 30 + tests/yacc_nodoc.py | 64 ++ tests/yacc_noerror.py | 63 + tests/yacc_nop.py | 65 ++ tests/yacc_notfunc.py | 63 + tests/yacc_notok.py | 63 + tests/yacc_prec1.py | 65 ++ tests/yacc_rr.py | 69 ++ tests/yacc_rr_unused.py | 27 + tests/yacc_simple.py | 65 ++ tests/yacc_sr.py | 60 + tests/yacc_term1.py | 65 ++ tests/yacc_unicode_literals.py | 67 ++ tests/yacc_unused.py | 74 ++ tests/yacc_unused_rule.py | 69 ++ tests/yacc_uprec.py | 60 + tests/yacc_uprec2.py | 60 + 177 files changed, 7746 insertions(+), 8068 deletions(-) create mode 100644 MANIFEST.in create mode 100644 Makefile delete mode 100644 ply-2022_01_02.tar.gz delete mode 100644 ply-tests.tar.gz delete mode 100644 ply/__init__.py delete mode 100644 ply/lex.py delete mode 100644 ply/yacc.py create mode 100644 pyproject.toml create mode 100644 setup.cfg create mode 100644 src/ply/__init__.py create mode 100644 src/ply/lex.py create mode 100644 src/ply/yacc.py delete mode 100644 test/README delete mode 100644 test/calclex.py delete mode 100755 test/cleanup.sh delete mode 100644 test/lex_closure.py delete mode 100644 test/lex_doc1.py delete mode 100644 test/lex_dup1.py delete mode 100644 test/lex_dup2.py delete mode 100644 test/lex_dup3.py delete mode 100644 test/lex_empty.py delete mode 100644 test/lex_error1.py delete mode 100644 test/lex_error2.py delete mode 100644 test/lex_error3.py delete mode 100644 test/lex_error4.py delete mode 100644 test/lex_hedit.py delete mode 100644 test/lex_ignore.py delete mode 100644 test/lex_ignore2.py delete mode 100644 test/lex_literal1.py delete mode 100644 test/lex_literal2.py delete mode 100644 test/lex_literal3.py delete mode 100644 test/lex_many_tokens.py delete mode 100644 test/lex_module.py delete mode 100644 test/lex_module_import.py delete mode 100644 test/lex_object.py delete mode 100644 test/lex_re1.py delete mode 100644 test/lex_re2.py delete mode 100644 test/lex_re3.py delete mode 100644 test/lex_rule1.py delete mode 100644 test/lex_rule2.py delete mode 100644 test/lex_rule3.py delete mode 100644 test/lex_state1.py delete mode 100644 test/lex_state2.py delete mode 100644 test/lex_state3.py delete mode 100644 test/lex_state4.py delete mode 100644 test/lex_state5.py delete mode 100644 test/lex_state_noerror.py delete mode 100644 test/lex_state_norule.py delete mode 100644 test/lex_state_try.py delete mode 100644 test/lex_token1.py delete mode 100644 test/lex_token2.py delete mode 100644 test/lex_token3.py delete mode 100644 test/lex_token4.py delete mode 100644 test/lex_token_dup.py delete mode 100644 test/test_cpp_nonascii.c delete mode 100644 test/testcpp.py delete mode 100755 test/testlex.py delete mode 100644 test/testyacc.py delete mode 100644 test/yacc_badargs.py delete mode 100644 test/yacc_badid.py delete mode 100644 test/yacc_badprec.py delete mode 100644 test/yacc_badprec2.py delete mode 100644 test/yacc_badprec3.py delete mode 100644 test/yacc_badrule.py delete mode 100644 test/yacc_badtok.py delete mode 100644 test/yacc_dup.py delete mode 100644 test/yacc_error1.py delete mode 100644 test/yacc_error2.py delete mode 100644 test/yacc_error3.py delete mode 100644 test/yacc_error4.py delete mode 100644 test/yacc_error5.py delete mode 100644 test/yacc_error6.py delete mode 100644 test/yacc_error7.py delete mode 100644 test/yacc_inf.py delete mode 100644 test/yacc_literal.py delete mode 100644 test/yacc_misplaced.py delete mode 100644 test/yacc_missing1.py delete mode 100644 test/yacc_nested.py delete mode 100644 test/yacc_nodoc.py delete mode 100644 test/yacc_noerror.py delete mode 100644 test/yacc_nop.py delete mode 100644 test/yacc_notfunc.py delete mode 100644 test/yacc_notok.py delete mode 100644 test/yacc_prec1.py delete mode 100644 test/yacc_rr.py delete mode 100644 test/yacc_rr_unused.py delete mode 100644 test/yacc_simple.py delete mode 100644 test/yacc_sr.py delete mode 100644 test/yacc_term1.py delete mode 100644 test/yacc_unicode_literals.py delete mode 100644 test/yacc_unused.py delete mode 100644 test/yacc_unused_rule.py delete mode 100644 test/yacc_uprec.py delete mode 100644 test/yacc_uprec2.py create mode 100644 tests/README create mode 100644 tests/calclex.py create mode 100755 tests/cleanup.sh create mode 100644 tests/lex_closure.py create mode 100644 tests/lex_doc1.py create mode 100644 tests/lex_dup1.py create mode 100644 tests/lex_dup2.py create mode 100644 tests/lex_dup3.py create mode 100644 tests/lex_empty.py create mode 100644 tests/lex_error1.py create mode 100644 tests/lex_error2.py create mode 100644 tests/lex_error3.py create mode 100644 tests/lex_error4.py create mode 100644 tests/lex_hedit.py create mode 100644 tests/lex_ignore.py create mode 100644 tests/lex_ignore2.py create mode 100644 tests/lex_literal1.py create mode 100644 tests/lex_literal2.py create mode 100644 tests/lex_literal3.py create mode 100644 tests/lex_many_tokens.py create mode 100644 tests/lex_module.py create mode 100644 tests/lex_module_import.py create mode 100644 tests/lex_object.py create mode 100644 tests/lex_re1.py create mode 100644 tests/lex_re2.py create mode 100644 tests/lex_re3.py create mode 100644 tests/lex_rule1.py create mode 100644 tests/lex_rule2.py create mode 100644 tests/lex_rule3.py create mode 100644 tests/lex_state1.py create mode 100644 tests/lex_state2.py create mode 100644 tests/lex_state3.py create mode 100644 tests/lex_state4.py create mode 100644 tests/lex_state5.py create mode 100644 tests/lex_state_noerror.py create mode 100644 tests/lex_state_norule.py create mode 100644 tests/lex_state_try.py create mode 100644 tests/lex_token1.py create mode 100644 tests/lex_token2.py create mode 100644 tests/lex_token3.py create mode 100644 tests/lex_token4.py create mode 100644 tests/lex_token_dup.py create mode 100644 tests/test_cpp_nonascii.c create mode 100755 tests/testlex.py create mode 100644 tests/testyacc.py create mode 100644 tests/yacc_badargs.py create mode 100644 tests/yacc_badid.py create mode 100644 tests/yacc_badprec.py create mode 100644 tests/yacc_badprec2.py create mode 100644 tests/yacc_badprec3.py create mode 100644 tests/yacc_badrule.py create mode 100644 tests/yacc_badtok.py create mode 100644 tests/yacc_dup.py create mode 100644 tests/yacc_error1.py create mode 100644 tests/yacc_error2.py create mode 100644 tests/yacc_error3.py create mode 100644 tests/yacc_error4.py create mode 100644 tests/yacc_error5.py create mode 100644 tests/yacc_error6.py create mode 100644 tests/yacc_error7.py create mode 100644 tests/yacc_inf.py create mode 100644 tests/yacc_literal.py create mode 100644 tests/yacc_misplaced.py create mode 100644 tests/yacc_missing1.py create mode 100644 tests/yacc_nested.py create mode 100644 tests/yacc_nodoc.py create mode 100644 tests/yacc_noerror.py create mode 100644 tests/yacc_nop.py create mode 100644 tests/yacc_notfunc.py create mode 100644 tests/yacc_notok.py create mode 100644 tests/yacc_prec1.py create mode 100644 tests/yacc_rr.py create mode 100644 tests/yacc_rr_unused.py create mode 100644 tests/yacc_simple.py create mode 100644 tests/yacc_sr.py create mode 100644 tests/yacc_term1.py create mode 100644 tests/yacc_unicode_literals.py create mode 100644 tests/yacc_unused.py create mode 100644 tests/yacc_unused_rule.py create mode 100644 tests/yacc_uprec.py create mode 100644 tests/yacc_uprec2.py diff --git a/CHANGES b/CHANGES index aefb2f9..0a88915 100644 --- a/CHANGES +++ b/CHANGES @@ -6,6 +6,13 @@ maintained as a mature library. No new major features are planned, but issues reported for bugs are still welcome. Any changes to the software will be noted here. +Version 2022.10.27 +------------------ +10/27/22 Reoganization/modernization of the build process. PLY continues + to make no package-installable releases. However, you can now + use the Makefile to build artifacts installable via pip. + Use `make test` to test and `make build` to build. + Version 2022_01_02 ------------------ 12/12/21 PLY is no longer being developed in public. Instead diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..cfcf771 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +include Makefile CONTRIBUTING.md +recursive-include example * +recursive-include tests * +recursive-include docs * diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..683f97c --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ +PYTHON=python3 +VENV=.venv + +# Setup and install all of the required tools for building, testing, +# and deploying +setup:: + rm -rf $(VENV) + $(PYTHON) -m venv $(VENV) + ./$(VENV)/bin/python -m pip install pytest + ./$(VENV)/bin/python -m pip install pytest-cov + ./$(VENV)/bin/python -m pip install build + ./$(VENV)/bin/python -m pip install twine + +# Run unit tests +test:: + ./$(VENV)/bin/python -m pip install . + ./$(VENV)/bin/python tests/testlex.py + ./$(VENV)/bin/python tests/testyacc.py + +# Build an artifact suitable for installing with pip +build:: + ./$(VENV)/bin/python -m build + +# Install into the default Python +install:: + $(PYTHON) -m pip install . diff --git a/README.md b/README.md index 473e6d3..9e4e3f9 100644 --- a/README.md +++ b/README.md @@ -18,22 +18,17 @@ flexibility in terms of how you decide to use it. You can use PLY to build Abstract Syntax Trees (ASTs), simple one-pass compilers, protocol decoders, or even a more advanced parsing framework. -## Important Notice - October 11, 2022 +## Important Notice - October 27, 2022 -I'm officially retiring the PLY project. There will be no further -releases. Should you choose to still use it, the GitHub repo will -remain alive--feel free to vendor the code and report bugs as you see -fit. I'd like to thank everyone who contributed to the -project over the last twenty-one years. -- Dave +The PLY project will make no further package-installable releases. +If you want the latest version, you'll need to download it here +or clone the repo. -## Download - -* [Current Release (ply-2022_01_02)](https://github.com/dabeaz/ply/raw/master/ply-2022_01_02.tar.gz) -* [Historial Releases](https://github.com/dabeaz/archive/tree/main/ply) +## Requirements The current release of PLY requires the use of Python 3.6 or greater. If you need to support an older version, download one of the -historical releases. +historical releases at https://github.com/dabeaz/archive/tree/main/ply. ## How to Install and Use diff --git a/ply-2022_01_02.tar.gz b/ply-2022_01_02.tar.gz deleted file mode 100644 index b464a14..0000000 Binary files a/ply-2022_01_02.tar.gz and /dev/null differ diff --git a/ply-tests.tar.gz b/ply-tests.tar.gz deleted file mode 100644 index 8ffa1aa..0000000 Binary files a/ply-tests.tar.gz and /dev/null differ diff --git a/ply/__init__.py b/ply/__init__.py deleted file mode 100644 index 0ef83ef..0000000 --- a/ply/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# PLY package -# Author: David Beazley (dave@dabeaz.com) -# https://github.com/dabeaz/ply - -__version__ = '2022_01_02' diff --git a/ply/lex.py b/ply/lex.py deleted file mode 100644 index de011fe..0000000 --- a/ply/lex.py +++ /dev/null @@ -1,901 +0,0 @@ -# ----------------------------------------------------------------------------- -# ply: lex.py -# -# Copyright (C) 2001-2022 -# David M. Beazley (Dabeaz LLC) -# All rights reserved. -# -# Latest version: https://github.com/dabeaz/ply -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of David Beazley or Dabeaz LLC may be used to -# endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ----------------------------------------------------------------------------- - -import re -import sys -import types -import copy -import os -import inspect - -# This tuple contains acceptable string types -StringTypes = (str, bytes) - -# This regular expression is used to match valid token names -_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') - -# Exception thrown when invalid token encountered and no default error -# handler is defined. -class LexError(Exception): - def __init__(self, message, s): - self.args = (message,) - self.text = s - -# Token class. This class is used to represent the tokens produced. -class LexToken(object): - def __repr__(self): - return f'LexToken({self.type},{self.value!r},{self.lineno},{self.lexpos})' - -# This object is a stand-in for a logging object created by the -# logging module. - -class PlyLogger(object): - def __init__(self, f): - self.f = f - - def critical(self, msg, *args, **kwargs): - self.f.write((msg % args) + '\n') - - def warning(self, msg, *args, **kwargs): - self.f.write('WARNING: ' + (msg % args) + '\n') - - def error(self, msg, *args, **kwargs): - self.f.write('ERROR: ' + (msg % args) + '\n') - - info = critical - debug = critical - -# ----------------------------------------------------------------------------- -# === Lexing Engine === -# -# The following Lexer class implements the lexer runtime. There are only -# a few public methods and attributes: -# -# input() - Store a new string in the lexer -# token() - Get the next token -# clone() - Clone the lexer -# -# lineno - Current line number -# lexpos - Current position in the input string -# ----------------------------------------------------------------------------- - -class Lexer: - def __init__(self): - self.lexre = None # Master regular expression. This is a list of - # tuples (re, findex) where re is a compiled - # regular expression and findex is a list - # mapping regex group numbers to rules - self.lexretext = None # Current regular expression strings - self.lexstatere = {} # Dictionary mapping lexer states to master regexs - self.lexstateretext = {} # Dictionary mapping lexer states to regex strings - self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names - self.lexstate = 'INITIAL' # Current lexer state - self.lexstatestack = [] # Stack of lexer states - self.lexstateinfo = None # State information - self.lexstateignore = {} # Dictionary of ignored characters for each state - self.lexstateerrorf = {} # Dictionary of error functions for each state - self.lexstateeoff = {} # Dictionary of eof functions for each state - self.lexreflags = 0 # Optional re compile flags - self.lexdata = None # Actual input data (as a string) - self.lexpos = 0 # Current position in input text - self.lexlen = 0 # Length of the input text - self.lexerrorf = None # Error rule (if any) - self.lexeoff = None # EOF rule (if any) - self.lextokens = None # List of valid tokens - self.lexignore = '' # Ignored characters - self.lexliterals = '' # Literal characters that can be passed through - self.lexmodule = None # Module - self.lineno = 1 # Current line number - - def clone(self, object=None): - c = copy.copy(self) - - # If the object parameter has been supplied, it means we are attaching the - # lexer to a new object. In this case, we have to rebind all methods in - # the lexstatere and lexstateerrorf tables. - - if object: - newtab = {} - for key, ritem in self.lexstatere.items(): - newre = [] - for cre, findex in ritem: - newfindex = [] - for f in findex: - if not f or not f[0]: - newfindex.append(f) - continue - newfindex.append((getattr(object, f[0].__name__), f[1])) - newre.append((cre, newfindex)) - newtab[key] = newre - c.lexstatere = newtab - c.lexstateerrorf = {} - for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(object, ef.__name__) - c.lexmodule = object - return c - - # ------------------------------------------------------------ - # input() - Push a new string into the lexer - # ------------------------------------------------------------ - def input(self, s): - self.lexdata = s - self.lexpos = 0 - self.lexlen = len(s) - - # ------------------------------------------------------------ - # begin() - Changes the lexing state - # ------------------------------------------------------------ - def begin(self, state): - if state not in self.lexstatere: - raise ValueError(f'Undefined state {state!r}') - self.lexre = self.lexstatere[state] - self.lexretext = self.lexstateretext[state] - self.lexignore = self.lexstateignore.get(state, '') - self.lexerrorf = self.lexstateerrorf.get(state, None) - self.lexeoff = self.lexstateeoff.get(state, None) - self.lexstate = state - - # ------------------------------------------------------------ - # push_state() - Changes the lexing state and saves old on stack - # ------------------------------------------------------------ - def push_state(self, state): - self.lexstatestack.append(self.lexstate) - self.begin(state) - - # ------------------------------------------------------------ - # pop_state() - Restores the previous state - # ------------------------------------------------------------ - def pop_state(self): - self.begin(self.lexstatestack.pop()) - - # ------------------------------------------------------------ - # current_state() - Returns the current lexing state - # ------------------------------------------------------------ - def current_state(self): - return self.lexstate - - # ------------------------------------------------------------ - # skip() - Skip ahead n characters - # ------------------------------------------------------------ - def skip(self, n): - self.lexpos += n - - # ------------------------------------------------------------ - # token() - Return the next token from the Lexer - # - # Note: This function has been carefully implemented to be as fast - # as possible. Don't make changes unless you really know what - # you are doing - # ------------------------------------------------------------ - def token(self): - # Make local copies of frequently referenced attributes - lexpos = self.lexpos - lexlen = self.lexlen - lexignore = self.lexignore - lexdata = self.lexdata - - while lexpos < lexlen: - # This code provides some short-circuit code for whitespace, tabs, and other ignored characters - if lexdata[lexpos] in lexignore: - lexpos += 1 - continue - - # Look for a regular expression match - for lexre, lexindexfunc in self.lexre: - m = lexre.match(lexdata, lexpos) - if not m: - continue - - # Create a token for return - tok = LexToken() - tok.value = m.group() - tok.lineno = self.lineno - tok.lexpos = lexpos - - i = m.lastindex - func, tok.type = lexindexfunc[i] - - if not func: - # If no token type was set, it's an ignored token - if tok.type: - self.lexpos = m.end() - return tok - else: - lexpos = m.end() - break - - lexpos = m.end() - - # If token is processed by a function, call it - - tok.lexer = self # Set additional attributes useful in token rules - self.lexmatch = m - self.lexpos = lexpos - newtok = func(tok) - del tok.lexer - del self.lexmatch - - # Every function must return a token, if nothing, we just move to next token - if not newtok: - lexpos = self.lexpos # This is here in case user has updated lexpos. - lexignore = self.lexignore # This is here in case there was a state change - break - return newtok - else: - # No match, see if in literals - if lexdata[lexpos] in self.lexliterals: - tok = LexToken() - tok.value = lexdata[lexpos] - tok.lineno = self.lineno - tok.type = tok.value - tok.lexpos = lexpos - self.lexpos = lexpos + 1 - return tok - - # No match. Call t_error() if defined. - if self.lexerrorf: - tok = LexToken() - tok.value = self.lexdata[lexpos:] - tok.lineno = self.lineno - tok.type = 'error' - tok.lexer = self - tok.lexpos = lexpos - self.lexpos = lexpos - newtok = self.lexerrorf(tok) - if lexpos == self.lexpos: - # Error method didn't change text position at all. This is an error. - raise LexError(f"Scanning error. Illegal character {lexdata[lexpos]!r}", - lexdata[lexpos:]) - lexpos = self.lexpos - if not newtok: - continue - return newtok - - self.lexpos = lexpos - raise LexError(f"Illegal character {lexdata[lexpos]!r} at index {lexpos}", - lexdata[lexpos:]) - - if self.lexeoff: - tok = LexToken() - tok.type = 'eof' - tok.value = '' - tok.lineno = self.lineno - tok.lexpos = lexpos - tok.lexer = self - self.lexpos = lexpos - newtok = self.lexeoff(tok) - return newtok - - self.lexpos = lexpos + 1 - if self.lexdata is None: - raise RuntimeError('No input string given with input()') - return None - - # Iterator interface - def __iter__(self): - return self - - def __next__(self): - t = self.token() - if t is None: - raise StopIteration - return t - -# ----------------------------------------------------------------------------- -# ==== Lex Builder === -# -# The functions and classes below are used to collect lexing information -# and build a Lexer object from it. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# _get_regex(func) -# -# Returns the regular expression assigned to a function either as a doc string -# or as a .regex attribute attached by the @TOKEN decorator. -# ----------------------------------------------------------------------------- -def _get_regex(func): - return getattr(func, 'regex', func.__doc__) - -# ----------------------------------------------------------------------------- -# get_caller_module_dict() -# -# This function returns a dictionary containing all of the symbols defined within -# a caller further down the call stack. This is used to get the environment -# associated with the yacc() call if none was provided. -# ----------------------------------------------------------------------------- -def get_caller_module_dict(levels): - f = sys._getframe(levels) - return { **f.f_globals, **f.f_locals } - -# ----------------------------------------------------------------------------- -# _form_master_re() -# -# This function takes a list of all of the regex components and attempts to -# form the master regular expression. Given limitations in the Python re -# module, it may be necessary to break the master regex into separate expressions. -# ----------------------------------------------------------------------------- -def _form_master_re(relist, reflags, ldict, toknames): - if not relist: - return [], [], [] - regex = '|'.join(relist) - try: - lexre = re.compile(regex, reflags) - - # Build the index to function map for the matching engine - lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) - lexindexnames = lexindexfunc[:] - - for f, i in lexre.groupindex.items(): - handle = ldict.get(f, None) - if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle, toknames[f]) - lexindexnames[i] = f - elif handle is not None: - lexindexnames[i] = f - if f.find('ignore_') > 0: - lexindexfunc[i] = (None, None) - else: - lexindexfunc[i] = (None, toknames[f]) - - return [(lexre, lexindexfunc)], [regex], [lexindexnames] - except Exception: - m = (len(relist) // 2) + 1 - llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) - rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) - return (llist+rlist), (lre+rre), (lnames+rnames) - -# ----------------------------------------------------------------------------- -# def _statetoken(s,names) -# -# Given a declaration name s of the form "t_" and a dictionary whose keys are -# state names, this function returns a tuple (states,tokenname) where states -# is a tuple of state names and tokenname is the name of the token. For example, -# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') -# ----------------------------------------------------------------------------- -def _statetoken(s, names): - parts = s.split('_') - for i, part in enumerate(parts[1:], 1): - if part not in names and part != 'ANY': - break - - if i > 1: - states = tuple(parts[1:i]) - else: - states = ('INITIAL',) - - if 'ANY' in states: - states = tuple(names) - - tokenname = '_'.join(parts[i:]) - return (states, tokenname) - - -# ----------------------------------------------------------------------------- -# LexerReflect() -# -# This class represents information needed to build a lexer as extracted from a -# user's input file. -# ----------------------------------------------------------------------------- -class LexerReflect(object): - def __init__(self, ldict, log=None, reflags=0): - self.ldict = ldict - self.error_func = None - self.tokens = [] - self.reflags = reflags - self.stateinfo = {'INITIAL': 'inclusive'} - self.modules = set() - self.error = False - self.log = PlyLogger(sys.stderr) if log is None else log - - # Get all of the basic information - def get_all(self): - self.get_tokens() - self.get_literals() - self.get_states() - self.get_rules() - - # Validate all of the information - def validate_all(self): - self.validate_tokens() - self.validate_literals() - self.validate_rules() - return self.error - - # Get the tokens map - def get_tokens(self): - tokens = self.ldict.get('tokens', None) - if not tokens: - self.log.error('No token list is defined') - self.error = True - return - - if not isinstance(tokens, (list, tuple)): - self.log.error('tokens must be a list or tuple') - self.error = True - return - - if not tokens: - self.log.error('tokens is empty') - self.error = True - return - - self.tokens = tokens - - # Validate the tokens - def validate_tokens(self): - terminals = {} - for n in self.tokens: - if not _is_identifier.match(n): - self.log.error(f"Bad token name {n!r}") - self.error = True - if n in terminals: - self.log.warning(f"Token {n!r} multiply defined") - terminals[n] = 1 - - # Get the literals specifier - def get_literals(self): - self.literals = self.ldict.get('literals', '') - if not self.literals: - self.literals = '' - - # Validate literals - def validate_literals(self): - try: - for c in self.literals: - if not isinstance(c, StringTypes) or len(c) > 1: - self.log.error(f'Invalid literal {c!r}. Must be a single character') - self.error = True - - except TypeError: - self.log.error('Invalid literals specification. literals must be a sequence of characters') - self.error = True - - def get_states(self): - self.states = self.ldict.get('states', None) - # Build statemap - if self.states: - if not isinstance(self.states, (tuple, list)): - self.log.error('states must be defined as a tuple or list') - self.error = True - else: - for s in self.states: - if not isinstance(s, tuple) or len(s) != 2: - self.log.error("Invalid state specifier %r. Must be a tuple (statename,'exclusive|inclusive')", s) - self.error = True - continue - name, statetype = s - if not isinstance(name, StringTypes): - self.log.error('State name %r must be a string', name) - self.error = True - continue - if not (statetype == 'inclusive' or statetype == 'exclusive'): - self.log.error("State type for state %r must be 'inclusive' or 'exclusive'", name) - self.error = True - continue - if name in self.stateinfo: - self.log.error("State %r already defined", name) - self.error = True - continue - self.stateinfo[name] = statetype - - # Get all of the symbols with a t_ prefix and sort them into various - # categories (functions, strings, error functions, and ignore characters) - - def get_rules(self): - tsymbols = [f for f in self.ldict if f[:2] == 't_'] - - # Now build up a list of functions and a list of strings - self.toknames = {} # Mapping of symbols to token names - self.funcsym = {} # Symbols defined as functions - self.strsym = {} # Symbols defined as strings - self.ignore = {} # Ignore strings by state - self.errorf = {} # Error functions by state - self.eoff = {} # EOF functions by state - - for s in self.stateinfo: - self.funcsym[s] = [] - self.strsym[s] = [] - - if len(tsymbols) == 0: - self.log.error('No rules of the form t_rulename are defined') - self.error = True - return - - for f in tsymbols: - t = self.ldict[f] - states, tokname = _statetoken(f, self.stateinfo) - self.toknames[f] = tokname - - if hasattr(t, '__call__'): - if tokname == 'error': - for s in states: - self.errorf[s] = t - elif tokname == 'eof': - for s in states: - self.eoff[s] = t - elif tokname == 'ignore': - line = t.__code__.co_firstlineno - file = t.__code__.co_filename - self.log.error("%s:%d: Rule %r must be defined as a string", file, line, t.__name__) - self.error = True - else: - for s in states: - self.funcsym[s].append((f, t)) - elif isinstance(t, StringTypes): - if tokname == 'ignore': - for s in states: - self.ignore[s] = t - if '\\' in t: - self.log.warning("%s contains a literal backslash '\\'", f) - - elif tokname == 'error': - self.log.error("Rule %r must be defined as a function", f) - self.error = True - else: - for s in states: - self.strsym[s].append((f, t)) - else: - self.log.error('%s not defined as a function or string', f) - self.error = True - - # Sort the functions by line number - for f in self.funcsym.values(): - f.sort(key=lambda x: x[1].__code__.co_firstlineno) - - # Sort the strings by regular expression length - for s in self.strsym.values(): - s.sort(key=lambda x: len(x[1]), reverse=True) - - # Validate all of the t_rules collected - def validate_rules(self): - for state in self.stateinfo: - # Validate all rules defined by functions - - for fname, f in self.funcsym[state]: - line = f.__code__.co_firstlineno - file = f.__code__.co_filename - module = inspect.getmodule(f) - self.modules.add(module) - - tokname = self.toknames[fname] - if isinstance(f, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - nargs = f.__code__.co_argcount - if nargs > reqargs: - self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) - self.error = True - continue - - if nargs < reqargs: - self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) - self.error = True - continue - - if not _get_regex(f): - self.log.error("%s:%d: No regular expression defined for rule %r", file, line, f.__name__) - self.error = True - continue - - try: - c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) - if c.match(''): - self.log.error("%s:%d: Regular expression for rule %r matches empty string", file, line, f.__name__) - self.error = True - except re.error as e: - self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) - if '#' in _get_regex(f): - self.log.error("%s:%d. Make sure '#' in rule %r is escaped with '\\#'", file, line, f.__name__) - self.error = True - - # Validate all rules defined by strings - for name, r in self.strsym[state]: - tokname = self.toknames[name] - if tokname == 'error': - self.log.error("Rule %r must be defined as a function", name) - self.error = True - continue - - if tokname not in self.tokens and tokname.find('ignore_') < 0: - self.log.error("Rule %r defined for an unspecified token %s", name, tokname) - self.error = True - continue - - try: - c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) - if (c.match('')): - self.log.error("Regular expression for rule %r matches empty string", name) - self.error = True - except re.error as e: - self.log.error("Invalid regular expression for rule %r. %s", name, e) - if '#' in r: - self.log.error("Make sure '#' in rule %r is escaped with '\\#'", name) - self.error = True - - if not self.funcsym[state] and not self.strsym[state]: - self.log.error("No rules defined for state %r", state) - self.error = True - - # Validate the error function - efunc = self.errorf.get(state, None) - if efunc: - f = efunc - line = f.__code__.co_firstlineno - file = f.__code__.co_filename - module = inspect.getmodule(f) - self.modules.add(module) - - if isinstance(f, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - nargs = f.__code__.co_argcount - if nargs > reqargs: - self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) - self.error = True - - if nargs < reqargs: - self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) - self.error = True - - for module in self.modules: - self.validate_module(module) - - # ----------------------------------------------------------------------------- - # validate_module() - # - # This checks to see if there are duplicated t_rulename() functions or strings - # in the parser input file. This is done using a simple regular expression - # match on each line in the source code of the given module. - # ----------------------------------------------------------------------------- - - def validate_module(self, module): - try: - lines, linen = inspect.getsourcelines(module) - except IOError: - return - - fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') - sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') - - counthash = {} - linen += 1 - for line in lines: - m = fre.match(line) - if not m: - m = sre.match(line) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - filename = inspect.getsourcefile(module) - self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) - self.error = True - linen += 1 - -# ----------------------------------------------------------------------------- -# lex(module) -# -# Build all of the regular expression rules from definitions in the supplied module -# ----------------------------------------------------------------------------- -def lex(*, module=None, object=None, debug=False, - reflags=int(re.VERBOSE), debuglog=None, errorlog=None): - - global lexer - - ldict = None - stateinfo = {'INITIAL': 'inclusive'} - lexobj = Lexer() - global token, input - - if errorlog is None: - errorlog = PlyLogger(sys.stderr) - - if debug: - if debuglog is None: - debuglog = PlyLogger(sys.stderr) - - # Get the module dictionary used for the lexer - if object: - module = object - - # Get the module dictionary used for the parser - if module: - _items = [(k, getattr(module, k)) for k in dir(module)] - ldict = dict(_items) - # If no __file__ attribute is available, try to obtain it from the __module__ instead - if '__file__' not in ldict: - ldict['__file__'] = sys.modules[ldict['__module__']].__file__ - else: - ldict = get_caller_module_dict(2) - - # Collect parser information from the dictionary - linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) - linfo.get_all() - if linfo.validate_all(): - raise SyntaxError("Can't build lexer") - - # Dump some basic debugging information - if debug: - debuglog.info('lex: tokens = %r', linfo.tokens) - debuglog.info('lex: literals = %r', linfo.literals) - debuglog.info('lex: states = %r', linfo.stateinfo) - - # Build a dictionary of valid token names - lexobj.lextokens = set() - for n in linfo.tokens: - lexobj.lextokens.add(n) - - # Get literals specification - if isinstance(linfo.literals, (list, tuple)): - lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) - else: - lexobj.lexliterals = linfo.literals - - lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) - - # Get the stateinfo dictionary - stateinfo = linfo.stateinfo - - regexs = {} - # Build the master regular expressions - for state in stateinfo: - regex_list = [] - - # Add rules defined by functions first - for fname, f in linfo.funcsym[state]: - regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) - if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) - - # Now add all of the simple rules - for name, r in linfo.strsym[state]: - regex_list.append('(?P<%s>%s)' % (name, r)) - if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) - - regexs[state] = regex_list - - # Build the master regular expressions - - if debug: - debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') - - for state in regexs: - lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) - lexobj.lexstatere[state] = lexre - lexobj.lexstateretext[state] = re_text - lexobj.lexstaterenames[state] = re_names - if debug: - for i, text in enumerate(re_text): - debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) - - # For inclusive states, we need to add the regular expressions from the INITIAL state - for state, stype in stateinfo.items(): - if state != 'INITIAL' and stype == 'inclusive': - lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) - lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) - lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) - - lexobj.lexstateinfo = stateinfo - lexobj.lexre = lexobj.lexstatere['INITIAL'] - lexobj.lexretext = lexobj.lexstateretext['INITIAL'] - lexobj.lexreflags = reflags - - # Set up ignore variables - lexobj.lexstateignore = linfo.ignore - lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '') - - # Set up error functions - lexobj.lexstateerrorf = linfo.errorf - lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) - if not lexobj.lexerrorf: - errorlog.warning('No t_error rule is defined') - - # Set up eof functions - lexobj.lexstateeoff = linfo.eoff - lexobj.lexeoff = linfo.eoff.get('INITIAL', None) - - # Check state information for ignore and error rules - for s, stype in stateinfo.items(): - if stype == 'exclusive': - if s not in linfo.errorf: - errorlog.warning("No error rule is defined for exclusive state %r", s) - if s not in linfo.ignore and lexobj.lexignore: - errorlog.warning("No ignore rule is defined for exclusive state %r", s) - elif stype == 'inclusive': - if s not in linfo.errorf: - linfo.errorf[s] = linfo.errorf.get('INITIAL', None) - if s not in linfo.ignore: - linfo.ignore[s] = linfo.ignore.get('INITIAL', '') - - # Create global versions of the token() and input() functions - token = lexobj.token - input = lexobj.input - lexer = lexobj - - return lexobj - -# ----------------------------------------------------------------------------- -# runmain() -# -# This runs the lexer as a main program -# ----------------------------------------------------------------------------- - -def runmain(lexer=None, data=None): - if not data: - try: - filename = sys.argv[1] - with open(filename) as f: - data = f.read() - except IndexError: - sys.stdout.write('Reading from standard input (type EOF to end):\n') - data = sys.stdin.read() - - if lexer: - _input = lexer.input - else: - _input = input - _input(data) - if lexer: - _token = lexer.token - else: - _token = token - - while True: - tok = _token() - if not tok: - break - sys.stdout.write(f'({tok.type},{tok.value!r},{tok.lineno},{tok.lexpos})\n') - -# ----------------------------------------------------------------------------- -# @TOKEN(regex) -# -# This decorator function can be used to set the regex expression on a function -# when its docstring might need to be set in an alternative way -# ----------------------------------------------------------------------------- - -def TOKEN(r): - def set_regex(f): - if hasattr(r, '__call__'): - f.regex = _get_regex(r) - else: - f.regex = r - return f - return set_regex diff --git a/ply/yacc.py b/ply/yacc.py deleted file mode 100644 index 6528796..0000000 --- a/ply/yacc.py +++ /dev/null @@ -1,2482 +0,0 @@ -# ----------------------------------------------------------------------------- -# ply: yacc.py -# -# Copyright (C) 2001-2022 -# David M. Beazley (Dabeaz LLC) -# All rights reserved. -# -# Latest version: https://github.com/dabeaz/ply -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of David Beazley or Dabeaz LLC may be used to -# endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ----------------------------------------------------------------------------- -# -# This implements an LR parser that is constructed from grammar rules defined -# as Python functions. The grammar is specified by supplying the BNF inside -# Python documentation strings. The inspiration for this technique was borrowed -# from John Aycock's Spark parsing system. PLY might be viewed as cross between -# Spark and the GNU bison utility. -# -# The current implementation is only somewhat object-oriented. The -# LR parser itself is defined in terms of an object (which allows multiple -# parsers to co-exist). However, most of the variables used during table -# construction are defined in terms of global variables. Users shouldn't -# notice unless they are trying to define multiple parsers at the same -# time using threads (in which case they should have their head examined). -# -# This implementation supports both SLR and LALR(1) parsing. LALR(1) -# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), -# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, -# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced -# by the more efficient DeRemer and Pennello algorithm. -# -# :::::::: WARNING ::::::: -# -# Construction of LR parsing tables is fairly complicated and expensive. -# To make this module run fast, a *LOT* of work has been put into -# optimization---often at the expensive of readability and what might -# consider to be good Python "coding style." Modify the code at your -# own risk! -# ---------------------------------------------------------------------------- - -import re -import types -import sys -import inspect - -#----------------------------------------------------------------------------- -# === User configurable parameters === -# -# Change these to modify the default behavior of yacc (if you wish) -#----------------------------------------------------------------------------- - -yaccdebug = False # Debugging mode. If set, yacc generates a - # a 'parser.out' file in the current directory - -debug_file = 'parser.out' # Default name of the debugging file -error_count = 3 # Number of symbols that must be shifted to leave recovery mode -resultlimit = 40 # Size limit of results when running in debug mode. - -MAXINT = sys.maxsize - -# This object is a stand-in for a logging object created by the -# logging module. PLY will use this by default to create things -# such as the parser.out file. If a user wants more detailed -# information, they can create their own logging object and pass -# it into PLY. - -class PlyLogger(object): - def __init__(self, f): - self.f = f - - def debug(self, msg, *args, **kwargs): - self.f.write((msg % args) + '\n') - - info = debug - - def warning(self, msg, *args, **kwargs): - self.f.write('WARNING: ' + (msg % args) + '\n') - - def error(self, msg, *args, **kwargs): - self.f.write('ERROR: ' + (msg % args) + '\n') - - critical = debug - -# Null logger is used when no output is generated. Does nothing. -class NullLogger(object): - def __getattribute__(self, name): - return self - - def __call__(self, *args, **kwargs): - return self - -# Exception raised for yacc-related errors -class YaccError(Exception): - pass - -# Format the result message that the parser produces when running in debug mode. -def format_result(r): - repr_str = repr(r) - if '\n' in repr_str: - repr_str = repr(repr_str) - if len(repr_str) > resultlimit: - repr_str = repr_str[:resultlimit] + ' ...' - result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) - return result - -# Format stack entries when the parser is running in debug mode -def format_stack_entry(r): - repr_str = repr(r) - if '\n' in repr_str: - repr_str = repr(repr_str) - if len(repr_str) < 16: - return repr_str - else: - return '<%s @ 0x%x>' % (type(r).__name__, id(r)) - -#----------------------------------------------------------------------------- -# === LR Parsing Engine === -# -# The following classes are used for the LR parser itself. These are not -# used during table construction and are independent of the actual LR -# table generation algorithm -#----------------------------------------------------------------------------- - -# This class is used to hold non-terminal grammar symbols during parsing. -# It normally has the following attributes set: -# .type = Grammar symbol type -# .value = Symbol value -# .lineno = Starting line number -# .endlineno = Ending line number (optional, set automatically) -# .lexpos = Starting lex position -# .endlexpos = Ending lex position (optional, set automatically) - -class YaccSymbol: - def __str__(self): - return self.type - - def __repr__(self): - return str(self) - -# This class is a wrapper around the objects actually passed to each -# grammar rule. Index lookup and assignment actually assign the -# .value attribute of the underlying YaccSymbol object. -# The lineno() method returns the line number of a given -# item (or 0 if not defined). The linespan() method returns -# a tuple of (startline,endline) representing the range of lines -# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) -# representing the range of positional information for a symbol. - -class YaccProduction: - def __init__(self, s, stack=None): - self.slice = s - self.stack = stack - self.lexer = None - self.parser = None - - def __getitem__(self, n): - if isinstance(n, slice): - return [s.value for s in self.slice[n]] - elif n >= 0: - return self.slice[n].value - else: - return self.stack[n].value - - def __setitem__(self, n, v): - self.slice[n].value = v - - def __getslice__(self, i, j): - return [s.value for s in self.slice[i:j]] - - def __len__(self): - return len(self.slice) - - def lineno(self, n): - return getattr(self.slice[n], 'lineno', 0) - - def set_lineno(self, n, lineno): - self.slice[n].lineno = lineno - - def linespan(self, n): - startline = getattr(self.slice[n], 'lineno', 0) - endline = getattr(self.slice[n], 'endlineno', startline) - return startline, endline - - def lexpos(self, n): - return getattr(self.slice[n], 'lexpos', 0) - - def set_lexpos(self, n, lexpos): - self.slice[n].lexpos = lexpos - - def lexspan(self, n): - startpos = getattr(self.slice[n], 'lexpos', 0) - endpos = getattr(self.slice[n], 'endlexpos', startpos) - return startpos, endpos - - def error(self): - raise SyntaxError - -# ----------------------------------------------------------------------------- -# == LRParser == -# -# The LR Parsing engine. -# ----------------------------------------------------------------------------- - -class LRParser: - def __init__(self, lrtab, errorf): - self.productions = lrtab.lr_productions - self.action = lrtab.lr_action - self.goto = lrtab.lr_goto - self.errorfunc = errorf - self.set_defaulted_states() - self.errorok = True - - def errok(self): - self.errorok = True - - def restart(self): - del self.statestack[:] - del self.symstack[:] - sym = YaccSymbol() - sym.type = '$end' - self.symstack.append(sym) - self.statestack.append(0) - - # Defaulted state support. - # This method identifies parser states where there is only one possible reduction action. - # For such states, the parser can make a choose to make a rule reduction without consuming - # the next look-ahead token. This delayed invocation of the tokenizer can be useful in - # certain kinds of advanced parsing situations where the lexer and parser interact with - # each other or change states (i.e., manipulation of scope, lexer states, etc.). - # - # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions - def set_defaulted_states(self): - self.defaulted_states = {} - for state, actions in self.action.items(): - rules = list(actions.values()) - if len(rules) == 1 and rules[0] < 0: - self.defaulted_states[state] = rules[0] - - def disable_defaulted_states(self): - self.defaulted_states = {} - - # parse(). - # - # This is the core parsing engine. To operate, it requires a lexer object. - # Two options are provided. The debug flag turns on debugging so that you can - # see the various rule reductions and parsing steps. tracking turns on position - # tracking. In this mode, symbols will record the starting/ending line number and - # character index. - - def parse(self, input=None, lexer=None, debug=False, tracking=False): - # If debugging has been specified as a flag, turn it into a logging object - if isinstance(debug, int) and debug: - debug = PlyLogger(sys.stderr) - - lookahead = None # Current lookahead symbol - lookaheadstack = [] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - defaulted_states = self.defaulted_states # Local reference to defaulted states - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - if debug: - debug.info('PLY: PARSE DEBUG START') - - # If no lexer was given, we will try to use the lex module - if not lexer: - from . import lex - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - # Set the token function - get_token = self.token = lexer.token - - # Set up the state and symbol stacks - statestack = self.statestack = [] # Stack of parsing states - symstack = self.symstack = [] # Stack of grammar symbols - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - state = 0 - while True: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - if debug: - debug.debug('State : %s', state) - - if state not in defaulted_states: - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - else: - t = defaulted_states[state] - if debug: - debug.debug('Defaulted state %s: Reduce using %d', state, -t) - - if debug: - debug.debug('Stack : %s', - ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - if debug: - debug.debug('Action : Shift and goto state %s', t) - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: - errorcount -= 1 - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - if debug: - if plen: - debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, - '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', - goto[statestack[-1-plen]][pname]) - else: - debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], - goto[statestack[-1]][pname]) - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - - if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1, 'endlineno', t1.lineno) - sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - self.state = state - p.callable(pslice) - del statestack[-plen:] - if debug: - debug.info('Result : %s', format_result(pslice[0])) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) # Save the current lookahead token - symstack.extend(targ[1:-1]) # Put the production slice back on the stack - statestack.pop() # Pop back one state (before the reduce) - state = statestack[-1] - sym.type = 'error' - sym.value = 'error' - lookahead = sym - errorcount = error_count - self.errorok = False - - continue - - else: - - if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - - targ = [sym] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - self.state = state - p.callable(pslice) - if debug: - debug.info('Result : %s', format_result(pslice[0])) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) # Save the current lookahead token - statestack.pop() # Pop back one state (before the reduce) - state = statestack[-1] - sym.type = 'error' - sym.value = 'error' - lookahead = sym - errorcount = error_count - self.errorok = False - - continue - - if t == 0: - n = symstack[-1] - result = getattr(n, 'value', None) - - if debug: - debug.info('Done : Returning %s', format_result(result)) - debug.info('PLY: PARSE DEBUG END') - - return result - - if t is None: - - if debug: - debug.error('Error : %s', - ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = False - errtoken = lookahead - if errtoken.type == '$end': - errtoken = None # End of file! - if self.errorfunc: - if errtoken and not hasattr(errtoken, 'lexer'): - errtoken.lexer = lexer - self.state = state - tok = self.errorfunc(errtoken) - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken, 'lineno'): - lineno = lookahead.lineno - else: - lineno = 0 - if lineno: - sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) - else: - sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) - else: - sys.stderr.write('yacc: Parse error in input. EOF\n') - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != '$end': - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == '$end': - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - if tracking: - sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) - sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) - lookahead = None - continue - - # Create the error symbol for the first time and make it the new lookahead symbol - t = YaccSymbol() - t.type = 'error' - - if hasattr(lookahead, 'lineno'): - t.lineno = t.endlineno = lookahead.lineno - if hasattr(lookahead, 'lexpos'): - t.lexpos = t.endlexpos = lookahead.lexpos - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - sym = symstack.pop() - if tracking: - lookahead.lineno = sym.lineno - lookahead.lexpos = sym.lexpos - statestack.pop() - state = statestack[-1] - - continue - - # If we'r here, something really bad happened - raise RuntimeError('yacc: internal parser error!!!\n') - -# ----------------------------------------------------------------------------- -# === Grammar Representation === -# -# The following functions, classes, and variables are used to represent and -# manipulate the rules that make up a grammar. -# ----------------------------------------------------------------------------- - -# regex matching identifiers -_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') - -# ----------------------------------------------------------------------------- -# class Production: -# -# This class stores the raw information about a single production or grammar rule. -# A grammar rule refers to a specification such as this: -# -# expr : expr PLUS term -# -# Here are the basic attributes defined on all productions -# -# name - Name of the production. For example 'expr' -# prod - A list of symbols on the right side ['expr','PLUS','term'] -# prec - Production precedence level -# number - Production number. -# func - Function that executes on reduce -# file - File where production function is defined -# lineno - Line number where production function is defined -# -# The following attributes are defined or optional. -# -# len - Length of the production (number of symbols on right hand side) -# usyms - Set of unique symbols found in the production -# ----------------------------------------------------------------------------- - -class Production(object): - reduced = 0 - def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): - self.name = name - self.prod = tuple(prod) - self.number = number - self.func = func - self.callable = None - self.file = file - self.line = line - self.prec = precedence - - # Internal settings used during table construction - - self.len = len(self.prod) # Length of the production - - # Create a list of unique production symbols used in the production - self.usyms = [] - for s in self.prod: - if s not in self.usyms: - self.usyms.append(s) - - # List of all LR items for the production - self.lr_items = [] - self.lr_next = None - - # Create a string representation - if self.prod: - self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) - else: - self.str = '%s -> ' % self.name - - def __str__(self): - return self.str - - def __repr__(self): - return 'Production(' + str(self) + ')' - - def __len__(self): - return len(self.prod) - - def __nonzero__(self): - return 1 - - def __getitem__(self, index): - return self.prod[index] - - # Return the nth lr_item from the production (or None if at the end) - def lr_item(self, n): - if n > len(self.prod): - return None - p = LRItem(self, n) - # Precompute the list of productions immediately following. - try: - p.lr_after = self.Prodnames[p.prod[n+1]] - except (IndexError, KeyError): - p.lr_after = [] - try: - p.lr_before = p.prod[n-1] - except IndexError: - p.lr_before = None - return p - - # Bind the production function name to a callable - def bind(self, pdict): - if self.func: - self.callable = pdict[self.func] - -# ----------------------------------------------------------------------------- -# class LRItem -# -# This class represents a specific stage of parsing a production rule. For -# example: -# -# expr : expr . PLUS term -# -# In the above, the "." represents the current location of the parse. Here -# basic attributes: -# -# name - Name of the production. For example 'expr' -# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] -# number - Production number. -# -# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' -# then lr_next refers to 'expr -> expr PLUS . term' -# lr_index - LR item index (location of the ".") in the prod list. -# lookaheads - LALR lookahead symbols for this item -# len - Length of the production (number of symbols on right hand side) -# lr_after - List of all productions that immediately follow -# lr_before - Grammar symbol immediately before -# ----------------------------------------------------------------------------- - -class LRItem(object): - def __init__(self, p, n): - self.name = p.name - self.prod = list(p.prod) - self.number = p.number - self.lr_index = n - self.lookaheads = {} - self.prod.insert(n, '.') - self.prod = tuple(self.prod) - self.len = len(self.prod) - self.usyms = p.usyms - - def __str__(self): - if self.prod: - s = '%s -> %s' % (self.name, ' '.join(self.prod)) - else: - s = '%s -> ' % self.name - return s - - def __repr__(self): - return 'LRItem(' + str(self) + ')' - -# ----------------------------------------------------------------------------- -# rightmost_terminal() -# -# Return the rightmost terminal from a list of symbols. Used in add_production() -# ----------------------------------------------------------------------------- -def rightmost_terminal(symbols, terminals): - i = len(symbols) - 1 - while i >= 0: - if symbols[i] in terminals: - return symbols[i] - i -= 1 - return None - -# ----------------------------------------------------------------------------- -# === GRAMMAR CLASS === -# -# The following class represents the contents of the specified grammar along -# with various computed properties such as first sets, follow sets, LR items, etc. -# This data is used for critical parts of the table generation process later. -# ----------------------------------------------------------------------------- - -class GrammarError(YaccError): - pass - -class Grammar(object): - def __init__(self, terminals): - self.Productions = [None] # A list of all of the productions. The first - # entry is always reserved for the purpose of - # building an augmented grammar - - self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all - # productions of that nonterminal. - - self.Prodmap = {} # A dictionary that is only used to detect duplicate - # productions. - - self.Terminals = {} # A dictionary mapping the names of terminal symbols to a - # list of the rules where they are used. - - for term in terminals: - self.Terminals[term] = [] - - self.Terminals['error'] = [] - - self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list - # of rule numbers where they are used. - - self.First = {} # A dictionary of precomputed FIRST(x) symbols - - self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols - - self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the - # form ('right',level) or ('nonassoc', level) or ('left',level) - - self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. - # This is only used to provide error checking and to generate - # a warning about unused precedence rules. - - self.Start = None # Starting symbol for the grammar - - - def __len__(self): - return len(self.Productions) - - def __getitem__(self, index): - return self.Productions[index] - - # ----------------------------------------------------------------------------- - # set_precedence() - # - # Sets the precedence for a given terminal. assoc is the associativity such as - # 'left','right', or 'nonassoc'. level is a numeric level. - # - # ----------------------------------------------------------------------------- - - def set_precedence(self, term, assoc, level): - assert self.Productions == [None], 'Must call set_precedence() before add_production()' - if term in self.Precedence: - raise GrammarError('Precedence already specified for terminal %r' % term) - if assoc not in ['left', 'right', 'nonassoc']: - raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") - self.Precedence[term] = (assoc, level) - - # ----------------------------------------------------------------------------- - # add_production() - # - # Given an action function, this function assembles a production rule and - # computes its precedence level. - # - # The production rule is supplied as a list of symbols. For example, - # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and - # symbols ['expr','PLUS','term']. - # - # Precedence is determined by the precedence of the right-most non-terminal - # or the precedence of a terminal specified by %prec. - # - # A variety of error checks are performed to make sure production symbols - # are valid and that %prec is used correctly. - # ----------------------------------------------------------------------------- - - def add_production(self, prodname, syms, func=None, file='', line=0): - - if prodname in self.Terminals: - raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) - if prodname == 'error': - raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) - if not _is_identifier.match(prodname): - raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) - - # Look for literal tokens - for n, s in enumerate(syms): - if s[0] in "'\"": - try: - c = eval(s) - if (len(c) > 1): - raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % - (file, line, s, prodname)) - if c not in self.Terminals: - self.Terminals[c] = [] - syms[n] = c - continue - except SyntaxError: - pass - if not _is_identifier.match(s) and s != '%prec': - raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) - - # Determine the precedence level - if '%prec' in syms: - if syms[-1] == '%prec': - raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) - if syms[-2] != '%prec': - raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % - (file, line)) - precname = syms[-1] - prodprec = self.Precedence.get(precname) - if not prodprec: - raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) - else: - self.UsedPrecedence.add(precname) - del syms[-2:] # Drop %prec from the rule - else: - # If no %prec, precedence is determined by the rightmost terminal symbol - precname = rightmost_terminal(syms, self.Terminals) - prodprec = self.Precedence.get(precname, ('right', 0)) - - # See if the rule is already in the rulemap - map = '%s -> %s' % (prodname, syms) - if map in self.Prodmap: - m = self.Prodmap[map] - raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + - 'Previous definition at %s:%d' % (m.file, m.line)) - - # From this point on, everything is valid. Create a new Production instance - pnumber = len(self.Productions) - if prodname not in self.Nonterminals: - self.Nonterminals[prodname] = [] - - # Add the production number to Terminals and Nonterminals - for t in syms: - if t in self.Terminals: - self.Terminals[t].append(pnumber) - else: - if t not in self.Nonterminals: - self.Nonterminals[t] = [] - self.Nonterminals[t].append(pnumber) - - # Create a production and add it to the list of productions - p = Production(pnumber, prodname, syms, prodprec, func, file, line) - self.Productions.append(p) - self.Prodmap[map] = p - - # Add to the global productions list - try: - self.Prodnames[prodname].append(p) - except KeyError: - self.Prodnames[prodname] = [p] - - # ----------------------------------------------------------------------------- - # set_start() - # - # Sets the starting symbol and creates the augmented grammar. Production - # rule 0 is S' -> start where start is the start symbol. - # ----------------------------------------------------------------------------- - - def set_start(self, start=None): - if not start: - start = self.Productions[1].name - if start not in self.Nonterminals: - raise GrammarError('start symbol %s undefined' % start) - self.Productions[0] = Production(0, "S'", [start]) - self.Nonterminals[start].append(0) - self.Start = start - - # ----------------------------------------------------------------------------- - # find_unreachable() - # - # Find all of the nonterminal symbols that can't be reached from the starting - # symbol. Returns a list of nonterminals that can't be reached. - # ----------------------------------------------------------------------------- - - def find_unreachable(self): - - # Mark all symbols that are reachable from a symbol s - def mark_reachable_from(s): - if s in reachable: - return - reachable.add(s) - for p in self.Prodnames.get(s, []): - for r in p.prod: - mark_reachable_from(r) - - reachable = set() - mark_reachable_from(self.Productions[0].prod[0]) - return [s for s in self.Nonterminals if s not in reachable] - - # ----------------------------------------------------------------------------- - # infinite_cycles() - # - # This function looks at the various parsing rules and tries to detect - # infinite recursion cycles (grammar rules where there is no possible way - # to derive a string of only terminals). - # ----------------------------------------------------------------------------- - - def infinite_cycles(self): - terminates = {} - - # Terminals: - for t in self.Terminals: - terminates[t] = True - - terminates['$end'] = True - - # Nonterminals: - - # Initialize to false: - for n in self.Nonterminals: - terminates[n] = False - - # Then propagate termination until no change: - while True: - some_change = False - for (n, pl) in self.Prodnames.items(): - # Nonterminal n terminates iff any of its productions terminates. - for p in pl: - # Production p terminates iff all of its rhs symbols terminate. - for s in p.prod: - if not terminates[s]: - # The symbol s does not terminate, - # so production p does not terminate. - p_terminates = False - break - else: - # didn't break from the loop, - # so every symbol s terminates - # so production p terminates. - p_terminates = True - - if p_terminates: - # symbol n terminates! - if not terminates[n]: - terminates[n] = True - some_change = True - # Don't need to consider any more productions for this n. - break - - if not some_change: - break - - infinite = [] - for (s, term) in terminates.items(): - if not term: - if s not in self.Prodnames and s not in self.Terminals and s != 'error': - # s is used-but-not-defined, and we've already warned of that, - # so it would be overkill to say that it's also non-terminating. - pass - else: - infinite.append(s) - - return infinite - - # ----------------------------------------------------------------------------- - # undefined_symbols() - # - # Find all symbols that were used the grammar, but not defined as tokens or - # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol - # and prod is the production where the symbol was used. - # ----------------------------------------------------------------------------- - def undefined_symbols(self): - result = [] - for p in self.Productions: - if not p: - continue - - for s in p.prod: - if s not in self.Prodnames and s not in self.Terminals and s != 'error': - result.append((s, p)) - return result - - # ----------------------------------------------------------------------------- - # unused_terminals() - # - # Find all terminals that were defined, but not used by the grammar. Returns - # a list of all symbols. - # ----------------------------------------------------------------------------- - def unused_terminals(self): - unused_tok = [] - for s, v in self.Terminals.items(): - if s != 'error' and not v: - unused_tok.append(s) - - return unused_tok - - # ------------------------------------------------------------------------------ - # unused_rules() - # - # Find all grammar rules that were defined, but not used (maybe not reachable) - # Returns a list of productions. - # ------------------------------------------------------------------------------ - - def unused_rules(self): - unused_prod = [] - for s, v in self.Nonterminals.items(): - if not v: - p = self.Prodnames[s][0] - unused_prod.append(p) - return unused_prod - - # ----------------------------------------------------------------------------- - # unused_precedence() - # - # Returns a list of tuples (term,precedence) corresponding to precedence - # rules that were never used by the grammar. term is the name of the terminal - # on which precedence was applied and precedence is a string such as 'left' or - # 'right' corresponding to the type of precedence. - # ----------------------------------------------------------------------------- - - def unused_precedence(self): - unused = [] - for termname in self.Precedence: - if not (termname in self.Terminals or termname in self.UsedPrecedence): - unused.append((termname, self.Precedence[termname][0])) - - return unused - - # ------------------------------------------------------------------------- - # _first() - # - # Compute the value of FIRST1(beta) where beta is a tuple of symbols. - # - # During execution of compute_first1, the result may be incomplete. - # Afterward (e.g., when called from compute_follow()), it will be complete. - # ------------------------------------------------------------------------- - def _first(self, beta): - - # We are computing First(x1,x2,x3,...,xn) - result = [] - for x in beta: - x_produces_empty = False - - # Add all the non- symbols of First[x] to the result. - for f in self.First[x]: - if f == '': - x_produces_empty = True - else: - if f not in result: - result.append(f) - - if x_produces_empty: - # We have to consider the next x in beta, - # i.e. stay in the loop. - pass - else: - # We don't have to consider any further symbols in beta. - break - else: - # There was no 'break' from the loop, - # so x_produces_empty was true for all x in beta, - # so beta produces empty as well. - result.append('') - - return result - - # ------------------------------------------------------------------------- - # compute_first() - # - # Compute the value of FIRST1(X) for all symbols - # ------------------------------------------------------------------------- - def compute_first(self): - if self.First: - return self.First - - # Terminals: - for t in self.Terminals: - self.First[t] = [t] - - self.First['$end'] = ['$end'] - - # Nonterminals: - - # Initialize to the empty set: - for n in self.Nonterminals: - self.First[n] = [] - - # Then propagate symbols until no change: - while True: - some_change = False - for n in self.Nonterminals: - for p in self.Prodnames[n]: - for f in self._first(p.prod): - if f not in self.First[n]: - self.First[n].append(f) - some_change = True - if not some_change: - break - - return self.First - - # --------------------------------------------------------------------- - # compute_follow() - # - # Computes all of the follow sets for every non-terminal symbol. The - # follow set is the set of all symbols that might follow a given - # non-terminal. See the Dragon book, 2nd Ed. p. 189. - # --------------------------------------------------------------------- - def compute_follow(self, start=None): - # If already computed, return the result - if self.Follow: - return self.Follow - - # If first sets not computed yet, do that first. - if not self.First: - self.compute_first() - - # Add '$end' to the follow list of the start symbol - for k in self.Nonterminals: - self.Follow[k] = [] - - if not start: - start = self.Productions[1].name - - self.Follow[start] = ['$end'] - - while True: - didadd = False - for p in self.Productions[1:]: - # Here is the production set - for i, B in enumerate(p.prod): - if B in self.Nonterminals: - # Okay. We got a non-terminal in a production - fst = self._first(p.prod[i+1:]) - hasempty = False - for f in fst: - if f != '' and f not in self.Follow[B]: - self.Follow[B].append(f) - didadd = True - if f == '': - hasempty = True - if hasempty or i == (len(p.prod)-1): - # Add elements of follow(a) to follow(b) - for f in self.Follow[p.name]: - if f not in self.Follow[B]: - self.Follow[B].append(f) - didadd = True - if not didadd: - break - return self.Follow - - - # ----------------------------------------------------------------------------- - # build_lritems() - # - # This function walks the list of productions and builds a complete set of the - # LR items. The LR items are stored in two ways: First, they are uniquely - # numbered and placed in the list _lritems. Second, a linked list of LR items - # is built for each production. For example: - # - # E -> E PLUS E - # - # Creates the list - # - # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] - # ----------------------------------------------------------------------------- - - def build_lritems(self): - for p in self.Productions: - lastlri = p - i = 0 - lr_items = [] - while True: - if i > len(p): - lri = None - else: - lri = LRItem(p, i) - # Precompute the list of productions immediately following - try: - lri.lr_after = self.Prodnames[lri.prod[i+1]] - except (IndexError, KeyError): - lri.lr_after = [] - try: - lri.lr_before = lri.prod[i-1] - except IndexError: - lri.lr_before = None - - lastlri.lr_next = lri - if not lri: - break - lr_items.append(lri) - lastlri = lri - i += 1 - p.lr_items = lr_items - -# ----------------------------------------------------------------------------- -# === LR Generator === -# -# The following classes and functions are used to generate LR parsing tables on -# a grammar. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# digraph() -# traverse() -# -# The following two functions are used to compute set valued functions -# of the form: -# -# F(x) = F'(x) U U{F(y) | x R y} -# -# This is used to compute the values of Read() sets as well as FOLLOW sets -# in LALR(1) generation. -# -# Inputs: X - An input set -# R - A relation -# FP - Set-valued function -# ------------------------------------------------------------------------------ - -def digraph(X, R, FP): - N = {} - for x in X: - N[x] = 0 - stack = [] - F = {} - for x in X: - if N[x] == 0: - traverse(x, N, stack, F, X, R, FP) - return F - -def traverse(x, N, stack, F, X, R, FP): - stack.append(x) - d = len(stack) - N[x] = d - F[x] = FP(x) # F(X) <- F'(x) - - rel = R(x) # Get y's related to x - for y in rel: - if N[y] == 0: - traverse(y, N, stack, F, X, R, FP) - N[x] = min(N[x], N[y]) - for a in F.get(y, []): - if a not in F[x]: - F[x].append(a) - if N[x] == d: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - while element != x: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - -class LALRError(YaccError): - pass - - -# ----------------------------------------------------------------------------- -# == LRTable == -# -# This class implements the LR table generation algorithm. There are no -# public methods. -# ----------------------------------------------------------------------------- - -class LRTable: - def __init__(self, grammar, log=None): - self.grammar = grammar - - # Set up the logger - if not log: - log = NullLogger() - self.log = log - - # Internal attributes - self.lr_action = {} # Action table - self.lr_goto = {} # Goto table - self.lr_productions = grammar.Productions # Copy of grammar Production array - self.lr_goto_cache = {} # Cache of computed gotos - self.lr0_cidhash = {} # Cache of closures - - self._add_count = 0 # Internal counter used to detect cycles - - # Diagnostic information filled in by the table generator - self.sr_conflict = 0 - self.rr_conflict = 0 - self.conflicts = [] # List of conflicts - - self.sr_conflicts = [] - self.rr_conflicts = [] - - # Build the tables - self.grammar.build_lritems() - self.grammar.compute_first() - self.grammar.compute_follow() - self.lr_parse_table() - - # Bind all production function names to callable objects in pdict - def bind_callables(self, pdict): - for p in self.lr_productions: - p.bind(pdict) - - # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. - - def lr0_closure(self, I): - self._add_count += 1 - - # Add everything in I to J - J = I[:] - didadd = True - while didadd: - didadd = False - for j in J: - for x in j.lr_after: - if getattr(x, 'lr0_added', 0) == self._add_count: - continue - # Add B --> .G to J - J.append(x.lr_next) - x.lr0_added = self._add_count - didadd = True - - return J - - # Compute the LR(0) goto function goto(I,X) where I is a set - # of LR(0) items and X is a grammar symbol. This function is written - # in a way that guarantees uniqueness of the generated goto sets - # (i.e. the same goto set will never be returned as two different Python - # objects). With uniqueness, we can later do fast set comparisons using - # id(obj) instead of element-wise comparison. - - def lr0_goto(self, I, x): - # First we look for a previously cached entry - g = self.lr_goto_cache.get((id(I), x)) - if g: - return g - - # Now we generate the goto set in a way that guarantees uniqueness - # of the result - - s = self.lr_goto_cache.get(x) - if not s: - s = {} - self.lr_goto_cache[x] = s - - gs = [] - for p in I: - n = p.lr_next - if n and n.lr_before == x: - s1 = s.get(id(n)) - if not s1: - s1 = {} - s[id(n)] = s1 - gs.append(n) - s = s1 - g = s.get('$end') - if not g: - if gs: - g = self.lr0_closure(gs) - s['$end'] = g - else: - s['$end'] = gs - self.lr_goto_cache[(id(I), x)] = g - return g - - # Compute the LR(0) sets of item function - def lr0_items(self): - C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] - i = 0 - for I in C: - self.lr0_cidhash[id(I)] = i - i += 1 - - # Loop over the items in C and each grammar symbols - i = 0 - while i < len(C): - I = C[i] - i += 1 - - # Collect all of the symbols that could possibly be in the goto(I,X) sets - asyms = {} - for ii in I: - for s in ii.usyms: - asyms[s] = None - - for x in asyms: - g = self.lr0_goto(I, x) - if not g or id(g) in self.lr0_cidhash: - continue - self.lr0_cidhash[id(g)] = len(C) - C.append(g) - - return C - - # ----------------------------------------------------------------------------- - # ==== LALR(1) Parsing ==== - # - # LALR(1) parsing is almost exactly the same as SLR except that instead of - # relying upon Follow() sets when performing reductions, a more selective - # lookahead set that incorporates the state of the LR(0) machine is utilized. - # Thus, we mainly just have to focus on calculating the lookahead sets. - # - # The method used here is due to DeRemer and Pennelo (1982). - # - # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) - # Lookahead Sets", ACM Transactions on Programming Languages and Systems, - # Vol. 4, No. 4, Oct. 1982, pp. 615-649 - # - # Further details can also be found in: - # - # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", - # McGraw-Hill Book Company, (1985). - # - # ----------------------------------------------------------------------------- - - # ----------------------------------------------------------------------------- - # compute_nullable_nonterminals() - # - # Creates a dictionary containing all of the non-terminals that might produce - # an empty production. - # ----------------------------------------------------------------------------- - - def compute_nullable_nonterminals(self): - nullable = set() - num_nullable = 0 - while True: - for p in self.grammar.Productions[1:]: - if p.len == 0: - nullable.add(p.name) - continue - for t in p.prod: - if t not in nullable: - break - else: - nullable.add(p.name) - if len(nullable) == num_nullable: - break - num_nullable = len(nullable) - return nullable - - # ----------------------------------------------------------------------------- - # find_nonterminal_trans(C) - # - # Given a set of LR(0) items, this functions finds all of the non-terminal - # transitions. These are transitions in which a dot appears immediately before - # a non-terminal. Returns a list of tuples of the form (state,N) where state - # is the state number and N is the nonterminal symbol. - # - # The input C is the set of LR(0) items. - # ----------------------------------------------------------------------------- - - def find_nonterminal_transitions(self, C): - trans = [] - for stateno, state in enumerate(C): - for p in state: - if p.lr_index < p.len - 1: - t = (stateno, p.prod[p.lr_index+1]) - if t[1] in self.grammar.Nonterminals: - if t not in trans: - trans.append(t) - return trans - - # ----------------------------------------------------------------------------- - # dr_relation() - # - # Computes the DR(p,A) relationships for non-terminal transitions. The input - # is a tuple (state,N) where state is a number and N is a nonterminal symbol. - # - # Returns a list of terminals. - # ----------------------------------------------------------------------------- - - def dr_relation(self, C, trans, nullable): - state, N = trans - terms = [] - - g = self.lr0_goto(C[state], N) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index+1] - if a in self.grammar.Terminals: - if a not in terms: - terms.append(a) - - # This extra bit is to handle the start state - if state == 0 and N == self.grammar.Productions[0].prod[0]: - terms.append('$end') - - return terms - - # ----------------------------------------------------------------------------- - # reads_relation() - # - # Computes the READS() relation (p,A) READS (t,C). - # ----------------------------------------------------------------------------- - - def reads_relation(self, C, trans, empty): - # Look for empty transitions - rel = [] - state, N = trans - - g = self.lr0_goto(C[state], N) - j = self.lr0_cidhash.get(id(g), -1) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in empty: - rel.append((j, a)) - - return rel - - # ----------------------------------------------------------------------------- - # compute_lookback_includes() - # - # Determines the lookback and includes relations - # - # LOOKBACK: - # - # This relation is determined by running the LR(0) state machine forward. - # For example, starting with a production "N : . A B C", we run it forward - # to obtain "N : A B C ." We then build a relationship between this final - # state and the starting state. These relationships are stored in a dictionary - # lookdict. - # - # INCLUDES: - # - # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). - # - # This relation is used to determine non-terminal transitions that occur - # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) - # if the following holds: - # - # B -> LAT, where T -> epsilon and p' -L-> p - # - # L is essentially a prefix (which may be empty), T is a suffix that must be - # able to derive an empty string. State p' must lead to state p with the string L. - # - # ----------------------------------------------------------------------------- - - def compute_lookback_includes(self, C, trans, nullable): - lookdict = {} # Dictionary of lookback relations - includedict = {} # Dictionary of include relations - - # Make a dictionary of non-terminal transitions - dtrans = {} - for t in trans: - dtrans[t] = 1 - - # Loop over all transitions and compute lookbacks and includes - for state, N in trans: - lookb = [] - includes = [] - for p in C[state]: - if p.name != N: - continue - - # Okay, we have a name match. We now follow the production all the way - # through the state machine until we get the . on the right hand side - - lr_index = p.lr_index - j = state - while lr_index < p.len - 1: - lr_index = lr_index + 1 - t = p.prod[lr_index] - - # Check to see if this symbol and state are a non-terminal transition - if (j, t) in dtrans: - # Yes. Okay, there is some chance that this is an includes relation - # the only way to know for certain is whether the rest of the - # production derives empty - - li = lr_index + 1 - while li < p.len: - if p.prod[li] in self.grammar.Terminals: - break # No forget it - if p.prod[li] not in nullable: - break - li = li + 1 - else: - # Appears to be a relation between (j,t) and (state,N) - includes.append((j, t)) - - g = self.lr0_goto(C[j], t) # Go to next set - j = self.lr0_cidhash.get(id(g), -1) # Go to next state - - # When we get here, j is the final state, now we have to locate the production - for r in C[j]: - if r.name != p.name: - continue - if r.len != p.len: - continue - i = 0 - # This look is comparing a production ". A B C" with "A B C ." - while i < r.lr_index: - if r.prod[i] != p.prod[i+1]: - break - i = i + 1 - else: - lookb.append((j, r)) - for i in includes: - if i not in includedict: - includedict[i] = [] - includedict[i].append((state, N)) - lookdict[(state, N)] = lookb - - return lookdict, includedict - - # ----------------------------------------------------------------------------- - # compute_read_sets() - # - # Given a set of LR(0) items, this function computes the read sets. - # - # Inputs: C = Set of LR(0) items - # ntrans = Set of nonterminal transitions - # nullable = Set of empty transitions - # - # Returns a set containing the read sets - # ----------------------------------------------------------------------------- - - def compute_read_sets(self, C, ntrans, nullable): - FP = lambda x: self.dr_relation(C, x, nullable) - R = lambda x: self.reads_relation(C, x, nullable) - F = digraph(ntrans, R, FP) - return F - - # ----------------------------------------------------------------------------- - # compute_follow_sets() - # - # Given a set of LR(0) items, a set of non-terminal transitions, a readset, - # and an include set, this function computes the follow sets - # - # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} - # - # Inputs: - # ntrans = Set of nonterminal transitions - # readsets = Readset (previously computed) - # inclsets = Include sets (previously computed) - # - # Returns a set containing the follow sets - # ----------------------------------------------------------------------------- - - def compute_follow_sets(self, ntrans, readsets, inclsets): - FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x, []) - F = digraph(ntrans, R, FP) - return F - - # ----------------------------------------------------------------------------- - # add_lookaheads() - # - # Attaches the lookahead symbols to grammar rules. - # - # Inputs: lookbacks - Set of lookback relations - # followset - Computed follow set - # - # This function directly attaches the lookaheads to productions contained - # in the lookbacks set - # ----------------------------------------------------------------------------- - - def add_lookaheads(self, lookbacks, followset): - for trans, lb in lookbacks.items(): - # Loop over productions in lookback - for state, p in lb: - if state not in p.lookaheads: - p.lookaheads[state] = [] - f = followset.get(trans, []) - for a in f: - if a not in p.lookaheads[state]: - p.lookaheads[state].append(a) - - # ----------------------------------------------------------------------------- - # add_lalr_lookaheads() - # - # This function does all of the work of adding lookahead information for use - # with LALR parsing - # ----------------------------------------------------------------------------- - - def add_lalr_lookaheads(self, C): - # Determine all of the nullable nonterminals - nullable = self.compute_nullable_nonterminals() - - # Find all non-terminal transitions - trans = self.find_nonterminal_transitions(C) - - # Compute read sets - readsets = self.compute_read_sets(C, trans, nullable) - - # Compute lookback/includes relations - lookd, included = self.compute_lookback_includes(C, trans, nullable) - - # Compute LALR FOLLOW sets - followsets = self.compute_follow_sets(trans, readsets, included) - - # Add all of the lookaheads - self.add_lookaheads(lookd, followsets) - - # ----------------------------------------------------------------------------- - # lr_parse_table() - # - # This function constructs the parse tables for SLR or LALR - # ----------------------------------------------------------------------------- - def lr_parse_table(self): - Productions = self.grammar.Productions - Precedence = self.grammar.Precedence - goto = self.lr_goto # Goto array - action = self.lr_action # Action array - log = self.log # Logger for output - - actionp = {} # Action production array (temporary) - - # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items - # This determines the number of states - - C = self.lr0_items() - self.add_lalr_lookaheads(C) - - # Build the parser table, state by state - st = 0 - for I in C: - # Loop over each production in I - actlist = [] # List of actions - st_action = {} - st_actionp = {} - st_goto = {} - log.info('') - log.info('state %d', st) - log.info('') - for p in I: - log.info(' (%d) %s', p.number, p) - log.info('') - - for p in I: - if p.len == p.lr_index + 1: - if p.name == "S'": - # Start symbol. Accept! - st_action['$end'] = 0 - st_actionp['$end'] = p - else: - # We are at the end of a production. Reduce! - laheads = p.lookaheads[st] - for a in laheads: - actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) - r = st_action.get(a) - if r is not None: - # Whoa. Have a shift/reduce or reduce/reduce conflict - if r > 0: - # Need to decide on shift or reduce here - # By default we favor shifting. Need to add - # some precedence rules here. - - # Shift precedence comes from the token - sprec, slevel = Precedence.get(a, ('right', 0)) - - # Reduce precedence comes from rule being reduced (p) - rprec, rlevel = Productions[p.number].prec - - if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): - # We really need to reduce here. - st_action[a] = -p.number - st_actionp[a] = p - if not slevel and not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as reduce', a) - self.sr_conflicts.append((st, a, 'reduce')) - Productions[p.number].reduced += 1 - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the shift - if not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as shift', a) - self.sr_conflicts.append((st, a, 'shift')) - elif r < 0: - # Reduce/reduce conflict. In this case, we favor the rule - # that was defined first in the grammar file - oldp = Productions[-r] - pp = Productions[p.number] - if oldp.line > pp.line: - st_action[a] = -p.number - st_actionp[a] = p - chosenp, rejectp = pp, oldp - Productions[p.number].reduced += 1 - Productions[oldp.number].reduced -= 1 - else: - chosenp, rejectp = oldp, pp - self.rr_conflicts.append((st, chosenp, rejectp)) - log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', - a, st_actionp[a].number, st_actionp[a]) - else: - raise LALRError('Unknown conflict in state %d' % st) - else: - st_action[a] = -p.number - st_actionp[a] = p - Productions[p.number].reduced += 1 - else: - i = p.lr_index - a = p.prod[i+1] # Get symbol right after the "." - if a in self.grammar.Terminals: - g = self.lr0_goto(I, a) - j = self.lr0_cidhash.get(id(g), -1) - if j >= 0: - # We are in a shift state - actlist.append((a, p, 'shift and go to state %d' % j)) - r = st_action.get(a) - if r is not None: - # Whoa have a shift/reduce or shift/shift conflict - if r > 0: - if r != j: - raise LALRError('Shift/shift conflict in state %d' % st) - elif r < 0: - # Do a precedence check. - # - if precedence of reduce rule is higher, we reduce. - # - if precedence of reduce is same and left assoc, we reduce. - # - otherwise we shift - - # Shift precedence comes from the token - sprec, slevel = Precedence.get(a, ('right', 0)) - - # Reduce precedence comes from the rule that could have been reduced - rprec, rlevel = Productions[st_actionp[a].number].prec - - if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): - # We decide to shift here... highest precedence to shift - Productions[st_actionp[a].number].reduced -= 1 - st_action[a] = j - st_actionp[a] = p - if not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as shift', a) - self.sr_conflicts.append((st, a, 'shift')) - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the reduce - if not slevel and not rlevel: - log.info(' ! shift/reduce conflict for %s resolved as reduce', a) - self.sr_conflicts.append((st, a, 'reduce')) - - else: - raise LALRError('Unknown conflict in state %d' % st) - else: - st_action[a] = j - st_actionp[a] = p - - # Print the actions associated with each terminal - _actprint = {} - for a, p, m in actlist: - if a in st_action: - if p is st_actionp[a]: - log.info(' %-15s %s', a, m) - _actprint[(a, m)] = 1 - log.info('') - # Print the actions that were not used. (debugging) - not_used = 0 - for a, p, m in actlist: - if a in st_action: - if p is not st_actionp[a]: - if not (a, m) in _actprint: - log.debug(' ! %-15s [ %s ]', a, m) - not_used = 1 - _actprint[(a, m)] = 1 - if not_used: - log.debug('') - - # Construct the goto table for this state - - nkeys = {} - for ii in I: - for s in ii.usyms: - if s in self.grammar.Nonterminals: - nkeys[s] = None - for n in nkeys: - g = self.lr0_goto(I, n) - j = self.lr0_cidhash.get(id(g), -1) - if j >= 0: - st_goto[n] = j - log.info(' %-30s shift and go to state %d', n, j) - - action[st] = st_action - actionp[st] = st_actionp - goto[st] = st_goto - st += 1 - -# ----------------------------------------------------------------------------- -# === INTROSPECTION === -# -# The following functions and classes are used to implement the PLY -# introspection features followed by the yacc() function itself. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# get_caller_module_dict() -# -# This function returns a dictionary containing all of the symbols defined within -# a caller further down the call stack. This is used to get the environment -# associated with the yacc() call if none was provided. -# ----------------------------------------------------------------------------- - -def get_caller_module_dict(levels): - f = sys._getframe(levels) - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - return ldict - -# ----------------------------------------------------------------------------- -# parse_grammar() -# -# This takes a raw grammar rule string and parses it into production data -# ----------------------------------------------------------------------------- -def parse_grammar(doc, file, line): - grammar = [] - # Split the doc string into lines - pstrings = doc.splitlines() - lastp = None - dline = line - for ps in pstrings: - dline += 1 - p = ps.split() - if not p: - continue - try: - if p[0] == '|': - # This is a continuation of a previous rule - if not lastp: - raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) - prodname = lastp - syms = p[1:] - else: - prodname = p[0] - lastp = prodname - syms = p[2:] - assign = p[1] - if assign != ':' and assign != '::=': - raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) - - grammar.append((file, dline, prodname, syms)) - except SyntaxError: - raise - except Exception: - raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) - - return grammar - -# ----------------------------------------------------------------------------- -# ParserReflect() -# -# This class represents information extracted for building a parser including -# start symbol, error function, tokens, precedence list, action functions, -# etc. -# ----------------------------------------------------------------------------- -class ParserReflect(object): - def __init__(self, pdict, log=None): - self.pdict = pdict - self.start = None - self.error_func = None - self.tokens = None - self.modules = set() - self.grammar = [] - self.error = False - - if log is None: - self.log = PlyLogger(sys.stderr) - else: - self.log = log - - # Get all of the basic information - def get_all(self): - self.get_start() - self.get_error_func() - self.get_tokens() - self.get_precedence() - self.get_pfunctions() - - # Validate all of the information - def validate_all(self): - self.validate_start() - self.validate_error_func() - self.validate_tokens() - self.validate_precedence() - self.validate_pfunctions() - self.validate_modules() - return self.error - - # Compute a signature over the grammar - def signature(self): - parts = [] - try: - if self.start: - parts.append(self.start) - if self.prec: - parts.append(''.join([''.join(p) for p in self.prec])) - if self.tokens: - parts.append(' '.join(self.tokens)) - for f in self.pfuncs: - if f[3]: - parts.append(f[3]) - except (TypeError, ValueError): - pass - return ''.join(parts) - - # ----------------------------------------------------------------------------- - # validate_modules() - # - # This method checks to see if there are duplicated p_rulename() functions - # in the parser module file. Without this function, it is really easy for - # users to make mistakes by cutting and pasting code fragments (and it's a real - # bugger to try and figure out why the resulting parser doesn't work). Therefore, - # we just do a little regular expression pattern matching of def statements - # to try and detect duplicates. - # ----------------------------------------------------------------------------- - - def validate_modules(self): - # Match def p_funcname( - fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') - - for module in self.modules: - try: - lines, linen = inspect.getsourcelines(module) - except IOError: - continue - - counthash = {} - for linen, line in enumerate(lines): - linen += 1 - m = fre.match(line) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - filename = inspect.getsourcefile(module) - self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', - filename, linen, name, prev) - - # Get the start symbol - def get_start(self): - self.start = self.pdict.get('start') - - # Validate the start symbol - def validate_start(self): - if self.start is not None: - if not isinstance(self.start, str): - self.log.error("'start' must be a string") - - # Look for error handler - def get_error_func(self): - self.error_func = self.pdict.get('p_error') - - # Validate the error function - def validate_error_func(self): - if self.error_func: - if isinstance(self.error_func, types.FunctionType): - ismethod = 0 - elif isinstance(self.error_func, types.MethodType): - ismethod = 1 - else: - self.log.error("'p_error' defined, but is not a function or method") - self.error = True - return - - eline = self.error_func.__code__.co_firstlineno - efile = self.error_func.__code__.co_filename - module = inspect.getmodule(self.error_func) - self.modules.add(module) - - argcount = self.error_func.__code__.co_argcount - ismethod - if argcount != 1: - self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) - self.error = True - - # Get the tokens map - def get_tokens(self): - tokens = self.pdict.get('tokens') - if not tokens: - self.log.error('No token list is defined') - self.error = True - return - - if not isinstance(tokens, (list, tuple)): - self.log.error('tokens must be a list or tuple') - self.error = True - return - - if not tokens: - self.log.error('tokens is empty') - self.error = True - return - - self.tokens = sorted(tokens) - - # Validate the tokens - def validate_tokens(self): - # Validate the tokens. - if 'error' in self.tokens: - self.log.error("Illegal token name 'error'. Is a reserved word") - self.error = True - return - - terminals = set() - for n in self.tokens: - if n in terminals: - self.log.warning('Token %r multiply defined', n) - terminals.add(n) - - # Get the precedence map (if any) - def get_precedence(self): - self.prec = self.pdict.get('precedence') - - # Validate and parse the precedence map - def validate_precedence(self): - preclist = [] - if self.prec: - if not isinstance(self.prec, (list, tuple)): - self.log.error('precedence must be a list or tuple') - self.error = True - return - for level, p in enumerate(self.prec): - if not isinstance(p, (list, tuple)): - self.log.error('Bad precedence table') - self.error = True - return - - if len(p) < 2: - self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) - self.error = True - return - assoc = p[0] - if not isinstance(assoc, str): - self.log.error('precedence associativity must be a string') - self.error = True - return - for term in p[1:]: - if not isinstance(term, str): - self.log.error('precedence items must be strings') - self.error = True - return - preclist.append((term, assoc, level+1)) - self.preclist = preclist - - # Get all p_functions from the grammar - def get_pfunctions(self): - p_functions = [] - for name, item in self.pdict.items(): - if not name.startswith('p_') or name == 'p_error': - continue - if isinstance(item, (types.FunctionType, types.MethodType)): - line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) - module = inspect.getmodule(item) - p_functions.append((line, module, name, item.__doc__)) - - # Sort all of the actions by line number; make sure to stringify - # modules to make them sortable, since `line` may not uniquely sort all - # p functions - p_functions.sort(key=lambda p_function: ( - p_function[0], - str(p_function[1]), - p_function[2], - p_function[3])) - self.pfuncs = p_functions - - # Validate all of the p_functions - def validate_pfunctions(self): - grammar = [] - # Check for non-empty symbols - if len(self.pfuncs) == 0: - self.log.error('no rules of the form p_rulename are defined') - self.error = True - return - - for line, module, name, doc in self.pfuncs: - file = inspect.getsourcefile(module) - func = self.pdict[name] - if isinstance(func, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - if func.__code__.co_argcount > reqargs: - self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) - self.error = True - elif func.__code__.co_argcount < reqargs: - self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) - self.error = True - elif not func.__doc__: - self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', - file, line, func.__name__) - else: - try: - parsed_g = parse_grammar(doc, file, line) - for g in parsed_g: - grammar.append((name, g)) - except SyntaxError as e: - self.log.error(str(e)) - self.error = True - - # Looks like a valid grammar rule - # Mark the file in which defined. - self.modules.add(module) - - # Secondary validation step that looks for p_ definitions that are not functions - # or functions that look like they might be grammar rules. - - for n, v in self.pdict.items(): - if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): - continue - if n.startswith('t_'): - continue - if n.startswith('p_') and n != 'p_error': - self.log.warning('%r not defined as a function', n) - if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or - (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): - if v.__doc__: - try: - doc = v.__doc__.split(' ') - if doc[1] == ':': - self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', - v.__code__.co_filename, v.__code__.co_firstlineno, n) - except IndexError: - pass - - self.grammar = grammar - -# ----------------------------------------------------------------------------- -# yacc(module) -# -# Build a parser -# ----------------------------------------------------------------------------- - -def yacc(*, debug=yaccdebug, module=None, start=None, - check_recursion=True, optimize=False, debugfile=debug_file, - debuglog=None, errorlog=None): - - # Reference to the parsing method of the last built parser - global parse - - if errorlog is None: - errorlog = PlyLogger(sys.stderr) - - # Get the module dictionary used for the parser - if module: - _items = [(k, getattr(module, k)) for k in dir(module)] - pdict = dict(_items) - # If no __file__ or __package__ attributes are available, try to obtain them - # from the __module__ instead - if '__file__' not in pdict: - pdict['__file__'] = sys.modules[pdict['__module__']].__file__ - if '__package__' not in pdict and '__module__' in pdict: - if hasattr(sys.modules[pdict['__module__']], '__package__'): - pdict['__package__'] = sys.modules[pdict['__module__']].__package__ - else: - pdict = get_caller_module_dict(2) - - # Set start symbol if it's specified directly using an argument - if start is not None: - pdict['start'] = start - - # Collect parser information from the dictionary - pinfo = ParserReflect(pdict, log=errorlog) - pinfo.get_all() - - if pinfo.error: - raise YaccError('Unable to build parser') - - if debuglog is None: - if debug: - try: - debuglog = PlyLogger(open(debugfile, 'w')) - except IOError as e: - errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) - debuglog = NullLogger() - else: - debuglog = NullLogger() - - debuglog.info('Created by PLY (http://www.dabeaz.com/ply)') - - errors = False - - # Validate the parser information - if pinfo.validate_all(): - raise YaccError('Unable to build parser') - - if not pinfo.error_func: - errorlog.warning('no p_error() function is defined') - - # Create a grammar object - grammar = Grammar(pinfo.tokens) - - # Set precedence level for terminals - for term, assoc, level in pinfo.preclist: - try: - grammar.set_precedence(term, assoc, level) - except GrammarError as e: - errorlog.warning('%s', e) - - # Add productions to the grammar - for funcname, gram in pinfo.grammar: - file, line, prodname, syms = gram - try: - grammar.add_production(prodname, syms, funcname, file, line) - except GrammarError as e: - errorlog.error('%s', e) - errors = True - - # Set the grammar start symbols - try: - if start is None: - grammar.set_start(pinfo.start) - else: - grammar.set_start(start) - except GrammarError as e: - errorlog.error(str(e)) - errors = True - - if errors: - raise YaccError('Unable to build parser') - - # Verify the grammar structure - undefined_symbols = grammar.undefined_symbols() - for sym, prod in undefined_symbols: - errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) - errors = True - - unused_terminals = grammar.unused_terminals() - if unused_terminals: - debuglog.info('') - debuglog.info('Unused terminals:') - debuglog.info('') - for term in unused_terminals: - errorlog.warning('Token %r defined, but not used', term) - debuglog.info(' %s', term) - - # Print out all productions to the debug log - if debug: - debuglog.info('') - debuglog.info('Grammar') - debuglog.info('') - for n, p in enumerate(grammar.Productions): - debuglog.info('Rule %-5d %s', n, p) - - # Find unused non-terminals - unused_rules = grammar.unused_rules() - for prod in unused_rules: - errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) - - if len(unused_terminals) == 1: - errorlog.warning('There is 1 unused token') - if len(unused_terminals) > 1: - errorlog.warning('There are %d unused tokens', len(unused_terminals)) - - if len(unused_rules) == 1: - errorlog.warning('There is 1 unused rule') - if len(unused_rules) > 1: - errorlog.warning('There are %d unused rules', len(unused_rules)) - - if debug: - debuglog.info('') - debuglog.info('Terminals, with rules where they appear') - debuglog.info('') - terms = list(grammar.Terminals) - terms.sort() - for term in terms: - debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) - - debuglog.info('') - debuglog.info('Nonterminals, with rules where they appear') - debuglog.info('') - nonterms = list(grammar.Nonterminals) - nonterms.sort() - for nonterm in nonterms: - debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) - debuglog.info('') - - if check_recursion: - unreachable = grammar.find_unreachable() - for u in unreachable: - errorlog.warning('Symbol %r is unreachable', u) - - infinite = grammar.infinite_cycles() - for inf in infinite: - errorlog.error('Infinite recursion detected for symbol %r', inf) - errors = True - - unused_prec = grammar.unused_precedence() - for term, assoc in unused_prec: - errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) - errors = True - - if errors: - raise YaccError('Unable to build parser') - - # Run the LRTable on the grammar - lr = LRTable(grammar, debuglog) - - if debug: - num_sr = len(lr.sr_conflicts) - - # Report shift/reduce and reduce/reduce conflicts - if num_sr == 1: - errorlog.warning('1 shift/reduce conflict') - elif num_sr > 1: - errorlog.warning('%d shift/reduce conflicts', num_sr) - - num_rr = len(lr.rr_conflicts) - if num_rr == 1: - errorlog.warning('1 reduce/reduce conflict') - elif num_rr > 1: - errorlog.warning('%d reduce/reduce conflicts', num_rr) - - # Write out conflicts to the output file - if debug and (lr.sr_conflicts or lr.rr_conflicts): - debuglog.warning('') - debuglog.warning('Conflicts:') - debuglog.warning('') - - for state, tok, resolution in lr.sr_conflicts: - debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) - - already_reported = set() - for state, rule, rejected in lr.rr_conflicts: - if (state, id(rule), id(rejected)) in already_reported: - continue - debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) - debuglog.warning('rejected rule (%s) in state %d', rejected, state) - errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) - errorlog.warning('rejected rule (%s) in state %d', rejected, state) - already_reported.add((state, id(rule), id(rejected))) - - warned_never = [] - for state, rule, rejected in lr.rr_conflicts: - if not rejected.reduced and (rejected not in warned_never): - debuglog.warning('Rule (%s) is never reduced', rejected) - errorlog.warning('Rule (%s) is never reduced', rejected) - warned_never.append(rejected) - - # Build the parser - lr.bind_callables(pinfo.pdict) - parser = LRParser(lr, pinfo.error_func) - - parse = parser.parse - return parser diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9787c3b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..b4ba4ed --- /dev/null +++ b/setup.cfg @@ -0,0 +1,18 @@ +[metadata] +name = ply +version = 2022.10.27 +url = https://github.com/dabeaz/ply +author = David Beazley +author_email = "David Beazley" +description = "PLY - Sly Lex Yacc" +long_description = "PLY is an implementation of lex and yacc. No longer maintained on PyPI. Latest version on GitHub." +license = MIT +license_files = LICENSE +classifiers = + License :: OSI Approved :: MIT License + +[options] +package_dir = + =src + +packages = ply diff --git a/src/ply/__init__.py b/src/ply/__init__.py new file mode 100644 index 0000000..45f28c5 --- /dev/null +++ b/src/ply/__init__.py @@ -0,0 +1,5 @@ +# PLY package +# Author: David Beazley (dave@dabeaz.com) +# https://github.com/dabeaz/ply + +__version__ = '2022.10.27' diff --git a/src/ply/lex.py b/src/ply/lex.py new file mode 100644 index 0000000..de011fe --- /dev/null +++ b/src/ply/lex.py @@ -0,0 +1,901 @@ +# ----------------------------------------------------------------------------- +# ply: lex.py +# +# Copyright (C) 2001-2022 +# David M. Beazley (Dabeaz LLC) +# All rights reserved. +# +# Latest version: https://github.com/dabeaz/ply +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of David Beazley or Dabeaz LLC may be used to +# endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- + +import re +import sys +import types +import copy +import os +import inspect + +# This tuple contains acceptable string types +StringTypes = (str, bytes) + +# This regular expression is used to match valid token names +_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') + +# Exception thrown when invalid token encountered and no default error +# handler is defined. +class LexError(Exception): + def __init__(self, message, s): + self.args = (message,) + self.text = s + +# Token class. This class is used to represent the tokens produced. +class LexToken(object): + def __repr__(self): + return f'LexToken({self.type},{self.value!r},{self.lineno},{self.lexpos})' + +# This object is a stand-in for a logging object created by the +# logging module. + +class PlyLogger(object): + def __init__(self, f): + self.f = f + + def critical(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') + + info = critical + debug = critical + +# ----------------------------------------------------------------------------- +# === Lexing Engine === +# +# The following Lexer class implements the lexer runtime. There are only +# a few public methods and attributes: +# +# input() - Store a new string in the lexer +# token() - Get the next token +# clone() - Clone the lexer +# +# lineno - Current line number +# lexpos - Current position in the input string +# ----------------------------------------------------------------------------- + +class Lexer: + def __init__(self): + self.lexre = None # Master regular expression. This is a list of + # tuples (re, findex) where re is a compiled + # regular expression and findex is a list + # mapping regex group numbers to rules + self.lexretext = None # Current regular expression strings + self.lexstatere = {} # Dictionary mapping lexer states to master regexs + self.lexstateretext = {} # Dictionary mapping lexer states to regex strings + self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names + self.lexstate = 'INITIAL' # Current lexer state + self.lexstatestack = [] # Stack of lexer states + self.lexstateinfo = None # State information + self.lexstateignore = {} # Dictionary of ignored characters for each state + self.lexstateerrorf = {} # Dictionary of error functions for each state + self.lexstateeoff = {} # Dictionary of eof functions for each state + self.lexreflags = 0 # Optional re compile flags + self.lexdata = None # Actual input data (as a string) + self.lexpos = 0 # Current position in input text + self.lexlen = 0 # Length of the input text + self.lexerrorf = None # Error rule (if any) + self.lexeoff = None # EOF rule (if any) + self.lextokens = None # List of valid tokens + self.lexignore = '' # Ignored characters + self.lexliterals = '' # Literal characters that can be passed through + self.lexmodule = None # Module + self.lineno = 1 # Current line number + + def clone(self, object=None): + c = copy.copy(self) + + # If the object parameter has been supplied, it means we are attaching the + # lexer to a new object. In this case, we have to rebind all methods in + # the lexstatere and lexstateerrorf tables. + + if object: + newtab = {} + for key, ritem in self.lexstatere.items(): + newre = [] + for cre, findex in ritem: + newfindex = [] + for f in findex: + if not f or not f[0]: + newfindex.append(f) + continue + newfindex.append((getattr(object, f[0].__name__), f[1])) + newre.append((cre, newfindex)) + newtab[key] = newre + c.lexstatere = newtab + c.lexstateerrorf = {} + for key, ef in self.lexstateerrorf.items(): + c.lexstateerrorf[key] = getattr(object, ef.__name__) + c.lexmodule = object + return c + + # ------------------------------------------------------------ + # input() - Push a new string into the lexer + # ------------------------------------------------------------ + def input(self, s): + self.lexdata = s + self.lexpos = 0 + self.lexlen = len(s) + + # ------------------------------------------------------------ + # begin() - Changes the lexing state + # ------------------------------------------------------------ + def begin(self, state): + if state not in self.lexstatere: + raise ValueError(f'Undefined state {state!r}') + self.lexre = self.lexstatere[state] + self.lexretext = self.lexstateretext[state] + self.lexignore = self.lexstateignore.get(state, '') + self.lexerrorf = self.lexstateerrorf.get(state, None) + self.lexeoff = self.lexstateeoff.get(state, None) + self.lexstate = state + + # ------------------------------------------------------------ + # push_state() - Changes the lexing state and saves old on stack + # ------------------------------------------------------------ + def push_state(self, state): + self.lexstatestack.append(self.lexstate) + self.begin(state) + + # ------------------------------------------------------------ + # pop_state() - Restores the previous state + # ------------------------------------------------------------ + def pop_state(self): + self.begin(self.lexstatestack.pop()) + + # ------------------------------------------------------------ + # current_state() - Returns the current lexing state + # ------------------------------------------------------------ + def current_state(self): + return self.lexstate + + # ------------------------------------------------------------ + # skip() - Skip ahead n characters + # ------------------------------------------------------------ + def skip(self, n): + self.lexpos += n + + # ------------------------------------------------------------ + # token() - Return the next token from the Lexer + # + # Note: This function has been carefully implemented to be as fast + # as possible. Don't make changes unless you really know what + # you are doing + # ------------------------------------------------------------ + def token(self): + # Make local copies of frequently referenced attributes + lexpos = self.lexpos + lexlen = self.lexlen + lexignore = self.lexignore + lexdata = self.lexdata + + while lexpos < lexlen: + # This code provides some short-circuit code for whitespace, tabs, and other ignored characters + if lexdata[lexpos] in lexignore: + lexpos += 1 + continue + + # Look for a regular expression match + for lexre, lexindexfunc in self.lexre: + m = lexre.match(lexdata, lexpos) + if not m: + continue + + # Create a token for return + tok = LexToken() + tok.value = m.group() + tok.lineno = self.lineno + tok.lexpos = lexpos + + i = m.lastindex + func, tok.type = lexindexfunc[i] + + if not func: + # If no token type was set, it's an ignored token + if tok.type: + self.lexpos = m.end() + return tok + else: + lexpos = m.end() + break + + lexpos = m.end() + + # If token is processed by a function, call it + + tok.lexer = self # Set additional attributes useful in token rules + self.lexmatch = m + self.lexpos = lexpos + newtok = func(tok) + del tok.lexer + del self.lexmatch + + # Every function must return a token, if nothing, we just move to next token + if not newtok: + lexpos = self.lexpos # This is here in case user has updated lexpos. + lexignore = self.lexignore # This is here in case there was a state change + break + return newtok + else: + # No match, see if in literals + if lexdata[lexpos] in self.lexliterals: + tok = LexToken() + tok.value = lexdata[lexpos] + tok.lineno = self.lineno + tok.type = tok.value + tok.lexpos = lexpos + self.lexpos = lexpos + 1 + return tok + + # No match. Call t_error() if defined. + if self.lexerrorf: + tok = LexToken() + tok.value = self.lexdata[lexpos:] + tok.lineno = self.lineno + tok.type = 'error' + tok.lexer = self + tok.lexpos = lexpos + self.lexpos = lexpos + newtok = self.lexerrorf(tok) + if lexpos == self.lexpos: + # Error method didn't change text position at all. This is an error. + raise LexError(f"Scanning error. Illegal character {lexdata[lexpos]!r}", + lexdata[lexpos:]) + lexpos = self.lexpos + if not newtok: + continue + return newtok + + self.lexpos = lexpos + raise LexError(f"Illegal character {lexdata[lexpos]!r} at index {lexpos}", + lexdata[lexpos:]) + + if self.lexeoff: + tok = LexToken() + tok.type = 'eof' + tok.value = '' + tok.lineno = self.lineno + tok.lexpos = lexpos + tok.lexer = self + self.lexpos = lexpos + newtok = self.lexeoff(tok) + return newtok + + self.lexpos = lexpos + 1 + if self.lexdata is None: + raise RuntimeError('No input string given with input()') + return None + + # Iterator interface + def __iter__(self): + return self + + def __next__(self): + t = self.token() + if t is None: + raise StopIteration + return t + +# ----------------------------------------------------------------------------- +# ==== Lex Builder === +# +# The functions and classes below are used to collect lexing information +# and build a Lexer object from it. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# _get_regex(func) +# +# Returns the regular expression assigned to a function either as a doc string +# or as a .regex attribute attached by the @TOKEN decorator. +# ----------------------------------------------------------------------------- +def _get_regex(func): + return getattr(func, 'regex', func.__doc__) + +# ----------------------------------------------------------------------------- +# get_caller_module_dict() +# +# This function returns a dictionary containing all of the symbols defined within +# a caller further down the call stack. This is used to get the environment +# associated with the yacc() call if none was provided. +# ----------------------------------------------------------------------------- +def get_caller_module_dict(levels): + f = sys._getframe(levels) + return { **f.f_globals, **f.f_locals } + +# ----------------------------------------------------------------------------- +# _form_master_re() +# +# This function takes a list of all of the regex components and attempts to +# form the master regular expression. Given limitations in the Python re +# module, it may be necessary to break the master regex into separate expressions. +# ----------------------------------------------------------------------------- +def _form_master_re(relist, reflags, ldict, toknames): + if not relist: + return [], [], [] + regex = '|'.join(relist) + try: + lexre = re.compile(regex, reflags) + + # Build the index to function map for the matching engine + lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) + lexindexnames = lexindexfunc[:] + + for f, i in lexre.groupindex.items(): + handle = ldict.get(f, None) + if type(handle) in (types.FunctionType, types.MethodType): + lexindexfunc[i] = (handle, toknames[f]) + lexindexnames[i] = f + elif handle is not None: + lexindexnames[i] = f + if f.find('ignore_') > 0: + lexindexfunc[i] = (None, None) + else: + lexindexfunc[i] = (None, toknames[f]) + + return [(lexre, lexindexfunc)], [regex], [lexindexnames] + except Exception: + m = (len(relist) // 2) + 1 + llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) + rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) + return (llist+rlist), (lre+rre), (lnames+rnames) + +# ----------------------------------------------------------------------------- +# def _statetoken(s,names) +# +# Given a declaration name s of the form "t_" and a dictionary whose keys are +# state names, this function returns a tuple (states,tokenname) where states +# is a tuple of state names and tokenname is the name of the token. For example, +# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') +# ----------------------------------------------------------------------------- +def _statetoken(s, names): + parts = s.split('_') + for i, part in enumerate(parts[1:], 1): + if part not in names and part != 'ANY': + break + + if i > 1: + states = tuple(parts[1:i]) + else: + states = ('INITIAL',) + + if 'ANY' in states: + states = tuple(names) + + tokenname = '_'.join(parts[i:]) + return (states, tokenname) + + +# ----------------------------------------------------------------------------- +# LexerReflect() +# +# This class represents information needed to build a lexer as extracted from a +# user's input file. +# ----------------------------------------------------------------------------- +class LexerReflect(object): + def __init__(self, ldict, log=None, reflags=0): + self.ldict = ldict + self.error_func = None + self.tokens = [] + self.reflags = reflags + self.stateinfo = {'INITIAL': 'inclusive'} + self.modules = set() + self.error = False + self.log = PlyLogger(sys.stderr) if log is None else log + + # Get all of the basic information + def get_all(self): + self.get_tokens() + self.get_literals() + self.get_states() + self.get_rules() + + # Validate all of the information + def validate_all(self): + self.validate_tokens() + self.validate_literals() + self.validate_rules() + return self.error + + # Get the tokens map + def get_tokens(self): + tokens = self.ldict.get('tokens', None) + if not tokens: + self.log.error('No token list is defined') + self.error = True + return + + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True + return + + if not tokens: + self.log.error('tokens is empty') + self.error = True + return + + self.tokens = tokens + + # Validate the tokens + def validate_tokens(self): + terminals = {} + for n in self.tokens: + if not _is_identifier.match(n): + self.log.error(f"Bad token name {n!r}") + self.error = True + if n in terminals: + self.log.warning(f"Token {n!r} multiply defined") + terminals[n] = 1 + + # Get the literals specifier + def get_literals(self): + self.literals = self.ldict.get('literals', '') + if not self.literals: + self.literals = '' + + # Validate literals + def validate_literals(self): + try: + for c in self.literals: + if not isinstance(c, StringTypes) or len(c) > 1: + self.log.error(f'Invalid literal {c!r}. Must be a single character') + self.error = True + + except TypeError: + self.log.error('Invalid literals specification. literals must be a sequence of characters') + self.error = True + + def get_states(self): + self.states = self.ldict.get('states', None) + # Build statemap + if self.states: + if not isinstance(self.states, (tuple, list)): + self.log.error('states must be defined as a tuple or list') + self.error = True + else: + for s in self.states: + if not isinstance(s, tuple) or len(s) != 2: + self.log.error("Invalid state specifier %r. Must be a tuple (statename,'exclusive|inclusive')", s) + self.error = True + continue + name, statetype = s + if not isinstance(name, StringTypes): + self.log.error('State name %r must be a string', name) + self.error = True + continue + if not (statetype == 'inclusive' or statetype == 'exclusive'): + self.log.error("State type for state %r must be 'inclusive' or 'exclusive'", name) + self.error = True + continue + if name in self.stateinfo: + self.log.error("State %r already defined", name) + self.error = True + continue + self.stateinfo[name] = statetype + + # Get all of the symbols with a t_ prefix and sort them into various + # categories (functions, strings, error functions, and ignore characters) + + def get_rules(self): + tsymbols = [f for f in self.ldict if f[:2] == 't_'] + + # Now build up a list of functions and a list of strings + self.toknames = {} # Mapping of symbols to token names + self.funcsym = {} # Symbols defined as functions + self.strsym = {} # Symbols defined as strings + self.ignore = {} # Ignore strings by state + self.errorf = {} # Error functions by state + self.eoff = {} # EOF functions by state + + for s in self.stateinfo: + self.funcsym[s] = [] + self.strsym[s] = [] + + if len(tsymbols) == 0: + self.log.error('No rules of the form t_rulename are defined') + self.error = True + return + + for f in tsymbols: + t = self.ldict[f] + states, tokname = _statetoken(f, self.stateinfo) + self.toknames[f] = tokname + + if hasattr(t, '__call__'): + if tokname == 'error': + for s in states: + self.errorf[s] = t + elif tokname == 'eof': + for s in states: + self.eoff[s] = t + elif tokname == 'ignore': + line = t.__code__.co_firstlineno + file = t.__code__.co_filename + self.log.error("%s:%d: Rule %r must be defined as a string", file, line, t.__name__) + self.error = True + else: + for s in states: + self.funcsym[s].append((f, t)) + elif isinstance(t, StringTypes): + if tokname == 'ignore': + for s in states: + self.ignore[s] = t + if '\\' in t: + self.log.warning("%s contains a literal backslash '\\'", f) + + elif tokname == 'error': + self.log.error("Rule %r must be defined as a function", f) + self.error = True + else: + for s in states: + self.strsym[s].append((f, t)) + else: + self.log.error('%s not defined as a function or string', f) + self.error = True + + # Sort the functions by line number + for f in self.funcsym.values(): + f.sort(key=lambda x: x[1].__code__.co_firstlineno) + + # Sort the strings by regular expression length + for s in self.strsym.values(): + s.sort(key=lambda x: len(x[1]), reverse=True) + + # Validate all of the t_rules collected + def validate_rules(self): + for state in self.stateinfo: + # Validate all rules defined by functions + + for fname, f in self.funcsym[state]: + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) + + tokname = self.toknames[fname] + if isinstance(f, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + nargs = f.__code__.co_argcount + if nargs > reqargs: + self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) + self.error = True + continue + + if nargs < reqargs: + self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) + self.error = True + continue + + if not _get_regex(f): + self.log.error("%s:%d: No regular expression defined for rule %r", file, line, f.__name__) + self.error = True + continue + + try: + c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) + if c.match(''): + self.log.error("%s:%d: Regular expression for rule %r matches empty string", file, line, f.__name__) + self.error = True + except re.error as e: + self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) + if '#' in _get_regex(f): + self.log.error("%s:%d. Make sure '#' in rule %r is escaped with '\\#'", file, line, f.__name__) + self.error = True + + # Validate all rules defined by strings + for name, r in self.strsym[state]: + tokname = self.toknames[name] + if tokname == 'error': + self.log.error("Rule %r must be defined as a function", name) + self.error = True + continue + + if tokname not in self.tokens and tokname.find('ignore_') < 0: + self.log.error("Rule %r defined for an unspecified token %s", name, tokname) + self.error = True + continue + + try: + c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) + if (c.match('')): + self.log.error("Regular expression for rule %r matches empty string", name) + self.error = True + except re.error as e: + self.log.error("Invalid regular expression for rule %r. %s", name, e) + if '#' in r: + self.log.error("Make sure '#' in rule %r is escaped with '\\#'", name) + self.error = True + + if not self.funcsym[state] and not self.strsym[state]: + self.log.error("No rules defined for state %r", state) + self.error = True + + # Validate the error function + efunc = self.errorf.get(state, None) + if efunc: + f = efunc + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) + + if isinstance(f, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + nargs = f.__code__.co_argcount + if nargs > reqargs: + self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) + self.error = True + + if nargs < reqargs: + self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) + self.error = True + + for module in self.modules: + self.validate_module(module) + + # ----------------------------------------------------------------------------- + # validate_module() + # + # This checks to see if there are duplicated t_rulename() functions or strings + # in the parser input file. This is done using a simple regular expression + # match on each line in the source code of the given module. + # ----------------------------------------------------------------------------- + + def validate_module(self, module): + try: + lines, linen = inspect.getsourcelines(module) + except IOError: + return + + fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') + sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') + + counthash = {} + linen += 1 + for line in lines: + m = fre.match(line) + if not m: + m = sre.match(line) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + filename = inspect.getsourcefile(module) + self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) + self.error = True + linen += 1 + +# ----------------------------------------------------------------------------- +# lex(module) +# +# Build all of the regular expression rules from definitions in the supplied module +# ----------------------------------------------------------------------------- +def lex(*, module=None, object=None, debug=False, + reflags=int(re.VERBOSE), debuglog=None, errorlog=None): + + global lexer + + ldict = None + stateinfo = {'INITIAL': 'inclusive'} + lexobj = Lexer() + global token, input + + if errorlog is None: + errorlog = PlyLogger(sys.stderr) + + if debug: + if debuglog is None: + debuglog = PlyLogger(sys.stderr) + + # Get the module dictionary used for the lexer + if object: + module = object + + # Get the module dictionary used for the parser + if module: + _items = [(k, getattr(module, k)) for k in dir(module)] + ldict = dict(_items) + # If no __file__ attribute is available, try to obtain it from the __module__ instead + if '__file__' not in ldict: + ldict['__file__'] = sys.modules[ldict['__module__']].__file__ + else: + ldict = get_caller_module_dict(2) + + # Collect parser information from the dictionary + linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) + linfo.get_all() + if linfo.validate_all(): + raise SyntaxError("Can't build lexer") + + # Dump some basic debugging information + if debug: + debuglog.info('lex: tokens = %r', linfo.tokens) + debuglog.info('lex: literals = %r', linfo.literals) + debuglog.info('lex: states = %r', linfo.stateinfo) + + # Build a dictionary of valid token names + lexobj.lextokens = set() + for n in linfo.tokens: + lexobj.lextokens.add(n) + + # Get literals specification + if isinstance(linfo.literals, (list, tuple)): + lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) + else: + lexobj.lexliterals = linfo.literals + + lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) + + # Get the stateinfo dictionary + stateinfo = linfo.stateinfo + + regexs = {} + # Build the master regular expressions + for state in stateinfo: + regex_list = [] + + # Add rules defined by functions first + for fname, f in linfo.funcsym[state]: + regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) + + # Now add all of the simple rules + for name, r in linfo.strsym[state]: + regex_list.append('(?P<%s>%s)' % (name, r)) + if debug: + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) + + regexs[state] = regex_list + + # Build the master regular expressions + + if debug: + debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') + + for state in regexs: + lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) + lexobj.lexstatere[state] = lexre + lexobj.lexstateretext[state] = re_text + lexobj.lexstaterenames[state] = re_names + if debug: + for i, text in enumerate(re_text): + debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) + + # For inclusive states, we need to add the regular expressions from the INITIAL state + for state, stype in stateinfo.items(): + if state != 'INITIAL' and stype == 'inclusive': + lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) + lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) + lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) + + lexobj.lexstateinfo = stateinfo + lexobj.lexre = lexobj.lexstatere['INITIAL'] + lexobj.lexretext = lexobj.lexstateretext['INITIAL'] + lexobj.lexreflags = reflags + + # Set up ignore variables + lexobj.lexstateignore = linfo.ignore + lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '') + + # Set up error functions + lexobj.lexstateerrorf = linfo.errorf + lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) + if not lexobj.lexerrorf: + errorlog.warning('No t_error rule is defined') + + # Set up eof functions + lexobj.lexstateeoff = linfo.eoff + lexobj.lexeoff = linfo.eoff.get('INITIAL', None) + + # Check state information for ignore and error rules + for s, stype in stateinfo.items(): + if stype == 'exclusive': + if s not in linfo.errorf: + errorlog.warning("No error rule is defined for exclusive state %r", s) + if s not in linfo.ignore and lexobj.lexignore: + errorlog.warning("No ignore rule is defined for exclusive state %r", s) + elif stype == 'inclusive': + if s not in linfo.errorf: + linfo.errorf[s] = linfo.errorf.get('INITIAL', None) + if s not in linfo.ignore: + linfo.ignore[s] = linfo.ignore.get('INITIAL', '') + + # Create global versions of the token() and input() functions + token = lexobj.token + input = lexobj.input + lexer = lexobj + + return lexobj + +# ----------------------------------------------------------------------------- +# runmain() +# +# This runs the lexer as a main program +# ----------------------------------------------------------------------------- + +def runmain(lexer=None, data=None): + if not data: + try: + filename = sys.argv[1] + with open(filename) as f: + data = f.read() + except IndexError: + sys.stdout.write('Reading from standard input (type EOF to end):\n') + data = sys.stdin.read() + + if lexer: + _input = lexer.input + else: + _input = input + _input(data) + if lexer: + _token = lexer.token + else: + _token = token + + while True: + tok = _token() + if not tok: + break + sys.stdout.write(f'({tok.type},{tok.value!r},{tok.lineno},{tok.lexpos})\n') + +# ----------------------------------------------------------------------------- +# @TOKEN(regex) +# +# This decorator function can be used to set the regex expression on a function +# when its docstring might need to be set in an alternative way +# ----------------------------------------------------------------------------- + +def TOKEN(r): + def set_regex(f): + if hasattr(r, '__call__'): + f.regex = _get_regex(r) + else: + f.regex = r + return f + return set_regex diff --git a/src/ply/yacc.py b/src/ply/yacc.py new file mode 100644 index 0000000..6528796 --- /dev/null +++ b/src/ply/yacc.py @@ -0,0 +1,2482 @@ +# ----------------------------------------------------------------------------- +# ply: yacc.py +# +# Copyright (C) 2001-2022 +# David M. Beazley (Dabeaz LLC) +# All rights reserved. +# +# Latest version: https://github.com/dabeaz/ply +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the name of David Beazley or Dabeaz LLC may be used to +# endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# +# This implements an LR parser that is constructed from grammar rules defined +# as Python functions. The grammar is specified by supplying the BNF inside +# Python documentation strings. The inspiration for this technique was borrowed +# from John Aycock's Spark parsing system. PLY might be viewed as cross between +# Spark and the GNU bison utility. +# +# The current implementation is only somewhat object-oriented. The +# LR parser itself is defined in terms of an object (which allows multiple +# parsers to co-exist). However, most of the variables used during table +# construction are defined in terms of global variables. Users shouldn't +# notice unless they are trying to define multiple parsers at the same +# time using threads (in which case they should have their head examined). +# +# This implementation supports both SLR and LALR(1) parsing. LALR(1) +# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), +# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, +# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced +# by the more efficient DeRemer and Pennello algorithm. +# +# :::::::: WARNING ::::::: +# +# Construction of LR parsing tables is fairly complicated and expensive. +# To make this module run fast, a *LOT* of work has been put into +# optimization---often at the expensive of readability and what might +# consider to be good Python "coding style." Modify the code at your +# own risk! +# ---------------------------------------------------------------------------- + +import re +import types +import sys +import inspect + +#----------------------------------------------------------------------------- +# === User configurable parameters === +# +# Change these to modify the default behavior of yacc (if you wish) +#----------------------------------------------------------------------------- + +yaccdebug = False # Debugging mode. If set, yacc generates a + # a 'parser.out' file in the current directory + +debug_file = 'parser.out' # Default name of the debugging file +error_count = 3 # Number of symbols that must be shifted to leave recovery mode +resultlimit = 40 # Size limit of results when running in debug mode. + +MAXINT = sys.maxsize + +# This object is a stand-in for a logging object created by the +# logging module. PLY will use this by default to create things +# such as the parser.out file. If a user wants more detailed +# information, they can create their own logging object and pass +# it into PLY. + +class PlyLogger(object): + def __init__(self, f): + self.f = f + + def debug(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + info = debug + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') + + critical = debug + +# Null logger is used when no output is generated. Does nothing. +class NullLogger(object): + def __getattribute__(self, name): + return self + + def __call__(self, *args, **kwargs): + return self + +# Exception raised for yacc-related errors +class YaccError(Exception): + pass + +# Format the result message that the parser produces when running in debug mode. +def format_result(r): + repr_str = repr(r) + if '\n' in repr_str: + repr_str = repr(repr_str) + if len(repr_str) > resultlimit: + repr_str = repr_str[:resultlimit] + ' ...' + result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) + return result + +# Format stack entries when the parser is running in debug mode +def format_stack_entry(r): + repr_str = repr(r) + if '\n' in repr_str: + repr_str = repr(repr_str) + if len(repr_str) < 16: + return repr_str + else: + return '<%s @ 0x%x>' % (type(r).__name__, id(r)) + +#----------------------------------------------------------------------------- +# === LR Parsing Engine === +# +# The following classes are used for the LR parser itself. These are not +# used during table construction and are independent of the actual LR +# table generation algorithm +#----------------------------------------------------------------------------- + +# This class is used to hold non-terminal grammar symbols during parsing. +# It normally has the following attributes set: +# .type = Grammar symbol type +# .value = Symbol value +# .lineno = Starting line number +# .endlineno = Ending line number (optional, set automatically) +# .lexpos = Starting lex position +# .endlexpos = Ending lex position (optional, set automatically) + +class YaccSymbol: + def __str__(self): + return self.type + + def __repr__(self): + return str(self) + +# This class is a wrapper around the objects actually passed to each +# grammar rule. Index lookup and assignment actually assign the +# .value attribute of the underlying YaccSymbol object. +# The lineno() method returns the line number of a given +# item (or 0 if not defined). The linespan() method returns +# a tuple of (startline,endline) representing the range of lines +# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) +# representing the range of positional information for a symbol. + +class YaccProduction: + def __init__(self, s, stack=None): + self.slice = s + self.stack = stack + self.lexer = None + self.parser = None + + def __getitem__(self, n): + if isinstance(n, slice): + return [s.value for s in self.slice[n]] + elif n >= 0: + return self.slice[n].value + else: + return self.stack[n].value + + def __setitem__(self, n, v): + self.slice[n].value = v + + def __getslice__(self, i, j): + return [s.value for s in self.slice[i:j]] + + def __len__(self): + return len(self.slice) + + def lineno(self, n): + return getattr(self.slice[n], 'lineno', 0) + + def set_lineno(self, n, lineno): + self.slice[n].lineno = lineno + + def linespan(self, n): + startline = getattr(self.slice[n], 'lineno', 0) + endline = getattr(self.slice[n], 'endlineno', startline) + return startline, endline + + def lexpos(self, n): + return getattr(self.slice[n], 'lexpos', 0) + + def set_lexpos(self, n, lexpos): + self.slice[n].lexpos = lexpos + + def lexspan(self, n): + startpos = getattr(self.slice[n], 'lexpos', 0) + endpos = getattr(self.slice[n], 'endlexpos', startpos) + return startpos, endpos + + def error(self): + raise SyntaxError + +# ----------------------------------------------------------------------------- +# == LRParser == +# +# The LR Parsing engine. +# ----------------------------------------------------------------------------- + +class LRParser: + def __init__(self, lrtab, errorf): + self.productions = lrtab.lr_productions + self.action = lrtab.lr_action + self.goto = lrtab.lr_goto + self.errorfunc = errorf + self.set_defaulted_states() + self.errorok = True + + def errok(self): + self.errorok = True + + def restart(self): + del self.statestack[:] + del self.symstack[:] + sym = YaccSymbol() + sym.type = '$end' + self.symstack.append(sym) + self.statestack.append(0) + + # Defaulted state support. + # This method identifies parser states where there is only one possible reduction action. + # For such states, the parser can make a choose to make a rule reduction without consuming + # the next look-ahead token. This delayed invocation of the tokenizer can be useful in + # certain kinds of advanced parsing situations where the lexer and parser interact with + # each other or change states (i.e., manipulation of scope, lexer states, etc.). + # + # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions + def set_defaulted_states(self): + self.defaulted_states = {} + for state, actions in self.action.items(): + rules = list(actions.values()) + if len(rules) == 1 and rules[0] < 0: + self.defaulted_states[state] = rules[0] + + def disable_defaulted_states(self): + self.defaulted_states = {} + + # parse(). + # + # This is the core parsing engine. To operate, it requires a lexer object. + # Two options are provided. The debug flag turns on debugging so that you can + # see the various rule reductions and parsing steps. tracking turns on position + # tracking. In this mode, symbols will record the starting/ending line number and + # character index. + + def parse(self, input=None, lexer=None, debug=False, tracking=False): + # If debugging has been specified as a flag, turn it into a logging object + if isinstance(debug, int) and debug: + debug = PlyLogger(sys.stderr) + + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + if debug: + debug.info('PLY: PARSE DEBUG START') + + # If no lexer was given, we will try to use the lex module + if not lexer: + from . import lex + lexer = lex.lexer + + # Set up the lexer and parser objects on pslice + pslice.lexer = lexer + pslice.parser = self + + # If input was supplied, pass to lexer + if input is not None: + lexer.input(input) + + # Set the token function + get_token = self.token = lexer.token + + # Set up the state and symbol stacks + statestack = self.statestack = [] # Stack of parsing states + symstack = self.symstack = [] # Stack of grammar symbols + pslice.stack = symstack # Put in the production + errtoken = None # Err token + + # The start state is assumed to be (0,$end) + + statestack.append(0) + sym = YaccSymbol() + sym.type = '$end' + symstack.append(sym) + state = 0 + while True: + # Get the next symbol on the input. If a lookahead symbol + # is already set, we just use that. Otherwise, we'll pull + # the next token off of the lookaheadstack or from the lexer + + if debug: + debug.debug('State : %s', state) + + if state not in defaulted_states: + if not lookahead: + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + if debug: + debug.debug('Defaulted state %s: Reduce using %d', state, -t) + + if debug: + debug.debug('Stack : %s', + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + + if t is not None: + if t > 0: + # shift a symbol on the stack + statestack.append(t) + state = t + + if debug: + debug.debug('Action : Shift and goto state %s', t) + + symstack.append(lookahead) + lookahead = None + + # Decrease error count on successful shift + if errorcount: + errorcount -= 1 + continue + + if t < 0: + # reduce a symbol on the stack, emit a production + p = prod[-t] + pname = p.name + plen = p.len + + # Get production function + sym = YaccSymbol() + sym.type = pname # Production name + sym.value = None + + if debug: + if plen: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, + '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', + goto[statestack[-1-plen]][pname]) + else: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], + goto[statestack[-1]][pname]) + + if plen: + targ = symstack[-plen-1:] + targ[0] = sym + + if tracking: + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # below as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + del symstack[-plen:] + self.state = state + p.callable(pslice) + del statestack[-plen:] + if debug: + debug.info('Result : %s', format_result(pslice[0])) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + + else: + + if tracking: + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + + targ = [sym] + + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # The code enclosed in this section is duplicated + # above as a performance optimization. Make sure + # changes get made in both locations. + + pslice.slice = targ + + try: + # Call the grammar rule with our special slice object + self.state = state + p.callable(pslice) + if debug: + debug.info('Result : %s', format_result(pslice[0])) + symstack.append(sym) + state = goto[statestack[-1]][pname] + statestack.append(state) + except SyntaxError: + # If an error was set. Enter error recovery state + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) + state = statestack[-1] + sym.type = 'error' + sym.value = 'error' + lookahead = sym + errorcount = error_count + self.errorok = False + + continue + + if t == 0: + n = symstack[-1] + result = getattr(n, 'value', None) + + if debug: + debug.info('Done : Returning %s', format_result(result)) + debug.info('PLY: PARSE DEBUG END') + + return result + + if t is None: + + if debug: + debug.error('Error : %s', + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + + # We have some kind of parsing error here. To handle + # this, we are going to push the current token onto + # the tokenstack and replace it with an 'error' token. + # If there are any synchronization rules, they may + # catch it. + # + # In addition to pushing the error token, we call call + # the user defined p_error() function if this is the + # first syntax error. This function is only called if + # errorcount == 0. + if errorcount == 0 or self.errorok: + errorcount = error_count + self.errorok = False + errtoken = lookahead + if errtoken.type == '$end': + errtoken = None # End of file! + if self.errorfunc: + if errtoken and not hasattr(errtoken, 'lexer'): + errtoken.lexer = lexer + self.state = state + tok = self.errorfunc(errtoken) + if self.errorok: + # User must have done some kind of panic + # mode recovery on their own. The + # returned token is the next lookahead + lookahead = tok + errtoken = None + continue + else: + if errtoken: + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 + if lineno: + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) + else: + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) + else: + sys.stderr.write('yacc: Parse error in input. EOF\n') + return + + else: + errorcount = error_count + + # case 1: the statestack only has 1 entry on it. If we're in this state, the + # entire parse has been rolled back and we're completely hosed. The token is + # discarded and we just keep going. + + if len(statestack) <= 1 and lookahead.type != '$end': + lookahead = None + errtoken = None + state = 0 + # Nuke the pushback stack + del lookaheadstack[:] + continue + + # case 2: the statestack has a couple of entries on it, but we're + # at the end of the file. nuke the top entry and generate an error token + + # Start nuking entries on the stack + if lookahead.type == '$end': + # Whoa. We're really hosed here. Bail out + return + + if lookahead.type != 'error': + sym = symstack[-1] + if sym.type == 'error': + # Hmmm. Error is on top of stack, we'll just nuke input + # symbol and continue + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + lookahead = None + continue + + # Create the error symbol for the first time and make it the new lookahead symbol + t = YaccSymbol() + t.type = 'error' + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos + t.value = lookahead + lookaheadstack.append(lookahead) + lookahead = t + else: + sym = symstack.pop() + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + statestack.pop() + state = statestack[-1] + + continue + + # If we'r here, something really bad happened + raise RuntimeError('yacc: internal parser error!!!\n') + +# ----------------------------------------------------------------------------- +# === Grammar Representation === +# +# The following functions, classes, and variables are used to represent and +# manipulate the rules that make up a grammar. +# ----------------------------------------------------------------------------- + +# regex matching identifiers +_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') + +# ----------------------------------------------------------------------------- +# class Production: +# +# This class stores the raw information about a single production or grammar rule. +# A grammar rule refers to a specification such as this: +# +# expr : expr PLUS term +# +# Here are the basic attributes defined on all productions +# +# name - Name of the production. For example 'expr' +# prod - A list of symbols on the right side ['expr','PLUS','term'] +# prec - Production precedence level +# number - Production number. +# func - Function that executes on reduce +# file - File where production function is defined +# lineno - Line number where production function is defined +# +# The following attributes are defined or optional. +# +# len - Length of the production (number of symbols on right hand side) +# usyms - Set of unique symbols found in the production +# ----------------------------------------------------------------------------- + +class Production(object): + reduced = 0 + def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): + self.name = name + self.prod = tuple(prod) + self.number = number + self.func = func + self.callable = None + self.file = file + self.line = line + self.prec = precedence + + # Internal settings used during table construction + + self.len = len(self.prod) # Length of the production + + # Create a list of unique production symbols used in the production + self.usyms = [] + for s in self.prod: + if s not in self.usyms: + self.usyms.append(s) + + # List of all LR items for the production + self.lr_items = [] + self.lr_next = None + + # Create a string representation + if self.prod: + self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) + else: + self.str = '%s -> ' % self.name + + def __str__(self): + return self.str + + def __repr__(self): + return 'Production(' + str(self) + ')' + + def __len__(self): + return len(self.prod) + + def __nonzero__(self): + return 1 + + def __getitem__(self, index): + return self.prod[index] + + # Return the nth lr_item from the production (or None if at the end) + def lr_item(self, n): + if n > len(self.prod): + return None + p = LRItem(self, n) + # Precompute the list of productions immediately following. + try: + p.lr_after = self.Prodnames[p.prod[n+1]] + except (IndexError, KeyError): + p.lr_after = [] + try: + p.lr_before = p.prod[n-1] + except IndexError: + p.lr_before = None + return p + + # Bind the production function name to a callable + def bind(self, pdict): + if self.func: + self.callable = pdict[self.func] + +# ----------------------------------------------------------------------------- +# class LRItem +# +# This class represents a specific stage of parsing a production rule. For +# example: +# +# expr : expr . PLUS term +# +# In the above, the "." represents the current location of the parse. Here +# basic attributes: +# +# name - Name of the production. For example 'expr' +# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] +# number - Production number. +# +# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' +# then lr_next refers to 'expr -> expr PLUS . term' +# lr_index - LR item index (location of the ".") in the prod list. +# lookaheads - LALR lookahead symbols for this item +# len - Length of the production (number of symbols on right hand side) +# lr_after - List of all productions that immediately follow +# lr_before - Grammar symbol immediately before +# ----------------------------------------------------------------------------- + +class LRItem(object): + def __init__(self, p, n): + self.name = p.name + self.prod = list(p.prod) + self.number = p.number + self.lr_index = n + self.lookaheads = {} + self.prod.insert(n, '.') + self.prod = tuple(self.prod) + self.len = len(self.prod) + self.usyms = p.usyms + + def __str__(self): + if self.prod: + s = '%s -> %s' % (self.name, ' '.join(self.prod)) + else: + s = '%s -> ' % self.name + return s + + def __repr__(self): + return 'LRItem(' + str(self) + ')' + +# ----------------------------------------------------------------------------- +# rightmost_terminal() +# +# Return the rightmost terminal from a list of symbols. Used in add_production() +# ----------------------------------------------------------------------------- +def rightmost_terminal(symbols, terminals): + i = len(symbols) - 1 + while i >= 0: + if symbols[i] in terminals: + return symbols[i] + i -= 1 + return None + +# ----------------------------------------------------------------------------- +# === GRAMMAR CLASS === +# +# The following class represents the contents of the specified grammar along +# with various computed properties such as first sets, follow sets, LR items, etc. +# This data is used for critical parts of the table generation process later. +# ----------------------------------------------------------------------------- + +class GrammarError(YaccError): + pass + +class Grammar(object): + def __init__(self, terminals): + self.Productions = [None] # A list of all of the productions. The first + # entry is always reserved for the purpose of + # building an augmented grammar + + self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all + # productions of that nonterminal. + + self.Prodmap = {} # A dictionary that is only used to detect duplicate + # productions. + + self.Terminals = {} # A dictionary mapping the names of terminal symbols to a + # list of the rules where they are used. + + for term in terminals: + self.Terminals[term] = [] + + self.Terminals['error'] = [] + + self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list + # of rule numbers where they are used. + + self.First = {} # A dictionary of precomputed FIRST(x) symbols + + self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols + + self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the + # form ('right',level) or ('nonassoc', level) or ('left',level) + + self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. + # This is only used to provide error checking and to generate + # a warning about unused precedence rules. + + self.Start = None # Starting symbol for the grammar + + + def __len__(self): + return len(self.Productions) + + def __getitem__(self, index): + return self.Productions[index] + + # ----------------------------------------------------------------------------- + # set_precedence() + # + # Sets the precedence for a given terminal. assoc is the associativity such as + # 'left','right', or 'nonassoc'. level is a numeric level. + # + # ----------------------------------------------------------------------------- + + def set_precedence(self, term, assoc, level): + assert self.Productions == [None], 'Must call set_precedence() before add_production()' + if term in self.Precedence: + raise GrammarError('Precedence already specified for terminal %r' % term) + if assoc not in ['left', 'right', 'nonassoc']: + raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") + self.Precedence[term] = (assoc, level) + + # ----------------------------------------------------------------------------- + # add_production() + # + # Given an action function, this function assembles a production rule and + # computes its precedence level. + # + # The production rule is supplied as a list of symbols. For example, + # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and + # symbols ['expr','PLUS','term']. + # + # Precedence is determined by the precedence of the right-most non-terminal + # or the precedence of a terminal specified by %prec. + # + # A variety of error checks are performed to make sure production symbols + # are valid and that %prec is used correctly. + # ----------------------------------------------------------------------------- + + def add_production(self, prodname, syms, func=None, file='', line=0): + + if prodname in self.Terminals: + raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) + if prodname == 'error': + raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) + if not _is_identifier.match(prodname): + raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) + + # Look for literal tokens + for n, s in enumerate(syms): + if s[0] in "'\"": + try: + c = eval(s) + if (len(c) > 1): + raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % + (file, line, s, prodname)) + if c not in self.Terminals: + self.Terminals[c] = [] + syms[n] = c + continue + except SyntaxError: + pass + if not _is_identifier.match(s) and s != '%prec': + raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) + + # Determine the precedence level + if '%prec' in syms: + if syms[-1] == '%prec': + raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) + if syms[-2] != '%prec': + raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % + (file, line)) + precname = syms[-1] + prodprec = self.Precedence.get(precname) + if not prodprec: + raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) + else: + self.UsedPrecedence.add(precname) + del syms[-2:] # Drop %prec from the rule + else: + # If no %prec, precedence is determined by the rightmost terminal symbol + precname = rightmost_terminal(syms, self.Terminals) + prodprec = self.Precedence.get(precname, ('right', 0)) + + # See if the rule is already in the rulemap + map = '%s -> %s' % (prodname, syms) + if map in self.Prodmap: + m = self.Prodmap[map] + raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + + 'Previous definition at %s:%d' % (m.file, m.line)) + + # From this point on, everything is valid. Create a new Production instance + pnumber = len(self.Productions) + if prodname not in self.Nonterminals: + self.Nonterminals[prodname] = [] + + # Add the production number to Terminals and Nonterminals + for t in syms: + if t in self.Terminals: + self.Terminals[t].append(pnumber) + else: + if t not in self.Nonterminals: + self.Nonterminals[t] = [] + self.Nonterminals[t].append(pnumber) + + # Create a production and add it to the list of productions + p = Production(pnumber, prodname, syms, prodprec, func, file, line) + self.Productions.append(p) + self.Prodmap[map] = p + + # Add to the global productions list + try: + self.Prodnames[prodname].append(p) + except KeyError: + self.Prodnames[prodname] = [p] + + # ----------------------------------------------------------------------------- + # set_start() + # + # Sets the starting symbol and creates the augmented grammar. Production + # rule 0 is S' -> start where start is the start symbol. + # ----------------------------------------------------------------------------- + + def set_start(self, start=None): + if not start: + start = self.Productions[1].name + if start not in self.Nonterminals: + raise GrammarError('start symbol %s undefined' % start) + self.Productions[0] = Production(0, "S'", [start]) + self.Nonterminals[start].append(0) + self.Start = start + + # ----------------------------------------------------------------------------- + # find_unreachable() + # + # Find all of the nonterminal symbols that can't be reached from the starting + # symbol. Returns a list of nonterminals that can't be reached. + # ----------------------------------------------------------------------------- + + def find_unreachable(self): + + # Mark all symbols that are reachable from a symbol s + def mark_reachable_from(s): + if s in reachable: + return + reachable.add(s) + for p in self.Prodnames.get(s, []): + for r in p.prod: + mark_reachable_from(r) + + reachable = set() + mark_reachable_from(self.Productions[0].prod[0]) + return [s for s in self.Nonterminals if s not in reachable] + + # ----------------------------------------------------------------------------- + # infinite_cycles() + # + # This function looks at the various parsing rules and tries to detect + # infinite recursion cycles (grammar rules where there is no possible way + # to derive a string of only terminals). + # ----------------------------------------------------------------------------- + + def infinite_cycles(self): + terminates = {} + + # Terminals: + for t in self.Terminals: + terminates[t] = True + + terminates['$end'] = True + + # Nonterminals: + + # Initialize to false: + for n in self.Nonterminals: + terminates[n] = False + + # Then propagate termination until no change: + while True: + some_change = False + for (n, pl) in self.Prodnames.items(): + # Nonterminal n terminates iff any of its productions terminates. + for p in pl: + # Production p terminates iff all of its rhs symbols terminate. + for s in p.prod: + if not terminates[s]: + # The symbol s does not terminate, + # so production p does not terminate. + p_terminates = False + break + else: + # didn't break from the loop, + # so every symbol s terminates + # so production p terminates. + p_terminates = True + + if p_terminates: + # symbol n terminates! + if not terminates[n]: + terminates[n] = True + some_change = True + # Don't need to consider any more productions for this n. + break + + if not some_change: + break + + infinite = [] + for (s, term) in terminates.items(): + if not term: + if s not in self.Prodnames and s not in self.Terminals and s != 'error': + # s is used-but-not-defined, and we've already warned of that, + # so it would be overkill to say that it's also non-terminating. + pass + else: + infinite.append(s) + + return infinite + + # ----------------------------------------------------------------------------- + # undefined_symbols() + # + # Find all symbols that were used the grammar, but not defined as tokens or + # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol + # and prod is the production where the symbol was used. + # ----------------------------------------------------------------------------- + def undefined_symbols(self): + result = [] + for p in self.Productions: + if not p: + continue + + for s in p.prod: + if s not in self.Prodnames and s not in self.Terminals and s != 'error': + result.append((s, p)) + return result + + # ----------------------------------------------------------------------------- + # unused_terminals() + # + # Find all terminals that were defined, but not used by the grammar. Returns + # a list of all symbols. + # ----------------------------------------------------------------------------- + def unused_terminals(self): + unused_tok = [] + for s, v in self.Terminals.items(): + if s != 'error' and not v: + unused_tok.append(s) + + return unused_tok + + # ------------------------------------------------------------------------------ + # unused_rules() + # + # Find all grammar rules that were defined, but not used (maybe not reachable) + # Returns a list of productions. + # ------------------------------------------------------------------------------ + + def unused_rules(self): + unused_prod = [] + for s, v in self.Nonterminals.items(): + if not v: + p = self.Prodnames[s][0] + unused_prod.append(p) + return unused_prod + + # ----------------------------------------------------------------------------- + # unused_precedence() + # + # Returns a list of tuples (term,precedence) corresponding to precedence + # rules that were never used by the grammar. term is the name of the terminal + # on which precedence was applied and precedence is a string such as 'left' or + # 'right' corresponding to the type of precedence. + # ----------------------------------------------------------------------------- + + def unused_precedence(self): + unused = [] + for termname in self.Precedence: + if not (termname in self.Terminals or termname in self.UsedPrecedence): + unused.append((termname, self.Precedence[termname][0])) + + return unused + + # ------------------------------------------------------------------------- + # _first() + # + # Compute the value of FIRST1(beta) where beta is a tuple of symbols. + # + # During execution of compute_first1, the result may be incomplete. + # Afterward (e.g., when called from compute_follow()), it will be complete. + # ------------------------------------------------------------------------- + def _first(self, beta): + + # We are computing First(x1,x2,x3,...,xn) + result = [] + for x in beta: + x_produces_empty = False + + # Add all the non- symbols of First[x] to the result. + for f in self.First[x]: + if f == '': + x_produces_empty = True + else: + if f not in result: + result.append(f) + + if x_produces_empty: + # We have to consider the next x in beta, + # i.e. stay in the loop. + pass + else: + # We don't have to consider any further symbols in beta. + break + else: + # There was no 'break' from the loop, + # so x_produces_empty was true for all x in beta, + # so beta produces empty as well. + result.append('') + + return result + + # ------------------------------------------------------------------------- + # compute_first() + # + # Compute the value of FIRST1(X) for all symbols + # ------------------------------------------------------------------------- + def compute_first(self): + if self.First: + return self.First + + # Terminals: + for t in self.Terminals: + self.First[t] = [t] + + self.First['$end'] = ['$end'] + + # Nonterminals: + + # Initialize to the empty set: + for n in self.Nonterminals: + self.First[n] = [] + + # Then propagate symbols until no change: + while True: + some_change = False + for n in self.Nonterminals: + for p in self.Prodnames[n]: + for f in self._first(p.prod): + if f not in self.First[n]: + self.First[n].append(f) + some_change = True + if not some_change: + break + + return self.First + + # --------------------------------------------------------------------- + # compute_follow() + # + # Computes all of the follow sets for every non-terminal symbol. The + # follow set is the set of all symbols that might follow a given + # non-terminal. See the Dragon book, 2nd Ed. p. 189. + # --------------------------------------------------------------------- + def compute_follow(self, start=None): + # If already computed, return the result + if self.Follow: + return self.Follow + + # If first sets not computed yet, do that first. + if not self.First: + self.compute_first() + + # Add '$end' to the follow list of the start symbol + for k in self.Nonterminals: + self.Follow[k] = [] + + if not start: + start = self.Productions[1].name + + self.Follow[start] = ['$end'] + + while True: + didadd = False + for p in self.Productions[1:]: + # Here is the production set + for i, B in enumerate(p.prod): + if B in self.Nonterminals: + # Okay. We got a non-terminal in a production + fst = self._first(p.prod[i+1:]) + hasempty = False + for f in fst: + if f != '' and f not in self.Follow[B]: + self.Follow[B].append(f) + didadd = True + if f == '': + hasempty = True + if hasempty or i == (len(p.prod)-1): + # Add elements of follow(a) to follow(b) + for f in self.Follow[p.name]: + if f not in self.Follow[B]: + self.Follow[B].append(f) + didadd = True + if not didadd: + break + return self.Follow + + + # ----------------------------------------------------------------------------- + # build_lritems() + # + # This function walks the list of productions and builds a complete set of the + # LR items. The LR items are stored in two ways: First, they are uniquely + # numbered and placed in the list _lritems. Second, a linked list of LR items + # is built for each production. For example: + # + # E -> E PLUS E + # + # Creates the list + # + # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] + # ----------------------------------------------------------------------------- + + def build_lritems(self): + for p in self.Productions: + lastlri = p + i = 0 + lr_items = [] + while True: + if i > len(p): + lri = None + else: + lri = LRItem(p, i) + # Precompute the list of productions immediately following + try: + lri.lr_after = self.Prodnames[lri.prod[i+1]] + except (IndexError, KeyError): + lri.lr_after = [] + try: + lri.lr_before = lri.prod[i-1] + except IndexError: + lri.lr_before = None + + lastlri.lr_next = lri + if not lri: + break + lr_items.append(lri) + lastlri = lri + i += 1 + p.lr_items = lr_items + +# ----------------------------------------------------------------------------- +# === LR Generator === +# +# The following classes and functions are used to generate LR parsing tables on +# a grammar. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# digraph() +# traverse() +# +# The following two functions are used to compute set valued functions +# of the form: +# +# F(x) = F'(x) U U{F(y) | x R y} +# +# This is used to compute the values of Read() sets as well as FOLLOW sets +# in LALR(1) generation. +# +# Inputs: X - An input set +# R - A relation +# FP - Set-valued function +# ------------------------------------------------------------------------------ + +def digraph(X, R, FP): + N = {} + for x in X: + N[x] = 0 + stack = [] + F = {} + for x in X: + if N[x] == 0: + traverse(x, N, stack, F, X, R, FP) + return F + +def traverse(x, N, stack, F, X, R, FP): + stack.append(x) + d = len(stack) + N[x] = d + F[x] = FP(x) # F(X) <- F'(x) + + rel = R(x) # Get y's related to x + for y in rel: + if N[y] == 0: + traverse(y, N, stack, F, X, R, FP) + N[x] = min(N[x], N[y]) + for a in F.get(y, []): + if a not in F[x]: + F[x].append(a) + if N[x] == d: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + while element != x: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + +class LALRError(YaccError): + pass + + +# ----------------------------------------------------------------------------- +# == LRTable == +# +# This class implements the LR table generation algorithm. There are no +# public methods. +# ----------------------------------------------------------------------------- + +class LRTable: + def __init__(self, grammar, log=None): + self.grammar = grammar + + # Set up the logger + if not log: + log = NullLogger() + self.log = log + + # Internal attributes + self.lr_action = {} # Action table + self.lr_goto = {} # Goto table + self.lr_productions = grammar.Productions # Copy of grammar Production array + self.lr_goto_cache = {} # Cache of computed gotos + self.lr0_cidhash = {} # Cache of closures + + self._add_count = 0 # Internal counter used to detect cycles + + # Diagnostic information filled in by the table generator + self.sr_conflict = 0 + self.rr_conflict = 0 + self.conflicts = [] # List of conflicts + + self.sr_conflicts = [] + self.rr_conflicts = [] + + # Build the tables + self.grammar.build_lritems() + self.grammar.compute_first() + self.grammar.compute_follow() + self.lr_parse_table() + + # Bind all production function names to callable objects in pdict + def bind_callables(self, pdict): + for p in self.lr_productions: + p.bind(pdict) + + # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. + + def lr0_closure(self, I): + self._add_count += 1 + + # Add everything in I to J + J = I[:] + didadd = True + while didadd: + didadd = False + for j in J: + for x in j.lr_after: + if getattr(x, 'lr0_added', 0) == self._add_count: + continue + # Add B --> .G to J + J.append(x.lr_next) + x.lr0_added = self._add_count + didadd = True + + return J + + # Compute the LR(0) goto function goto(I,X) where I is a set + # of LR(0) items and X is a grammar symbol. This function is written + # in a way that guarantees uniqueness of the generated goto sets + # (i.e. the same goto set will never be returned as two different Python + # objects). With uniqueness, we can later do fast set comparisons using + # id(obj) instead of element-wise comparison. + + def lr0_goto(self, I, x): + # First we look for a previously cached entry + g = self.lr_goto_cache.get((id(I), x)) + if g: + return g + + # Now we generate the goto set in a way that guarantees uniqueness + # of the result + + s = self.lr_goto_cache.get(x) + if not s: + s = {} + self.lr_goto_cache[x] = s + + gs = [] + for p in I: + n = p.lr_next + if n and n.lr_before == x: + s1 = s.get(id(n)) + if not s1: + s1 = {} + s[id(n)] = s1 + gs.append(n) + s = s1 + g = s.get('$end') + if not g: + if gs: + g = self.lr0_closure(gs) + s['$end'] = g + else: + s['$end'] = gs + self.lr_goto_cache[(id(I), x)] = g + return g + + # Compute the LR(0) sets of item function + def lr0_items(self): + C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] + i = 0 + for I in C: + self.lr0_cidhash[id(I)] = i + i += 1 + + # Loop over the items in C and each grammar symbols + i = 0 + while i < len(C): + I = C[i] + i += 1 + + # Collect all of the symbols that could possibly be in the goto(I,X) sets + asyms = {} + for ii in I: + for s in ii.usyms: + asyms[s] = None + + for x in asyms: + g = self.lr0_goto(I, x) + if not g or id(g) in self.lr0_cidhash: + continue + self.lr0_cidhash[id(g)] = len(C) + C.append(g) + + return C + + # ----------------------------------------------------------------------------- + # ==== LALR(1) Parsing ==== + # + # LALR(1) parsing is almost exactly the same as SLR except that instead of + # relying upon Follow() sets when performing reductions, a more selective + # lookahead set that incorporates the state of the LR(0) machine is utilized. + # Thus, we mainly just have to focus on calculating the lookahead sets. + # + # The method used here is due to DeRemer and Pennelo (1982). + # + # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) + # Lookahead Sets", ACM Transactions on Programming Languages and Systems, + # Vol. 4, No. 4, Oct. 1982, pp. 615-649 + # + # Further details can also be found in: + # + # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", + # McGraw-Hill Book Company, (1985). + # + # ----------------------------------------------------------------------------- + + # ----------------------------------------------------------------------------- + # compute_nullable_nonterminals() + # + # Creates a dictionary containing all of the non-terminals that might produce + # an empty production. + # ----------------------------------------------------------------------------- + + def compute_nullable_nonterminals(self): + nullable = set() + num_nullable = 0 + while True: + for p in self.grammar.Productions[1:]: + if p.len == 0: + nullable.add(p.name) + continue + for t in p.prod: + if t not in nullable: + break + else: + nullable.add(p.name) + if len(nullable) == num_nullable: + break + num_nullable = len(nullable) + return nullable + + # ----------------------------------------------------------------------------- + # find_nonterminal_trans(C) + # + # Given a set of LR(0) items, this functions finds all of the non-terminal + # transitions. These are transitions in which a dot appears immediately before + # a non-terminal. Returns a list of tuples of the form (state,N) where state + # is the state number and N is the nonterminal symbol. + # + # The input C is the set of LR(0) items. + # ----------------------------------------------------------------------------- + + def find_nonterminal_transitions(self, C): + trans = [] + for stateno, state in enumerate(C): + for p in state: + if p.lr_index < p.len - 1: + t = (stateno, p.prod[p.lr_index+1]) + if t[1] in self.grammar.Nonterminals: + if t not in trans: + trans.append(t) + return trans + + # ----------------------------------------------------------------------------- + # dr_relation() + # + # Computes the DR(p,A) relationships for non-terminal transitions. The input + # is a tuple (state,N) where state is a number and N is a nonterminal symbol. + # + # Returns a list of terminals. + # ----------------------------------------------------------------------------- + + def dr_relation(self, C, trans, nullable): + state, N = trans + terms = [] + + g = self.lr0_goto(C[state], N) + for p in g: + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index+1] + if a in self.grammar.Terminals: + if a not in terms: + terms.append(a) + + # This extra bit is to handle the start state + if state == 0 and N == self.grammar.Productions[0].prod[0]: + terms.append('$end') + + return terms + + # ----------------------------------------------------------------------------- + # reads_relation() + # + # Computes the READS() relation (p,A) READS (t,C). + # ----------------------------------------------------------------------------- + + def reads_relation(self, C, trans, empty): + # Look for empty transitions + rel = [] + state, N = trans + + g = self.lr0_goto(C[state], N) + j = self.lr0_cidhash.get(id(g), -1) + for p in g: + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index + 1] + if a in empty: + rel.append((j, a)) + + return rel + + # ----------------------------------------------------------------------------- + # compute_lookback_includes() + # + # Determines the lookback and includes relations + # + # LOOKBACK: + # + # This relation is determined by running the LR(0) state machine forward. + # For example, starting with a production "N : . A B C", we run it forward + # to obtain "N : A B C ." We then build a relationship between this final + # state and the starting state. These relationships are stored in a dictionary + # lookdict. + # + # INCLUDES: + # + # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). + # + # This relation is used to determine non-terminal transitions that occur + # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) + # if the following holds: + # + # B -> LAT, where T -> epsilon and p' -L-> p + # + # L is essentially a prefix (which may be empty), T is a suffix that must be + # able to derive an empty string. State p' must lead to state p with the string L. + # + # ----------------------------------------------------------------------------- + + def compute_lookback_includes(self, C, trans, nullable): + lookdict = {} # Dictionary of lookback relations + includedict = {} # Dictionary of include relations + + # Make a dictionary of non-terminal transitions + dtrans = {} + for t in trans: + dtrans[t] = 1 + + # Loop over all transitions and compute lookbacks and includes + for state, N in trans: + lookb = [] + includes = [] + for p in C[state]: + if p.name != N: + continue + + # Okay, we have a name match. We now follow the production all the way + # through the state machine until we get the . on the right hand side + + lr_index = p.lr_index + j = state + while lr_index < p.len - 1: + lr_index = lr_index + 1 + t = p.prod[lr_index] + + # Check to see if this symbol and state are a non-terminal transition + if (j, t) in dtrans: + # Yes. Okay, there is some chance that this is an includes relation + # the only way to know for certain is whether the rest of the + # production derives empty + + li = lr_index + 1 + while li < p.len: + if p.prod[li] in self.grammar.Terminals: + break # No forget it + if p.prod[li] not in nullable: + break + li = li + 1 + else: + # Appears to be a relation between (j,t) and (state,N) + includes.append((j, t)) + + g = self.lr0_goto(C[j], t) # Go to next set + j = self.lr0_cidhash.get(id(g), -1) # Go to next state + + # When we get here, j is the final state, now we have to locate the production + for r in C[j]: + if r.name != p.name: + continue + if r.len != p.len: + continue + i = 0 + # This look is comparing a production ". A B C" with "A B C ." + while i < r.lr_index: + if r.prod[i] != p.prod[i+1]: + break + i = i + 1 + else: + lookb.append((j, r)) + for i in includes: + if i not in includedict: + includedict[i] = [] + includedict[i].append((state, N)) + lookdict[(state, N)] = lookb + + return lookdict, includedict + + # ----------------------------------------------------------------------------- + # compute_read_sets() + # + # Given a set of LR(0) items, this function computes the read sets. + # + # Inputs: C = Set of LR(0) items + # ntrans = Set of nonterminal transitions + # nullable = Set of empty transitions + # + # Returns a set containing the read sets + # ----------------------------------------------------------------------------- + + def compute_read_sets(self, C, ntrans, nullable): + FP = lambda x: self.dr_relation(C, x, nullable) + R = lambda x: self.reads_relation(C, x, nullable) + F = digraph(ntrans, R, FP) + return F + + # ----------------------------------------------------------------------------- + # compute_follow_sets() + # + # Given a set of LR(0) items, a set of non-terminal transitions, a readset, + # and an include set, this function computes the follow sets + # + # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} + # + # Inputs: + # ntrans = Set of nonterminal transitions + # readsets = Readset (previously computed) + # inclsets = Include sets (previously computed) + # + # Returns a set containing the follow sets + # ----------------------------------------------------------------------------- + + def compute_follow_sets(self, ntrans, readsets, inclsets): + FP = lambda x: readsets[x] + R = lambda x: inclsets.get(x, []) + F = digraph(ntrans, R, FP) + return F + + # ----------------------------------------------------------------------------- + # add_lookaheads() + # + # Attaches the lookahead symbols to grammar rules. + # + # Inputs: lookbacks - Set of lookback relations + # followset - Computed follow set + # + # This function directly attaches the lookaheads to productions contained + # in the lookbacks set + # ----------------------------------------------------------------------------- + + def add_lookaheads(self, lookbacks, followset): + for trans, lb in lookbacks.items(): + # Loop over productions in lookback + for state, p in lb: + if state not in p.lookaheads: + p.lookaheads[state] = [] + f = followset.get(trans, []) + for a in f: + if a not in p.lookaheads[state]: + p.lookaheads[state].append(a) + + # ----------------------------------------------------------------------------- + # add_lalr_lookaheads() + # + # This function does all of the work of adding lookahead information for use + # with LALR parsing + # ----------------------------------------------------------------------------- + + def add_lalr_lookaheads(self, C): + # Determine all of the nullable nonterminals + nullable = self.compute_nullable_nonterminals() + + # Find all non-terminal transitions + trans = self.find_nonterminal_transitions(C) + + # Compute read sets + readsets = self.compute_read_sets(C, trans, nullable) + + # Compute lookback/includes relations + lookd, included = self.compute_lookback_includes(C, trans, nullable) + + # Compute LALR FOLLOW sets + followsets = self.compute_follow_sets(trans, readsets, included) + + # Add all of the lookaheads + self.add_lookaheads(lookd, followsets) + + # ----------------------------------------------------------------------------- + # lr_parse_table() + # + # This function constructs the parse tables for SLR or LALR + # ----------------------------------------------------------------------------- + def lr_parse_table(self): + Productions = self.grammar.Productions + Precedence = self.grammar.Precedence + goto = self.lr_goto # Goto array + action = self.lr_action # Action array + log = self.log # Logger for output + + actionp = {} # Action production array (temporary) + + # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items + # This determines the number of states + + C = self.lr0_items() + self.add_lalr_lookaheads(C) + + # Build the parser table, state by state + st = 0 + for I in C: + # Loop over each production in I + actlist = [] # List of actions + st_action = {} + st_actionp = {} + st_goto = {} + log.info('') + log.info('state %d', st) + log.info('') + for p in I: + log.info(' (%d) %s', p.number, p) + log.info('') + + for p in I: + if p.len == p.lr_index + 1: + if p.name == "S'": + # Start symbol. Accept! + st_action['$end'] = 0 + st_actionp['$end'] = p + else: + # We are at the end of a production. Reduce! + laheads = p.lookaheads[st] + for a in laheads: + actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) + r = st_action.get(a) + if r is not None: + # Whoa. Have a shift/reduce or reduce/reduce conflict + if r > 0: + # Need to decide on shift or reduce here + # By default we favor shifting. Need to add + # some precedence rules here. + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from rule being reduced (p) + rprec, rlevel = Productions[p.number].prec + + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + # We really need to reduce here. + st_action[a] = -p.number + st_actionp[a] = p + if not slevel and not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) + Productions[p.number].reduced += 1 + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None + else: + # Hmmm. Guess we'll keep the shift + if not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) + elif r < 0: + # Reduce/reduce conflict. In this case, we favor the rule + # that was defined first in the grammar file + oldp = Productions[-r] + pp = Productions[p.number] + if oldp.line > pp.line: + st_action[a] = -p.number + st_actionp[a] = p + chosenp, rejectp = pp, oldp + Productions[p.number].reduced += 1 + Productions[oldp.number].reduced -= 1 + else: + chosenp, rejectp = oldp, pp + self.rr_conflicts.append((st, chosenp, rejectp)) + log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', + a, st_actionp[a].number, st_actionp[a]) + else: + raise LALRError('Unknown conflict in state %d' % st) + else: + st_action[a] = -p.number + st_actionp[a] = p + Productions[p.number].reduced += 1 + else: + i = p.lr_index + a = p.prod[i+1] # Get symbol right after the "." + if a in self.grammar.Terminals: + g = self.lr0_goto(I, a) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + # We are in a shift state + actlist.append((a, p, 'shift and go to state %d' % j)) + r = st_action.get(a) + if r is not None: + # Whoa have a shift/reduce or shift/shift conflict + if r > 0: + if r != j: + raise LALRError('Shift/shift conflict in state %d' % st) + elif r < 0: + # Do a precedence check. + # - if precedence of reduce rule is higher, we reduce. + # - if precedence of reduce is same and left assoc, we reduce. + # - otherwise we shift + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from the rule that could have been reduced + rprec, rlevel = Productions[st_actionp[a].number].prec + + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): + # We decide to shift here... highest precedence to shift + Productions[st_actionp[a].number].reduced -= 1 + st_action[a] = j + st_actionp[a] = p + if not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None + else: + # Hmmm. Guess we'll keep the reduce + if not slevel and not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) + + else: + raise LALRError('Unknown conflict in state %d' % st) + else: + st_action[a] = j + st_actionp[a] = p + + # Print the actions associated with each terminal + _actprint = {} + for a, p, m in actlist: + if a in st_action: + if p is st_actionp[a]: + log.info(' %-15s %s', a, m) + _actprint[(a, m)] = 1 + log.info('') + # Print the actions that were not used. (debugging) + not_used = 0 + for a, p, m in actlist: + if a in st_action: + if p is not st_actionp[a]: + if not (a, m) in _actprint: + log.debug(' ! %-15s [ %s ]', a, m) + not_used = 1 + _actprint[(a, m)] = 1 + if not_used: + log.debug('') + + # Construct the goto table for this state + + nkeys = {} + for ii in I: + for s in ii.usyms: + if s in self.grammar.Nonterminals: + nkeys[s] = None + for n in nkeys: + g = self.lr0_goto(I, n) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + st_goto[n] = j + log.info(' %-30s shift and go to state %d', n, j) + + action[st] = st_action + actionp[st] = st_actionp + goto[st] = st_goto + st += 1 + +# ----------------------------------------------------------------------------- +# === INTROSPECTION === +# +# The following functions and classes are used to implement the PLY +# introspection features followed by the yacc() function itself. +# ----------------------------------------------------------------------------- + +# ----------------------------------------------------------------------------- +# get_caller_module_dict() +# +# This function returns a dictionary containing all of the symbols defined within +# a caller further down the call stack. This is used to get the environment +# associated with the yacc() call if none was provided. +# ----------------------------------------------------------------------------- + +def get_caller_module_dict(levels): + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict + +# ----------------------------------------------------------------------------- +# parse_grammar() +# +# This takes a raw grammar rule string and parses it into production data +# ----------------------------------------------------------------------------- +def parse_grammar(doc, file, line): + grammar = [] + # Split the doc string into lines + pstrings = doc.splitlines() + lastp = None + dline = line + for ps in pstrings: + dline += 1 + p = ps.split() + if not p: + continue + try: + if p[0] == '|': + # This is a continuation of a previous rule + if not lastp: + raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) + prodname = lastp + syms = p[1:] + else: + prodname = p[0] + lastp = prodname + syms = p[2:] + assign = p[1] + if assign != ':' and assign != '::=': + raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) + + grammar.append((file, dline, prodname, syms)) + except SyntaxError: + raise + except Exception: + raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) + + return grammar + +# ----------------------------------------------------------------------------- +# ParserReflect() +# +# This class represents information extracted for building a parser including +# start symbol, error function, tokens, precedence list, action functions, +# etc. +# ----------------------------------------------------------------------------- +class ParserReflect(object): + def __init__(self, pdict, log=None): + self.pdict = pdict + self.start = None + self.error_func = None + self.tokens = None + self.modules = set() + self.grammar = [] + self.error = False + + if log is None: + self.log = PlyLogger(sys.stderr) + else: + self.log = log + + # Get all of the basic information + def get_all(self): + self.get_start() + self.get_error_func() + self.get_tokens() + self.get_precedence() + self.get_pfunctions() + + # Validate all of the information + def validate_all(self): + self.validate_start() + self.validate_error_func() + self.validate_tokens() + self.validate_precedence() + self.validate_pfunctions() + self.validate_modules() + return self.error + + # Compute a signature over the grammar + def signature(self): + parts = [] + try: + if self.start: + parts.append(self.start) + if self.prec: + parts.append(''.join([''.join(p) for p in self.prec])) + if self.tokens: + parts.append(' '.join(self.tokens)) + for f in self.pfuncs: + if f[3]: + parts.append(f[3]) + except (TypeError, ValueError): + pass + return ''.join(parts) + + # ----------------------------------------------------------------------------- + # validate_modules() + # + # This method checks to see if there are duplicated p_rulename() functions + # in the parser module file. Without this function, it is really easy for + # users to make mistakes by cutting and pasting code fragments (and it's a real + # bugger to try and figure out why the resulting parser doesn't work). Therefore, + # we just do a little regular expression pattern matching of def statements + # to try and detect duplicates. + # ----------------------------------------------------------------------------- + + def validate_modules(self): + # Match def p_funcname( + fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') + + for module in self.modules: + try: + lines, linen = inspect.getsourcelines(module) + except IOError: + continue + + counthash = {} + for linen, line in enumerate(lines): + linen += 1 + m = fre.match(line) + if m: + name = m.group(1) + prev = counthash.get(name) + if not prev: + counthash[name] = linen + else: + filename = inspect.getsourcefile(module) + self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', + filename, linen, name, prev) + + # Get the start symbol + def get_start(self): + self.start = self.pdict.get('start') + + # Validate the start symbol + def validate_start(self): + if self.start is not None: + if not isinstance(self.start, str): + self.log.error("'start' must be a string") + + # Look for error handler + def get_error_func(self): + self.error_func = self.pdict.get('p_error') + + # Validate the error function + def validate_error_func(self): + if self.error_func: + if isinstance(self.error_func, types.FunctionType): + ismethod = 0 + elif isinstance(self.error_func, types.MethodType): + ismethod = 1 + else: + self.log.error("'p_error' defined, but is not a function or method") + self.error = True + return + + eline = self.error_func.__code__.co_firstlineno + efile = self.error_func.__code__.co_filename + module = inspect.getmodule(self.error_func) + self.modules.add(module) + + argcount = self.error_func.__code__.co_argcount - ismethod + if argcount != 1: + self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) + self.error = True + + # Get the tokens map + def get_tokens(self): + tokens = self.pdict.get('tokens') + if not tokens: + self.log.error('No token list is defined') + self.error = True + return + + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True + return + + if not tokens: + self.log.error('tokens is empty') + self.error = True + return + + self.tokens = sorted(tokens) + + # Validate the tokens + def validate_tokens(self): + # Validate the tokens. + if 'error' in self.tokens: + self.log.error("Illegal token name 'error'. Is a reserved word") + self.error = True + return + + terminals = set() + for n in self.tokens: + if n in terminals: + self.log.warning('Token %r multiply defined', n) + terminals.add(n) + + # Get the precedence map (if any) + def get_precedence(self): + self.prec = self.pdict.get('precedence') + + # Validate and parse the precedence map + def validate_precedence(self): + preclist = [] + if self.prec: + if not isinstance(self.prec, (list, tuple)): + self.log.error('precedence must be a list or tuple') + self.error = True + return + for level, p in enumerate(self.prec): + if not isinstance(p, (list, tuple)): + self.log.error('Bad precedence table') + self.error = True + return + + if len(p) < 2: + self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) + self.error = True + return + assoc = p[0] + if not isinstance(assoc, str): + self.log.error('precedence associativity must be a string') + self.error = True + return + for term in p[1:]: + if not isinstance(term, str): + self.log.error('precedence items must be strings') + self.error = True + return + preclist.append((term, assoc, level+1)) + self.preclist = preclist + + # Get all p_functions from the grammar + def get_pfunctions(self): + p_functions = [] + for name, item in self.pdict.items(): + if not name.startswith('p_') or name == 'p_error': + continue + if isinstance(item, (types.FunctionType, types.MethodType)): + line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) + module = inspect.getmodule(item) + p_functions.append((line, module, name, item.__doc__)) + + # Sort all of the actions by line number; make sure to stringify + # modules to make them sortable, since `line` may not uniquely sort all + # p functions + p_functions.sort(key=lambda p_function: ( + p_function[0], + str(p_function[1]), + p_function[2], + p_function[3])) + self.pfuncs = p_functions + + # Validate all of the p_functions + def validate_pfunctions(self): + grammar = [] + # Check for non-empty symbols + if len(self.pfuncs) == 0: + self.log.error('no rules of the form p_rulename are defined') + self.error = True + return + + for line, module, name, doc in self.pfuncs: + file = inspect.getsourcefile(module) + func = self.pdict[name] + if isinstance(func, types.MethodType): + reqargs = 2 + else: + reqargs = 1 + if func.__code__.co_argcount > reqargs: + self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) + self.error = True + elif func.__code__.co_argcount < reqargs: + self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) + self.error = True + elif not func.__doc__: + self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', + file, line, func.__name__) + else: + try: + parsed_g = parse_grammar(doc, file, line) + for g in parsed_g: + grammar.append((name, g)) + except SyntaxError as e: + self.log.error(str(e)) + self.error = True + + # Looks like a valid grammar rule + # Mark the file in which defined. + self.modules.add(module) + + # Secondary validation step that looks for p_ definitions that are not functions + # or functions that look like they might be grammar rules. + + for n, v in self.pdict.items(): + if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): + continue + if n.startswith('t_'): + continue + if n.startswith('p_') and n != 'p_error': + self.log.warning('%r not defined as a function', n) + if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or + (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): + if v.__doc__: + try: + doc = v.__doc__.split(' ') + if doc[1] == ':': + self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', + v.__code__.co_filename, v.__code__.co_firstlineno, n) + except IndexError: + pass + + self.grammar = grammar + +# ----------------------------------------------------------------------------- +# yacc(module) +# +# Build a parser +# ----------------------------------------------------------------------------- + +def yacc(*, debug=yaccdebug, module=None, start=None, + check_recursion=True, optimize=False, debugfile=debug_file, + debuglog=None, errorlog=None): + + # Reference to the parsing method of the last built parser + global parse + + if errorlog is None: + errorlog = PlyLogger(sys.stderr) + + # Get the module dictionary used for the parser + if module: + _items = [(k, getattr(module, k)) for k in dir(module)] + pdict = dict(_items) + # If no __file__ or __package__ attributes are available, try to obtain them + # from the __module__ instead + if '__file__' not in pdict: + pdict['__file__'] = sys.modules[pdict['__module__']].__file__ + if '__package__' not in pdict and '__module__' in pdict: + if hasattr(sys.modules[pdict['__module__']], '__package__'): + pdict['__package__'] = sys.modules[pdict['__module__']].__package__ + else: + pdict = get_caller_module_dict(2) + + # Set start symbol if it's specified directly using an argument + if start is not None: + pdict['start'] = start + + # Collect parser information from the dictionary + pinfo = ParserReflect(pdict, log=errorlog) + pinfo.get_all() + + if pinfo.error: + raise YaccError('Unable to build parser') + + if debuglog is None: + if debug: + try: + debuglog = PlyLogger(open(debugfile, 'w')) + except IOError as e: + errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) + debuglog = NullLogger() + else: + debuglog = NullLogger() + + debuglog.info('Created by PLY (http://www.dabeaz.com/ply)') + + errors = False + + # Validate the parser information + if pinfo.validate_all(): + raise YaccError('Unable to build parser') + + if not pinfo.error_func: + errorlog.warning('no p_error() function is defined') + + # Create a grammar object + grammar = Grammar(pinfo.tokens) + + # Set precedence level for terminals + for term, assoc, level in pinfo.preclist: + try: + grammar.set_precedence(term, assoc, level) + except GrammarError as e: + errorlog.warning('%s', e) + + # Add productions to the grammar + for funcname, gram in pinfo.grammar: + file, line, prodname, syms = gram + try: + grammar.add_production(prodname, syms, funcname, file, line) + except GrammarError as e: + errorlog.error('%s', e) + errors = True + + # Set the grammar start symbols + try: + if start is None: + grammar.set_start(pinfo.start) + else: + grammar.set_start(start) + except GrammarError as e: + errorlog.error(str(e)) + errors = True + + if errors: + raise YaccError('Unable to build parser') + + # Verify the grammar structure + undefined_symbols = grammar.undefined_symbols() + for sym, prod in undefined_symbols: + errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) + errors = True + + unused_terminals = grammar.unused_terminals() + if unused_terminals: + debuglog.info('') + debuglog.info('Unused terminals:') + debuglog.info('') + for term in unused_terminals: + errorlog.warning('Token %r defined, but not used', term) + debuglog.info(' %s', term) + + # Print out all productions to the debug log + if debug: + debuglog.info('') + debuglog.info('Grammar') + debuglog.info('') + for n, p in enumerate(grammar.Productions): + debuglog.info('Rule %-5d %s', n, p) + + # Find unused non-terminals + unused_rules = grammar.unused_rules() + for prod in unused_rules: + errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) + + if len(unused_terminals) == 1: + errorlog.warning('There is 1 unused token') + if len(unused_terminals) > 1: + errorlog.warning('There are %d unused tokens', len(unused_terminals)) + + if len(unused_rules) == 1: + errorlog.warning('There is 1 unused rule') + if len(unused_rules) > 1: + errorlog.warning('There are %d unused rules', len(unused_rules)) + + if debug: + debuglog.info('') + debuglog.info('Terminals, with rules where they appear') + debuglog.info('') + terms = list(grammar.Terminals) + terms.sort() + for term in terms: + debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) + + debuglog.info('') + debuglog.info('Nonterminals, with rules where they appear') + debuglog.info('') + nonterms = list(grammar.Nonterminals) + nonterms.sort() + for nonterm in nonterms: + debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) + debuglog.info('') + + if check_recursion: + unreachable = grammar.find_unreachable() + for u in unreachable: + errorlog.warning('Symbol %r is unreachable', u) + + infinite = grammar.infinite_cycles() + for inf in infinite: + errorlog.error('Infinite recursion detected for symbol %r', inf) + errors = True + + unused_prec = grammar.unused_precedence() + for term, assoc in unused_prec: + errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) + errors = True + + if errors: + raise YaccError('Unable to build parser') + + # Run the LRTable on the grammar + lr = LRTable(grammar, debuglog) + + if debug: + num_sr = len(lr.sr_conflicts) + + # Report shift/reduce and reduce/reduce conflicts + if num_sr == 1: + errorlog.warning('1 shift/reduce conflict') + elif num_sr > 1: + errorlog.warning('%d shift/reduce conflicts', num_sr) + + num_rr = len(lr.rr_conflicts) + if num_rr == 1: + errorlog.warning('1 reduce/reduce conflict') + elif num_rr > 1: + errorlog.warning('%d reduce/reduce conflicts', num_rr) + + # Write out conflicts to the output file + if debug and (lr.sr_conflicts or lr.rr_conflicts): + debuglog.warning('') + debuglog.warning('Conflicts:') + debuglog.warning('') + + for state, tok, resolution in lr.sr_conflicts: + debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) + + already_reported = set() + for state, rule, rejected in lr.rr_conflicts: + if (state, id(rule), id(rejected)) in already_reported: + continue + debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + debuglog.warning('rejected rule (%s) in state %d', rejected, state) + errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + errorlog.warning('rejected rule (%s) in state %d', rejected, state) + already_reported.add((state, id(rule), id(rejected))) + + warned_never = [] + for state, rule, rejected in lr.rr_conflicts: + if not rejected.reduced and (rejected not in warned_never): + debuglog.warning('Rule (%s) is never reduced', rejected) + errorlog.warning('Rule (%s) is never reduced', rejected) + warned_never.append(rejected) + + # Build the parser + lr.bind_callables(pinfo.pdict) + parser = LRParser(lr, pinfo.error_func) + + parse = parser.parse + return parser diff --git a/test/README b/test/README deleted file mode 100644 index 03b167c..0000000 --- a/test/README +++ /dev/null @@ -1,8 +0,0 @@ -This directory mostly contains tests for various types of error -conditions. To run: - - $ python testlex.py - $ python testyacc.py - $ python testcpp.py - -The script 'cleanup.sh' cleans up this directory to its original state. diff --git a/test/calclex.py b/test/calclex.py deleted file mode 100644 index 030a986..0000000 --- a/test/calclex.py +++ /dev/null @@ -1,49 +0,0 @@ -# ----------------------------------------------------------------------------- -# calclex.py -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.lex as lex - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -lexer = lex.lex() - - - diff --git a/test/cleanup.sh b/test/cleanup.sh deleted file mode 100755 index 9374f2c..0000000 --- a/test/cleanup.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh - -rm -rf *~ *.pyc *.pyo *.dif *.out __pycache__ - diff --git a/test/lex_closure.py b/test/lex_closure.py deleted file mode 100644 index 30ee679..0000000 --- a/test/lex_closure.py +++ /dev/null @@ -1,54 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_closure.py -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.lex as lex - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -def make_calc(): - - # Tokens - - t_PLUS = r'\+' - t_MINUS = r'-' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_EQUALS = r'=' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - - def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - - t_ignore = " \t" - - def t_newline(t): - r'\n+' - t.lineno += t.value.count("\n") - - def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - - # Build the lexer - return lex.lex() - -make_calc() -lex.runmain(data="3+4") - - - diff --git a/test/lex_doc1.py b/test/lex_doc1.py deleted file mode 100644 index 8a2bfcc..0000000 --- a/test/lex_doc1.py +++ /dev/null @@ -1,26 +0,0 @@ -# lex_doc1.py -# -# Missing documentation string - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -def t_NUMBER(t): - pass - -def t_error(t): - pass - -lex.lex() - - diff --git a/test/lex_dup1.py b/test/lex_dup1.py deleted file mode 100644 index fd04cdb..0000000 --- a/test/lex_dup1.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_dup1.py -# -# Duplicated rule specifiers - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_NUMBER = r'\d+' - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/test/lex_dup2.py b/test/lex_dup2.py deleted file mode 100644 index 870e5e7..0000000 --- a/test/lex_dup2.py +++ /dev/null @@ -1,33 +0,0 @@ -# lex_dup2.py -# -# Duplicated rule specifiers - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -def t_NUMBER(t): - r'\d+' - pass - -def t_NUMBER(t): - r'\d+' - pass - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/test/lex_dup3.py b/test/lex_dup3.py deleted file mode 100644 index 94b5592..0000000 --- a/test/lex_dup3.py +++ /dev/null @@ -1,31 +0,0 @@ -# lex_dup3.py -# -# Duplicated rule specifiers - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_NUMBER(t): - r'\d+' - pass - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/test/lex_empty.py b/test/lex_empty.py deleted file mode 100644 index e0368bf..0000000 --- a/test/lex_empty.py +++ /dev/null @@ -1,20 +0,0 @@ -# lex_empty.py -# -# No rules defined - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - - - -lex.lex() - - diff --git a/test/lex_error1.py b/test/lex_error1.py deleted file mode 100644 index 4508a80..0000000 --- a/test/lex_error1.py +++ /dev/null @@ -1,24 +0,0 @@ -# lex_error1.py -# -# Missing t_error() rule - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - - - -lex.lex() - - diff --git a/test/lex_error2.py b/test/lex_error2.py deleted file mode 100644 index 8040d39..0000000 --- a/test/lex_error2.py +++ /dev/null @@ -1,26 +0,0 @@ -# lex_error2.py -# -# t_error defined, but not function - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_error = "foo" - - - -lex.lex() - - diff --git a/test/lex_error3.py b/test/lex_error3.py deleted file mode 100644 index 1feefb6..0000000 --- a/test/lex_error3.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_error3.py -# -# t_error defined as function, but with wrong # args - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(): - pass - - - -lex.lex() - - diff --git a/test/lex_error4.py b/test/lex_error4.py deleted file mode 100644 index f4f48db..0000000 --- a/test/lex_error4.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_error4.py -# -# t_error defined as function, but too many args - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t,s): - pass - - - -lex.lex() - - diff --git a/test/lex_hedit.py b/test/lex_hedit.py deleted file mode 100644 index 34f15a1..0000000 --- a/test/lex_hedit.py +++ /dev/null @@ -1,47 +0,0 @@ -# ----------------------------------------------------------------------------- -# hedit.py -# -# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson) -# -# These tokens can't be easily tokenized because they are of the following -# form: -# -# nHc1...cn -# -# where n is a positive integer and c1 ... cn are characters. -# -# This example shows how to modify the state of the lexer to parse -# such tokens -# ----------------------------------------------------------------------------- -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = ( - 'H_EDIT_DESCRIPTOR', - ) - -# Tokens -t_ignore = " \t\n" - -def t_H_EDIT_DESCRIPTOR(t): - r"\d+H.*" # This grabs all of the remaining text - i = t.value.index('H') - n = eval(t.value[:i]) - - # Adjust the tokenizing position - t.lexer.lexpos -= len(t.value) - (i+1+n) - t.value = t.value[i+1:i+1+n] - return t - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -lex.lex() -lex.runmain(data="3Habc 10Habcdefghij 2Hxy") - - - diff --git a/test/lex_ignore.py b/test/lex_ignore.py deleted file mode 100644 index 6c43b4c..0000000 --- a/test/lex_ignore.py +++ /dev/null @@ -1,31 +0,0 @@ -# lex_ignore.py -# -# Improperly specific ignore declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_ignore(t): - ' \t' - pass - -def t_error(t): - pass - -import sys - -lex.lex() - - diff --git a/test/lex_ignore2.py b/test/lex_ignore2.py deleted file mode 100644 index f60987a..0000000 --- a/test/lex_ignore2.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_ignore2.py -# -# ignore declaration as a raw string - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_ignore = r' \t' - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/test/lex_literal1.py b/test/lex_literal1.py deleted file mode 100644 index db389c3..0000000 --- a/test/lex_literal1.py +++ /dev/null @@ -1,25 +0,0 @@ -# lex_literal1.py -# -# Bad literal specification - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "NUMBER", - ] - -literals = ["+","-","**"] - -def t_NUMBER(t): - r'\d+' - return t - -def t_error(t): - pass - -lex.lex() - - diff --git a/test/lex_literal2.py b/test/lex_literal2.py deleted file mode 100644 index b50b92c..0000000 --- a/test/lex_literal2.py +++ /dev/null @@ -1,25 +0,0 @@ -# lex_literal2.py -# -# Bad literal specification - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "NUMBER", - ] - -literals = 23 - -def t_NUMBER(t): - r'\d+' - return t - -def t_error(t): - pass - -lex.lex() - - diff --git a/test/lex_literal3.py b/test/lex_literal3.py deleted file mode 100644 index 91ab980..0000000 --- a/test/lex_literal3.py +++ /dev/null @@ -1,26 +0,0 @@ -# lex_literal3.py -# -# An empty literal specification given as a list -# Issue 8 : Literals empty list causes IndexError - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "NUMBER", - ] - -literals = [] - -def t_NUMBER(t): - r'\d+' - return t - -def t_error(t): - pass - -lex.lex() - - diff --git a/test/lex_many_tokens.py b/test/lex_many_tokens.py deleted file mode 100644 index 81ae57a..0000000 --- a/test/lex_many_tokens.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_many_tokens.py -# -# Test lex's ability to handle a large number of tokens (beyond the -# 100-group limit of the re module) - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = ["TOK%d" % i for i in range(1000)] - -for tok in tokens: - if sys.version_info[0] < 3: - exec("t_%s = '%s:'" % (tok,tok)) - else: - exec("t_%s = '%s:'" % (tok,tok), globals()) - -t_ignore = " \t" - -def t_error(t): - pass - -lex.lex() -lex.runmain(data="TOK34: TOK143: TOK269: TOK372: TOK452: TOK561: TOK999:") - - diff --git a/test/lex_module.py b/test/lex_module.py deleted file mode 100644 index 8bdd3ed..0000000 --- a/test/lex_module.py +++ /dev/null @@ -1,10 +0,0 @@ -# lex_module.py -# - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex -import lex_module_import -lex.lex(module=lex_module_import) -lex.runmain(data="3+4") diff --git a/test/lex_module_import.py b/test/lex_module_import.py deleted file mode 100644 index df42082..0000000 --- a/test/lex_module_import.py +++ /dev/null @@ -1,42 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_module_import.py -# -# A lexer defined in a module, but built in lex_module.py -# ----------------------------------------------------------------------------- - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - diff --git a/test/lex_object.py b/test/lex_object.py deleted file mode 100644 index 7e9f389..0000000 --- a/test/lex_object.py +++ /dev/null @@ -1,55 +0,0 @@ -# ----------------------------------------------------------------------------- -# lex_object.py -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.lex as lex - -class CalcLexer: - tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - - # Tokens - - t_PLUS = r'\+' - t_MINUS = r'-' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_EQUALS = r'=' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - - def t_NUMBER(self,t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print("Integer value too large %s" % t.value) - t.value = 0 - return t - - t_ignore = " \t" - - def t_newline(self,t): - r'\n+' - t.lineno += t.value.count("\n") - - def t_error(self,t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - - -calc = CalcLexer() - -# Build the lexer -lex.lex(object=calc) -lex.runmain(data="3+4") - - - - diff --git a/test/lex_re1.py b/test/lex_re1.py deleted file mode 100644 index 5be7aef..0000000 --- a/test/lex_re1.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_re1.py -# -# Bad regular expression in a string - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'(\d+' - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/test/lex_re2.py b/test/lex_re2.py deleted file mode 100644 index 8dfb8e3..0000000 --- a/test/lex_re2.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_re2.py -# -# Regular expression rule matches empty string - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+?' -t_MINUS = r'-' -t_NUMBER = r'(\d+)' - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/test/lex_re3.py b/test/lex_re3.py deleted file mode 100644 index e179925..0000000 --- a/test/lex_re3.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_re3.py -# -# Regular expression rule matches empty string - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - "POUND", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'(\d+)' -t_POUND = r'#' - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/test/lex_rule1.py b/test/lex_rule1.py deleted file mode 100644 index 0406c6f..0000000 --- a/test/lex_rule1.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_rule1.py -# -# Rule function with incorrect number of arguments - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = 1 - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/test/lex_rule2.py b/test/lex_rule2.py deleted file mode 100644 index 1c29d87..0000000 --- a/test/lex_rule2.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_rule2.py -# -# Rule function with incorrect number of arguments - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -def t_NUMBER(): - r'\d+' - return t - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/test/lex_rule3.py b/test/lex_rule3.py deleted file mode 100644 index 9ea94da..0000000 --- a/test/lex_rule3.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_rule3.py -# -# Rule function with incorrect number of arguments - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -def t_NUMBER(t,s): - r'\d+' - return t - -def t_error(t): - pass - -lex.lex() - - diff --git a/test/lex_state1.py b/test/lex_state1.py deleted file mode 100644 index 7528c91..0000000 --- a/test/lex_state1.py +++ /dev/null @@ -1,40 +0,0 @@ -# lex_state1.py -# -# Bad state declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = 'comment' - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/test/lex_state2.py b/test/lex_state2.py deleted file mode 100644 index 3aef69e..0000000 --- a/test/lex_state2.py +++ /dev/null @@ -1,40 +0,0 @@ -# lex_state2.py -# -# Bad state declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = ('comment','example') - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/test/lex_state3.py b/test/lex_state3.py deleted file mode 100644 index 616e484..0000000 --- a/test/lex_state3.py +++ /dev/null @@ -1,42 +0,0 @@ -# lex_state3.py -# -# Bad state declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -comment = 1 -states = ((comment, 'inclusive'), - ('example', 'exclusive')) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/test/lex_state4.py b/test/lex_state4.py deleted file mode 100644 index 1825016..0000000 --- a/test/lex_state4.py +++ /dev/null @@ -1,41 +0,0 @@ -# lex_state4.py -# -# Bad state declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - - -states = (('comment', 'exclsive'),) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - - -lex.lex() - - diff --git a/test/lex_state5.py b/test/lex_state5.py deleted file mode 100644 index 4ce828e..0000000 --- a/test/lex_state5.py +++ /dev/null @@ -1,40 +0,0 @@ -# lex_state5.py -# -# Bad state declaration - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = (('comment', 'exclusive'), - ('comment', 'exclusive')) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - -lex.lex() - - diff --git a/test/lex_state_noerror.py b/test/lex_state_noerror.py deleted file mode 100644 index 90bbea8..0000000 --- a/test/lex_state_noerror.py +++ /dev/null @@ -1,39 +0,0 @@ -# lex_state_noerror.py -# -# Declaration of a state for which no rules are defined - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = (('comment', 'exclusive'),) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - -lex.lex() - - diff --git a/test/lex_state_norule.py b/test/lex_state_norule.py deleted file mode 100644 index 64ec6d3..0000000 --- a/test/lex_state_norule.py +++ /dev/null @@ -1,40 +0,0 @@ -# lex_state_norule.py -# -# Declaration of a state for which no rules are defined - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = (('comment', 'exclusive'), - ('example', 'exclusive')) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - - -lex.lex() - - diff --git a/test/lex_state_try.py b/test/lex_state_try.py deleted file mode 100644 index fd5ba22..0000000 --- a/test/lex_state_try.py +++ /dev/null @@ -1,45 +0,0 @@ -# lex_state_try.py -# -# Declaration of a state for which no rules are defined - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = (('comment', 'exclusive'),) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_ignore = " \t" - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print("Entering comment state") - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print("comment body %s" % t) - t.lexer.begin('INITIAL') - -def t_error(t): - pass - -t_comment_error = t_error -t_comment_ignore = t_ignore - -lex.lex() - -data = "3 + 4 /* This is a comment */ + 10" - -lex.runmain(data=data) diff --git a/test/lex_token1.py b/test/lex_token1.py deleted file mode 100644 index 6fca300..0000000 --- a/test/lex_token1.py +++ /dev/null @@ -1,19 +0,0 @@ -# lex_token1.py -# -# Tests for absence of tokens variable - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - -lex.lex() - - diff --git a/test/lex_token2.py b/test/lex_token2.py deleted file mode 100644 index 6e65ab0..0000000 --- a/test/lex_token2.py +++ /dev/null @@ -1,22 +0,0 @@ -# lex_token2.py -# -# Tests for tokens of wrong type - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = "PLUS MINUS NUMBER" - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - - -lex.lex() - - diff --git a/test/lex_token3.py b/test/lex_token3.py deleted file mode 100644 index 636452e..0000000 --- a/test/lex_token3.py +++ /dev/null @@ -1,24 +0,0 @@ -# lex_token3.py -# -# tokens is right type, but is missing a token for one rule - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - -lex.lex() - - diff --git a/test/lex_token4.py b/test/lex_token4.py deleted file mode 100644 index 52947e9..0000000 --- a/test/lex_token4.py +++ /dev/null @@ -1,26 +0,0 @@ -# lex_token4.py -# -# Bad token name - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "-", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - -lex.lex() - - diff --git a/test/lex_token_dup.py b/test/lex_token_dup.py deleted file mode 100644 index 384f4e9..0000000 --- a/test/lex_token_dup.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_token_dup.py -# -# Duplicate token name in tokens - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - "MINUS" - ] - -t_PLUS = r'\+' -t_MINUS = r'-' - -def t_NUMBER(t): - r'\d+' - return t - -def t_error(t): - pass - -lex.lex() - - diff --git a/test/test_cpp_nonascii.c b/test/test_cpp_nonascii.c deleted file mode 100644 index 3e97d81..0000000 --- a/test/test_cpp_nonascii.c +++ /dev/null @@ -1,2 +0,0 @@ -/* ë */ -#define x 1 \ No newline at end of file diff --git a/test/testcpp.py b/test/testcpp.py deleted file mode 100644 index dbfb3e4..0000000 --- a/test/testcpp.py +++ /dev/null @@ -1,153 +0,0 @@ -from unittest import TestCase, main - -from multiprocessing import Process, Queue -from six.moves.queue import Empty - -import sys -import locale - -if ".." not in sys.path: - sys.path.insert(0, "..") - -from ply.lex import lex -from ply.cpp import * - - -def preprocessing(in_, out_queue): - out = None - - try: - p = Preprocessor(lex()) - p.parse(in_) - tokens = [t.value for t in p.parser] - out = "".join(tokens) - finally: - out_queue.put(out) - -class CPPTests(TestCase): - "Tests related to ANSI-C style lexical preprocessor." - - def __test_preprocessing(self, in_, expected, time_limit = 1.0): - out_queue = Queue() - - preprocessor = Process( - name = "PLY`s C preprocessor", - target = preprocessing, - args = (in_, out_queue) - ) - - preprocessor.start() - - try: - out = out_queue.get(timeout = time_limit) - except Empty: - preprocessor.terminate() - raise RuntimeError("Time limit exceeded!") - else: - self.assertMultiLineEqual(out, expected) - - def test_infinite_argument_expansion(self): - # CPP does not drags set of currently expanded macros through macro - # arguments expansion. If there is a match between an argument value - # and name of an already expanded macro then CPP falls into infinite - # recursion. - self.__test_preprocessing("""\ -#define a(x) x -#define b a(b) -b -""" , """\ - - -b""" - ) - - - def test_concatenation(self): - self.__test_preprocessing("""\ -#define a(x) x##_ -#define b(x) _##x -#define c(x) _##x##_ -#define d(x,y) _##x##y##_ - -a(i) -b(j) -c(k) -d(q,s)""" - , """\ - - - - - -i_ -_j -_k_ -_qs_""" - ) - - def test_deadloop_macro(self): - # If there is a word which equals to name of a parametrized macro, then - # attempt to expand such word as a macro manages the parser to fall - # into an infinite loop. - - self.__test_preprocessing("""\ -#define a(x) x - -a;""" - , """\ - - -a;""" - ) - - def test_index_error(self): - # If there are no tokens after a word ("a") which equals to name of - # a parameterized macro, then attempt to expand this word leads to - # IndexError. - - self.__test_preprocessing("""\ -#define a(x) x - -a""" - , """\ - - -a""" - ) - - def test_evalexpr(self): - # #if 1 != 2 is not processed correctly; undefined values are converted - # to 0L instead of 0 (issue #195) - # - self.__test_preprocessing("""\ -#if (1!=0) && (!x || (!(1==2))) -a; -#else -b; -#endif -""" - , """\ - -a; - -""" - ) - - def test_include_nonascii(self): - # Issue #196: #included files are read using the current locale's - # getdefaultencoding. if a #included file contains non-ascii characters, - # while default encoding is e.g. US_ASCII, this causes an error - locale.setlocale(locale.LC_ALL, 'C') - self.__test_preprocessing("""\ -#include "test_cpp_nonascii.c" -x; - -""" - , """\ - - -1; -""" - ) - -main() diff --git a/test/testlex.py b/test/testlex.py deleted file mode 100755 index a41fda8..0000000 --- a/test/testlex.py +++ /dev/null @@ -1,402 +0,0 @@ -# testlex.py - -import unittest -try: - import StringIO -except ImportError: - import io as StringIO - -import sys -import os -import warnings -import platform - -sys.path.insert(0,"..") -sys.tracebacklimit = 0 - -import ply.lex - -try: - from importlib.util import cache_from_source -except ImportError: - # Python 2.7, but we don't care. - cache_from_source = None - - -def make_pymodule_path(filename, optimization=None): - path = os.path.dirname(filename) - file = os.path.basename(filename) - mod, ext = os.path.splitext(file) - - if sys.hexversion >= 0x3050000: - fullpath = cache_from_source(filename, optimization=optimization) - elif sys.hexversion >= 0x3040000: - fullpath = cache_from_source(filename, ext=='.pyc') - elif sys.hexversion >= 0x3020000: - import imp - modname = mod+"."+imp.get_tag()+ext - fullpath = os.path.join(path,'__pycache__',modname) - else: - fullpath = filename - return fullpath - -def pymodule_out_exists(filename, optimization=None): - return os.path.exists(make_pymodule_path(filename, - optimization=optimization)) - -def pymodule_out_remove(filename, optimization=None): - os.remove(make_pymodule_path(filename, optimization=optimization)) - -def implementation(): - if platform.system().startswith("Java"): - return "Jython" - elif hasattr(sys, "pypy_version_info"): - return "PyPy" - else: - return "CPython" - -test_pyo = (implementation() == 'CPython') - -def check_expected(result, expected, contains=False): - if sys.version_info[0] >= 3: - if isinstance(result,str): - result = result.encode('ascii') - if isinstance(expected,str): - expected = expected.encode('ascii') - resultlines = result.splitlines() - expectedlines = expected.splitlines() - - if len(resultlines) != len(expectedlines): - return False - - for rline,eline in zip(resultlines,expectedlines): - if contains: - if eline not in rline: - return False - else: - if not rline.endswith(eline): - return False - return True - -def run_import(module): - code = "import "+module - exec(code) - del sys.modules[module] - -# Tests related to errors and warnings when building lexers -class LexErrorWarningTests(unittest.TestCase): - def setUp(self): - sys.stderr = StringIO.StringIO() - sys.stdout = StringIO.StringIO() - if sys.hexversion >= 0x3020000: - warnings.filterwarnings('ignore',category=ResourceWarning) - - def tearDown(self): - sys.stderr = sys.__stderr__ - sys.stdout = sys.__stdout__ - def test_lex_doc1(self): - self.assertRaises(SyntaxError,run_import,"lex_doc1") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "lex_doc1.py:18: No regular expression defined for rule 't_NUMBER'\n")) - def test_lex_dup1(self): - self.assertRaises(SyntaxError,run_import,"lex_dup1") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "lex_dup1.py:20: Rule t_NUMBER redefined. Previously defined on line 18\n" )) - - def test_lex_dup2(self): - self.assertRaises(SyntaxError,run_import,"lex_dup2") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "lex_dup2.py:22: Rule t_NUMBER redefined. Previously defined on line 18\n" )) - - def test_lex_dup3(self): - self.assertRaises(SyntaxError,run_import,"lex_dup3") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "lex_dup3.py:20: Rule t_NUMBER redefined. Previously defined on line 18\n" )) - - def test_lex_empty(self): - self.assertRaises(SyntaxError,run_import,"lex_empty") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "No rules of the form t_rulename are defined\n" - "No rules defined for state 'INITIAL'\n")) - - def test_lex_error1(self): - run_import("lex_error1") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "No t_error rule is defined\n")) - - def test_lex_error2(self): - self.assertRaises(SyntaxError,run_import,"lex_error2") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Rule 't_error' must be defined as a function\n") - ) - - def test_lex_error3(self): - self.assertRaises(SyntaxError,run_import,"lex_error3") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "lex_error3.py:20: Rule 't_error' requires an argument\n")) - - def test_lex_error4(self): - self.assertRaises(SyntaxError,run_import,"lex_error4") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "lex_error4.py:20: Rule 't_error' has too many arguments\n")) - - def test_lex_ignore(self): - self.assertRaises(SyntaxError,run_import,"lex_ignore") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "lex_ignore.py:20: Rule 't_ignore' must be defined as a string\n")) - - def test_lex_ignore2(self): - run_import("lex_ignore2") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "t_ignore contains a literal backslash '\\'\n")) - - - def test_lex_re1(self): - self.assertRaises(SyntaxError,run_import,"lex_re1") - result = sys.stderr.getvalue() - if sys.hexversion < 0x3050000: - msg = "Invalid regular expression for rule 't_NUMBER'. unbalanced parenthesis\n" - else: - msg = "Invalid regular expression for rule 't_NUMBER'. missing ), unterminated subpattern at position 0" - self.assertTrue(check_expected(result, - msg, - contains=True)) - - def test_lex_re2(self): - self.assertRaises(SyntaxError,run_import,"lex_re2") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Regular expression for rule 't_PLUS' matches empty string\n")) - - def test_lex_re3(self): - self.assertRaises(SyntaxError,run_import,"lex_re3") - result = sys.stderr.getvalue() -# self.assertTrue(check_expected(result, -# "Invalid regular expression for rule 't_POUND'. unbalanced parenthesis\n" -# "Make sure '#' in rule 't_POUND' is escaped with '\\#'\n")) - - if sys.hexversion < 0x3050000: - msg = ("Invalid regular expression for rule 't_POUND'. unbalanced parenthesis\n" - "Make sure '#' in rule 't_POUND' is escaped with '\\#'\n") - else: - msg = ("Invalid regular expression for rule 't_POUND'. missing ), unterminated subpattern at position 0\n" - "ERROR: Make sure '#' in rule 't_POUND' is escaped with '\#'") - self.assertTrue(check_expected(result, - msg, - contains=True), result) - - def test_lex_rule1(self): - self.assertRaises(SyntaxError,run_import,"lex_rule1") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "t_NUMBER not defined as a function or string\n")) - - def test_lex_rule2(self): - self.assertRaises(SyntaxError,run_import,"lex_rule2") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "lex_rule2.py:18: Rule 't_NUMBER' requires an argument\n")) - - def test_lex_rule3(self): - self.assertRaises(SyntaxError,run_import,"lex_rule3") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "lex_rule3.py:18: Rule 't_NUMBER' has too many arguments\n")) - - - def test_lex_state1(self): - self.assertRaises(SyntaxError,run_import,"lex_state1") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "states must be defined as a tuple or list\n")) - - def test_lex_state2(self): - self.assertRaises(SyntaxError,run_import,"lex_state2") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Invalid state specifier 'comment'. Must be a tuple (statename,'exclusive|inclusive')\n" - "Invalid state specifier 'example'. Must be a tuple (statename,'exclusive|inclusive')\n")) - - def test_lex_state3(self): - self.assertRaises(SyntaxError,run_import,"lex_state3") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "State name 1 must be a string\n" - "No rules defined for state 'example'\n")) - - def test_lex_state4(self): - self.assertRaises(SyntaxError,run_import,"lex_state4") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "State type for state 'comment' must be 'inclusive' or 'exclusive'\n")) - - - def test_lex_state5(self): - self.assertRaises(SyntaxError,run_import,"lex_state5") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "State 'comment' already defined\n")) - - def test_lex_state_noerror(self): - run_import("lex_state_noerror") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "No error rule is defined for exclusive state 'comment'\n")) - - def test_lex_state_norule(self): - self.assertRaises(SyntaxError,run_import,"lex_state_norule") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "No rules defined for state 'example'\n")) - - def test_lex_token1(self): - self.assertRaises(SyntaxError,run_import,"lex_token1") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "No token list is defined\n" - "Rule 't_NUMBER' defined for an unspecified token NUMBER\n" - "Rule 't_PLUS' defined for an unspecified token PLUS\n" - "Rule 't_MINUS' defined for an unspecified token MINUS\n" -)) - - def test_lex_token2(self): - self.assertRaises(SyntaxError,run_import,"lex_token2") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "tokens must be a list or tuple\n" - "Rule 't_NUMBER' defined for an unspecified token NUMBER\n" - "Rule 't_PLUS' defined for an unspecified token PLUS\n" - "Rule 't_MINUS' defined for an unspecified token MINUS\n" -)) - - def test_lex_token3(self): - self.assertRaises(SyntaxError,run_import,"lex_token3") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Rule 't_MINUS' defined for an unspecified token MINUS\n")) - - - def test_lex_token4(self): - self.assertRaises(SyntaxError,run_import,"lex_token4") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Bad token name '-'\n")) - - - def test_lex_token_dup(self): - run_import("lex_token_dup") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Token 'MINUS' multiply defined\n")) - - - def test_lex_literal1(self): - self.assertRaises(SyntaxError,run_import,"lex_literal1") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Invalid literal '**'. Must be a single character\n")) - - def test_lex_literal2(self): - self.assertRaises(SyntaxError,run_import,"lex_literal2") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Invalid literals specification. literals must be a sequence of characters\n")) - -import os -import subprocess -import shutil - -# Tests related to various build options associated with lexers -class LexBuildOptionTests(unittest.TestCase): - def setUp(self): - sys.stderr = StringIO.StringIO() - sys.stdout = StringIO.StringIO() - def tearDown(self): - sys.stderr = sys.__stderr__ - sys.stdout = sys.__stdout__ - try: - shutil.rmtree("lexdir") - except OSError: - pass - - def test_lex_module(self): - run_import("lex_module") - result = sys.stdout.getvalue() - self.assertTrue(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - - def test_lex_object(self): - run_import("lex_object") - result = sys.stdout.getvalue() - self.assertTrue(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - - def test_lex_closure(self): - run_import("lex_closure") - result = sys.stdout.getvalue() - self.assertTrue(check_expected(result, - "(NUMBER,3,1,0)\n" - "(PLUS,'+',1,1)\n" - "(NUMBER,4,1,2)\n")) - - def test_lex_many_tokens(self): - run_import("lex_many_tokens") - result = sys.stdout.getvalue() - self.assertTrue(check_expected(result, - "(TOK34,'TOK34:',1,0)\n" - "(TOK143,'TOK143:',1,7)\n" - "(TOK269,'TOK269:',1,15)\n" - "(TOK372,'TOK372:',1,23)\n" - "(TOK452,'TOK452:',1,31)\n" - "(TOK561,'TOK561:',1,39)\n" - "(TOK999,'TOK999:',1,47)\n" - )) - -# Tests related to run-time behavior of lexers -class LexRunTests(unittest.TestCase): - def setUp(self): - sys.stderr = StringIO.StringIO() - sys.stdout = StringIO.StringIO() - def tearDown(self): - sys.stderr = sys.__stderr__ - sys.stdout = sys.__stdout__ - - def test_lex_hedit(self): - run_import("lex_hedit") - result = sys.stdout.getvalue() - self.assertTrue(check_expected(result, - "(H_EDIT_DESCRIPTOR,'abc',1,0)\n" - "(H_EDIT_DESCRIPTOR,'abcdefghij',1,6)\n" - "(H_EDIT_DESCRIPTOR,'xy',1,20)\n")) - - def test_lex_state_try(self): - run_import("lex_state_try") - result = sys.stdout.getvalue() - self.assertTrue(check_expected(result, - "(NUMBER,'3',1,0)\n" - "(PLUS,'+',1,2)\n" - "(NUMBER,'4',1,4)\n" - "Entering comment state\n" - "comment body LexToken(body_part,'This is a comment */',1,9)\n" - "(PLUS,'+',1,30)\n" - "(NUMBER,'10',1,32)\n" - )) - - - -unittest.main() diff --git a/test/testyacc.py b/test/testyacc.py deleted file mode 100644 index c52d0ac..0000000 --- a/test/testyacc.py +++ /dev/null @@ -1,404 +0,0 @@ -# testyacc.py - -import unittest -try: - import StringIO -except ImportError: - import io as StringIO - -import sys -import os -import warnings -import re -import platform - -sys.path.insert(0,"..") -sys.tracebacklimit = 0 - -import ply.yacc - -def make_pymodule_path(filename): - path = os.path.dirname(filename) - file = os.path.basename(filename) - mod, ext = os.path.splitext(file) - - if sys.hexversion >= 0x3040000: - import importlib.util - fullpath = importlib.util.cache_from_source(filename, ext=='.pyc') - elif sys.hexversion >= 0x3020000: - import imp - modname = mod+"."+imp.get_tag()+ext - fullpath = os.path.join(path,'__pycache__',modname) - else: - fullpath = filename - return fullpath - -def pymodule_out_exists(filename): - return os.path.exists(make_pymodule_path(filename)) - -def pymodule_out_remove(filename): - os.remove(make_pymodule_path(filename)) - -def implementation(): - if platform.system().startswith("Java"): - return "Jython" - elif hasattr(sys, "pypy_version_info"): - return "PyPy" - else: - return "CPython" - -# Check the output to see if it contains all of a set of expected output lines. -# This alternate implementation looks weird, but is needed to properly handle -# some variations in error message order that occurs due to dict hash table -# randomization that was introduced in Python 3.3 -def check_expected(result, expected): - # Normalize 'state n' text to account for randomization effects in Python 3.3 - expected = re.sub(r' state \d+', 'state ', expected) - result = re.sub(r' state \d+', 'state ', result) - - resultlines = set() - for line in result.splitlines(): - if line.startswith("WARNING: "): - line = line[9:] - elif line.startswith("ERROR: "): - line = line[7:] - resultlines.add(line) - - # Selectively remove expected lines from the output - for eline in expected.splitlines(): - resultlines = set(line for line in resultlines if not line.endswith(eline)) - - # Return True if no result lines remain - return not bool(resultlines) - -def run_import(module): - code = "import "+module - exec(code) - del sys.modules[module] - -# Tests related to errors and warnings when building parsers -class YaccErrorWarningTests(unittest.TestCase): - def setUp(self): - sys.stderr = StringIO.StringIO() - sys.stdout = StringIO.StringIO() - try: - os.remove("parsetab.py") - pymodule_out_remove("parsetab.pyc") - except OSError: - pass - - if sys.hexversion >= 0x3020000: - warnings.filterwarnings('ignore', category=ResourceWarning) - warnings.filterwarnings('ignore', category=DeprecationWarning) - - def tearDown(self): - sys.stderr = sys.__stderr__ - sys.stdout = sys.__stdout__ - def test_yacc_badargs(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badargs") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_badargs.py:23: Rule 'p_statement_assign' has too many arguments\n" - "yacc_badargs.py:27: Rule 'p_statement_expr' requires an argument\n" - )) - def test_yacc_badid(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badid") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_badid.py:32: Illegal name 'bad&rule' in rule 'statement'\n" - "yacc_badid.py:36: Illegal rule name 'bad&rule'\n" - )) - - def test_yacc_badprec(self): - try: - run_import("yacc_badprec") - except ply.yacc.YaccError: - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "precedence must be a list or tuple\n" - )) - def test_yacc_badprec2(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badprec2") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Bad precedence table\n" - )) - - def test_yacc_badprec3(self): - run_import("yacc_badprec3") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Precedence already specified for terminal 'MINUS'\n" - "Generating LALR tables\n" - - )) - - def test_yacc_badrule(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badrule") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_badrule.py:24: Syntax error. Expected ':'\n" - "yacc_badrule.py:28: Syntax error in rule 'statement'\n" - "yacc_badrule.py:33: Syntax error. Expected ':'\n" - "yacc_badrule.py:42: Syntax error. Expected ':'\n" - )) - - def test_yacc_badtok(self): - try: - run_import("yacc_badtok") - except ply.yacc.YaccError: - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "tokens must be a list or tuple\n")) - - def test_yacc_dup(self): - run_import("yacc_dup") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_dup.py:27: Function p_statement redefined. Previously defined on line 23\n" - "Token 'EQUALS' defined, but not used\n" - "There is 1 unused token\n" - "Generating LALR tables\n" - - )) - def test_yacc_error1(self): - try: - run_import("yacc_error1") - except ply.yacc.YaccError: - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_error1.py:61: p_error() requires 1 argument\n")) - - def test_yacc_error2(self): - try: - run_import("yacc_error2") - except ply.yacc.YaccError: - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_error2.py:61: p_error() requires 1 argument\n")) - - def test_yacc_error3(self): - try: - run_import("yacc_error3") - except ply.yacc.YaccError: - e = sys.exc_info()[1] - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "'p_error' defined, but is not a function or method\n")) - - def test_yacc_error4(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_error4") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_error4.py:62: Illegal rule name 'error'. Already defined as a token\n" - )) - - - def test_yacc_error5(self): - run_import("yacc_error5") - result = sys.stdout.getvalue() - self.assertTrue(check_expected(result, - "Group at 3:10 to 3:12\n" - "Undefined name 'a'\n" - "Syntax error at 'b'\n" - "Syntax error at 4:18 to 4:22\n" - "Assignment Error at 2:5 to 5:27\n" - "13\n" - )) - - def test_yacc_error6(self): - run_import("yacc_error6") - result = sys.stdout.getvalue() - self.assertTrue(check_expected(result, - "a=7\n" - "Line 3: Syntax error at '*'\n" - "c=21\n" - )) - - def test_yacc_error7(self): - run_import("yacc_error7") - result = sys.stdout.getvalue() - self.assertTrue(check_expected(result, - "a=7\n" - "Line 3: Syntax error at '*'\n" - "c=21\n" - )) - - def test_yacc_inf(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_inf") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Token 'NUMBER' defined, but not used\n" - "There is 1 unused token\n" - "Infinite recursion detected for symbol 'statement'\n" - "Infinite recursion detected for symbol 'expression'\n" - )) - def test_yacc_literal(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_literal") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_literal.py:36: Literal token '**' in rule 'expression' may only be a single character\n" - )) - def test_yacc_misplaced(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_misplaced") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_misplaced.py:32: Misplaced '|'\n" - )) - - def test_yacc_missing1(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_missing1") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_missing1.py:24: Symbol 'location' used, but not defined as a token or a rule\n" - )) - - def test_yacc_nested(self): - run_import("yacc_nested") - result = sys.stdout.getvalue() - self.assertTrue(check_expected(result, - "A\n" - "A\n" - "A\n", - )) - - def test_yacc_nodoc(self): - run_import("yacc_nodoc") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_nodoc.py:27: No documentation string specified in function 'p_statement_expr' (ignored)\n" - "Generating LALR tables\n" - )) - - def test_yacc_noerror(self): - run_import("yacc_noerror") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "no p_error() function is defined\n" - "Generating LALR tables\n" - )) - - def test_yacc_nop(self): - run_import("yacc_nop") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_nop.py:27: Possible grammar rule 'statement_expr' defined without p_ prefix\n" - "Generating LALR tables\n" - )) - - def test_yacc_notfunc(self): - run_import("yacc_notfunc") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "'p_statement_assign' not defined as a function\n" - "Token 'EQUALS' defined, but not used\n" - "There is 1 unused token\n" - "Generating LALR tables\n" - )) - def test_yacc_notok(self): - try: - run_import("yacc_notok") - except ply.yacc.YaccError: - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "No token list is defined\n")) - - def test_yacc_rr(self): - run_import("yacc_rr") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Generating LALR tables\n" - "1 reduce/reduce conflict\n" - "reduce/reduce conflict in state 15 resolved using rule (statement -> NAME EQUALS NUMBER)\n" - "rejected rule (expression -> NUMBER) in state 15\n" - - )) - - def test_yacc_rr_unused(self): - run_import("yacc_rr_unused") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "no p_error() function is defined\n" - "Generating LALR tables\n" - "3 reduce/reduce conflicts\n" - "reduce/reduce conflict in state 1 resolved using rule (rule3 -> A)\n" - "rejected rule (rule4 -> A) in state 1\n" - "reduce/reduce conflict in state 1 resolved using rule (rule3 -> A)\n" - "rejected rule (rule5 -> A) in state 1\n" - "reduce/reduce conflict in state 1 resolved using rule (rule4 -> A)\n" - "rejected rule (rule5 -> A) in state 1\n" - "Rule (rule5 -> A) is never reduced\n" - )) - - def test_yacc_simple(self): - run_import("yacc_simple") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Generating LALR tables\n" - )) - - def test_yacc_sr(self): - run_import("yacc_sr") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Generating LALR tables\n" - "20 shift/reduce conflicts\n" - )) - - def test_yacc_term1(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_term1") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_term1.py:24: Illegal rule name 'NUMBER'. Already defined as a token\n" - )) - - def test_yacc_unicode_literals(self): - run_import("yacc_unicode_literals") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Generating LALR tables\n" - )) - - def test_yacc_unused(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_unused") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_unused.py:62: Symbol 'COMMA' used, but not defined as a token or a rule\n" - "Symbol 'COMMA' is unreachable\n" - "Symbol 'exprlist' is unreachable\n" - )) - def test_yacc_unused_rule(self): - run_import("yacc_unused_rule") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_unused_rule.py:62: Rule 'integer' defined, but not used\n" - "There is 1 unused rule\n" - "Symbol 'integer' is unreachable\n" - "Generating LALR tables\n" - )) - - def test_yacc_uprec(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_uprec") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_uprec.py:37: Nothing known about the precedence of 'UMINUS'\n" - )) - - def test_yacc_uprec2(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_uprec2") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "yacc_uprec2.py:37: Syntax error. Nothing follows %prec\n" - )) - - def test_yacc_prec1(self): - self.assertRaises(ply.yacc.YaccError,run_import,"yacc_prec1") - result = sys.stderr.getvalue() - self.assertTrue(check_expected(result, - "Precedence rule 'left' defined for unknown symbol '+'\n" - "Precedence rule 'left' defined for unknown symbol '*'\n" - "Precedence rule 'left' defined for unknown symbol '-'\n" - "Precedence rule 'left' defined for unknown symbol '/'\n" - )) - -unittest.main() diff --git a/test/yacc_badargs.py b/test/yacc_badargs.py deleted file mode 100644 index 9a1d03f..0000000 --- a/test/yacc_badargs.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badargs.py -# -# Rules with wrong # args -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t,s): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_badid.py b/test/yacc_badid.py deleted file mode 100644 index e4b9f5e..0000000 --- a/test/yacc_badid.py +++ /dev/null @@ -1,77 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badid.py -# -# Attempt to define a rule with a bad-identifier name -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_statement_expr2(t): - 'statement : bad&rule' - pass - -def p_badrule(t): - 'bad&rule : expression' - pass - - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - pass - -yacc.yacc() - - - - diff --git a/test/yacc_badprec.py b/test/yacc_badprec.py deleted file mode 100644 index 3013bb6..0000000 --- a/test/yacc_badprec.py +++ /dev/null @@ -1,64 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badprec.py -# -# Bad precedence specifier -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = "blah" - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_badprec2.py b/test/yacc_badprec2.py deleted file mode 100644 index 83093b4..0000000 --- a/test/yacc_badprec2.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badprec2.py -# -# Bad precedence -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - 42, - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_badprec3.py b/test/yacc_badprec3.py deleted file mode 100644 index d925ecd..0000000 --- a/test/yacc_badprec3.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badprec3.py -# -# Bad precedence -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE','MINUS'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_badrule.py b/test/yacc_badrule.py deleted file mode 100644 index 92af646..0000000 --- a/test/yacc_badrule.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badrule.py -# -# Syntax problems in the rule strings -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression: MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_badtok.py b/test/yacc_badtok.py deleted file mode 100644 index fc4afe1..0000000 --- a/test/yacc_badtok.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badtok.py -# -# A grammar, but tokens is a bad datatype -# ----------------------------------------------------------------------------- - -import sys -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -tokens = "Hello" - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_dup.py b/test/yacc_dup.py deleted file mode 100644 index 309ba32..0000000 --- a/test/yacc_dup.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_dup.py -# -# Duplicated rule name -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_error1.py b/test/yacc_error1.py deleted file mode 100644 index 10ac6a9..0000000 --- a/test/yacc_error1.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error1.py -# -# Bad p_error() function -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t,s): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_error2.py b/test/yacc_error2.py deleted file mode 100644 index 7591418..0000000 --- a/test/yacc_error2.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error2.py -# -# Bad p_error() function -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_error3.py b/test/yacc_error3.py deleted file mode 100644 index 4604a48..0000000 --- a/test/yacc_error3.py +++ /dev/null @@ -1,67 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error3.py -# -# Bad p_error() function -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -p_error = "blah" - -yacc.yacc() - - - - diff --git a/test/yacc_error4.py b/test/yacc_error4.py deleted file mode 100644 index 9c550cd..0000000 --- a/test/yacc_error4.py +++ /dev/null @@ -1,72 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error4.py -# -# Attempt to define a rule named 'error' -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error_handler(t): - 'error : NAME' - pass - -def p_error(t): - pass - -yacc.yacc() - - - - diff --git a/test/yacc_error5.py b/test/yacc_error5.py deleted file mode 100644 index 9eb0f85..0000000 --- a/test/yacc_error5.py +++ /dev/null @@ -1,94 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error5.py -# -# Lineno and position tracking with error tokens -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_assign_error(t): - 'statement : NAME EQUALS error' - line_start, line_end = t.linespan(3) - pos_start, pos_end = t.lexspan(3) - print("Assignment Error at %d:%d to %d:%d" % (line_start,pos_start,line_end,pos_end)) - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - line_start, line_end = t.linespan(2) - pos_start, pos_end = t.lexspan(2) - print("Group at %d:%d to %d:%d" % (line_start,pos_start, line_end, pos_end)) - t[0] = t[2] - -def p_expression_group_error(t): - 'expression : LPAREN error RPAREN' - line_start, line_end = t.linespan(2) - pos_start, pos_end = t.lexspan(2) - print("Syntax error at %d:%d to %d:%d" % (line_start,pos_start, line_end, pos_end)) - t[0] = 0 - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -parser = yacc.yacc() -import calclex -calclex.lexer.lineno=1 -parser.parse(""" -a = 3 + -(4*5) + -(a b c) + -+ 6 + 7 -""", tracking=True) - - - - - - diff --git a/test/yacc_error6.py b/test/yacc_error6.py deleted file mode 100644 index 8d0ec85..0000000 --- a/test/yacc_error6.py +++ /dev/null @@ -1,80 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error6.py -# -# Panic mode recovery test -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -def p_statements(t): - 'statements : statements statement' - pass - -def p_statements_1(t): - 'statements : statement' - pass - -def p_statement_assign(p): - 'statement : LPAREN NAME EQUALS expression RPAREN' - print("%s=%s" % (p[2],p[4])) - -def p_statement_expr(t): - 'statement : LPAREN expression RPAREN' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_error(p): - if p: - print("Line %d: Syntax error at '%s'" % (p.lineno, p.value)) - # Scan ahead looking for a name token - while True: - tok = parser.token() - if not tok or tok.type == 'RPAREN': - break - if tok: - parser.restart() - return None - -parser = yacc.yacc() -import calclex -calclex.lexer.lineno=1 - -parser.parse(""" -(a = 3 + 4) -(b = 4 + * 5 - 6 + *) -(c = 10 + 11) -""") - - - - - - diff --git a/test/yacc_error7.py b/test/yacc_error7.py deleted file mode 100644 index abdc834..0000000 --- a/test/yacc_error7.py +++ /dev/null @@ -1,80 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error7.py -# -# Panic mode recovery test using deprecated functionality -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -def p_statements(t): - 'statements : statements statement' - pass - -def p_statements_1(t): - 'statements : statement' - pass - -def p_statement_assign(p): - 'statement : LPAREN NAME EQUALS expression RPAREN' - print("%s=%s" % (p[2],p[4])) - -def p_statement_expr(t): - 'statement : LPAREN expression RPAREN' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_error(p): - if p: - print("Line %d: Syntax error at '%s'" % (p.lineno, p.value)) - # Scan ahead looking for a name token - while True: - tok = parser.token() - if not tok or tok.type == 'RPAREN': - break - if tok: - parser.restart() - return None - -parser = yacc.yacc() -import calclex -calclex.lexer.lineno=1 - -parser.parse(""" -(a = 3 + 4) -(b = 4 + * 5 - 6 + *) -(c = 10 + 11) -""") - - - - - - diff --git a/test/yacc_inf.py b/test/yacc_inf.py deleted file mode 100644 index efd3612..0000000 --- a/test/yacc_inf.py +++ /dev/null @@ -1,56 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_inf.py -# -# Infinite recursion -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_literal.py b/test/yacc_literal.py deleted file mode 100644 index 0d62803..0000000 --- a/test/yacc_literal.py +++ /dev/null @@ -1,69 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_literal.py -# -# Grammar with bad literal characters -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','+','-'), - ('left','*','/'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression '+' expression - | expression '-' expression - | expression '*' expression - | expression '/' expression - | expression '**' expression ''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_misplaced.py b/test/yacc_misplaced.py deleted file mode 100644 index 9159b01..0000000 --- a/test/yacc_misplaced.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_misplaced.py -# -# A misplaced | in grammar rules -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - ''' | expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_missing1.py b/test/yacc_missing1.py deleted file mode 100644 index d1b5105..0000000 --- a/test/yacc_missing1.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_missing1.py -# -# Grammar with a missing rule -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : location EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_nested.py b/test/yacc_nested.py deleted file mode 100644 index a3543a9..0000000 --- a/test/yacc_nested.py +++ /dev/null @@ -1,33 +0,0 @@ -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") - -from ply import lex, yacc - -t_A = 'A' -t_B = 'B' -t_C = 'C' - -tokens = ('A', 'B', 'C') - -the_lexer = lex.lex() - -def t_error(t): - pass - -def p_error(p): - pass - -def p_start(t): - '''start : A nest C''' - pass - -def p_nest(t): - '''nest : B''' - print(t[-1]) - -the_parser = yacc.yacc(debug = False) - -the_parser.parse('ABC', the_lexer) -the_parser.parse('ABC', the_lexer, tracking=True) -the_parser.parse('ABC', the_lexer, tracking=True, debug=1) diff --git a/test/yacc_nodoc.py b/test/yacc_nodoc.py deleted file mode 100644 index 0f61920..0000000 --- a/test/yacc_nodoc.py +++ /dev/null @@ -1,67 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_nodoc.py -# -# Rule with a missing doc-string -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_noerror.py b/test/yacc_noerror.py deleted file mode 100644 index b38c758..0000000 --- a/test/yacc_noerror.py +++ /dev/null @@ -1,66 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_noerror.py -# -# No p_error() rule defined. -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - - -yacc.yacc() - - - - diff --git a/test/yacc_nop.py b/test/yacc_nop.py deleted file mode 100644 index 789a9cf..0000000 --- a/test/yacc_nop.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_nop.py -# -# Possible grammar rule defined without p_ prefix -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_notfunc.py b/test/yacc_notfunc.py deleted file mode 100644 index 5093a74..0000000 --- a/test/yacc_notfunc.py +++ /dev/null @@ -1,66 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_notfunc.py -# -# p_rule not defined as a function -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -p_statement_assign = "Blah" - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_notok.py b/test/yacc_notok.py deleted file mode 100644 index cff55a8..0000000 --- a/test/yacc_notok.py +++ /dev/null @@ -1,67 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_notok.py -# -# A grammar, but we forgot to import the tokens list -# ----------------------------------------------------------------------------- - -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_prec1.py b/test/yacc_prec1.py deleted file mode 100644 index 99fcd90..0000000 --- a/test/yacc_prec1.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_prec1.py -# -# Tests case where precedence specifier doesn't match up to terminals -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left', '+', '-'), - ('left', '*', '/'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_rr.py b/test/yacc_rr.py deleted file mode 100644 index e7336c2..0000000 --- a/test/yacc_rr.py +++ /dev/null @@ -1,72 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_rr.py -# -# A grammar with a reduce/reduce conflict -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_assign_2(t): - 'statement : NAME EQUALS NUMBER' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_rr_unused.py b/test/yacc_rr_unused.py deleted file mode 100644 index 1ca5f7e..0000000 --- a/test/yacc_rr_unused.py +++ /dev/null @@ -1,30 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_rr_unused.py -# -# A grammar with reduce/reduce conflicts and a rule that never -# gets reduced. -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -tokens = ('A', 'B', 'C') - -def p_grammar(p): - ''' - rule1 : rule2 B - | rule2 C - - rule2 : rule3 B - | rule4 - | rule5 - - rule3 : A - - rule4 : A - - rule5 : A - ''' - -yacc.yacc() diff --git a/test/yacc_simple.py b/test/yacc_simple.py deleted file mode 100644 index bd989f4..0000000 --- a/test/yacc_simple.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_simple.py -# -# A simple, properly specifier grammar -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_sr.py b/test/yacc_sr.py deleted file mode 100644 index 69a1e9c..0000000 --- a/test/yacc_sr.py +++ /dev/null @@ -1,63 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_sr.py -# -# A grammar with shift-reduce conflicts -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_term1.py b/test/yacc_term1.py deleted file mode 100644 index eaa36e9..0000000 --- a/test/yacc_term1.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_term1.py -# -# Terminal used on the left-hand-side -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'NUMBER : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_unicode_literals.py b/test/yacc_unicode_literals.py deleted file mode 100644 index 5ae4f5b..0000000 --- a/test/yacc_unicode_literals.py +++ /dev/null @@ -1,70 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_unicode_literals -# -# Test for unicode literals on Python 2.x -# ----------------------------------------------------------------------------- -from __future__ import unicode_literals - -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_unused.py b/test/yacc_unused.py deleted file mode 100644 index 55b677b..0000000 --- a/test/yacc_unused.py +++ /dev/null @@ -1,77 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_unused.py -# -# A grammar with an unused rule -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_expr_list(t): - 'exprlist : exprlist COMMA expression' - pass - -def p_expr_list_2(t): - 'exprlist : expression' - pass - - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_unused_rule.py b/test/yacc_unused_rule.py deleted file mode 100644 index 4868ef8..0000000 --- a/test/yacc_unused_rule.py +++ /dev/null @@ -1,72 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_unused_rule.py -# -# Grammar with an unused rule -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_integer(t): - 'integer : NUMBER' - t[0] = t[1] - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_uprec.py b/test/yacc_uprec.py deleted file mode 100644 index 569adb8..0000000 --- a/test/yacc_uprec.py +++ /dev/null @@ -1,63 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_uprec.py -# -# A grammar with a bad %prec specifier -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/test/yacc_uprec2.py b/test/yacc_uprec2.py deleted file mode 100644 index 73274bf..0000000 --- a/test/yacc_uprec2.py +++ /dev/null @@ -1,63 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_uprec2.py -# -# A grammar with a bad %prec specifier -# ----------------------------------------------------------------------------- -import sys - -if ".." not in sys.path: sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print(t[1]) - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print("Undefined name '%s'" % t[1]) - t[0] = 0 - -def p_error(t): - print("Syntax error at '%s'" % t.value) - -yacc.yacc() - - - - diff --git a/tests/README b/tests/README new file mode 100644 index 0000000..03b167c --- /dev/null +++ b/tests/README @@ -0,0 +1,8 @@ +This directory mostly contains tests for various types of error +conditions. To run: + + $ python testlex.py + $ python testyacc.py + $ python testcpp.py + +The script 'cleanup.sh' cleans up this directory to its original state. diff --git a/tests/calclex.py b/tests/calclex.py new file mode 100644 index 0000000..4862753 --- /dev/null +++ b/tests/calclex.py @@ -0,0 +1,46 @@ +# ----------------------------------------------------------------------------- +# calclex.py +# ----------------------------------------------------------------------------- +import ply.lex as lex + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +lexer = lex.lex() + + + diff --git a/tests/cleanup.sh b/tests/cleanup.sh new file mode 100755 index 0000000..9374f2c --- /dev/null +++ b/tests/cleanup.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +rm -rf *~ *.pyc *.pyo *.dif *.out __pycache__ + diff --git a/tests/lex_closure.py b/tests/lex_closure.py new file mode 100644 index 0000000..0b34cff --- /dev/null +++ b/tests/lex_closure.py @@ -0,0 +1,51 @@ +# ----------------------------------------------------------------------------- +# lex_closure.py +# ----------------------------------------------------------------------------- +import ply.lex as lex + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +def make_calc(): + + # Tokens + + t_PLUS = r'\+' + t_MINUS = r'-' + t_TIMES = r'\*' + t_DIVIDE = r'/' + t_EQUALS = r'=' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + + def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + + t_ignore = " \t" + + def t_newline(t): + r'\n+' + t.lineno += t.value.count("\n") + + def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + + # Build the lexer + return lex.lex() + +make_calc() +lex.runmain(data="3+4") + + + diff --git a/tests/lex_doc1.py b/tests/lex_doc1.py new file mode 100644 index 0000000..4b62609 --- /dev/null +++ b/tests/lex_doc1.py @@ -0,0 +1,23 @@ +# lex_doc1.py +# +# Missing documentation string + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +def t_NUMBER(t): + pass + +def t_error(t): + pass + +lex.lex() + + diff --git a/tests/lex_dup1.py b/tests/lex_dup1.py new file mode 100644 index 0000000..b9dc3c5 --- /dev/null +++ b/tests/lex_dup1.py @@ -0,0 +1,26 @@ +# lex_dup1.py +# +# Duplicated rule specifiers + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +t_NUMBER = r'\d+' + +def t_error(t): + pass + + + +lex.lex() + + diff --git a/tests/lex_dup2.py b/tests/lex_dup2.py new file mode 100644 index 0000000..47bf5b5 --- /dev/null +++ b/tests/lex_dup2.py @@ -0,0 +1,30 @@ +# lex_dup2.py +# +# Duplicated rule specifiers + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +def t_NUMBER(t): + r'\d+' + pass + +def t_NUMBER(t): + r'\d+' + pass + +def t_error(t): + pass + + + +lex.lex() + + diff --git a/tests/lex_dup3.py b/tests/lex_dup3.py new file mode 100644 index 0000000..5398f48 --- /dev/null +++ b/tests/lex_dup3.py @@ -0,0 +1,28 @@ +# lex_dup3.py +# +# Duplicated rule specifiers + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_NUMBER(t): + r'\d+' + pass + +def t_error(t): + pass + + + +lex.lex() + + diff --git a/tests/lex_empty.py b/tests/lex_empty.py new file mode 100644 index 0000000..d0baf31 --- /dev/null +++ b/tests/lex_empty.py @@ -0,0 +1,17 @@ +# lex_empty.py +# +# No rules defined + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + + + +lex.lex() + + diff --git a/tests/lex_error1.py b/tests/lex_error1.py new file mode 100644 index 0000000..fec0282 --- /dev/null +++ b/tests/lex_error1.py @@ -0,0 +1,21 @@ +# lex_error1.py +# +# Missing t_error() rule + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + + + +lex.lex() + + diff --git a/tests/lex_error2.py b/tests/lex_error2.py new file mode 100644 index 0000000..8914ab6 --- /dev/null +++ b/tests/lex_error2.py @@ -0,0 +1,23 @@ +# lex_error2.py +# +# t_error defined, but not function + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +t_error = "foo" + + + +lex.lex() + + diff --git a/tests/lex_error3.py b/tests/lex_error3.py new file mode 100644 index 0000000..71ca903 --- /dev/null +++ b/tests/lex_error3.py @@ -0,0 +1,24 @@ +# lex_error3.py +# +# t_error defined as function, but with wrong # args + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(): + pass + + + +lex.lex() + + diff --git a/tests/lex_error4.py b/tests/lex_error4.py new file mode 100644 index 0000000..5387808 --- /dev/null +++ b/tests/lex_error4.py @@ -0,0 +1,24 @@ +# lex_error4.py +# +# t_error defined as function, but too many args + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t,s): + pass + + + +lex.lex() + + diff --git a/tests/lex_hedit.py b/tests/lex_hedit.py new file mode 100644 index 0000000..9f724d0 --- /dev/null +++ b/tests/lex_hedit.py @@ -0,0 +1,45 @@ +# ----------------------------------------------------------------------------- +# hedit.py +# +# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson) +# +# These tokens can't be easily tokenized because they are of the following +# form: +# +# nHc1...cn +# +# where n is a positive integer and c1 ... cn are characters. +# +# This example shows how to modify the state of the lexer to parse +# such tokens +# ----------------------------------------------------------------------------- + +import ply.lex as lex + +tokens = ( + 'H_EDIT_DESCRIPTOR', + ) + +# Tokens +t_ignore = " \t\n" + +def t_H_EDIT_DESCRIPTOR(t): + r"\d+H.*" # This grabs all of the remaining text + i = t.value.index('H') + n = eval(t.value[:i]) + + # Adjust the tokenizing position + t.lexer.lexpos -= len(t.value) - (i+1+n) + t.value = t.value[i+1:i+1+n] + return t + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + +# Build the lexer +lex.lex() +lex.runmain(data="3Habc 10Habcdefghij 2Hxy") + + + diff --git a/tests/lex_ignore.py b/tests/lex_ignore.py new file mode 100644 index 0000000..b31fb39 --- /dev/null +++ b/tests/lex_ignore.py @@ -0,0 +1,28 @@ +# lex_ignore.py +# +# Improperly specific ignore declaration + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_ignore(t): + ' \t' + pass + +def t_error(t): + pass + +import sys + +lex.lex() + + diff --git a/tests/lex_ignore2.py b/tests/lex_ignore2.py new file mode 100644 index 0000000..de0958a --- /dev/null +++ b/tests/lex_ignore2.py @@ -0,0 +1,26 @@ +# lex_ignore2.py +# +# ignore declaration as a raw string + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +t_ignore = r' \t' + +def t_error(t): + pass + + + +lex.lex() + + diff --git a/tests/lex_literal1.py b/tests/lex_literal1.py new file mode 100644 index 0000000..510b379 --- /dev/null +++ b/tests/lex_literal1.py @@ -0,0 +1,22 @@ +# lex_literal1.py +# +# Bad literal specification + +import ply.lex as lex + +tokens = [ + "NUMBER", + ] + +literals = ["+","-","**"] + +def t_NUMBER(t): + r'\d+' + return t + +def t_error(t): + pass + +lex.lex() + + diff --git a/tests/lex_literal2.py b/tests/lex_literal2.py new file mode 100644 index 0000000..a7a2c56 --- /dev/null +++ b/tests/lex_literal2.py @@ -0,0 +1,22 @@ +# lex_literal2.py +# +# Bad literal specification + +import ply.lex as lex + +tokens = [ + "NUMBER", + ] + +literals = 23 + +def t_NUMBER(t): + r'\d+' + return t + +def t_error(t): + pass + +lex.lex() + + diff --git a/tests/lex_literal3.py b/tests/lex_literal3.py new file mode 100644 index 0000000..9d697c9 --- /dev/null +++ b/tests/lex_literal3.py @@ -0,0 +1,23 @@ +# lex_literal3.py +# +# An empty literal specification given as a list +# Issue 8 : Literals empty list causes IndexError + +import ply.lex as lex + +tokens = [ + "NUMBER", + ] + +literals = [] + +def t_NUMBER(t): + r'\d+' + return t + +def t_error(t): + pass + +lex.lex() + + diff --git a/tests/lex_many_tokens.py b/tests/lex_many_tokens.py new file mode 100644 index 0000000..02e3a11 --- /dev/null +++ b/tests/lex_many_tokens.py @@ -0,0 +1,25 @@ +# lex_many_tokens.py +# +# Test lex's ability to handle a large number of tokens (beyond the +# 100-group limit of the re module) + +import sys +import ply.lex as lex + +tokens = ["TOK%d" % i for i in range(1000)] + +for tok in tokens: + if sys.version_info[0] < 3: + exec("t_%s = '%s:'" % (tok,tok)) + else: + exec("t_%s = '%s:'" % (tok,tok), globals()) + +t_ignore = " \t" + +def t_error(t): + pass + +lex.lex() +lex.runmain(data="TOK34: TOK143: TOK269: TOK372: TOK452: TOK561: TOK999:") + + diff --git a/tests/lex_module.py b/tests/lex_module.py new file mode 100644 index 0000000..0fa8544 --- /dev/null +++ b/tests/lex_module.py @@ -0,0 +1,7 @@ +# lex_module.py +# + +import ply.lex as lex +import lex_module_import +lex.lex(module=lex_module_import) +lex.runmain(data="3+4") diff --git a/tests/lex_module_import.py b/tests/lex_module_import.py new file mode 100644 index 0000000..df42082 --- /dev/null +++ b/tests/lex_module_import.py @@ -0,0 +1,42 @@ +# ----------------------------------------------------------------------------- +# lex_module_import.py +# +# A lexer defined in a module, but built in lex_module.py +# ----------------------------------------------------------------------------- + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_DIVIDE = r'/' +t_EQUALS = r'=' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + +t_ignore = " \t" + +def t_newline(t): + r'\n+' + t.lineno += t.value.count("\n") + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + diff --git a/tests/lex_object.py b/tests/lex_object.py new file mode 100644 index 0000000..2fa7537 --- /dev/null +++ b/tests/lex_object.py @@ -0,0 +1,53 @@ +# ----------------------------------------------------------------------------- +# lex_object.py +# ----------------------------------------------------------------------------- + +import ply.lex as lex + +class CalcLexer: + tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + + # Tokens + + t_PLUS = r'\+' + t_MINUS = r'-' + t_TIMES = r'\*' + t_DIVIDE = r'/' + t_EQUALS = r'=' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + + def t_NUMBER(self,t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print("Integer value too large %s" % t.value) + t.value = 0 + return t + + t_ignore = " \t" + + def t_newline(self,t): + r'\n+' + t.lineno += t.value.count("\n") + + def t_error(self,t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + + +calc = CalcLexer() + +# Build the lexer +lex.lex(object=calc) +lex.runmain(data="3+4") + + + + diff --git a/tests/lex_re1.py b/tests/lex_re1.py new file mode 100644 index 0000000..d6b7c38 --- /dev/null +++ b/tests/lex_re1.py @@ -0,0 +1,24 @@ +# lex_re1.py +# +# Bad regular expression in a string + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'(\d+' + +def t_error(t): + pass + + + +lex.lex() + + diff --git a/tests/lex_re2.py b/tests/lex_re2.py new file mode 100644 index 0000000..f5ce695 --- /dev/null +++ b/tests/lex_re2.py @@ -0,0 +1,24 @@ +# lex_re2.py +# +# Regular expression rule matches empty string + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+?' +t_MINUS = r'-' +t_NUMBER = r'(\d+)' + +def t_error(t): + pass + + + +lex.lex() + + diff --git a/tests/lex_re3.py b/tests/lex_re3.py new file mode 100644 index 0000000..8f594f3 --- /dev/null +++ b/tests/lex_re3.py @@ -0,0 +1,26 @@ +# lex_re3.py +# +# Regular expression rule matches empty string + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + "POUND", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'(\d+)' +t_POUND = r'#' + +def t_error(t): + pass + + + +lex.lex() + + diff --git a/tests/lex_rule1.py b/tests/lex_rule1.py new file mode 100644 index 0000000..ae279e9 --- /dev/null +++ b/tests/lex_rule1.py @@ -0,0 +1,24 @@ +# lex_rule1.py +# +# Rule function with incorrect number of arguments + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = 1 + +def t_error(t): + pass + + + +lex.lex() + + diff --git a/tests/lex_rule2.py b/tests/lex_rule2.py new file mode 100644 index 0000000..adefee2 --- /dev/null +++ b/tests/lex_rule2.py @@ -0,0 +1,26 @@ +# lex_rule2.py +# +# Rule function with incorrect number of arguments + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +def t_NUMBER(): + r'\d+' + return t + +def t_error(t): + pass + + + +lex.lex() + + diff --git a/tests/lex_rule3.py b/tests/lex_rule3.py new file mode 100644 index 0000000..bd61040 --- /dev/null +++ b/tests/lex_rule3.py @@ -0,0 +1,24 @@ +# lex_rule3.py +# +# Rule function with incorrect number of arguments + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +def t_NUMBER(t,s): + r'\d+' + return t + +def t_error(t): + pass + +lex.lex() + + diff --git a/tests/lex_state1.py b/tests/lex_state1.py new file mode 100644 index 0000000..6fa5e9f --- /dev/null +++ b/tests/lex_state1.py @@ -0,0 +1,37 @@ +# lex_state1.py +# +# Bad state declaration + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +states = 'comment' + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +# Comments +def t_comment(t): + r'/\*' + t.lexer.begin('comment') + print("Entering comment state") + +def t_comment_body_part(t): + r'(.|\n)*\*/' + print("comment body %s" % t) + t.lexer.begin('INITIAL') + +def t_error(t): + pass + + + +lex.lex() + + diff --git a/tests/lex_state2.py b/tests/lex_state2.py new file mode 100644 index 0000000..9300ed7 --- /dev/null +++ b/tests/lex_state2.py @@ -0,0 +1,37 @@ +# lex_state2.py +# +# Bad state declaration + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +states = ('comment','example') + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +# Comments +def t_comment(t): + r'/\*' + t.lexer.begin('comment') + print("Entering comment state") + +def t_comment_body_part(t): + r'(.|\n)*\*/' + print("comment body %s" % t) + t.lexer.begin('INITIAL') + +def t_error(t): + pass + + + +lex.lex() + + diff --git a/tests/lex_state3.py b/tests/lex_state3.py new file mode 100644 index 0000000..2e5b7cf --- /dev/null +++ b/tests/lex_state3.py @@ -0,0 +1,39 @@ +# lex_state3.py +# +# Bad state declaration + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +comment = 1 +states = ((comment, 'inclusive'), + ('example', 'exclusive')) + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +# Comments +def t_comment(t): + r'/\*' + t.lexer.begin('comment') + print("Entering comment state") + +def t_comment_body_part(t): + r'(.|\n)*\*/' + print("comment body %s" % t) + t.lexer.begin('INITIAL') + +def t_error(t): + pass + + + +lex.lex() + + diff --git a/tests/lex_state4.py b/tests/lex_state4.py new file mode 100644 index 0000000..fb147c8 --- /dev/null +++ b/tests/lex_state4.py @@ -0,0 +1,38 @@ +# lex_state4.py +# +# Bad state declaration + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + + +states = (('comment', 'exclsive'),) + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +# Comments +def t_comment(t): + r'/\*' + t.lexer.begin('comment') + print("Entering comment state") + +def t_comment_body_part(t): + r'(.|\n)*\*/' + print("comment body %s" % t) + t.lexer.begin('INITIAL') + +def t_error(t): + pass + + + +lex.lex() + + diff --git a/tests/lex_state5.py b/tests/lex_state5.py new file mode 100644 index 0000000..fb2f3a7 --- /dev/null +++ b/tests/lex_state5.py @@ -0,0 +1,37 @@ +# lex_state5.py +# +# Bad state declaration + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +states = (('comment', 'exclusive'), + ('comment', 'exclusive')) + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +# Comments +def t_comment(t): + r'/\*' + t.lexer.begin('comment') + print("Entering comment state") + +def t_comment_body_part(t): + r'(.|\n)*\*/' + print("comment body %s" % t) + t.lexer.begin('INITIAL') + +def t_error(t): + pass + + +lex.lex() + + diff --git a/tests/lex_state_noerror.py b/tests/lex_state_noerror.py new file mode 100644 index 0000000..54b892e --- /dev/null +++ b/tests/lex_state_noerror.py @@ -0,0 +1,36 @@ +# lex_state_noerror.py +# +# Declaration of a state for which no rules are defined + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +states = (('comment', 'exclusive'),) + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +# Comments +def t_comment(t): + r'/\*' + t.lexer.begin('comment') + print("Entering comment state") + +def t_comment_body_part(t): + r'(.|\n)*\*/' + print("comment body %s" % t) + t.lexer.begin('INITIAL') + +def t_error(t): + pass + + +lex.lex() + + diff --git a/tests/lex_state_norule.py b/tests/lex_state_norule.py new file mode 100644 index 0000000..4b979b2 --- /dev/null +++ b/tests/lex_state_norule.py @@ -0,0 +1,37 @@ +# lex_state_norule.py +# +# Declaration of a state for which no rules are defined + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +states = (('comment', 'exclusive'), + ('example', 'exclusive')) + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +# Comments +def t_comment(t): + r'/\*' + t.lexer.begin('comment') + print("Entering comment state") + +def t_comment_body_part(t): + r'(.|\n)*\*/' + print("comment body %s" % t) + t.lexer.begin('INITIAL') + +def t_error(t): + pass + + +lex.lex() + + diff --git a/tests/lex_state_try.py b/tests/lex_state_try.py new file mode 100644 index 0000000..c5f3448 --- /dev/null +++ b/tests/lex_state_try.py @@ -0,0 +1,42 @@ +# lex_state_try.py +# +# Declaration of a state for which no rules are defined + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + ] + +states = (('comment', 'exclusive'),) + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +t_ignore = " \t" + +# Comments +def t_comment(t): + r'/\*' + t.lexer.begin('comment') + print("Entering comment state") + +def t_comment_body_part(t): + r'(.|\n)*\*/' + print("comment body %s" % t) + t.lexer.begin('INITIAL') + +def t_error(t): + pass + +t_comment_error = t_error +t_comment_ignore = t_ignore + +lex.lex() + +data = "3 + 4 /* This is a comment */ + 10" + +lex.runmain(data=data) diff --git a/tests/lex_token1.py b/tests/lex_token1.py new file mode 100644 index 0000000..1001a99 --- /dev/null +++ b/tests/lex_token1.py @@ -0,0 +1,16 @@ +# lex_token1.py +# +# Tests for absence of tokens variable + +import ply.lex as lex + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + +lex.lex() + + diff --git a/tests/lex_token2.py b/tests/lex_token2.py new file mode 100644 index 0000000..00e758b --- /dev/null +++ b/tests/lex_token2.py @@ -0,0 +1,19 @@ +# lex_token2.py +# +# Tests for tokens of wrong type + +import ply.lex as lex + +tokens = "PLUS MINUS NUMBER" + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + + +lex.lex() + + diff --git a/tests/lex_token3.py b/tests/lex_token3.py new file mode 100644 index 0000000..3b71b3a --- /dev/null +++ b/tests/lex_token3.py @@ -0,0 +1,21 @@ +# lex_token3.py +# +# tokens is right type, but is missing a token for one rule + +import ply.lex as lex + +tokens = [ + "PLUS", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + +lex.lex() + + diff --git a/tests/lex_token4.py b/tests/lex_token4.py new file mode 100644 index 0000000..89bc2e2 --- /dev/null +++ b/tests/lex_token4.py @@ -0,0 +1,23 @@ +# lex_token4.py +# +# Bad token name + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "-", + "NUMBER", + ] + +t_PLUS = r'\+' +t_MINUS = r'-' +t_NUMBER = r'\d+' + +def t_error(t): + pass + +lex.lex() + + diff --git a/tests/lex_token_dup.py b/tests/lex_token_dup.py new file mode 100644 index 0000000..631535f --- /dev/null +++ b/tests/lex_token_dup.py @@ -0,0 +1,26 @@ +# lex_token_dup.py +# +# Duplicate token name in tokens + +import ply.lex as lex + +tokens = [ + "PLUS", + "MINUS", + "NUMBER", + "MINUS" + ] + +t_PLUS = r'\+' +t_MINUS = r'-' + +def t_NUMBER(t): + r'\d+' + return t + +def t_error(t): + pass + +lex.lex() + + diff --git a/tests/test_cpp_nonascii.c b/tests/test_cpp_nonascii.c new file mode 100644 index 0000000..3e97d81 --- /dev/null +++ b/tests/test_cpp_nonascii.c @@ -0,0 +1,2 @@ +/* ë */ +#define x 1 \ No newline at end of file diff --git a/tests/testlex.py b/tests/testlex.py new file mode 100755 index 0000000..7f2adad --- /dev/null +++ b/tests/testlex.py @@ -0,0 +1,401 @@ +# testlex.py + +import unittest +try: + import StringIO +except ImportError: + import io as StringIO + +import sys +import os +import warnings +import platform + +sys.tracebacklimit = 0 + +import ply.lex + +try: + from importlib.util import cache_from_source +except ImportError: + # Python 2.7, but we don't care. + cache_from_source = None + + +def make_pymodule_path(filename, optimization=None): + path = os.path.dirname(filename) + file = os.path.basename(filename) + mod, ext = os.path.splitext(file) + + if sys.hexversion >= 0x3050000: + fullpath = cache_from_source(filename, optimization=optimization) + elif sys.hexversion >= 0x3040000: + fullpath = cache_from_source(filename, ext=='.pyc') + elif sys.hexversion >= 0x3020000: + import imp + modname = mod+"."+imp.get_tag()+ext + fullpath = os.path.join(path,'__pycache__',modname) + else: + fullpath = filename + return fullpath + +def pymodule_out_exists(filename, optimization=None): + return os.path.exists(make_pymodule_path(filename, + optimization=optimization)) + +def pymodule_out_remove(filename, optimization=None): + os.remove(make_pymodule_path(filename, optimization=optimization)) + +def implementation(): + if platform.system().startswith("Java"): + return "Jython" + elif hasattr(sys, "pypy_version_info"): + return "PyPy" + else: + return "CPython" + +test_pyo = (implementation() == 'CPython') + +def check_expected(result, expected, contains=False): + if sys.version_info[0] >= 3: + if isinstance(result,str): + result = result.encode('ascii') + if isinstance(expected,str): + expected = expected.encode('ascii') + resultlines = result.splitlines() + expectedlines = expected.splitlines() + + if len(resultlines) != len(expectedlines): + return False + + for rline,eline in zip(resultlines,expectedlines): + if contains: + if eline not in rline: + return False + else: + if not rline.endswith(eline): + return False + return True + +def run_import(module): + code = "import "+module + exec(code) + del sys.modules[module] + +# Tests related to errors and warnings when building lexers +class LexErrorWarningTests(unittest.TestCase): + def setUp(self): + sys.stderr = StringIO.StringIO() + sys.stdout = StringIO.StringIO() + if sys.hexversion >= 0x3020000: + warnings.filterwarnings('ignore',category=ResourceWarning) + + def tearDown(self): + sys.stderr = sys.__stderr__ + sys.stdout = sys.__stdout__ + def test_lex_doc1(self): + self.assertRaises(SyntaxError,run_import,"lex_doc1") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "lex_doc1.py:15: No regular expression defined for rule 't_NUMBER'\n")) + def test_lex_dup1(self): + self.assertRaises(SyntaxError,run_import,"lex_dup1") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "lex_dup1.py:17: Rule t_NUMBER redefined. Previously defined on line 15\n" )) + + def test_lex_dup2(self): + self.assertRaises(SyntaxError,run_import,"lex_dup2") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "lex_dup2.py:19: Rule t_NUMBER redefined. Previously defined on line 15\n" )) + + def test_lex_dup3(self): + self.assertRaises(SyntaxError,run_import,"lex_dup3") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "lex_dup3.py:17: Rule t_NUMBER redefined. Previously defined on line 15\n" )) + + def test_lex_empty(self): + self.assertRaises(SyntaxError,run_import,"lex_empty") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "No rules of the form t_rulename are defined\n" + "No rules defined for state 'INITIAL'\n")) + + def test_lex_error1(self): + run_import("lex_error1") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "No t_error rule is defined\n")) + + def test_lex_error2(self): + self.assertRaises(SyntaxError,run_import,"lex_error2") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Rule 't_error' must be defined as a function\n") + ) + + def test_lex_error3(self): + self.assertRaises(SyntaxError,run_import,"lex_error3") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "lex_error3.py:17: Rule 't_error' requires an argument\n")) + + def test_lex_error4(self): + self.assertRaises(SyntaxError,run_import,"lex_error4") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "lex_error4.py:17: Rule 't_error' has too many arguments\n")) + + def test_lex_ignore(self): + self.assertRaises(SyntaxError,run_import,"lex_ignore") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "lex_ignore.py:17: Rule 't_ignore' must be defined as a string\n")) + + def test_lex_ignore2(self): + run_import("lex_ignore2") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "t_ignore contains a literal backslash '\\'\n")) + + + def test_lex_re1(self): + self.assertRaises(SyntaxError,run_import,"lex_re1") + result = sys.stderr.getvalue() + if sys.hexversion < 0x3050000: + msg = "Invalid regular expression for rule 't_NUMBER'. unbalanced parenthesis\n" + else: + msg = "Invalid regular expression for rule 't_NUMBER'. missing ), unterminated subpattern at position 0" + self.assertTrue(check_expected(result, + msg, + contains=True)) + + def test_lex_re2(self): + self.assertRaises(SyntaxError,run_import,"lex_re2") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Regular expression for rule 't_PLUS' matches empty string\n")) + + def test_lex_re3(self): + self.assertRaises(SyntaxError,run_import,"lex_re3") + result = sys.stderr.getvalue() +# self.assertTrue(check_expected(result, +# "Invalid regular expression for rule 't_POUND'. unbalanced parenthesis\n" +# "Make sure '#' in rule 't_POUND' is escaped with '\\#'\n")) + + if sys.hexversion < 0x3050000: + msg = ("Invalid regular expression for rule 't_POUND'. unbalanced parenthesis\n" + "Make sure '#' in rule 't_POUND' is escaped with '\\#'\n") + else: + msg = ("Invalid regular expression for rule 't_POUND'. missing ), unterminated subpattern at position 0\n" + "ERROR: Make sure '#' in rule 't_POUND' is escaped with '\#'") + self.assertTrue(check_expected(result, + msg, + contains=True), result) + + def test_lex_rule1(self): + self.assertRaises(SyntaxError,run_import,"lex_rule1") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "t_NUMBER not defined as a function or string\n")) + + def test_lex_rule2(self): + self.assertRaises(SyntaxError,run_import,"lex_rule2") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "lex_rule2.py:15: Rule 't_NUMBER' requires an argument\n")) + + def test_lex_rule3(self): + self.assertRaises(SyntaxError,run_import,"lex_rule3") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "lex_rule3.py:15: Rule 't_NUMBER' has too many arguments\n")) + + + def test_lex_state1(self): + self.assertRaises(SyntaxError,run_import,"lex_state1") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "states must be defined as a tuple or list\n")) + + def test_lex_state2(self): + self.assertRaises(SyntaxError,run_import,"lex_state2") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Invalid state specifier 'comment'. Must be a tuple (statename,'exclusive|inclusive')\n" + "Invalid state specifier 'example'. Must be a tuple (statename,'exclusive|inclusive')\n")) + + def test_lex_state3(self): + self.assertRaises(SyntaxError,run_import,"lex_state3") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "State name 1 must be a string\n" + "No rules defined for state 'example'\n")) + + def test_lex_state4(self): + self.assertRaises(SyntaxError,run_import,"lex_state4") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "State type for state 'comment' must be 'inclusive' or 'exclusive'\n")) + + + def test_lex_state5(self): + self.assertRaises(SyntaxError,run_import,"lex_state5") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "State 'comment' already defined\n")) + + def test_lex_state_noerror(self): + run_import("lex_state_noerror") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "No error rule is defined for exclusive state 'comment'\n")) + + def test_lex_state_norule(self): + self.assertRaises(SyntaxError,run_import,"lex_state_norule") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "No rules defined for state 'example'\n")) + + def test_lex_token1(self): + self.assertRaises(SyntaxError,run_import,"lex_token1") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "No token list is defined\n" + "Rule 't_NUMBER' defined for an unspecified token NUMBER\n" + "Rule 't_PLUS' defined for an unspecified token PLUS\n" + "Rule 't_MINUS' defined for an unspecified token MINUS\n" +)) + + def test_lex_token2(self): + self.assertRaises(SyntaxError,run_import,"lex_token2") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "tokens must be a list or tuple\n" + "Rule 't_NUMBER' defined for an unspecified token NUMBER\n" + "Rule 't_PLUS' defined for an unspecified token PLUS\n" + "Rule 't_MINUS' defined for an unspecified token MINUS\n" +)) + + def test_lex_token3(self): + self.assertRaises(SyntaxError,run_import,"lex_token3") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Rule 't_MINUS' defined for an unspecified token MINUS\n")) + + + def test_lex_token4(self): + self.assertRaises(SyntaxError,run_import,"lex_token4") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Bad token name '-'\n")) + + + def test_lex_token_dup(self): + run_import("lex_token_dup") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Token 'MINUS' multiply defined\n")) + + + def test_lex_literal1(self): + self.assertRaises(SyntaxError,run_import,"lex_literal1") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Invalid literal '**'. Must be a single character\n")) + + def test_lex_literal2(self): + self.assertRaises(SyntaxError,run_import,"lex_literal2") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Invalid literals specification. literals must be a sequence of characters\n")) + +import os +import subprocess +import shutil + +# Tests related to various build options associated with lexers +class LexBuildOptionTests(unittest.TestCase): + def setUp(self): + sys.stderr = StringIO.StringIO() + sys.stdout = StringIO.StringIO() + def tearDown(self): + sys.stderr = sys.__stderr__ + sys.stdout = sys.__stdout__ + try: + shutil.rmtree("lexdir") + except OSError: + pass + + def test_lex_module(self): + run_import("lex_module") + result = sys.stdout.getvalue() + self.assertTrue(check_expected(result, + "(NUMBER,3,1,0)\n" + "(PLUS,'+',1,1)\n" + "(NUMBER,4,1,2)\n")) + + def test_lex_object(self): + run_import("lex_object") + result = sys.stdout.getvalue() + self.assertTrue(check_expected(result, + "(NUMBER,3,1,0)\n" + "(PLUS,'+',1,1)\n" + "(NUMBER,4,1,2)\n")) + + def test_lex_closure(self): + run_import("lex_closure") + result = sys.stdout.getvalue() + self.assertTrue(check_expected(result, + "(NUMBER,3,1,0)\n" + "(PLUS,'+',1,1)\n" + "(NUMBER,4,1,2)\n")) + + def test_lex_many_tokens(self): + run_import("lex_many_tokens") + result = sys.stdout.getvalue() + self.assertTrue(check_expected(result, + "(TOK34,'TOK34:',1,0)\n" + "(TOK143,'TOK143:',1,7)\n" + "(TOK269,'TOK269:',1,15)\n" + "(TOK372,'TOK372:',1,23)\n" + "(TOK452,'TOK452:',1,31)\n" + "(TOK561,'TOK561:',1,39)\n" + "(TOK999,'TOK999:',1,47)\n" + )) + +# Tests related to run-time behavior of lexers +class LexRunTests(unittest.TestCase): + def setUp(self): + sys.stderr = StringIO.StringIO() + sys.stdout = StringIO.StringIO() + def tearDown(self): + sys.stderr = sys.__stderr__ + sys.stdout = sys.__stdout__ + + def test_lex_hedit(self): + run_import("lex_hedit") + result = sys.stdout.getvalue() + self.assertTrue(check_expected(result, + "(H_EDIT_DESCRIPTOR,'abc',1,0)\n" + "(H_EDIT_DESCRIPTOR,'abcdefghij',1,6)\n" + "(H_EDIT_DESCRIPTOR,'xy',1,20)\n")) + + def test_lex_state_try(self): + run_import("lex_state_try") + result = sys.stdout.getvalue() + self.assertTrue(check_expected(result, + "(NUMBER,'3',1,0)\n" + "(PLUS,'+',1,2)\n" + "(NUMBER,'4',1,4)\n" + "Entering comment state\n" + "comment body LexToken(body_part,'This is a comment */',1,9)\n" + "(PLUS,'+',1,30)\n" + "(NUMBER,'10',1,32)\n" + )) + + + +unittest.main() diff --git a/tests/testyacc.py b/tests/testyacc.py new file mode 100644 index 0000000..b488bf7 --- /dev/null +++ b/tests/testyacc.py @@ -0,0 +1,403 @@ +# testyacc.py + +import unittest +try: + import StringIO +except ImportError: + import io as StringIO + +import sys +import os +import warnings +import re +import platform + +sys.tracebacklimit = 0 + +import ply.yacc + +def make_pymodule_path(filename): + path = os.path.dirname(filename) + file = os.path.basename(filename) + mod, ext = os.path.splitext(file) + + if sys.hexversion >= 0x3040000: + import importlib.util + fullpath = importlib.util.cache_from_source(filename, ext=='.pyc') + elif sys.hexversion >= 0x3020000: + import imp + modname = mod+"."+imp.get_tag()+ext + fullpath = os.path.join(path,'__pycache__',modname) + else: + fullpath = filename + return fullpath + +def pymodule_out_exists(filename): + return os.path.exists(make_pymodule_path(filename)) + +def pymodule_out_remove(filename): + os.remove(make_pymodule_path(filename)) + +def implementation(): + if platform.system().startswith("Java"): + return "Jython" + elif hasattr(sys, "pypy_version_info"): + return "PyPy" + else: + return "CPython" + +# Check the output to see if it contains all of a set of expected output lines. +# This alternate implementation looks weird, but is needed to properly handle +# some variations in error message order that occurs due to dict hash table +# randomization that was introduced in Python 3.3 +def check_expected(result, expected): + # Normalize 'state n' text to account for randomization effects in Python 3.3 + expected = re.sub(r' state \d+', 'state ', expected) + result = re.sub(r' state \d+', 'state ', result) + + resultlines = set() + for line in result.splitlines(): + if line.startswith("WARNING: "): + line = line[9:] + elif line.startswith("ERROR: "): + line = line[7:] + resultlines.add(line) + + # Selectively remove expected lines from the output + for eline in expected.splitlines(): + resultlines = set(line for line in resultlines if not line.endswith(eline)) + + # Return True if no result lines remain + return not bool(resultlines) + +def run_import(module): + code = "import "+module + exec(code) + del sys.modules[module] + +# Tests related to errors and warnings when building parsers +class YaccErrorWarningTests(unittest.TestCase): + def setUp(self): + sys.stderr = StringIO.StringIO() + sys.stdout = StringIO.StringIO() + try: + os.remove("parsetab.py") + pymodule_out_remove("parsetab.pyc") + except OSError: + pass + + if sys.hexversion >= 0x3020000: + warnings.filterwarnings('ignore', category=ResourceWarning) + warnings.filterwarnings('ignore', category=DeprecationWarning) + + def tearDown(self): + sys.stderr = sys.__stderr__ + sys.stdout = sys.__stdout__ + def test_yacc_badargs(self): + self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badargs") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_badargs.py:23: Rule 'p_statement_assign' has too many arguments\n" + "yacc_badargs.py:27: Rule 'p_statement_expr' requires an argument\n" + )) + def test_yacc_badid(self): + self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badid") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_badid.py:29: Illegal name 'bad&rule' in rule 'statement'\n" + "yacc_badid.py:33: Illegal rule name 'bad&rule'\n" + )) + + def test_yacc_badprec(self): + try: + run_import("yacc_badprec") + except ply.yacc.YaccError: + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "precedence must be a list or tuple\n" + )) + def test_yacc_badprec2(self): + self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badprec2") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Bad precedence table\n" + )) + + def test_yacc_badprec3(self): + run_import("yacc_badprec3") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Precedence already specified for terminal 'MINUS'\n" + "Generating LALR tables\n" + + )) + + def test_yacc_badrule(self): + self.assertRaises(ply.yacc.YaccError,run_import,"yacc_badrule") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_badrule.py:21: Syntax error. Expected ':'\n" + "yacc_badrule.py:25: Syntax error in rule 'statement'\n" + "yacc_badrule.py:30: Syntax error. Expected ':'\n" + "yacc_badrule.py:39: Syntax error. Expected ':'\n" + )) + + def test_yacc_badtok(self): + try: + run_import("yacc_badtok") + except ply.yacc.YaccError: + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "tokens must be a list or tuple\n")) + + def test_yacc_dup(self): + run_import("yacc_dup") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_dup.py:24: Function p_statement redefined. Previously defined on line 20\n" + "Token 'EQUALS' defined, but not used\n" + "There is 1 unused token\n" + "Generating LALR tables\n" + + )) + def test_yacc_error1(self): + try: + run_import("yacc_error1") + except ply.yacc.YaccError: + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_error1.py:58: p_error() requires 1 argument\n")) + + def test_yacc_error2(self): + try: + run_import("yacc_error2") + except ply.yacc.YaccError: + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_error2.py:58: p_error() requires 1 argument\n")) + + def test_yacc_error3(self): + try: + run_import("yacc_error3") + except ply.yacc.YaccError: + e = sys.exc_info()[1] + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "'p_error' defined, but is not a function or method\n")) + + def test_yacc_error4(self): + self.assertRaises(ply.yacc.YaccError,run_import,"yacc_error4") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_error4.py:59: Illegal rule name 'error'. Already defined as a token\n" + )) + + + def test_yacc_error5(self): + run_import("yacc_error5") + result = sys.stdout.getvalue() + self.assertTrue(check_expected(result, + "Group at 3:10 to 3:12\n" + "Undefined name 'a'\n" + "Syntax error at 'b'\n" + "Syntax error at 4:18 to 4:22\n" + "Assignment Error at 2:5 to 5:27\n" + "13\n" + )) + + def test_yacc_error6(self): + run_import("yacc_error6") + result = sys.stdout.getvalue() + self.assertTrue(check_expected(result, + "a=7\n" + "Line 3: Syntax error at '*'\n" + "c=21\n" + )) + + def test_yacc_error7(self): + run_import("yacc_error7") + result = sys.stdout.getvalue() + self.assertTrue(check_expected(result, + "a=7\n" + "Line 3: Syntax error at '*'\n" + "c=21\n" + )) + + def test_yacc_inf(self): + self.assertRaises(ply.yacc.YaccError,run_import,"yacc_inf") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Token 'NUMBER' defined, but not used\n" + "There is 1 unused token\n" + "Infinite recursion detected for symbol 'statement'\n" + "Infinite recursion detected for symbol 'expression'\n" + )) + def test_yacc_literal(self): + self.assertRaises(ply.yacc.YaccError,run_import,"yacc_literal") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_literal.py:33: Literal token '**' in rule 'expression' may only be a single character\n" + )) + def test_yacc_misplaced(self): + self.assertRaises(ply.yacc.YaccError,run_import,"yacc_misplaced") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_misplaced.py:29: Misplaced '|'\n" + )) + + def test_yacc_missing1(self): + self.assertRaises(ply.yacc.YaccError,run_import,"yacc_missing1") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_missing1.py:21: Symbol 'location' used, but not defined as a token or a rule\n" + )) + + def test_yacc_nested(self): + run_import("yacc_nested") + result = sys.stdout.getvalue() + self.assertTrue(check_expected(result, + "A\n" + "A\n" + "A\n", + )) + + def test_yacc_nodoc(self): + run_import("yacc_nodoc") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_nodoc.py:24: No documentation string specified in function 'p_statement_expr' (ignored)\n" + "Generating LALR tables\n" + )) + + def test_yacc_noerror(self): + run_import("yacc_noerror") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "no p_error() function is defined\n" + "Generating LALR tables\n" + )) + + def test_yacc_nop(self): + run_import("yacc_nop") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_nop.py:24: Possible grammar rule 'statement_expr' defined without p_ prefix\n" + "Generating LALR tables\n" + )) + + def test_yacc_notfunc(self): + run_import("yacc_notfunc") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "'p_statement_assign' not defined as a function\n" + "Token 'EQUALS' defined, but not used\n" + "There is 1 unused token\n" + "Generating LALR tables\n" + )) + def test_yacc_notok(self): + try: + run_import("yacc_notok") + except ply.yacc.YaccError: + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "No token list is defined\n")) + + def test_yacc_rr(self): + run_import("yacc_rr") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Generating LALR tables\n" + "1 reduce/reduce conflict\n" + "reduce/reduce conflict in state 15 resolved using rule (statement -> NAME EQUALS NUMBER)\n" + "rejected rule (expression -> NUMBER) in state 15\n" + + )) + + def test_yacc_rr_unused(self): + run_import("yacc_rr_unused") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "no p_error() function is defined\n" + "Generating LALR tables\n" + "3 reduce/reduce conflicts\n" + "reduce/reduce conflict in state 1 resolved using rule (rule3 -> A)\n" + "rejected rule (rule4 -> A) in state 1\n" + "reduce/reduce conflict in state 1 resolved using rule (rule3 -> A)\n" + "rejected rule (rule5 -> A) in state 1\n" + "reduce/reduce conflict in state 1 resolved using rule (rule4 -> A)\n" + "rejected rule (rule5 -> A) in state 1\n" + "Rule (rule5 -> A) is never reduced\n" + )) + + def test_yacc_simple(self): + run_import("yacc_simple") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Generating LALR tables\n" + )) + + def test_yacc_sr(self): + run_import("yacc_sr") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Generating LALR tables\n" + "20 shift/reduce conflicts\n" + )) + + def test_yacc_term1(self): + self.assertRaises(ply.yacc.YaccError,run_import,"yacc_term1") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_term1.py:21: Illegal rule name 'NUMBER'. Already defined as a token\n" + )) + + def test_yacc_unicode_literals(self): + run_import("yacc_unicode_literals") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Generating LALR tables\n" + )) + + def test_yacc_unused(self): + self.assertRaises(ply.yacc.YaccError,run_import,"yacc_unused") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_unused.py:59: Symbol 'COMMA' used, but not defined as a token or a rule\n" + "Symbol 'COMMA' is unreachable\n" + "Symbol 'exprlist' is unreachable\n" + )) + def test_yacc_unused_rule(self): + run_import("yacc_unused_rule") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_unused_rule.py:59: Rule 'integer' defined, but not used\n" + "There is 1 unused rule\n" + "Symbol 'integer' is unreachable\n" + "Generating LALR tables\n" + )) + + def test_yacc_uprec(self): + self.assertRaises(ply.yacc.YaccError,run_import,"yacc_uprec") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_uprec.py:34: Nothing known about the precedence of 'UMINUS'\n" + )) + + def test_yacc_uprec2(self): + self.assertRaises(ply.yacc.YaccError,run_import,"yacc_uprec2") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "yacc_uprec2.py:34: Syntax error. Nothing follows %prec\n" + )) + + def test_yacc_prec1(self): + self.assertRaises(ply.yacc.YaccError,run_import,"yacc_prec1") + result = sys.stderr.getvalue() + self.assertTrue(check_expected(result, + "Precedence rule 'left' defined for unknown symbol '+'\n" + "Precedence rule 'left' defined for unknown symbol '*'\n" + "Precedence rule 'left' defined for unknown symbol '-'\n" + "Precedence rule 'left' defined for unknown symbol '/'\n" + )) + +unittest.main() diff --git a/tests/yacc_badargs.py b/tests/yacc_badargs.py new file mode 100644 index 0000000..cd4b1e7 --- /dev/null +++ b/tests/yacc_badargs.py @@ -0,0 +1,68 @@ +# ----------------------------------------------------------------------------- +# yacc_badargs.py +# +# Rules with wrong # args +# ----------------------------------------------------------------------------- +import sys +sys.tracebacklimit = 0 + +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t,s): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_badid.py b/tests/yacc_badid.py new file mode 100644 index 0000000..f624aca --- /dev/null +++ b/tests/yacc_badid.py @@ -0,0 +1,74 @@ +# ----------------------------------------------------------------------------- +# yacc_badid.py +# +# Attempt to define a rule with a bad-identifier name +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_statement_expr2(t): + 'statement : bad&rule' + pass + +def p_badrule(t): + 'bad&rule : expression' + pass + + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + pass + +yacc.yacc() + + + + diff --git a/tests/yacc_badprec.py b/tests/yacc_badprec.py new file mode 100644 index 0000000..95e9db2 --- /dev/null +++ b/tests/yacc_badprec.py @@ -0,0 +1,61 @@ +# ----------------------------------------------------------------------------- +# yacc_badprec.py +# +# Bad precedence specifier +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = "blah" + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_badprec2.py b/tests/yacc_badprec2.py new file mode 100644 index 0000000..c33303d --- /dev/null +++ b/tests/yacc_badprec2.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_badprec2.py +# +# Bad precedence +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + 42, + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_badprec3.py b/tests/yacc_badprec3.py new file mode 100644 index 0000000..e1ce3a6 --- /dev/null +++ b/tests/yacc_badprec3.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_badprec3.py +# +# Bad precedence +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE','MINUS'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[3] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_badrule.py b/tests/yacc_badrule.py new file mode 100644 index 0000000..898a8c7 --- /dev/null +++ b/tests/yacc_badrule.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_badrule.py +# +# Syntax problems in the rule strings +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression: MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_badtok.py b/tests/yacc_badtok.py new file mode 100644 index 0000000..567aadf --- /dev/null +++ b/tests/yacc_badtok.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_badtok.py +# +# A grammar, but tokens is a bad datatype +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +tokens = "Hello" + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_dup.py b/tests/yacc_dup.py new file mode 100644 index 0000000..94238a4 --- /dev/null +++ b/tests/yacc_dup.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_dup.py +# +# Duplicated rule name +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_error1.py b/tests/yacc_error1.py new file mode 100644 index 0000000..39b9124 --- /dev/null +++ b/tests/yacc_error1.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_error1.py +# +# Bad p_error() function +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t,s): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_error2.py b/tests/yacc_error2.py new file mode 100644 index 0000000..2463ee9 --- /dev/null +++ b/tests/yacc_error2.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_error2.py +# +# Bad p_error() function +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_error3.py b/tests/yacc_error3.py new file mode 100644 index 0000000..f181253 --- /dev/null +++ b/tests/yacc_error3.py @@ -0,0 +1,64 @@ +# ----------------------------------------------------------------------------- +# yacc_error3.py +# +# Bad p_error() function +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +p_error = "blah" + +yacc.yacc() + + + + diff --git a/tests/yacc_error4.py b/tests/yacc_error4.py new file mode 100644 index 0000000..f6bef7e --- /dev/null +++ b/tests/yacc_error4.py @@ -0,0 +1,69 @@ +# ----------------------------------------------------------------------------- +# yacc_error4.py +# +# Attempt to define a rule named 'error' +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error_handler(t): + 'error : NAME' + pass + +def p_error(t): + pass + +yacc.yacc() + + + + diff --git a/tests/yacc_error5.py b/tests/yacc_error5.py new file mode 100644 index 0000000..5d4b683 --- /dev/null +++ b/tests/yacc_error5.py @@ -0,0 +1,91 @@ +# ----------------------------------------------------------------------------- +# yacc_error5.py +# +# Lineno and position tracking with error tokens +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_assign_error(t): + 'statement : NAME EQUALS error' + line_start, line_end = t.linespan(3) + pos_start, pos_end = t.lexspan(3) + print("Assignment Error at %d:%d to %d:%d" % (line_start,pos_start,line_end,pos_end)) + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + line_start, line_end = t.linespan(2) + pos_start, pos_end = t.lexspan(2) + print("Group at %d:%d to %d:%d" % (line_start,pos_start, line_end, pos_end)) + t[0] = t[2] + +def p_expression_group_error(t): + 'expression : LPAREN error RPAREN' + line_start, line_end = t.linespan(2) + pos_start, pos_end = t.lexspan(2) + print("Syntax error at %d:%d to %d:%d" % (line_start,pos_start, line_end, pos_end)) + t[0] = 0 + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +parser = yacc.yacc() +import calclex +calclex.lexer.lineno=1 +parser.parse(""" +a = 3 + +(4*5) + +(a b c) + ++ 6 + 7 +""", tracking=True) + + + + + + diff --git a/tests/yacc_error6.py b/tests/yacc_error6.py new file mode 100644 index 0000000..fde3c4d --- /dev/null +++ b/tests/yacc_error6.py @@ -0,0 +1,77 @@ +# ----------------------------------------------------------------------------- +# yacc_error6.py +# +# Panic mode recovery test +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +def p_statements(t): + 'statements : statements statement' + pass + +def p_statements_1(t): + 'statements : statement' + pass + +def p_statement_assign(p): + 'statement : LPAREN NAME EQUALS expression RPAREN' + print("%s=%s" % (p[2],p[4])) + +def p_statement_expr(t): + 'statement : LPAREN expression RPAREN' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_error(p): + if p: + print("Line %d: Syntax error at '%s'" % (p.lineno, p.value)) + # Scan ahead looking for a name token + while True: + tok = parser.token() + if not tok or tok.type == 'RPAREN': + break + if tok: + parser.restart() + return None + +parser = yacc.yacc() +import calclex +calclex.lexer.lineno=1 + +parser.parse(""" +(a = 3 + 4) +(b = 4 + * 5 - 6 + *) +(c = 10 + 11) +""") + + + + + + diff --git a/tests/yacc_error7.py b/tests/yacc_error7.py new file mode 100644 index 0000000..17cc4f1 --- /dev/null +++ b/tests/yacc_error7.py @@ -0,0 +1,77 @@ +# ----------------------------------------------------------------------------- +# yacc_error7.py +# +# Panic mode recovery test using deprecated functionality +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +def p_statements(t): + 'statements : statements statement' + pass + +def p_statements_1(t): + 'statements : statement' + pass + +def p_statement_assign(p): + 'statement : LPAREN NAME EQUALS expression RPAREN' + print("%s=%s" % (p[2],p[4])) + +def p_statement_expr(t): + 'statement : LPAREN expression RPAREN' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_error(p): + if p: + print("Line %d: Syntax error at '%s'" % (p.lineno, p.value)) + # Scan ahead looking for a name token + while True: + tok = parser.token() + if not tok or tok.type == 'RPAREN': + break + if tok: + parser.restart() + return None + +parser = yacc.yacc() +import calclex +calclex.lexer.lineno=1 + +parser.parse(""" +(a = 3 + 4) +(b = 4 + * 5 - 6 + *) +(c = 10 + 11) +""") + + + + + + diff --git a/tests/yacc_inf.py b/tests/yacc_inf.py new file mode 100644 index 0000000..f8b4c30 --- /dev/null +++ b/tests/yacc_inf.py @@ -0,0 +1,53 @@ +# ----------------------------------------------------------------------------- +# yacc_inf.py +# +# Infinite recursion +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_literal.py b/tests/yacc_literal.py new file mode 100644 index 0000000..687d916 --- /dev/null +++ b/tests/yacc_literal.py @@ -0,0 +1,66 @@ +# ----------------------------------------------------------------------------- +# yacc_literal.py +# +# Grammar with bad literal characters +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','+','-'), + ('left','*','/'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression '+' expression + | expression '-' expression + | expression '*' expression + | expression '/' expression + | expression '**' expression ''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_misplaced.py b/tests/yacc_misplaced.py new file mode 100644 index 0000000..7cc75be --- /dev/null +++ b/tests/yacc_misplaced.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_misplaced.py +# +# A misplaced | in grammar rules +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + ''' | expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_missing1.py b/tests/yacc_missing1.py new file mode 100644 index 0000000..81955be --- /dev/null +++ b/tests/yacc_missing1.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_missing1.py +# +# Grammar with a missing rule +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : location EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_nested.py b/tests/yacc_nested.py new file mode 100644 index 0000000..93ff400 --- /dev/null +++ b/tests/yacc_nested.py @@ -0,0 +1,30 @@ + +from ply import lex, yacc + +t_A = 'A' +t_B = 'B' +t_C = 'C' + +tokens = ('A', 'B', 'C') + +the_lexer = lex.lex() + +def t_error(t): + pass + +def p_error(p): + pass + +def p_start(t): + '''start : A nest C''' + pass + +def p_nest(t): + '''nest : B''' + print(t[-1]) + +the_parser = yacc.yacc(debug = False) + +the_parser.parse('ABC', the_lexer) +the_parser.parse('ABC', the_lexer, tracking=True) +the_parser.parse('ABC', the_lexer, tracking=True, debug=1) diff --git a/tests/yacc_nodoc.py b/tests/yacc_nodoc.py new file mode 100644 index 0000000..a2f34dd --- /dev/null +++ b/tests/yacc_nodoc.py @@ -0,0 +1,64 @@ +# ----------------------------------------------------------------------------- +# yacc_nodoc.py +# +# Rule with a missing doc-string +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_noerror.py b/tests/yacc_noerror.py new file mode 100644 index 0000000..2262745 --- /dev/null +++ b/tests/yacc_noerror.py @@ -0,0 +1,63 @@ +# ----------------------------------------------------------------------------- +# yacc_noerror.py +# +# No p_error() rule defined. +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + + +yacc.yacc() + + + + diff --git a/tests/yacc_nop.py b/tests/yacc_nop.py new file mode 100644 index 0000000..3ff17ac --- /dev/null +++ b/tests/yacc_nop.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_nop.py +# +# Possible grammar rule defined without p_ prefix +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_notfunc.py b/tests/yacc_notfunc.py new file mode 100644 index 0000000..c8bd123 --- /dev/null +++ b/tests/yacc_notfunc.py @@ -0,0 +1,63 @@ +# ----------------------------------------------------------------------------- +# yacc_notfunc.py +# +# p_rule not defined as a function +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +p_statement_assign = "Blah" + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_notok.py b/tests/yacc_notok.py new file mode 100644 index 0000000..6e271a0 --- /dev/null +++ b/tests/yacc_notok.py @@ -0,0 +1,63 @@ +# ----------------------------------------------------------------------------- +# yacc_notok.py +# +# A grammar, but we forgot to import the tokens list +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_prec1.py b/tests/yacc_prec1.py new file mode 100644 index 0000000..d4fad6e --- /dev/null +++ b/tests/yacc_prec1.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_prec1.py +# +# Tests case where precedence specifier doesn't match up to terminals +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left', '+', '-'), + ('left', '*', '/'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_rr.py b/tests/yacc_rr.py new file mode 100644 index 0000000..de78a58 --- /dev/null +++ b/tests/yacc_rr.py @@ -0,0 +1,69 @@ +# ----------------------------------------------------------------------------- +# yacc_rr.py +# +# A grammar with a reduce/reduce conflict +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_assign_2(t): + 'statement : NAME EQUALS NUMBER' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_rr_unused.py b/tests/yacc_rr_unused.py new file mode 100644 index 0000000..70a8c78 --- /dev/null +++ b/tests/yacc_rr_unused.py @@ -0,0 +1,27 @@ +# ----------------------------------------------------------------------------- +# yacc_rr_unused.py +# +# A grammar with reduce/reduce conflicts and a rule that never +# gets reduced. +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +tokens = ('A', 'B', 'C') + +def p_grammar(p): + ''' + rule1 : rule2 B + | rule2 C + + rule2 : rule3 B + | rule4 + | rule5 + + rule3 : A + + rule4 : A + + rule5 : A + ''' + +yacc.yacc() diff --git a/tests/yacc_simple.py b/tests/yacc_simple.py new file mode 100644 index 0000000..5261838 --- /dev/null +++ b/tests/yacc_simple.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_simple.py +# +# A simple, properly specifier grammar +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_sr.py b/tests/yacc_sr.py new file mode 100644 index 0000000..0f15a50 --- /dev/null +++ b/tests/yacc_sr.py @@ -0,0 +1,60 @@ +# ----------------------------------------------------------------------------- +# yacc_sr.py +# +# A grammar with shift-reduce conflicts +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_term1.py b/tests/yacc_term1.py new file mode 100644 index 0000000..1c1d8c2 --- /dev/null +++ b/tests/yacc_term1.py @@ -0,0 +1,65 @@ +# ----------------------------------------------------------------------------- +# yacc_term1.py +# +# Terminal used on the left-hand-side +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'NUMBER : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_unicode_literals.py b/tests/yacc_unicode_literals.py new file mode 100644 index 0000000..04f6cfd --- /dev/null +++ b/tests/yacc_unicode_literals.py @@ -0,0 +1,67 @@ +# ----------------------------------------------------------------------------- +# yacc_unicode_literals +# +# Test for unicode literals on Python 2.x +# ----------------------------------------------------------------------------- +from __future__ import unicode_literals + +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_unused.py b/tests/yacc_unused.py new file mode 100644 index 0000000..7bc6749 --- /dev/null +++ b/tests/yacc_unused.py @@ -0,0 +1,74 @@ +# ----------------------------------------------------------------------------- +# yacc_unused.py +# +# A grammar with an unused rule +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_expr_list(t): + 'exprlist : exprlist COMMA expression' + pass + +def p_expr_list_2(t): + 'exprlist : expression' + pass + + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_unused_rule.py b/tests/yacc_unused_rule.py new file mode 100644 index 0000000..2e08d6d --- /dev/null +++ b/tests/yacc_unused_rule.py @@ -0,0 +1,69 @@ +# ----------------------------------------------------------------------------- +# yacc_unused_rule.py +# +# Grammar with an unused rule +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_integer(t): + 'integer : NUMBER' + t[0] = t[1] + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_uprec.py b/tests/yacc_uprec.py new file mode 100644 index 0000000..f173509 --- /dev/null +++ b/tests/yacc_uprec.py @@ -0,0 +1,60 @@ +# ----------------------------------------------------------------------------- +# yacc_uprec.py +# +# A grammar with a bad %prec specifier +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec UMINUS' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + diff --git a/tests/yacc_uprec2.py b/tests/yacc_uprec2.py new file mode 100644 index 0000000..85c0f2f --- /dev/null +++ b/tests/yacc_uprec2.py @@ -0,0 +1,60 @@ +# ----------------------------------------------------------------------------- +# yacc_uprec2.py +# +# A grammar with a bad %prec specifier +# ----------------------------------------------------------------------------- +import ply.yacc as yacc + +from calclex import tokens + +# Parsing rules + +# dictionary of names +names = { } + +def p_statement_assign(t): + 'statement : NAME EQUALS expression' + names[t[1]] = t[3] + +def p_statement_expr(t): + 'statement : expression' + print(t[1]) + +def p_expression_binop(t): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if t[2] == '+' : t[0] = t[1] + t[3] + elif t[2] == '-': t[0] = t[1] - t[3] + elif t[2] == '*': t[0] = t[1] * t[3] + elif t[2] == '/': t[0] = t[1] / t[3] + +def p_expression_uminus(t): + 'expression : MINUS expression %prec' + t[0] = -t[2] + +def p_expression_group(t): + 'expression : LPAREN expression RPAREN' + t[0] = t[2] + +def p_expression_number(t): + 'expression : NUMBER' + t[0] = t[1] + +def p_expression_name(t): + 'expression : NAME' + try: + t[0] = names[t[1]] + except LookupError: + print("Undefined name '%s'" % t[1]) + t[0] = 0 + +def p_error(t): + print("Syntax error at '%s'" % t.value) + +yacc.yacc() + + + + -- cgit v1.2.1