5 files changed, 10 insertions, 1122 deletions
diff --git a/ply/__init__.py b/ply/__init__.py
index 23707c6..6f768b7 100644
--- a/ply/__init__.py
+++ b/ply/__init__.py
@@ -1,5 +1,5 @@
 # PLY package
 # Author: David Beazley (dave@dabeaz.com)
 
-__version__ = '3.11'
+__version__ = '4.0'
 __all__ = ['lex','yacc']
diff --git a/ply/cpp.py b/ply/cpp.py
deleted file mode 100644
index 50a44a1..0000000
--- a/ply/cpp.py
+++ /dev/null
@@ -1,974 +0,0 @@
-# -----------------------------------------------------------------------------
-# ply: cpp.py
-#
-# Copyright (C) 2001-2019
-# David M. Beazley (Dabeaz LLC)
-# All rights reserved.
-#
-# Latest version: https://github.com/dabeaz/ply
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright notice,
-#   this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-# * Neither the name of David Beazley or Dabeaz LLC may be used to
-#   endorse or promote products derived from this software without
-#   specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-# -----------------------------------------------------------------------------
-
-# This module implements an ANSI-C style lexical preprocessor for PLY.
-# -----------------------------------------------------------------------------
-from __future__ import generators
-
-import sys
-
-# Some Python 3 compatibility shims
-if sys.version_info.major < 3:
-    STRING_TYPES = (str, unicode)
-else:
-    STRING_TYPES = str
-    xrange = range
-
-# -----------------------------------------------------------------------------
-# Default preprocessor lexer definitions.   These tokens are enough to get
-# a basic preprocessor working.   Other modules may import these if they want
-# -----------------------------------------------------------------------------
-
-tokens = (
-   'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND'
-)
-
-literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
-
-# Whitespace
-def t_CPP_WS(t):
-    r'\s+'
-    t.lexer.lineno += t.value.count("\n")
-    return t
-
-t_CPP_POUND = r'\#'
-t_CPP_DPOUND = r'\#\#'
-
-# Identifier
-t_CPP_ID = r'[A-Za-z_][\w_]*'
-
-# Integer literal
-def CPP_INTEGER(t):
-    r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)'
-    return t
-
-t_CPP_INTEGER = CPP_INTEGER
-
-# Floating literal
-t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
-
-# String literal
-def t_CPP_STRING(t):
-    r'\"([^\\\n]|(\\(.|\n)))*?\"'
-    t.lexer.lineno += t.value.count("\n")
-    return t
-
-# Character constant 'c' or L'c'
-def t_CPP_CHAR(t):
-    r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
-    t.lexer.lineno += t.value.count("\n")
-    return t
-
-# Comment
-def t_CPP_COMMENT1(t):
-    r'(/\*(.|\n)*?\*/)'
-    ncr = t.value.count("\n")
-    t.lexer.lineno += ncr
-    # replace with one space or a number of '\n'
-    t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' '
-    return t
-
-# Line comment
-def t_CPP_COMMENT2(t):
-    r'(//.*?(\n|$))'
-    # replace with '/n'
-    t.type = 'CPP_WS'; t.value = '\n'
-    return t
-
-def t_error(t):
-    t.type = t.value[0]
-    t.value = t.value[0]
-    t.lexer.skip(1)
-    return t
-
-import re
-import copy
-import time
-import os.path
-
-# -----------------------------------------------------------------------------
-# trigraph()
-#
-# Given an input string, this function replaces all trigraph sequences.
-# The following mapping is used:
-#
-#     ??=    #
-#     ??/    \
-#     ??'    ^
-#     ??(    [
-#     ??)    ]
-#     ??!    |
-#     ??<    {
-#     ??>    }
-#     ??-    ~
-# -----------------------------------------------------------------------------
-
-_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
-_trigraph_rep = {
-    '=':'#',
-    '/':'\\',
-    "'":'^',
-    '(':'[',
-    ')':']',
-    '!':'|',
-    '<':'{',
-    '>':'}',
-    '-':'~'
-}
-
-def trigraph(input):
-    return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
-
-# ------------------------------------------------------------------
-# Macro object
-#
-# This object holds information about preprocessor macros
-#
-#    .name      - Macro name (string)
-#    .value     - Macro value (a list of tokens)
-#    .arglist   - List of argument names
-#    .variadic  - Boolean indicating whether or not variadic macro
-#    .vararg    - Name of the variadic parameter
-#
-# When a macro is created, the macro replacement token sequence is
-# pre-scanned and used to create patch lists that are later used
-# during macro expansion
-# ------------------------------------------------------------------
-
-class Macro(object):
-    def __init__(self,name,value,arglist=None,variadic=False):
-        self.name = name
-        self.value = value
-        self.arglist = arglist
-        self.variadic = variadic
-        if variadic:
-            self.vararg = arglist[-1]
-        self.source = None
-
-# ------------------------------------------------------------------
-# Preprocessor object
-#
-# Object representing a preprocessor.  Contains macro definitions,
-# include directories, and other information
-# ------------------------------------------------------------------
-
-class Preprocessor(object):
-    def __init__(self,lexer=None):
-        if lexer is None:
-            lexer = lex.lexer
-        self.lexer = lexer
-        self.macros = { }
-        self.path = []
-        self.temp_path = []
-
-        # Probe the lexer for selected tokens
-        self.lexprobe()
-
-        tm = time.localtime()
-        self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
-        self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
-        self.parser = None
-
-    # -----------------------------------------------------------------------------
-    # tokenize()
-    #
-    # Utility function. Given a string of text, tokenize into a list of tokens
-    # -----------------------------------------------------------------------------
-
-    def tokenize(self,text):
-        tokens = []
-        self.lexer.input(text)
-        while True:
-            tok = self.lexer.token()
-            if not tok: break
-            tokens.append(tok)
-        return tokens
-
-    # ---------------------------------------------------------------------
-    # error()
-    #
-    # Report a preprocessor error/warning of some kind
-    # ----------------------------------------------------------------------
-
-    def error(self,file,line,msg):
-        print("%s:%d %s" % (file,line,msg))
-
-    # ----------------------------------------------------------------------
-    # lexprobe()
-    #
-    # This method probes the preprocessor lexer object to discover
-    # the token types of symbols that are important to the preprocessor.
-    # If this works right, the preprocessor will simply "work"
-    # with any suitable lexer regardless of how tokens have been named.
-    # ----------------------------------------------------------------------
-
-    def lexprobe(self):
-
-        # Determine the token type for identifiers
-        self.lexer.input("identifier")
-        tok = self.lexer.token()
-        if not tok or tok.value != "identifier":
-            print("Couldn't determine identifier type")
-        else:
-            self.t_ID = tok.type
-
-        # Determine the token type for integers
-        self.lexer.input("12345")
-        tok = self.lexer.token()
-        if not tok or int(tok.value) != 12345:
-            print("Couldn't determine integer type")
-        else:
-            self.t_INTEGER = tok.type
-            self.t_INTEGER_TYPE = type(tok.value)
-
-        # Determine the token type for strings enclosed in double quotes
-        self.lexer.input("\"filename\"")
-        tok = self.lexer.token()
-        if not tok or tok.value != "\"filename\"":
-            print("Couldn't determine string type")
-        else:
-            self.t_STRING = tok.type
-
-        # Determine the token type for whitespace--if any
-        self.lexer.input("  ")
-        tok = self.lexer.token()
-        if not tok or tok.value != "  ":
-            self.t_SPACE = None
-        else:
-            self.t_SPACE = tok.type
-
-        # Determine the token type for newlines
-        self.lexer.input("\n")
-        tok = self.lexer.token()
-        if not tok or tok.value != "\n":
-            self.t_NEWLINE = None
-            print("Couldn't determine token for newlines")
-        else:
-            self.t_NEWLINE = tok.type
-
-        self.t_WS = (self.t_SPACE, self.t_NEWLINE)
-
-        # Check for other characters used by the preprocessor
-        chars = [ '<','>','#','##','\\','(',')',',','.']
-        for c in chars:
-            self.lexer.input(c)
-            tok = self.lexer.token()
-            if not tok or tok.value != c:
-                print("Unable to lex '%s' required for preprocessor" % c)
-
-    # ----------------------------------------------------------------------
-    # add_path()
-    #
-    # Adds a search path to the preprocessor.
-    # ----------------------------------------------------------------------
-
-    def add_path(self,path):
-        self.path.append(path)
-
-    # ----------------------------------------------------------------------
-    # group_lines()
-    #
-    # Given an input string, this function splits it into lines.  Trailing whitespace
-    # is removed.   Any line ending with \ is grouped with the next line.  This
-    # function forms the lowest level of the preprocessor---grouping into text into
-    # a line-by-line format.
-    # ----------------------------------------------------------------------
-
-    def group_lines(self,input):
-        lex = self.lexer.clone()
-        lines = [x.rstrip() for x in input.splitlines()]
-        for i in xrange(len(lines)):
-            j = i+1
-            while lines[i].endswith('\\') and (j < len(lines)):
-                lines[i] = lines[i][:-1]+lines[j]
-                lines[j] = ""
-                j += 1
-
-        input = "\n".join(lines)
-        lex.input(input)
-        lex.lineno = 1
-
-        current_line = []
-        while True:
-            tok = lex.token()
-            if not tok:
-                break
-            current_line.append(tok)
-            if tok.type in self.t_WS and '\n' in tok.value:
-                yield current_line
-                current_line = []
-
-        if current_line:
-            yield current_line
-
-    # ----------------------------------------------------------------------
-    # tokenstrip()
-    #
-    # Remove leading/trailing whitespace tokens from a token list
-    # ----------------------------------------------------------------------
-
-    def tokenstrip(self,tokens):
-        i = 0
-        while i < len(tokens) and tokens[i].type in self.t_WS:
-            i += 1
-        del tokens[:i]
-        i = len(tokens)-1
-        while i >= 0 and tokens[i].type in self.t_WS:
-            i -= 1
-        del tokens[i+1:]
-        return tokens
-
-
-    # ----------------------------------------------------------------------
-    # collect_args()
-    #
-    # Collects comma separated arguments from a list of tokens.   The arguments
-    # must be enclosed in parenthesis.  Returns a tuple (tokencount,args,positions)
-    # where tokencount is the number of tokens consumed, args is a list of arguments,
-    # and positions is a list of integers containing the starting index of each
-    # argument.  Each argument is represented by a list of tokens.
-    #
-    # When collecting arguments, leading and trailing whitespace is removed
-    # from each argument.
-    #
-    # This function properly handles nested parenthesis and commas---these do not
-    # define new arguments.
-    # ----------------------------------------------------------------------
-
-    def collect_args(self,tokenlist):
-        args = []
-        positions = []
-        current_arg = []
-        nesting = 1
-        tokenlen = len(tokenlist)
-
-        # Search for the opening '('.
-        i = 0
-        while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
-            i += 1
-
-        if (i < tokenlen) and (tokenlist[i].value == '('):
-            positions.append(i+1)
-        else:
-            self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
-            return 0, [], []
-
-        i += 1
-
-        while i < tokenlen:
-            t = tokenlist[i]
-            if t.value == '(':
-                current_arg.append(t)
-                nesting += 1
-            elif t.value == ')':
-                nesting -= 1
-                if nesting == 0:
-                    if current_arg:
-                        args.append(self.tokenstrip(current_arg))
-                        positions.append(i)
-                    return i+1,args,positions
-                current_arg.append(t)
-            elif t.value == ',' and nesting == 1:
-                args.append(self.tokenstrip(current_arg))
-                positions.append(i+1)
-                current_arg = []
-            else:
-                current_arg.append(t)
-            i += 1
-
-        # Missing end argument
-        self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
-        return 0, [],[]
-
-    # ----------------------------------------------------------------------
-    # macro_prescan()
-    #
-    # Examine the macro value (token sequence) and identify patch points
-    # This is used to speed up macro expansion later on---we'll know
-    # right away where to apply patches to the value to form the expansion
-    # ----------------------------------------------------------------------
-
-    def macro_prescan(self,macro):
-        macro.patch     = []             # Standard macro arguments
-        macro.str_patch = []             # String conversion expansion
-        macro.var_comma_patch = []       # Variadic macro comma patch
-        i = 0
-        while i < len(macro.value):
-            if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
-                argnum = macro.arglist.index(macro.value[i].value)
-                # Conversion of argument to a string
-                if i > 0 and macro.value[i-1].value == '#':
-                    macro.value[i] = copy.copy(macro.value[i])
-                    macro.value[i].type = self.t_STRING
-                    del macro.value[i-1]
-                    macro.str_patch.append((argnum,i-1))
-                    continue
-                # Concatenation
-                elif (i > 0 and macro.value[i-1].value == '##'):
-                    macro.patch.append(('c',argnum,i-1))
-                    del macro.value[i-1]
-                    i -= 1
-                    continue
-                elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
-                    macro.patch.append(('c',argnum,i))
-                    del macro.value[i + 1]
-                    continue
-                # Standard expansion
-                else:
-                    macro.patch.append(('e',argnum,i))
-            elif macro.value[i].value == '##':
-                if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
-                        ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
-                        (macro.value[i+1].value == macro.vararg):
-                    macro.var_comma_patch.append(i-1)
-            i += 1
-        macro.patch.sort(key=lambda x: x[2],reverse=True)
-
-    # ----------------------------------------------------------------------
-    # macro_expand_args()
-    #
-    # Given a Macro and list of arguments (each a token list), this method
-    # returns an expanded version of a macro.  The return value is a token sequence
-    # representing the replacement macro tokens
-    # ----------------------------------------------------------------------
-
-    def macro_expand_args(self,macro,args,expanded):
-        # Make a copy of the macro token sequence
-        rep = [copy.copy(_x) for _x in macro.value]
-
-        # Make string expansion patches.  These do not alter the length of the replacement sequence
-
-        str_expansion = {}
-        for argnum, i in macro.str_patch:
-            if argnum not in str_expansion:
-                str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
-            rep[i] = copy.copy(rep[i])
-            rep[i].value = str_expansion[argnum]
-
-        # Make the variadic macro comma patch.  If the variadic macro argument is empty, we get rid
-        comma_patch = False
-        if macro.variadic and not args[-1]:
-            for i in macro.var_comma_patch:
-                rep[i] = None
-                comma_patch = True
-
-        # Make all other patches.   The order of these matters.  It is assumed that the patch list
-        # has been sorted in reverse order of patch location since replacements will cause the
-        # size of the replacement sequence to expand from the patch point.
-
-        expanded_args = { }
-        for ptype, argnum, i in macro.patch:
-            # Concatenation.   Argument is left unexpanded
-            if ptype == 'c':
-                rep[i:i+1] = args[argnum]
-            # Normal expansion.  Argument is macro expanded first
-            elif ptype == 'e':
-                if argnum not in expanded_args:
-                    expanded_args[argnum] = self.expand_macros(args[argnum],expanded)
-                rep[i:i+1] = expanded_args[argnum]
-
-        # Get rid of removed comma if necessary
-        if comma_patch:
-            rep = [_i for _i in rep if _i]
-
-        return rep
-
-
-    # ----------------------------------------------------------------------
-    # expand_macros()
-    #
-    # Given a list of tokens, this function performs macro expansion.
-    # The expanded argument is a dictionary that contains macros already
-    # expanded.  This is used to prevent infinite recursion.
-    # ----------------------------------------------------------------------
-
-    def expand_macros(self,tokens,expanded=None):
-        if expanded is None:
-            expanded = {}
-        i = 0
-        while i < len(tokens):
-            t = tokens[i]
-            if t.type == self.t_ID:
-                if t.value in self.macros and t.value not in expanded:
-                    # Yes, we found a macro match
-                    expanded[t.value] = True
-
-                    m = self.macros[t.value]
-                    if not m.arglist:
-                        # A simple macro
-                        ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
-                        for e in ex:
-                            e.lineno = t.lineno
-                        tokens[i:i+1] = ex
-                        i += len(ex)
-                    else:
-                        # A macro with arguments
-                        j = i + 1
-                        while j < len(tokens) and tokens[j].type in self.t_WS:
-                            j += 1
-                        if j < len(tokens) and tokens[j].value == '(':
-                            tokcount,args,positions = self.collect_args(tokens[j:])
-                            if not m.variadic and len(args) !=  len(m.arglist):
-                                self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
-                                i = j + tokcount
-                            elif m.variadic and len(args) < len(m.arglist)-1:
-                                if len(m.arglist) > 2:
-                                    self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
-                                else:
-                                    self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
-                                i = j + tokcount
-                            else:
-                                if m.variadic:
-                                    if len(args) == len(m.arglist)-1:
-                                        args.append([])
-                                    else:
-                                        args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
-                                        del args[len(m.arglist):]
-
-                                # Get macro replacement text
-                                rep = self.macro_expand_args(m,args,expanded)
-                                rep = self.expand_macros(rep,expanded)
-                                for r in rep:
-                                    r.lineno = t.lineno
-                                tokens[i:j+tokcount] = rep
-                                i += len(rep)
-                        else:
-                            # This is not a macro. It is just a word which
-                            # equals to name of the macro. Hence, go to the
-                            # next token.
-                            i += 1
-
-                    del expanded[t.value]
-                    continue
-                elif t.value == '__LINE__':
-                    t.type = self.t_INTEGER
-                    t.value = self.t_INTEGER_TYPE(t.lineno)
-
-            i += 1
-        return tokens
-
-    # ----------------------------------------------------------------------
-    # evalexpr()
-    #
-    # Evaluate an expression token sequence for the purposes of evaluating
-    # integral expressions.
-    # ----------------------------------------------------------------------
-
-    def evalexpr(self,tokens):
-        # tokens = tokenize(line)
-        # Search for defined macros
-        i = 0
-        while i < len(tokens):
-            if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
-                j = i + 1
-                needparen = False
-                result = "0L"
-                while j < len(tokens):
-                    if tokens[j].type in self.t_WS:
-                        j += 1
-                        continue
-                    elif tokens[j].type == self.t_ID:
-                        if tokens[j].value in self.macros:
-                            result = "1L"
-                        else:
-                            result = "0L"
-                        if not needparen: break
-                    elif tokens[j].value == '(':
-                        needparen = True
-                    elif tokens[j].value == ')':
-                        break
-                    else:
-                        self.error(self.source,tokens[i].lineno,"Malformed defined()")
-                    j += 1
-                tokens[i].type = self.t_INTEGER
-                tokens[i].value = self.t_INTEGER_TYPE(result)
-                del tokens[i+1:j+1]
-            i += 1
-        tokens = self.expand_macros(tokens)
-        return self.evalexpr_expanded(tokens)
-
-    # ----------------------------------------------------------------------
-    # evalexpr_expanded()
-    #
-    # Helper for evalexpr that evaluates the expression that had its macros
-    # and defined(...) expressions expanded by evalexpr
-    # ----------------------------------------------------------------------
-
-    def evalexpr_expanded(self, tokens):
-        for i,t in enumerate(tokens):
-            if t.type == self.t_ID:
-                tokens[i] = copy.copy(t)
-                tokens[i].type = self.t_INTEGER
-                tokens[i].value = self.t_INTEGER_TYPE("0")
-            elif t.type == self.t_INTEGER:
-                tokens[i] = copy.copy(t)
-                # Strip off any trailing suffixes
-                tokens[i].value = str(tokens[i].value)
-                while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
-                    tokens[i].value = tokens[i].value[:-1]
-
-        return self.evalexpr_string("".join([str(x.value) for x in tokens]))
-
-    # ----------------------------------------------------------------------
-    # evalexpr_string()
-    #
-    # Helper for evalexpr that evaluates a string expression
-    # This implementation does basic C->python conversion and then uses eval()
-    # ----------------------------------------------------------------------
-    def evalexpr_string(self, expr):
-        expr = expr.replace("&&"," and ")
-        expr = expr.replace("||"," or ")
-        expr = expr.replace("!"," not ")
-        expr = expr.replace(" not ="," !=")
-        try:
-            result = eval(expr)
-        except Exception:
-            self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
-            result = 0
-        return result
-
-    # ----------------------------------------------------------------------
-    # parsegen()
-    #
-    # Parse an input string/
-    # ----------------------------------------------------------------------
-    def parsegen(self,input,source=None):
-
-        # Replace trigraph sequences
-        t = trigraph(input)
-        lines = self.group_lines(t)
-
-        if not source:
-            source = ""
-
-        self.define("__FILE__ \"%s\"" % source)
-
-        self.source = source
-        chunk = []
-        enable = True
-        iftrigger = False
-        ifstack = []
-
-        for x in lines:
-            for i,tok in enumerate(x):
-                if tok.type not in self.t_WS: break
-            if tok.value == '#':
-                # Preprocessor directive
-
-                # insert necessary whitespace instead of eaten tokens
-                for tok in x:
-                    if tok.type in self.t_WS and '\n' in tok.value:
-                        chunk.append(tok)
-
-                dirtokens = self.tokenstrip(x[i+1:])
-                if dirtokens:
-                    name = dirtokens[0].value
-                    args = self.tokenstrip(dirtokens[1:])
-                else:
-                    name = ""
-                    args = []
-
-                if name == 'define':
-                    if enable:
-                        for tok in self.expand_macros(chunk):
-                            yield tok
-                        chunk = []
-                        self.define(args)
-                elif name == 'include':
-                    if enable:
-                        for tok in self.expand_macros(chunk):
-                            yield tok
-                        chunk = []
-                        oldfile = self.macros['__FILE__']
-                        for tok in self.include(args):
-                            yield tok
-                        self.macros['__FILE__'] = oldfile
-                        self.source = source
-                elif name == 'undef':
-                    if enable:
-                        for tok in self.expand_macros(chunk):
-                            yield tok
-                        chunk = []
-                        self.undef(args)
-                elif name == 'ifdef':
-                    ifstack.append((enable,iftrigger))
-                    if enable:
-                        if not args[0].value in self.macros:
-                            enable = False
-                            iftrigger = False
-                        else:
-                            iftrigger = True
-                elif name == 'ifndef':
-                    ifstack.append((enable,iftrigger))
-                    if enable:
-                        if args[0].value in self.macros:
-                            enable = False
-                            iftrigger = False
-                        else:
-                            iftrigger = True
-                elif name == 'if':
-                    ifstack.append((enable,iftrigger))
-                    if enable:
-                        result = self.evalexpr(args)
-                        if not result:
-                            enable = False
-                            iftrigger = False
-                        else:
-                            iftrigger = True
-                elif name == 'elif':
-                    if ifstack:
-                        if ifstack[-1][0]:     # We only pay attention if outer "if" allows this
-                            if enable:         # If already true, we flip enable False
-                                enable = False
-                            elif not iftrigger:   # If False, but not triggered yet, we'll check expression
-                                result = self.evalexpr(args)
-                                if result:
-                                    enable  = True
-                                    iftrigger = True
-                    else:
-                        self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
-
-                elif name == 'else':
-                    if ifstack:
-                        if ifstack[-1][0]:
-                            if enable:
-                                enable = False
-                            elif not iftrigger:
-                                enable = True
-                                iftrigger = True
-                    else:
-                        self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
-
-                elif name == 'endif':
-                    if ifstack:
-                        enable,iftrigger = ifstack.pop()
-                    else:
-                        self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
-                else:
-                    # Unknown preprocessor directive
-                    pass
-
-            else:
-                # Normal text
-                if enable:
-                    chunk.extend(x)
-
-        for tok in self.expand_macros(chunk):
-            yield tok
-        chunk = []
-
-    # ----------------------------------------------------------------------
-    # include()
-    #
-    # Implementation of file-inclusion
-    # ----------------------------------------------------------------------
-
-    def include(self,tokens):
-        # Try to extract the filename and then process an include file
-        if not tokens:
-            return
-        if tokens:
-            if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
-                tokens = self.expand_macros(tokens)
-
-            if tokens[0].value == '<':
-                # Include <...>
-                i = 1
-                while i < len(tokens):
-                    if tokens[i].value == '>':
-                        break
-                    i += 1
-                else:
-                    print("Malformed #include <...>")
-                    return
-                filename = "".join([x.value for x in tokens[1:i]])
-                path = self.path + [""] + self.temp_path
-            elif tokens[0].type == self.t_STRING:
-                filename = tokens[0].value[1:-1]
-                path = self.temp_path + [""] + self.path
-            else:
-                print("Malformed #include statement")
-                return
-        for p in path:
-            iname = os.path.join(p,filename)
-            try:
-                data = self.read_include_file(iname)
-                dname = os.path.dirname(iname)
-                if dname:
-                    self.temp_path.insert(0,dname)
-                for tok in self.parsegen(data,filename):
-                    yield tok
-                if dname:
-                    del self.temp_path[0]
-                break
-            except IOError:
-                pass
-        else:
-            print("Couldn't find '%s'" % filename)
-
-    # ----------------------------------------------------------------------
-    # read_include_file()
-    #
-    # Reads a source file for inclusion using #include
-    # Could be overridden to e.g. customize encoding, limit access to
-    # certain paths on the filesystem, or provide the contents of system
-    # include files
-    # ----------------------------------------------------------------------
-
-    def read_include_file(self, filepath):
-        with open(filepath, 'r', encoding='utf-8', errors='surrogateescape') as file:
-            return file.read()
-
-    # ----------------------------------------------------------------------
-    # define()
-    #
-    # Define a new macro
-    # ----------------------------------------------------------------------
-
-    def define(self,tokens):
-        if isinstance(tokens,STRING_TYPES):
-            tokens = self.tokenize(tokens)
-
-        linetok = tokens
-        try:
-            name = linetok[0]
-            if len(linetok) > 1:
-                mtype = linetok[1]
-            else:
-                mtype = None
-            if not mtype:
-                m = Macro(name.value,[])
-                self.macros[name.value] = m
-            elif mtype.type in self.t_WS:
-                # A normal macro
-                m = Macro(name.value,self.tokenstrip(linetok[2:]))
-                self.macros[name.value] = m
-            elif mtype.value == '(':
-                # A macro with arguments
-                tokcount, args, positions = self.collect_args(linetok[1:])
-                variadic = False
-                for a in args:
-                    if variadic:
-                        print("No more arguments may follow a variadic argument")
-                        break
-                    astr = "".join([str(_i.value) for _i in a])
-                    if astr == "...":
-                        variadic = True
-                        a[0].type = self.t_ID
-                        a[0].value = '__VA_ARGS__'
-                        variadic = True
-                        del a[1:]
-                        continue
-                    elif astr[-3:] == "..." and a[0].type == self.t_ID:
-                        variadic = True
-                        del a[1:]
-                        # If, for some reason, "." is part of the identifier, strip off the name for the purposes
-                        # of macro expansion
-                        if a[0].value[-3:] == '...':
-                            a[0].value = a[0].value[:-3]
-                        continue
-                    if len(a) > 1 or a[0].type != self.t_ID:
-                        print("Invalid macro argument")
-                        break
-                else:
-                    mvalue = self.tokenstrip(linetok[1+tokcount:])
-                    i = 0
-                    while i < len(mvalue):
-                        if i+1 < len(mvalue):
-                            if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
-                                del mvalue[i]
-                                continue
-                            elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
-                                del mvalue[i+1]
-                        i += 1
-                    m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
-                    self.macro_prescan(m)
-                    self.macros[name.value] = m
-            else:
-                print("Bad macro definition")
-        except LookupError:
-            print("Bad macro definition")
-
-    # ----------------------------------------------------------------------
-    # undef()
-    #
-    # Undefine a macro
-    # ----------------------------------------------------------------------
-
-    def undef(self,tokens):
-        id = tokens[0].value
-        try:
-            del self.macros[id]
-        except LookupError:
-            pass
-
-    # ----------------------------------------------------------------------
-    # parse()
-    #
-    # Parse input text.
-    # ----------------------------------------------------------------------
-    def parse(self,input,source=None,ignore={}):
-        self.ignore = ignore
-        self.parser = self.parsegen(input,source)
-
-    # ----------------------------------------------------------------------
-    # token()
-    #
-    # Method to return individual tokens
-    # ----------------------------------------------------------------------
-    def token(self):
-        try:
-            while True:
-                tok = next(self.parser)
-                if tok.type not in self.ignore: return tok
-        except StopIteration:
-            self.parser = None
-            return None
-
-if __name__ == '__main__':
-    import ply.lex as lex
-    lexer = lex.lex()
-
-    # Run a preprocessor
-    import sys
-    with open(sys.argv[1]) as f:
-        input = f.read()
-
-    p = Preprocessor(lexer)
-    p.parse(input,sys.argv[1])
-    while True:
-        tok = p.token()
-        if not tok: break
-        print(p.source, tok)
diff --git a/ply/ctokens.py b/ply/ctokens.py
deleted file mode 100644
index b265e59..0000000
--- a/ply/ctokens.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# ----------------------------------------------------------------------
-# ctokens.py
-#
-# Token specifications for symbols in ANSI C and C++.  This file is
-# meant to be used as a library in other tokenizers.
-# ----------------------------------------------------------------------
-
-# Reserved words
-
-tokens = [
-    # Literals (identifier, integer constant, float constant, string constant, char const)
-    'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER',
-
-    # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
-    'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO',
-    'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
-    'LOR', 'LAND', 'LNOT',
-    'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
-
-    # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
-    'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
-    'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
-
-    # Increment/decrement (++,--)
-    'INCREMENT', 'DECREMENT',
-
-    # Structure dereference (->)
-    'ARROW',
-
-    # Ternary operator (?)
-    'TERNARY',
-
-    # Delimeters ( ) [ ] { } , . ; :
-    'LPAREN', 'RPAREN',
-    'LBRACKET', 'RBRACKET',
-    'LBRACE', 'RBRACE',
-    'COMMA', 'PERIOD', 'SEMI', 'COLON',
-
-    # Ellipsis (...)
-    'ELLIPSIS',
-]
-
-# Operators
-t_PLUS             = r'\+'
-t_MINUS            = r'-'
-t_TIMES            = r'\*'
-t_DIVIDE           = r'/'
-t_MODULO           = r'%'
-t_OR               = r'\|'
-t_AND              = r'&'
-t_NOT              = r'~'
-t_XOR              = r'\^'
-t_LSHIFT           = r'<<'
-t_RSHIFT           = r'>>'
-t_LOR              = r'\|\|'
-t_LAND             = r'&&'
-t_LNOT             = r'!'
-t_LT               = r'<'
-t_GT               = r'>'
-t_LE               = r'<='
-t_GE               = r'>='
-t_EQ               = r'=='
-t_NE               = r'!='
-
-# Assignment operators
-
-t_EQUALS           = r'='
-t_TIMESEQUAL       = r'\*='
-t_DIVEQUAL         = r'/='
-t_MODEQUAL         = r'%='
-t_PLUSEQUAL        = r'\+='
-t_MINUSEQUAL       = r'-='
-t_LSHIFTEQUAL      = r'<<='
-t_RSHIFTEQUAL      = r'>>='
-t_ANDEQUAL         = r'&='
-t_OREQUAL          = r'\|='
-t_XOREQUAL         = r'\^='
-
-# Increment/decrement
-t_INCREMENT        = r'\+\+'
-t_DECREMENT        = r'--'
-
-# ->
-t_ARROW            = r'->'
-
-# ?
-t_TERNARY          = r'\?'
-
-# Delimeters
-t_LPAREN           = r'\('
-t_RPAREN           = r'\)'
-t_LBRACKET         = r'\['
-t_RBRACKET         = r'\]'
-t_LBRACE           = r'\{'
-t_RBRACE           = r'\}'
-t_COMMA            = r','
-t_PERIOD           = r'\.'
-t_SEMI             = r';'
-t_COLON            = r':'
-t_ELLIPSIS         = r'\.\.\.'
-
-# Identifiers
-t_ID = r'[A-Za-z_][A-Za-z0-9_]*'
-
-# Integer literal
-t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
-
-# Floating literal
-t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
-
-# String literal
-t_STRING = r'\"([^\\\n]|(\\.))*?\"'
-
-# Character constant 'c' or L'c'
-t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\''
-
-# Comment (C-Style)
-def t_COMMENT(t):
-    r'/\*(.|\n)*?\*/'
-    t.lexer.lineno += t.value.count('\n')
-    return t
-
-# Comment (C++-Style)
-def t_CPPCOMMENT(t):
-    r'//.*\n'
-    t.lexer.lineno += 1
-    return t
diff --git a/ply/lex.py b/ply/lex.py
index bc9ed34..39095eb 100644
--- a/ply/lex.py
+++ b/ply/lex.py
@@ -1,7 +1,7 @@
 # -----------------------------------------------------------------------------
 # ply: lex.py
 #
-# Copyright (C) 2001-2019
+# Copyright (C) 2001-2020
 # David M. Beazley (Dabeaz LLC)
 # All rights reserved.
 #
@@ -33,7 +33,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 # -----------------------------------------------------------------------------
 
-__version__    = '3.11'
+__version__    = '4.0'
 __tabversion__ = '3.10'
 
 import re
@@ -43,13 +43,8 @@ import copy
 import os
 import inspect
 
-# This tuple contains known string types
-try:
-    # Python 2.6
-    StringTypes = (types.StringType, types.UnicodeType)
-except AttributeError:
-    # Python 3.0
-    StringTypes = (str, bytes)
+# This tuple contains acceptable string types
+StringTypes = (str, bytes)
 
 # This regular expression is used to match valid token names
 _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
diff --git a/ply/yacc.py b/ply/yacc.py
index 534bb3e..a5024eb 100644
--- a/ply/yacc.py
+++ b/ply/yacc.py
@@ -1,7 +1,7 @@
 # -----------------------------------------------------------------------------
 # ply: yacc.py
 #
-# Copyright (C) 2001-2019
+# Copyright (C) 2001-2020
 # David M. Beazley (Dabeaz LLC)
 # All rights reserved.
 #
@@ -68,7 +68,7 @@ import os.path
 import inspect
 import warnings
 
-__version__    = '3.11'
+__version__    = '4.0'
 __tabversion__ = '3.10'
 
 #-----------------------------------------------------------------------------
@@ -93,12 +93,6 @@ resultlimit = 40               # Size limit of results when running in debug mod
 
 pickle_protocol = 0            # Protocol to use when writing pickle files
 
-# String type-checking compatibility
-if sys.version_info[0] < 3:
-    string_types = basestring
-else:
-    string_types = str
-
 MAXINT = sys.maxsize
 
 # This object is a stand-in for a logging object created by the
@@ -3029,7 +3023,7 @@ class ParserReflect(object):
     # Validate the start symbol
     def validate_start(self):
         if self.start is not None:
-            if not isinstance(self.start, string_types):
+            if not isinstance(self.start, str):
                 self.log.error("'start' must be a string")
 
     # Look for error handler
@@ -3115,12 +3109,12 @@ class ParserReflect(object):
                     self.error = True
                     return
                 assoc = p[0]
-                if not isinstance(assoc, string_types):
+                if not isinstance(assoc, str):
                     self.log.error('precedence associativity must be a string')
                     self.error = True
                     return
                 for term in p[1:]:
-                    if not isinstance(term, string_types):
+                    if not isinstance(term, str):
                         self.log.error('precedence items must be strings')
                         self.error = True
                         return