summaryrefslogtreecommitdiff
path: root/ply
diff options
context:
space:
mode:
Diffstat (limited to 'ply')
-rw-r--r--ply/__init__.py2
-rw-r--r--ply/cpp.py974
-rw-r--r--ply/ctokens.py127
-rw-r--r--ply/lex.py13
-rw-r--r--ply/yacc.py16
5 files changed, 10 insertions, 1122 deletions
diff --git a/ply/__init__.py b/ply/__init__.py
index 23707c6..6f768b7 100644
--- a/ply/__init__.py
+++ b/ply/__init__.py
@@ -1,5 +1,5 @@
# PLY package
# Author: David Beazley (dave@dabeaz.com)
-__version__ = '3.11'
+__version__ = '4.0'
__all__ = ['lex','yacc']
diff --git a/ply/cpp.py b/ply/cpp.py
deleted file mode 100644
index 50a44a1..0000000
--- a/ply/cpp.py
+++ /dev/null
@@ -1,974 +0,0 @@
-# -----------------------------------------------------------------------------
-# ply: cpp.py
-#
-# Copyright (C) 2001-2019
-# David M. Beazley (Dabeaz LLC)
-# All rights reserved.
-#
-# Latest version: https://github.com/dabeaz/ply
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-# * Neither the name of David Beazley or Dabeaz LLC may be used to
-# endorse or promote products derived from this software without
-# specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-# -----------------------------------------------------------------------------
-
-# This module implements an ANSI-C style lexical preprocessor for PLY.
-# -----------------------------------------------------------------------------
-from __future__ import generators
-
-import sys
-
-# Some Python 3 compatibility shims
-if sys.version_info.major < 3:
- STRING_TYPES = (str, unicode)
-else:
- STRING_TYPES = str
- xrange = range
-
-# -----------------------------------------------------------------------------
-# Default preprocessor lexer definitions. These tokens are enough to get
-# a basic preprocessor working. Other modules may import these if they want
-# -----------------------------------------------------------------------------
-
-tokens = (
- 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND'
-)
-
-literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
-
-# Whitespace
-def t_CPP_WS(t):
- r'\s+'
- t.lexer.lineno += t.value.count("\n")
- return t
-
-t_CPP_POUND = r'\#'
-t_CPP_DPOUND = r'\#\#'
-
-# Identifier
-t_CPP_ID = r'[A-Za-z_][\w_]*'
-
-# Integer literal
-def CPP_INTEGER(t):
- r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)'
- return t
-
-t_CPP_INTEGER = CPP_INTEGER
-
-# Floating literal
-t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
-
-# String literal
-def t_CPP_STRING(t):
- r'\"([^\\\n]|(\\(.|\n)))*?\"'
- t.lexer.lineno += t.value.count("\n")
- return t
-
-# Character constant 'c' or L'c'
-def t_CPP_CHAR(t):
- r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
- t.lexer.lineno += t.value.count("\n")
- return t
-
-# Comment
-def t_CPP_COMMENT1(t):
- r'(/\*(.|\n)*?\*/)'
- ncr = t.value.count("\n")
- t.lexer.lineno += ncr
- # replace with one space or a number of '\n'
- t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' '
- return t
-
-# Line comment
-def t_CPP_COMMENT2(t):
- r'(//.*?(\n|$))'
- # replace with '/n'
- t.type = 'CPP_WS'; t.value = '\n'
- return t
-
-def t_error(t):
- t.type = t.value[0]
- t.value = t.value[0]
- t.lexer.skip(1)
- return t
-
-import re
-import copy
-import time
-import os.path
-
-# -----------------------------------------------------------------------------
-# trigraph()
-#
-# Given an input string, this function replaces all trigraph sequences.
-# The following mapping is used:
-#
-# ??= #
-# ??/ \
-# ??' ^
-# ??( [
-# ??) ]
-# ??! |
-# ??< {
-# ??> }
-# ??- ~
-# -----------------------------------------------------------------------------
-
-_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
-_trigraph_rep = {
- '=':'#',
- '/':'\\',
- "'":'^',
- '(':'[',
- ')':']',
- '!':'|',
- '<':'{',
- '>':'}',
- '-':'~'
-}
-
-def trigraph(input):
- return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
-
-# ------------------------------------------------------------------
-# Macro object
-#
-# This object holds information about preprocessor macros
-#
-# .name - Macro name (string)
-# .value - Macro value (a list of tokens)
-# .arglist - List of argument names
-# .variadic - Boolean indicating whether or not variadic macro
-# .vararg - Name of the variadic parameter
-#
-# When a macro is created, the macro replacement token sequence is
-# pre-scanned and used to create patch lists that are later used
-# during macro expansion
-# ------------------------------------------------------------------
-
-class Macro(object):
- def __init__(self,name,value,arglist=None,variadic=False):
- self.name = name
- self.value = value
- self.arglist = arglist
- self.variadic = variadic
- if variadic:
- self.vararg = arglist[-1]
- self.source = None
-
-# ------------------------------------------------------------------
-# Preprocessor object
-#
-# Object representing a preprocessor. Contains macro definitions,
-# include directories, and other information
-# ------------------------------------------------------------------
-
-class Preprocessor(object):
- def __init__(self,lexer=None):
- if lexer is None:
- lexer = lex.lexer
- self.lexer = lexer
- self.macros = { }
- self.path = []
- self.temp_path = []
-
- # Probe the lexer for selected tokens
- self.lexprobe()
-
- tm = time.localtime()
- self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
- self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
- self.parser = None
-
- # -----------------------------------------------------------------------------
- # tokenize()
- #
- # Utility function. Given a string of text, tokenize into a list of tokens
- # -----------------------------------------------------------------------------
-
- def tokenize(self,text):
- tokens = []
- self.lexer.input(text)
- while True:
- tok = self.lexer.token()
- if not tok: break
- tokens.append(tok)
- return tokens
-
- # ---------------------------------------------------------------------
- # error()
- #
- # Report a preprocessor error/warning of some kind
- # ----------------------------------------------------------------------
-
- def error(self,file,line,msg):
- print("%s:%d %s" % (file,line,msg))
-
- # ----------------------------------------------------------------------
- # lexprobe()
- #
- # This method probes the preprocessor lexer object to discover
- # the token types of symbols that are important to the preprocessor.
- # If this works right, the preprocessor will simply "work"
- # with any suitable lexer regardless of how tokens have been named.
- # ----------------------------------------------------------------------
-
- def lexprobe(self):
-
- # Determine the token type for identifiers
- self.lexer.input("identifier")
- tok = self.lexer.token()
- if not tok or tok.value != "identifier":
- print("Couldn't determine identifier type")
- else:
- self.t_ID = tok.type
-
- # Determine the token type for integers
- self.lexer.input("12345")
- tok = self.lexer.token()
- if not tok or int(tok.value) != 12345:
- print("Couldn't determine integer type")
- else:
- self.t_INTEGER = tok.type
- self.t_INTEGER_TYPE = type(tok.value)
-
- # Determine the token type for strings enclosed in double quotes
- self.lexer.input("\"filename\"")
- tok = self.lexer.token()
- if not tok or tok.value != "\"filename\"":
- print("Couldn't determine string type")
- else:
- self.t_STRING = tok.type
-
- # Determine the token type for whitespace--if any
- self.lexer.input(" ")
- tok = self.lexer.token()
- if not tok or tok.value != " ":
- self.t_SPACE = None
- else:
- self.t_SPACE = tok.type
-
- # Determine the token type for newlines
- self.lexer.input("\n")
- tok = self.lexer.token()
- if not tok or tok.value != "\n":
- self.t_NEWLINE = None
- print("Couldn't determine token for newlines")
- else:
- self.t_NEWLINE = tok.type
-
- self.t_WS = (self.t_SPACE, self.t_NEWLINE)
-
- # Check for other characters used by the preprocessor
- chars = [ '<','>','#','##','\\','(',')',',','.']
- for c in chars:
- self.lexer.input(c)
- tok = self.lexer.token()
- if not tok or tok.value != c:
- print("Unable to lex '%s' required for preprocessor" % c)
-
- # ----------------------------------------------------------------------
- # add_path()
- #
- # Adds a search path to the preprocessor.
- # ----------------------------------------------------------------------
-
- def add_path(self,path):
- self.path.append(path)
-
- # ----------------------------------------------------------------------
- # group_lines()
- #
- # Given an input string, this function splits it into lines. Trailing whitespace
- # is removed. Any line ending with \ is grouped with the next line. This
- # function forms the lowest level of the preprocessor---grouping into text into
- # a line-by-line format.
- # ----------------------------------------------------------------------
-
- def group_lines(self,input):
- lex = self.lexer.clone()
- lines = [x.rstrip() for x in input.splitlines()]
- for i in xrange(len(lines)):
- j = i+1
- while lines[i].endswith('\\') and (j < len(lines)):
- lines[i] = lines[i][:-1]+lines[j]
- lines[j] = ""
- j += 1
-
- input = "\n".join(lines)
- lex.input(input)
- lex.lineno = 1
-
- current_line = []
- while True:
- tok = lex.token()
- if not tok:
- break
- current_line.append(tok)
- if tok.type in self.t_WS and '\n' in tok.value:
- yield current_line
- current_line = []
-
- if current_line:
- yield current_line
-
- # ----------------------------------------------------------------------
- # tokenstrip()
- #
- # Remove leading/trailing whitespace tokens from a token list
- # ----------------------------------------------------------------------
-
- def tokenstrip(self,tokens):
- i = 0
- while i < len(tokens) and tokens[i].type in self.t_WS:
- i += 1
- del tokens[:i]
- i = len(tokens)-1
- while i >= 0 and tokens[i].type in self.t_WS:
- i -= 1
- del tokens[i+1:]
- return tokens
-
-
- # ----------------------------------------------------------------------
- # collect_args()
- #
- # Collects comma separated arguments from a list of tokens. The arguments
- # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions)
- # where tokencount is the number of tokens consumed, args is a list of arguments,
- # and positions is a list of integers containing the starting index of each
- # argument. Each argument is represented by a list of tokens.
- #
- # When collecting arguments, leading and trailing whitespace is removed
- # from each argument.
- #
- # This function properly handles nested parenthesis and commas---these do not
- # define new arguments.
- # ----------------------------------------------------------------------
-
- def collect_args(self,tokenlist):
- args = []
- positions = []
- current_arg = []
- nesting = 1
- tokenlen = len(tokenlist)
-
- # Search for the opening '('.
- i = 0
- while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
- i += 1
-
- if (i < tokenlen) and (tokenlist[i].value == '('):
- positions.append(i+1)
- else:
- self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
- return 0, [], []
-
- i += 1
-
- while i < tokenlen:
- t = tokenlist[i]
- if t.value == '(':
- current_arg.append(t)
- nesting += 1
- elif t.value == ')':
- nesting -= 1
- if nesting == 0:
- if current_arg:
- args.append(self.tokenstrip(current_arg))
- positions.append(i)
- return i+1,args,positions
- current_arg.append(t)
- elif t.value == ',' and nesting == 1:
- args.append(self.tokenstrip(current_arg))
- positions.append(i+1)
- current_arg = []
- else:
- current_arg.append(t)
- i += 1
-
- # Missing end argument
- self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
- return 0, [],[]
-
- # ----------------------------------------------------------------------
- # macro_prescan()
- #
- # Examine the macro value (token sequence) and identify patch points
- # This is used to speed up macro expansion later on---we'll know
- # right away where to apply patches to the value to form the expansion
- # ----------------------------------------------------------------------
-
- def macro_prescan(self,macro):
- macro.patch = [] # Standard macro arguments
- macro.str_patch = [] # String conversion expansion
- macro.var_comma_patch = [] # Variadic macro comma patch
- i = 0
- while i < len(macro.value):
- if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
- argnum = macro.arglist.index(macro.value[i].value)
- # Conversion of argument to a string
- if i > 0 and macro.value[i-1].value == '#':
- macro.value[i] = copy.copy(macro.value[i])
- macro.value[i].type = self.t_STRING
- del macro.value[i-1]
- macro.str_patch.append((argnum,i-1))
- continue
- # Concatenation
- elif (i > 0 and macro.value[i-1].value == '##'):
- macro.patch.append(('c',argnum,i-1))
- del macro.value[i-1]
- i -= 1
- continue
- elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
- macro.patch.append(('c',argnum,i))
- del macro.value[i + 1]
- continue
- # Standard expansion
- else:
- macro.patch.append(('e',argnum,i))
- elif macro.value[i].value == '##':
- if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
- ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
- (macro.value[i+1].value == macro.vararg):
- macro.var_comma_patch.append(i-1)
- i += 1
- macro.patch.sort(key=lambda x: x[2],reverse=True)
-
- # ----------------------------------------------------------------------
- # macro_expand_args()
- #
- # Given a Macro and list of arguments (each a token list), this method
- # returns an expanded version of a macro. The return value is a token sequence
- # representing the replacement macro tokens
- # ----------------------------------------------------------------------
-
- def macro_expand_args(self,macro,args,expanded):
- # Make a copy of the macro token sequence
- rep = [copy.copy(_x) for _x in macro.value]
-
- # Make string expansion patches. These do not alter the length of the replacement sequence
-
- str_expansion = {}
- for argnum, i in macro.str_patch:
- if argnum not in str_expansion:
- str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
- rep[i] = copy.copy(rep[i])
- rep[i].value = str_expansion[argnum]
-
- # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid
- comma_patch = False
- if macro.variadic and not args[-1]:
- for i in macro.var_comma_patch:
- rep[i] = None
- comma_patch = True
-
- # Make all other patches. The order of these matters. It is assumed that the patch list
- # has been sorted in reverse order of patch location since replacements will cause the
- # size of the replacement sequence to expand from the patch point.
-
- expanded_args = { }
- for ptype, argnum, i in macro.patch:
- # Concatenation. Argument is left unexpanded
- if ptype == 'c':
- rep[i:i+1] = args[argnum]
- # Normal expansion. Argument is macro expanded first
- elif ptype == 'e':
- if argnum not in expanded_args:
- expanded_args[argnum] = self.expand_macros(args[argnum],expanded)
- rep[i:i+1] = expanded_args[argnum]
-
- # Get rid of removed comma if necessary
- if comma_patch:
- rep = [_i for _i in rep if _i]
-
- return rep
-
-
- # ----------------------------------------------------------------------
- # expand_macros()
- #
- # Given a list of tokens, this function performs macro expansion.
- # The expanded argument is a dictionary that contains macros already
- # expanded. This is used to prevent infinite recursion.
- # ----------------------------------------------------------------------
-
- def expand_macros(self,tokens,expanded=None):
- if expanded is None:
- expanded = {}
- i = 0
- while i < len(tokens):
- t = tokens[i]
- if t.type == self.t_ID:
- if t.value in self.macros and t.value not in expanded:
- # Yes, we found a macro match
- expanded[t.value] = True
-
- m = self.macros[t.value]
- if not m.arglist:
- # A simple macro
- ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
- for e in ex:
- e.lineno = t.lineno
- tokens[i:i+1] = ex
- i += len(ex)
- else:
- # A macro with arguments
- j = i + 1
- while j < len(tokens) and tokens[j].type in self.t_WS:
- j += 1
- if j < len(tokens) and tokens[j].value == '(':
- tokcount,args,positions = self.collect_args(tokens[j:])
- if not m.variadic and len(args) != len(m.arglist):
- self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
- i = j + tokcount
- elif m.variadic and len(args) < len(m.arglist)-1:
- if len(m.arglist) > 2:
- self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
- else:
- self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
- i = j + tokcount
- else:
- if m.variadic:
- if len(args) == len(m.arglist)-1:
- args.append([])
- else:
- args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
- del args[len(m.arglist):]
-
- # Get macro replacement text
- rep = self.macro_expand_args(m,args,expanded)
- rep = self.expand_macros(rep,expanded)
- for r in rep:
- r.lineno = t.lineno
- tokens[i:j+tokcount] = rep
- i += len(rep)
- else:
- # This is not a macro. It is just a word which
- # equals to name of the macro. Hence, go to the
- # next token.
- i += 1
-
- del expanded[t.value]
- continue
- elif t.value == '__LINE__':
- t.type = self.t_INTEGER
- t.value = self.t_INTEGER_TYPE(t.lineno)
-
- i += 1
- return tokens
-
- # ----------------------------------------------------------------------
- # evalexpr()
- #
- # Evaluate an expression token sequence for the purposes of evaluating
- # integral expressions.
- # ----------------------------------------------------------------------
-
- def evalexpr(self,tokens):
- # tokens = tokenize(line)
- # Search for defined macros
- i = 0
- while i < len(tokens):
- if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
- j = i + 1
- needparen = False
- result = "0L"
- while j < len(tokens):
- if tokens[j].type in self.t_WS:
- j += 1
- continue
- elif tokens[j].type == self.t_ID:
- if tokens[j].value in self.macros:
- result = "1L"
- else:
- result = "0L"
- if not needparen: break
- elif tokens[j].value == '(':
- needparen = True
- elif tokens[j].value == ')':
- break
- else:
- self.error(self.source,tokens[i].lineno,"Malformed defined()")
- j += 1
- tokens[i].type = self.t_INTEGER
- tokens[i].value = self.t_INTEGER_TYPE(result)
- del tokens[i+1:j+1]
- i += 1
- tokens = self.expand_macros(tokens)
- return self.evalexpr_expanded(tokens)
-
- # ----------------------------------------------------------------------
- # evalexpr_expanded()
- #
- # Helper for evalexpr that evaluates the expression that had its macros
- # and defined(...) expressions expanded by evalexpr
- # ----------------------------------------------------------------------
-
- def evalexpr_expanded(self, tokens):
- for i,t in enumerate(tokens):
- if t.type == self.t_ID:
- tokens[i] = copy.copy(t)
- tokens[i].type = self.t_INTEGER
- tokens[i].value = self.t_INTEGER_TYPE("0")
- elif t.type == self.t_INTEGER:
- tokens[i] = copy.copy(t)
- # Strip off any trailing suffixes
- tokens[i].value = str(tokens[i].value)
- while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
- tokens[i].value = tokens[i].value[:-1]
-
- return self.evalexpr_string("".join([str(x.value) for x in tokens]))
-
- # ----------------------------------------------------------------------
- # evalexpr_string()
- #
- # Helper for evalexpr that evaluates a string expression
- # This implementation does basic C->python conversion and then uses eval()
- # ----------------------------------------------------------------------
- def evalexpr_string(self, expr):
- expr = expr.replace("&&"," and ")
- expr = expr.replace("||"," or ")
- expr = expr.replace("!"," not ")
- expr = expr.replace(" not ="," !=")
- try:
- result = eval(expr)
- except Exception:
- self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
- result = 0
- return result
-
- # ----------------------------------------------------------------------
- # parsegen()
- #
- # Parse an input string/
- # ----------------------------------------------------------------------
- def parsegen(self,input,source=None):
-
- # Replace trigraph sequences
- t = trigraph(input)
- lines = self.group_lines(t)
-
- if not source:
- source = ""
-
- self.define("__FILE__ \"%s\"" % source)
-
- self.source = source
- chunk = []
- enable = True
- iftrigger = False
- ifstack = []
-
- for x in lines:
- for i,tok in enumerate(x):
- if tok.type not in self.t_WS: break
- if tok.value == '#':
- # Preprocessor directive
-
- # insert necessary whitespace instead of eaten tokens
- for tok in x:
- if tok.type in self.t_WS and '\n' in tok.value:
- chunk.append(tok)
-
- dirtokens = self.tokenstrip(x[i+1:])
- if dirtokens:
- name = dirtokens[0].value
- args = self.tokenstrip(dirtokens[1:])
- else:
- name = ""
- args = []
-
- if name == 'define':
- if enable:
- for tok in self.expand_macros(chunk):
- yield tok
- chunk = []
- self.define(args)
- elif name == 'include':
- if enable:
- for tok in self.expand_macros(chunk):
- yield tok
- chunk = []
- oldfile = self.macros['__FILE__']
- for tok in self.include(args):
- yield tok
- self.macros['__FILE__'] = oldfile
- self.source = source
- elif name == 'undef':
- if enable:
- for tok in self.expand_macros(chunk):
- yield tok
- chunk = []
- self.undef(args)
- elif name == 'ifdef':
- ifstack.append((enable,iftrigger))
- if enable:
- if not args[0].value in self.macros:
- enable = False
- iftrigger = False
- else:
- iftrigger = True
- elif name == 'ifndef':
- ifstack.append((enable,iftrigger))
- if enable:
- if args[0].value in self.macros:
- enable = False
- iftrigger = False
- else:
- iftrigger = True
- elif name == 'if':
- ifstack.append((enable,iftrigger))
- if enable:
- result = self.evalexpr(args)
- if not result:
- enable = False
- iftrigger = False
- else:
- iftrigger = True
- elif name == 'elif':
- if ifstack:
- if ifstack[-1][0]: # We only pay attention if outer "if" allows this
- if enable: # If already true, we flip enable False
- enable = False
- elif not iftrigger: # If False, but not triggered yet, we'll check expression
- result = self.evalexpr(args)
- if result:
- enable = True
- iftrigger = True
- else:
- self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
-
- elif name == 'else':
- if ifstack:
- if ifstack[-1][0]:
- if enable:
- enable = False
- elif not iftrigger:
- enable = True
- iftrigger = True
- else:
- self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
-
- elif name == 'endif':
- if ifstack:
- enable,iftrigger = ifstack.pop()
- else:
- self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
- else:
- # Unknown preprocessor directive
- pass
-
- else:
- # Normal text
- if enable:
- chunk.extend(x)
-
- for tok in self.expand_macros(chunk):
- yield tok
- chunk = []
-
- # ----------------------------------------------------------------------
- # include()
- #
- # Implementation of file-inclusion
- # ----------------------------------------------------------------------
-
- def include(self,tokens):
- # Try to extract the filename and then process an include file
- if not tokens:
- return
- if tokens:
- if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
- tokens = self.expand_macros(tokens)
-
- if tokens[0].value == '<':
- # Include <...>
- i = 1
- while i < len(tokens):
- if tokens[i].value == '>':
- break
- i += 1
- else:
- print("Malformed #include <...>")
- return
- filename = "".join([x.value for x in tokens[1:i]])
- path = self.path + [""] + self.temp_path
- elif tokens[0].type == self.t_STRING:
- filename = tokens[0].value[1:-1]
- path = self.temp_path + [""] + self.path
- else:
- print("Malformed #include statement")
- return
- for p in path:
- iname = os.path.join(p,filename)
- try:
- data = self.read_include_file(iname)
- dname = os.path.dirname(iname)
- if dname:
- self.temp_path.insert(0,dname)
- for tok in self.parsegen(data,filename):
- yield tok
- if dname:
- del self.temp_path[0]
- break
- except IOError:
- pass
- else:
- print("Couldn't find '%s'" % filename)
-
- # ----------------------------------------------------------------------
- # read_include_file()
- #
- # Reads a source file for inclusion using #include
- # Could be overridden to e.g. customize encoding, limit access to
- # certain paths on the filesystem, or provide the contents of system
- # include files
- # ----------------------------------------------------------------------
-
- def read_include_file(self, filepath):
- with open(filepath, 'r', encoding='utf-8', errors='surrogateescape') as file:
- return file.read()
-
- # ----------------------------------------------------------------------
- # define()
- #
- # Define a new macro
- # ----------------------------------------------------------------------
-
- def define(self,tokens):
- if isinstance(tokens,STRING_TYPES):
- tokens = self.tokenize(tokens)
-
- linetok = tokens
- try:
- name = linetok[0]
- if len(linetok) > 1:
- mtype = linetok[1]
- else:
- mtype = None
- if not mtype:
- m = Macro(name.value,[])
- self.macros[name.value] = m
- elif mtype.type in self.t_WS:
- # A normal macro
- m = Macro(name.value,self.tokenstrip(linetok[2:]))
- self.macros[name.value] = m
- elif mtype.value == '(':
- # A macro with arguments
- tokcount, args, positions = self.collect_args(linetok[1:])
- variadic = False
- for a in args:
- if variadic:
- print("No more arguments may follow a variadic argument")
- break
- astr = "".join([str(_i.value) for _i in a])
- if astr == "...":
- variadic = True
- a[0].type = self.t_ID
- a[0].value = '__VA_ARGS__'
- variadic = True
- del a[1:]
- continue
- elif astr[-3:] == "..." and a[0].type == self.t_ID:
- variadic = True
- del a[1:]
- # If, for some reason, "." is part of the identifier, strip off the name for the purposes
- # of macro expansion
- if a[0].value[-3:] == '...':
- a[0].value = a[0].value[:-3]
- continue
- if len(a) > 1 or a[0].type != self.t_ID:
- print("Invalid macro argument")
- break
- else:
- mvalue = self.tokenstrip(linetok[1+tokcount:])
- i = 0
- while i < len(mvalue):
- if i+1 < len(mvalue):
- if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
- del mvalue[i]
- continue
- elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
- del mvalue[i+1]
- i += 1
- m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
- self.macro_prescan(m)
- self.macros[name.value] = m
- else:
- print("Bad macro definition")
- except LookupError:
- print("Bad macro definition")
-
- # ----------------------------------------------------------------------
- # undef()
- #
- # Undefine a macro
- # ----------------------------------------------------------------------
-
- def undef(self,tokens):
- id = tokens[0].value
- try:
- del self.macros[id]
- except LookupError:
- pass
-
- # ----------------------------------------------------------------------
- # parse()
- #
- # Parse input text.
- # ----------------------------------------------------------------------
- def parse(self,input,source=None,ignore={}):
- self.ignore = ignore
- self.parser = self.parsegen(input,source)
-
- # ----------------------------------------------------------------------
- # token()
- #
- # Method to return individual tokens
- # ----------------------------------------------------------------------
- def token(self):
- try:
- while True:
- tok = next(self.parser)
- if tok.type not in self.ignore: return tok
- except StopIteration:
- self.parser = None
- return None
-
-if __name__ == '__main__':
- import ply.lex as lex
- lexer = lex.lex()
-
- # Run a preprocessor
- import sys
- with open(sys.argv[1]) as f:
- input = f.read()
-
- p = Preprocessor(lexer)
- p.parse(input,sys.argv[1])
- while True:
- tok = p.token()
- if not tok: break
- print(p.source, tok)
diff --git a/ply/ctokens.py b/ply/ctokens.py
deleted file mode 100644
index b265e59..0000000
--- a/ply/ctokens.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# ----------------------------------------------------------------------
-# ctokens.py
-#
-# Token specifications for symbols in ANSI C and C++. This file is
-# meant to be used as a library in other tokenizers.
-# ----------------------------------------------------------------------
-
-# Reserved words
-
-tokens = [
- # Literals (identifier, integer constant, float constant, string constant, char const)
- 'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER',
-
- # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
- 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO',
- 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
- 'LOR', 'LAND', 'LNOT',
- 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
-
- # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
- 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
- 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
-
- # Increment/decrement (++,--)
- 'INCREMENT', 'DECREMENT',
-
- # Structure dereference (->)
- 'ARROW',
-
- # Ternary operator (?)
- 'TERNARY',
-
- # Delimeters ( ) [ ] { } , . ; :
- 'LPAREN', 'RPAREN',
- 'LBRACKET', 'RBRACKET',
- 'LBRACE', 'RBRACE',
- 'COMMA', 'PERIOD', 'SEMI', 'COLON',
-
- # Ellipsis (...)
- 'ELLIPSIS',
-]
-
-# Operators
-t_PLUS = r'\+'
-t_MINUS = r'-'
-t_TIMES = r'\*'
-t_DIVIDE = r'/'
-t_MODULO = r'%'
-t_OR = r'\|'
-t_AND = r'&'
-t_NOT = r'~'
-t_XOR = r'\^'
-t_LSHIFT = r'<<'
-t_RSHIFT = r'>>'
-t_LOR = r'\|\|'
-t_LAND = r'&&'
-t_LNOT = r'!'
-t_LT = r'<'
-t_GT = r'>'
-t_LE = r'<='
-t_GE = r'>='
-t_EQ = r'=='
-t_NE = r'!='
-
-# Assignment operators
-
-t_EQUALS = r'='
-t_TIMESEQUAL = r'\*='
-t_DIVEQUAL = r'/='
-t_MODEQUAL = r'%='
-t_PLUSEQUAL = r'\+='
-t_MINUSEQUAL = r'-='
-t_LSHIFTEQUAL = r'<<='
-t_RSHIFTEQUAL = r'>>='
-t_ANDEQUAL = r'&='
-t_OREQUAL = r'\|='
-t_XOREQUAL = r'\^='
-
-# Increment/decrement
-t_INCREMENT = r'\+\+'
-t_DECREMENT = r'--'
-
-# ->
-t_ARROW = r'->'
-
-# ?
-t_TERNARY = r'\?'
-
-# Delimeters
-t_LPAREN = r'\('
-t_RPAREN = r'\)'
-t_LBRACKET = r'\['
-t_RBRACKET = r'\]'
-t_LBRACE = r'\{'
-t_RBRACE = r'\}'
-t_COMMA = r','
-t_PERIOD = r'\.'
-t_SEMI = r';'
-t_COLON = r':'
-t_ELLIPSIS = r'\.\.\.'
-
-# Identifiers
-t_ID = r'[A-Za-z_][A-Za-z0-9_]*'
-
-# Integer literal
-t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
-
-# Floating literal
-t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
-
-# String literal
-t_STRING = r'\"([^\\\n]|(\\.))*?\"'
-
-# Character constant 'c' or L'c'
-t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\''
-
-# Comment (C-Style)
-def t_COMMENT(t):
- r'/\*(.|\n)*?\*/'
- t.lexer.lineno += t.value.count('\n')
- return t
-
-# Comment (C++-Style)
-def t_CPPCOMMENT(t):
- r'//.*\n'
- t.lexer.lineno += 1
- return t
diff --git a/ply/lex.py b/ply/lex.py
index bc9ed34..39095eb 100644
--- a/ply/lex.py
+++ b/ply/lex.py
@@ -1,7 +1,7 @@
# -----------------------------------------------------------------------------
# ply: lex.py
#
-# Copyright (C) 2001-2019
+# Copyright (C) 2001-2020
# David M. Beazley (Dabeaz LLC)
# All rights reserved.
#
@@ -33,7 +33,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
-__version__ = '3.11'
+__version__ = '4.0'
__tabversion__ = '3.10'
import re
@@ -43,13 +43,8 @@ import copy
import os
import inspect
-# This tuple contains known string types
-try:
- # Python 2.6
- StringTypes = (types.StringType, types.UnicodeType)
-except AttributeError:
- # Python 3.0
- StringTypes = (str, bytes)
+# This tuple contains acceptable string types
+StringTypes = (str, bytes)
# This regular expression is used to match valid token names
_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
diff --git a/ply/yacc.py b/ply/yacc.py
index 534bb3e..a5024eb 100644
--- a/ply/yacc.py
+++ b/ply/yacc.py
@@ -1,7 +1,7 @@
# -----------------------------------------------------------------------------
# ply: yacc.py
#
-# Copyright (C) 2001-2019
+# Copyright (C) 2001-2020
# David M. Beazley (Dabeaz LLC)
# All rights reserved.
#
@@ -68,7 +68,7 @@ import os.path
import inspect
import warnings
-__version__ = '3.11'
+__version__ = '4.0'
__tabversion__ = '3.10'
#-----------------------------------------------------------------------------
@@ -93,12 +93,6 @@ resultlimit = 40 # Size limit of results when running in debug mod
pickle_protocol = 0 # Protocol to use when writing pickle files
-# String type-checking compatibility
-if sys.version_info[0] < 3:
- string_types = basestring
-else:
- string_types = str
-
MAXINT = sys.maxsize
# This object is a stand-in for a logging object created by the
@@ -3029,7 +3023,7 @@ class ParserReflect(object):
# Validate the start symbol
def validate_start(self):
if self.start is not None:
- if not isinstance(self.start, string_types):
+ if not isinstance(self.start, str):
self.log.error("'start' must be a string")
# Look for error handler
@@ -3115,12 +3109,12 @@ class ParserReflect(object):
self.error = True
return
assoc = p[0]
- if not isinstance(assoc, string_types):
+ if not isinstance(assoc, str):
self.log.error('precedence associativity must be a string')
self.error = True
return
for term in p[1:]:
- if not isinstance(term, string_types):
+ if not isinstance(term, str):
self.log.error('precedence items must be strings')
self.error = True
return