diff options
Diffstat (limited to 'ply')
-rw-r--r-- | ply/__init__.py | 2 | ||||
-rw-r--r-- | ply/cpp.py | 974 | ||||
-rw-r--r-- | ply/ctokens.py | 127 | ||||
-rw-r--r-- | ply/lex.py | 13 | ||||
-rw-r--r-- | ply/yacc.py | 16 |
5 files changed, 10 insertions, 1122 deletions
diff --git a/ply/__init__.py b/ply/__init__.py index 23707c6..6f768b7 100644 --- a/ply/__init__.py +++ b/ply/__init__.py @@ -1,5 +1,5 @@ # PLY package # Author: David Beazley (dave@dabeaz.com) -__version__ = '3.11' +__version__ = '4.0' __all__ = ['lex','yacc'] diff --git a/ply/cpp.py b/ply/cpp.py deleted file mode 100644 index 50a44a1..0000000 --- a/ply/cpp.py +++ /dev/null @@ -1,974 +0,0 @@ -# ----------------------------------------------------------------------------- -# ply: cpp.py -# -# Copyright (C) 2001-2019 -# David M. Beazley (Dabeaz LLC) -# All rights reserved. -# -# Latest version: https://github.com/dabeaz/ply -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of David Beazley or Dabeaz LLC may be used to -# endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ----------------------------------------------------------------------------- - -# This module implements an ANSI-C style lexical preprocessor for PLY. -# ----------------------------------------------------------------------------- -from __future__ import generators - -import sys - -# Some Python 3 compatibility shims -if sys.version_info.major < 3: - STRING_TYPES = (str, unicode) -else: - STRING_TYPES = str - xrange = range - -# ----------------------------------------------------------------------------- -# Default preprocessor lexer definitions. These tokens are enough to get -# a basic preprocessor working. Other modules may import these if they want -# ----------------------------------------------------------------------------- - -tokens = ( - 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND' -) - -literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\"" - -# Whitespace -def t_CPP_WS(t): - r'\s+' - t.lexer.lineno += t.value.count("\n") - return t - -t_CPP_POUND = r'\#' -t_CPP_DPOUND = r'\#\#' - -# Identifier -t_CPP_ID = r'[A-Za-z_][\w_]*' - -# Integer literal -def CPP_INTEGER(t): - r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)' - return t - -t_CPP_INTEGER = CPP_INTEGER - -# Floating literal -t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' - -# String literal -def t_CPP_STRING(t): - r'\"([^\\\n]|(\\(.|\n)))*?\"' - t.lexer.lineno += t.value.count("\n") - return t - -# Character constant 'c' or L'c' -def t_CPP_CHAR(t): - r'(L)?\'([^\\\n]|(\\(.|\n)))*?\'' - t.lexer.lineno += t.value.count("\n") - return t - -# Comment -def t_CPP_COMMENT1(t): - r'(/\*(.|\n)*?\*/)' - ncr = t.value.count("\n") - t.lexer.lineno += ncr - # replace with one space or a number of '\n' - t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' ' - return t - -# Line comment -def t_CPP_COMMENT2(t): - r'(//.*?(\n|$))' - # replace with '/n' - t.type = 'CPP_WS'; t.value = '\n' - return t - -def t_error(t): - t.type = t.value[0] - t.value = t.value[0] - t.lexer.skip(1) - return t - -import re -import copy -import time -import os.path - -# ----------------------------------------------------------------------------- -# trigraph() -# -# Given an input string, this function replaces all trigraph sequences. -# The following mapping is used: -# -# ??= # -# ??/ \ -# ??' ^ -# ??( [ -# ??) ] -# ??! | -# ??< { -# ??> } -# ??- ~ -# ----------------------------------------------------------------------------- - -_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''') -_trigraph_rep = { - '=':'#', - '/':'\\', - "'":'^', - '(':'[', - ')':']', - '!':'|', - '<':'{', - '>':'}', - '-':'~' -} - -def trigraph(input): - return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input) - -# ------------------------------------------------------------------ -# Macro object -# -# This object holds information about preprocessor macros -# -# .name - Macro name (string) -# .value - Macro value (a list of tokens) -# .arglist - List of argument names -# .variadic - Boolean indicating whether or not variadic macro -# .vararg - Name of the variadic parameter -# -# When a macro is created, the macro replacement token sequence is -# pre-scanned and used to create patch lists that are later used -# during macro expansion -# ------------------------------------------------------------------ - -class Macro(object): - def __init__(self,name,value,arglist=None,variadic=False): - self.name = name - self.value = value - self.arglist = arglist - self.variadic = variadic - if variadic: - self.vararg = arglist[-1] - self.source = None - -# ------------------------------------------------------------------ -# Preprocessor object -# -# Object representing a preprocessor. Contains macro definitions, -# include directories, and other information -# ------------------------------------------------------------------ - -class Preprocessor(object): - def __init__(self,lexer=None): - if lexer is None: - lexer = lex.lexer - self.lexer = lexer - self.macros = { } - self.path = [] - self.temp_path = [] - - # Probe the lexer for selected tokens - self.lexprobe() - - tm = time.localtime() - self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm)) - self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm)) - self.parser = None - - # ----------------------------------------------------------------------------- - # tokenize() - # - # Utility function. Given a string of text, tokenize into a list of tokens - # ----------------------------------------------------------------------------- - - def tokenize(self,text): - tokens = [] - self.lexer.input(text) - while True: - tok = self.lexer.token() - if not tok: break - tokens.append(tok) - return tokens - - # --------------------------------------------------------------------- - # error() - # - # Report a preprocessor error/warning of some kind - # ---------------------------------------------------------------------- - - def error(self,file,line,msg): - print("%s:%d %s" % (file,line,msg)) - - # ---------------------------------------------------------------------- - # lexprobe() - # - # This method probes the preprocessor lexer object to discover - # the token types of symbols that are important to the preprocessor. - # If this works right, the preprocessor will simply "work" - # with any suitable lexer regardless of how tokens have been named. - # ---------------------------------------------------------------------- - - def lexprobe(self): - - # Determine the token type for identifiers - self.lexer.input("identifier") - tok = self.lexer.token() - if not tok or tok.value != "identifier": - print("Couldn't determine identifier type") - else: - self.t_ID = tok.type - - # Determine the token type for integers - self.lexer.input("12345") - tok = self.lexer.token() - if not tok or int(tok.value) != 12345: - print("Couldn't determine integer type") - else: - self.t_INTEGER = tok.type - self.t_INTEGER_TYPE = type(tok.value) - - # Determine the token type for strings enclosed in double quotes - self.lexer.input("\"filename\"") - tok = self.lexer.token() - if not tok or tok.value != "\"filename\"": - print("Couldn't determine string type") - else: - self.t_STRING = tok.type - - # Determine the token type for whitespace--if any - self.lexer.input(" ") - tok = self.lexer.token() - if not tok or tok.value != " ": - self.t_SPACE = None - else: - self.t_SPACE = tok.type - - # Determine the token type for newlines - self.lexer.input("\n") - tok = self.lexer.token() - if not tok or tok.value != "\n": - self.t_NEWLINE = None - print("Couldn't determine token for newlines") - else: - self.t_NEWLINE = tok.type - - self.t_WS = (self.t_SPACE, self.t_NEWLINE) - - # Check for other characters used by the preprocessor - chars = [ '<','>','#','##','\\','(',')',',','.'] - for c in chars: - self.lexer.input(c) - tok = self.lexer.token() - if not tok or tok.value != c: - print("Unable to lex '%s' required for preprocessor" % c) - - # ---------------------------------------------------------------------- - # add_path() - # - # Adds a search path to the preprocessor. - # ---------------------------------------------------------------------- - - def add_path(self,path): - self.path.append(path) - - # ---------------------------------------------------------------------- - # group_lines() - # - # Given an input string, this function splits it into lines. Trailing whitespace - # is removed. Any line ending with \ is grouped with the next line. This - # function forms the lowest level of the preprocessor---grouping into text into - # a line-by-line format. - # ---------------------------------------------------------------------- - - def group_lines(self,input): - lex = self.lexer.clone() - lines = [x.rstrip() for x in input.splitlines()] - for i in xrange(len(lines)): - j = i+1 - while lines[i].endswith('\\') and (j < len(lines)): - lines[i] = lines[i][:-1]+lines[j] - lines[j] = "" - j += 1 - - input = "\n".join(lines) - lex.input(input) - lex.lineno = 1 - - current_line = [] - while True: - tok = lex.token() - if not tok: - break - current_line.append(tok) - if tok.type in self.t_WS and '\n' in tok.value: - yield current_line - current_line = [] - - if current_line: - yield current_line - - # ---------------------------------------------------------------------- - # tokenstrip() - # - # Remove leading/trailing whitespace tokens from a token list - # ---------------------------------------------------------------------- - - def tokenstrip(self,tokens): - i = 0 - while i < len(tokens) and tokens[i].type in self.t_WS: - i += 1 - del tokens[:i] - i = len(tokens)-1 - while i >= 0 and tokens[i].type in self.t_WS: - i -= 1 - del tokens[i+1:] - return tokens - - - # ---------------------------------------------------------------------- - # collect_args() - # - # Collects comma separated arguments from a list of tokens. The arguments - # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions) - # where tokencount is the number of tokens consumed, args is a list of arguments, - # and positions is a list of integers containing the starting index of each - # argument. Each argument is represented by a list of tokens. - # - # When collecting arguments, leading and trailing whitespace is removed - # from each argument. - # - # This function properly handles nested parenthesis and commas---these do not - # define new arguments. - # ---------------------------------------------------------------------- - - def collect_args(self,tokenlist): - args = [] - positions = [] - current_arg = [] - nesting = 1 - tokenlen = len(tokenlist) - - # Search for the opening '('. - i = 0 - while (i < tokenlen) and (tokenlist[i].type in self.t_WS): - i += 1 - - if (i < tokenlen) and (tokenlist[i].value == '('): - positions.append(i+1) - else: - self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments") - return 0, [], [] - - i += 1 - - while i < tokenlen: - t = tokenlist[i] - if t.value == '(': - current_arg.append(t) - nesting += 1 - elif t.value == ')': - nesting -= 1 - if nesting == 0: - if current_arg: - args.append(self.tokenstrip(current_arg)) - positions.append(i) - return i+1,args,positions - current_arg.append(t) - elif t.value == ',' and nesting == 1: - args.append(self.tokenstrip(current_arg)) - positions.append(i+1) - current_arg = [] - else: - current_arg.append(t) - i += 1 - - # Missing end argument - self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments") - return 0, [],[] - - # ---------------------------------------------------------------------- - # macro_prescan() - # - # Examine the macro value (token sequence) and identify patch points - # This is used to speed up macro expansion later on---we'll know - # right away where to apply patches to the value to form the expansion - # ---------------------------------------------------------------------- - - def macro_prescan(self,macro): - macro.patch = [] # Standard macro arguments - macro.str_patch = [] # String conversion expansion - macro.var_comma_patch = [] # Variadic macro comma patch - i = 0 - while i < len(macro.value): - if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist: - argnum = macro.arglist.index(macro.value[i].value) - # Conversion of argument to a string - if i > 0 and macro.value[i-1].value == '#': - macro.value[i] = copy.copy(macro.value[i]) - macro.value[i].type = self.t_STRING - del macro.value[i-1] - macro.str_patch.append((argnum,i-1)) - continue - # Concatenation - elif (i > 0 and macro.value[i-1].value == '##'): - macro.patch.append(('c',argnum,i-1)) - del macro.value[i-1] - i -= 1 - continue - elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'): - macro.patch.append(('c',argnum,i)) - del macro.value[i + 1] - continue - # Standard expansion - else: - macro.patch.append(('e',argnum,i)) - elif macro.value[i].value == '##': - if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \ - ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \ - (macro.value[i+1].value == macro.vararg): - macro.var_comma_patch.append(i-1) - i += 1 - macro.patch.sort(key=lambda x: x[2],reverse=True) - - # ---------------------------------------------------------------------- - # macro_expand_args() - # - # Given a Macro and list of arguments (each a token list), this method - # returns an expanded version of a macro. The return value is a token sequence - # representing the replacement macro tokens - # ---------------------------------------------------------------------- - - def macro_expand_args(self,macro,args,expanded): - # Make a copy of the macro token sequence - rep = [copy.copy(_x) for _x in macro.value] - - # Make string expansion patches. These do not alter the length of the replacement sequence - - str_expansion = {} - for argnum, i in macro.str_patch: - if argnum not in str_expansion: - str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\") - rep[i] = copy.copy(rep[i]) - rep[i].value = str_expansion[argnum] - - # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid - comma_patch = False - if macro.variadic and not args[-1]: - for i in macro.var_comma_patch: - rep[i] = None - comma_patch = True - - # Make all other patches. The order of these matters. It is assumed that the patch list - # has been sorted in reverse order of patch location since replacements will cause the - # size of the replacement sequence to expand from the patch point. - - expanded_args = { } - for ptype, argnum, i in macro.patch: - # Concatenation. Argument is left unexpanded - if ptype == 'c': - rep[i:i+1] = args[argnum] - # Normal expansion. Argument is macro expanded first - elif ptype == 'e': - if argnum not in expanded_args: - expanded_args[argnum] = self.expand_macros(args[argnum],expanded) - rep[i:i+1] = expanded_args[argnum] - - # Get rid of removed comma if necessary - if comma_patch: - rep = [_i for _i in rep if _i] - - return rep - - - # ---------------------------------------------------------------------- - # expand_macros() - # - # Given a list of tokens, this function performs macro expansion. - # The expanded argument is a dictionary that contains macros already - # expanded. This is used to prevent infinite recursion. - # ---------------------------------------------------------------------- - - def expand_macros(self,tokens,expanded=None): - if expanded is None: - expanded = {} - i = 0 - while i < len(tokens): - t = tokens[i] - if t.type == self.t_ID: - if t.value in self.macros and t.value not in expanded: - # Yes, we found a macro match - expanded[t.value] = True - - m = self.macros[t.value] - if not m.arglist: - # A simple macro - ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded) - for e in ex: - e.lineno = t.lineno - tokens[i:i+1] = ex - i += len(ex) - else: - # A macro with arguments - j = i + 1 - while j < len(tokens) and tokens[j].type in self.t_WS: - j += 1 - if j < len(tokens) and tokens[j].value == '(': - tokcount,args,positions = self.collect_args(tokens[j:]) - if not m.variadic and len(args) != len(m.arglist): - self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist))) - i = j + tokcount - elif m.variadic and len(args) < len(m.arglist)-1: - if len(m.arglist) > 2: - self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1)) - else: - self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1)) - i = j + tokcount - else: - if m.variadic: - if len(args) == len(m.arglist)-1: - args.append([]) - else: - args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1] - del args[len(m.arglist):] - - # Get macro replacement text - rep = self.macro_expand_args(m,args,expanded) - rep = self.expand_macros(rep,expanded) - for r in rep: - r.lineno = t.lineno - tokens[i:j+tokcount] = rep - i += len(rep) - else: - # This is not a macro. It is just a word which - # equals to name of the macro. Hence, go to the - # next token. - i += 1 - - del expanded[t.value] - continue - elif t.value == '__LINE__': - t.type = self.t_INTEGER - t.value = self.t_INTEGER_TYPE(t.lineno) - - i += 1 - return tokens - - # ---------------------------------------------------------------------- - # evalexpr() - # - # Evaluate an expression token sequence for the purposes of evaluating - # integral expressions. - # ---------------------------------------------------------------------- - - def evalexpr(self,tokens): - # tokens = tokenize(line) - # Search for defined macros - i = 0 - while i < len(tokens): - if tokens[i].type == self.t_ID and tokens[i].value == 'defined': - j = i + 1 - needparen = False - result = "0L" - while j < len(tokens): - if tokens[j].type in self.t_WS: - j += 1 - continue - elif tokens[j].type == self.t_ID: - if tokens[j].value in self.macros: - result = "1L" - else: - result = "0L" - if not needparen: break - elif tokens[j].value == '(': - needparen = True - elif tokens[j].value == ')': - break - else: - self.error(self.source,tokens[i].lineno,"Malformed defined()") - j += 1 - tokens[i].type = self.t_INTEGER - tokens[i].value = self.t_INTEGER_TYPE(result) - del tokens[i+1:j+1] - i += 1 - tokens = self.expand_macros(tokens) - return self.evalexpr_expanded(tokens) - - # ---------------------------------------------------------------------- - # evalexpr_expanded() - # - # Helper for evalexpr that evaluates the expression that had its macros - # and defined(...) expressions expanded by evalexpr - # ---------------------------------------------------------------------- - - def evalexpr_expanded(self, tokens): - for i,t in enumerate(tokens): - if t.type == self.t_ID: - tokens[i] = copy.copy(t) - tokens[i].type = self.t_INTEGER - tokens[i].value = self.t_INTEGER_TYPE("0") - elif t.type == self.t_INTEGER: - tokens[i] = copy.copy(t) - # Strip off any trailing suffixes - tokens[i].value = str(tokens[i].value) - while tokens[i].value[-1] not in "0123456789abcdefABCDEF": - tokens[i].value = tokens[i].value[:-1] - - return self.evalexpr_string("".join([str(x.value) for x in tokens])) - - # ---------------------------------------------------------------------- - # evalexpr_string() - # - # Helper for evalexpr that evaluates a string expression - # This implementation does basic C->python conversion and then uses eval() - # ---------------------------------------------------------------------- - def evalexpr_string(self, expr): - expr = expr.replace("&&"," and ") - expr = expr.replace("||"," or ") - expr = expr.replace("!"," not ") - expr = expr.replace(" not ="," !=") - try: - result = eval(expr) - except Exception: - self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression") - result = 0 - return result - - # ---------------------------------------------------------------------- - # parsegen() - # - # Parse an input string/ - # ---------------------------------------------------------------------- - def parsegen(self,input,source=None): - - # Replace trigraph sequences - t = trigraph(input) - lines = self.group_lines(t) - - if not source: - source = "" - - self.define("__FILE__ \"%s\"" % source) - - self.source = source - chunk = [] - enable = True - iftrigger = False - ifstack = [] - - for x in lines: - for i,tok in enumerate(x): - if tok.type not in self.t_WS: break - if tok.value == '#': - # Preprocessor directive - - # insert necessary whitespace instead of eaten tokens - for tok in x: - if tok.type in self.t_WS and '\n' in tok.value: - chunk.append(tok) - - dirtokens = self.tokenstrip(x[i+1:]) - if dirtokens: - name = dirtokens[0].value - args = self.tokenstrip(dirtokens[1:]) - else: - name = "" - args = [] - - if name == 'define': - if enable: - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - self.define(args) - elif name == 'include': - if enable: - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - oldfile = self.macros['__FILE__'] - for tok in self.include(args): - yield tok - self.macros['__FILE__'] = oldfile - self.source = source - elif name == 'undef': - if enable: - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - self.undef(args) - elif name == 'ifdef': - ifstack.append((enable,iftrigger)) - if enable: - if not args[0].value in self.macros: - enable = False - iftrigger = False - else: - iftrigger = True - elif name == 'ifndef': - ifstack.append((enable,iftrigger)) - if enable: - if args[0].value in self.macros: - enable = False - iftrigger = False - else: - iftrigger = True - elif name == 'if': - ifstack.append((enable,iftrigger)) - if enable: - result = self.evalexpr(args) - if not result: - enable = False - iftrigger = False - else: - iftrigger = True - elif name == 'elif': - if ifstack: - if ifstack[-1][0]: # We only pay attention if outer "if" allows this - if enable: # If already true, we flip enable False - enable = False - elif not iftrigger: # If False, but not triggered yet, we'll check expression - result = self.evalexpr(args) - if result: - enable = True - iftrigger = True - else: - self.error(self.source,dirtokens[0].lineno,"Misplaced #elif") - - elif name == 'else': - if ifstack: - if ifstack[-1][0]: - if enable: - enable = False - elif not iftrigger: - enable = True - iftrigger = True - else: - self.error(self.source,dirtokens[0].lineno,"Misplaced #else") - - elif name == 'endif': - if ifstack: - enable,iftrigger = ifstack.pop() - else: - self.error(self.source,dirtokens[0].lineno,"Misplaced #endif") - else: - # Unknown preprocessor directive - pass - - else: - # Normal text - if enable: - chunk.extend(x) - - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - - # ---------------------------------------------------------------------- - # include() - # - # Implementation of file-inclusion - # ---------------------------------------------------------------------- - - def include(self,tokens): - # Try to extract the filename and then process an include file - if not tokens: - return - if tokens: - if tokens[0].value != '<' and tokens[0].type != self.t_STRING: - tokens = self.expand_macros(tokens) - - if tokens[0].value == '<': - # Include <...> - i = 1 - while i < len(tokens): - if tokens[i].value == '>': - break - i += 1 - else: - print("Malformed #include <...>") - return - filename = "".join([x.value for x in tokens[1:i]]) - path = self.path + [""] + self.temp_path - elif tokens[0].type == self.t_STRING: - filename = tokens[0].value[1:-1] - path = self.temp_path + [""] + self.path - else: - print("Malformed #include statement") - return - for p in path: - iname = os.path.join(p,filename) - try: - data = self.read_include_file(iname) - dname = os.path.dirname(iname) - if dname: - self.temp_path.insert(0,dname) - for tok in self.parsegen(data,filename): - yield tok - if dname: - del self.temp_path[0] - break - except IOError: - pass - else: - print("Couldn't find '%s'" % filename) - - # ---------------------------------------------------------------------- - # read_include_file() - # - # Reads a source file for inclusion using #include - # Could be overridden to e.g. customize encoding, limit access to - # certain paths on the filesystem, or provide the contents of system - # include files - # ---------------------------------------------------------------------- - - def read_include_file(self, filepath): - with open(filepath, 'r', encoding='utf-8', errors='surrogateescape') as file: - return file.read() - - # ---------------------------------------------------------------------- - # define() - # - # Define a new macro - # ---------------------------------------------------------------------- - - def define(self,tokens): - if isinstance(tokens,STRING_TYPES): - tokens = self.tokenize(tokens) - - linetok = tokens - try: - name = linetok[0] - if len(linetok) > 1: - mtype = linetok[1] - else: - mtype = None - if not mtype: - m = Macro(name.value,[]) - self.macros[name.value] = m - elif mtype.type in self.t_WS: - # A normal macro - m = Macro(name.value,self.tokenstrip(linetok[2:])) - self.macros[name.value] = m - elif mtype.value == '(': - # A macro with arguments - tokcount, args, positions = self.collect_args(linetok[1:]) - variadic = False - for a in args: - if variadic: - print("No more arguments may follow a variadic argument") - break - astr = "".join([str(_i.value) for _i in a]) - if astr == "...": - variadic = True - a[0].type = self.t_ID - a[0].value = '__VA_ARGS__' - variadic = True - del a[1:] - continue - elif astr[-3:] == "..." and a[0].type == self.t_ID: - variadic = True - del a[1:] - # If, for some reason, "." is part of the identifier, strip off the name for the purposes - # of macro expansion - if a[0].value[-3:] == '...': - a[0].value = a[0].value[:-3] - continue - if len(a) > 1 or a[0].type != self.t_ID: - print("Invalid macro argument") - break - else: - mvalue = self.tokenstrip(linetok[1+tokcount:]) - i = 0 - while i < len(mvalue): - if i+1 < len(mvalue): - if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##': - del mvalue[i] - continue - elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS: - del mvalue[i+1] - i += 1 - m = Macro(name.value,mvalue,[x[0].value for x in args],variadic) - self.macro_prescan(m) - self.macros[name.value] = m - else: - print("Bad macro definition") - except LookupError: - print("Bad macro definition") - - # ---------------------------------------------------------------------- - # undef() - # - # Undefine a macro - # ---------------------------------------------------------------------- - - def undef(self,tokens): - id = tokens[0].value - try: - del self.macros[id] - except LookupError: - pass - - # ---------------------------------------------------------------------- - # parse() - # - # Parse input text. - # ---------------------------------------------------------------------- - def parse(self,input,source=None,ignore={}): - self.ignore = ignore - self.parser = self.parsegen(input,source) - - # ---------------------------------------------------------------------- - # token() - # - # Method to return individual tokens - # ---------------------------------------------------------------------- - def token(self): - try: - while True: - tok = next(self.parser) - if tok.type not in self.ignore: return tok - except StopIteration: - self.parser = None - return None - -if __name__ == '__main__': - import ply.lex as lex - lexer = lex.lex() - - # Run a preprocessor - import sys - with open(sys.argv[1]) as f: - input = f.read() - - p = Preprocessor(lexer) - p.parse(input,sys.argv[1]) - while True: - tok = p.token() - if not tok: break - print(p.source, tok) diff --git a/ply/ctokens.py b/ply/ctokens.py deleted file mode 100644 index b265e59..0000000 --- a/ply/ctokens.py +++ /dev/null @@ -1,127 +0,0 @@ -# ---------------------------------------------------------------------- -# ctokens.py -# -# Token specifications for symbols in ANSI C and C++. This file is -# meant to be used as a library in other tokenizers. -# ---------------------------------------------------------------------- - -# Reserved words - -tokens = [ - # Literals (identifier, integer constant, float constant, string constant, char const) - 'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER', - - # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) - 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO', - 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', - 'LOR', 'LAND', 'LNOT', - 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', - - # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) - 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', - 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', - - # Increment/decrement (++,--) - 'INCREMENT', 'DECREMENT', - - # Structure dereference (->) - 'ARROW', - - # Ternary operator (?) - 'TERNARY', - - # Delimeters ( ) [ ] { } , . ; : - 'LPAREN', 'RPAREN', - 'LBRACKET', 'RBRACKET', - 'LBRACE', 'RBRACE', - 'COMMA', 'PERIOD', 'SEMI', 'COLON', - - # Ellipsis (...) - 'ELLIPSIS', -] - -# Operators -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_MODULO = r'%' -t_OR = r'\|' -t_AND = r'&' -t_NOT = r'~' -t_XOR = r'\^' -t_LSHIFT = r'<<' -t_RSHIFT = r'>>' -t_LOR = r'\|\|' -t_LAND = r'&&' -t_LNOT = r'!' -t_LT = r'<' -t_GT = r'>' -t_LE = r'<=' -t_GE = r'>=' -t_EQ = r'==' -t_NE = r'!=' - -# Assignment operators - -t_EQUALS = r'=' -t_TIMESEQUAL = r'\*=' -t_DIVEQUAL = r'/=' -t_MODEQUAL = r'%=' -t_PLUSEQUAL = r'\+=' -t_MINUSEQUAL = r'-=' -t_LSHIFTEQUAL = r'<<=' -t_RSHIFTEQUAL = r'>>=' -t_ANDEQUAL = r'&=' -t_OREQUAL = r'\|=' -t_XOREQUAL = r'\^=' - -# Increment/decrement -t_INCREMENT = r'\+\+' -t_DECREMENT = r'--' - -# -> -t_ARROW = r'->' - -# ? -t_TERNARY = r'\?' - -# Delimeters -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_LBRACKET = r'\[' -t_RBRACKET = r'\]' -t_LBRACE = r'\{' -t_RBRACE = r'\}' -t_COMMA = r',' -t_PERIOD = r'\.' -t_SEMI = r';' -t_COLON = r':' -t_ELLIPSIS = r'\.\.\.' - -# Identifiers -t_ID = r'[A-Za-z_][A-Za-z0-9_]*' - -# Integer literal -t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' - -# Floating literal -t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' - -# String literal -t_STRING = r'\"([^\\\n]|(\\.))*?\"' - -# Character constant 'c' or L'c' -t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\'' - -# Comment (C-Style) -def t_COMMENT(t): - r'/\*(.|\n)*?\*/' - t.lexer.lineno += t.value.count('\n') - return t - -# Comment (C++-Style) -def t_CPPCOMMENT(t): - r'//.*\n' - t.lexer.lineno += 1 - return t @@ -1,7 +1,7 @@ # ----------------------------------------------------------------------------- # ply: lex.py # -# Copyright (C) 2001-2019 +# Copyright (C) 2001-2020 # David M. Beazley (Dabeaz LLC) # All rights reserved. # @@ -33,7 +33,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- -__version__ = '3.11' +__version__ = '4.0' __tabversion__ = '3.10' import re @@ -43,13 +43,8 @@ import copy import os import inspect -# This tuple contains known string types -try: - # Python 2.6 - StringTypes = (types.StringType, types.UnicodeType) -except AttributeError: - # Python 3.0 - StringTypes = (str, bytes) +# This tuple contains acceptable string types +StringTypes = (str, bytes) # This regular expression is used to match valid token names _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') diff --git a/ply/yacc.py b/ply/yacc.py index 534bb3e..a5024eb 100644 --- a/ply/yacc.py +++ b/ply/yacc.py @@ -1,7 +1,7 @@ # ----------------------------------------------------------------------------- # ply: yacc.py # -# Copyright (C) 2001-2019 +# Copyright (C) 2001-2020 # David M. Beazley (Dabeaz LLC) # All rights reserved. # @@ -68,7 +68,7 @@ import os.path import inspect import warnings -__version__ = '3.11' +__version__ = '4.0' __tabversion__ = '3.10' #----------------------------------------------------------------------------- @@ -93,12 +93,6 @@ resultlimit = 40 # Size limit of results when running in debug mod pickle_protocol = 0 # Protocol to use when writing pickle files -# String type-checking compatibility -if sys.version_info[0] < 3: - string_types = basestring -else: - string_types = str - MAXINT = sys.maxsize # This object is a stand-in for a logging object created by the @@ -3029,7 +3023,7 @@ class ParserReflect(object): # Validate the start symbol def validate_start(self): if self.start is not None: - if not isinstance(self.start, string_types): + if not isinstance(self.start, str): self.log.error("'start' must be a string") # Look for error handler @@ -3115,12 +3109,12 @@ class ParserReflect(object): self.error = True return assoc = p[0] - if not isinstance(assoc, string_types): + if not isinstance(assoc, str): self.log.error('precedence associativity must be a string') self.error = True return for term in p[1:]: - if not isinstance(term, string_types): + if not isinstance(term, str): self.log.error('precedence items must be strings') self.error = True return |