summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Beazley <dave@dabeaz.com>2015-04-21 16:31:32 -0500
committerDavid Beazley <dave@dabeaz.com>2015-04-21 16:31:32 -0500
commitaf651673ba6117a0a5405055a92170fffd028106 (patch)
treeed62991309d8669dfea7c8c27ed7361f34a3adec
parentaa8f06c083194f64f2aaf68382675062adbdf196 (diff)
downloadply-af651673ba6117a0a5405055a92170fffd028106.tar.gz
Added optional support for defaulted states
-rw-r--r--CHANGES29
-rw-r--r--ply/lex.py2
-rw-r--r--ply/yacc.py157
3 files changed, 125 insertions, 63 deletions
diff --git a/CHANGES b/CHANGES
index ec6c71a..54ffe3b 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,34 @@
Version 3.5
---------------------
+04/21/15: beazley
+ Added optional support for defaulted_states in the parser.
+ A defaulted_state is a state where the only legal action is
+ a reduction of a single grammar rule across all valid input
+ tokens. For such states, the rule is reduced and the
+ reading of the next lookahead token is delayed until it is
+ actually needed at a later point in time.
+
+ This delay in consuming the next lookahead token is a potentially
+ important feature in advanced parsing applications that require tight
+ interaction between the lexer and the parser. For example, a grammar
+ rule change modify the lexer state upon reduction and have such changes
+ take effect before the next input token is read.
+
+ One potential danger of defaulted_states is that syntax errors might
+ be deferred to a a later point of processing than where they were detected
+ in the past. Thus, it's possible that your error handling could change
+ slightly over previous versions of PLY.
+
+ In order to enable defaulted_state support. You need to activate it
+ explicitly. For example:
+
+ parser = yacc.yacc()
+ parser.use_defaulted_states()
+
+04/21/15: beazley
+ Fixed debug logging in the parser. It wasn't properly reporting goto states
+ on grammar rule reductions.
+
04/20/15: beazley
Added actions to be defined to character literals (Issue #32). For example:
diff --git a/ply/lex.py b/ply/lex.py
index 7a7d18e..9bc9d09 100644
--- a/ply/lex.py
+++ b/ply/lex.py
@@ -115,7 +115,7 @@ class NullLogger(object):
class Lexer:
def __init__(self):
self.lexre = None # Master regular expression. This is a list of
- # tuples (re,findex) where re is a compiled
+ # tuples (re, findex) where re is a compiled
# regular expression and findex is a list
# mapping regex group numbers to rules
self.lexretext = None # Current regular expression strings
diff --git a/ply/yacc.py b/ply/yacc.py
index 22864aa..63d4877 100644
--- a/ply/yacc.py
+++ b/ply/yacc.py
@@ -288,6 +288,7 @@ class LRParser:
self.action = lrtab.lr_action
self.goto = lrtab.lr_goto
self.errorfunc = errorf
+ self.defaulted_states = { }
def errok(self):
self.errorok = True
@@ -300,6 +301,20 @@ class LRParser:
self.symstack.append(sym)
self.statestack.append(0)
+ # Defaulted state support (Experimental)
+ # This method identifies parser states where there is only one possible reduction action.
+ # For such states, the parser can make a choose to make a rule reduction without consuming
+ # the next look-ahead token. This delayed invocation of the tokenizer can be useful in
+ # certain kinds of advanced parsing situations where the lexer and parser interact with
+ # each other or change states (i.e., manipulation of scope, lexer states, etc.).
+ #
+ # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
+ def use_defaulted_states(self):
+ for state, actions in self.action.items():
+ rules = list(actions.values())
+ if rules and rules[0] < 0 and all(rules[0] == rule for rule in rules):
+ self.defaulted_states[state] = rules[0]
+
def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
if debug or yaccdevel:
if isinstance(debug, int):
@@ -327,13 +342,14 @@ class LRParser:
def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
#--! parsedebug-start
- lookahead = None # Current lookahead symbol
- lookaheadstack = [] # Stack of lookahead symbols
- actions = self.action # Local reference to action table (to avoid lookup on self.)
- goto = self.goto # Local reference to goto table (to avoid lookup on self.)
- prod = self.productions # Local reference to production list (to avoid lookup on self.)
- pslice = YaccProduction(None) # Production object passed to grammar rules
- errorcount = 0 # Used during error recovery
+ lookahead = None # Current lookahead symbol
+ lookaheadstack = [] # Stack of lookahead symbols
+ actions = self.action # Local reference to action table (to avoid lookup on self.)
+ goto = self.goto # Local reference to goto table (to avoid lookup on self.)
+ prod = self.productions # Local reference to production list (to avoid lookup on self.)
+ defaulted_states = self.defaulted_states # Local reference to defaulted states
+ pslice = YaccProduction(None) # Production object passed to grammar rules
+ errorcount = 0 # Used during error recovery
#--! DEBUG
debug.info('PLY: PARSE DEBUG START')
@@ -388,24 +404,30 @@ class LRParser:
debug.debug('State : %s', state)
#--! DEBUG
- if not lookahead:
- if not lookaheadstack:
- lookahead = get_token() # Get the next token
- else:
- lookahead = lookaheadstack.pop()
+ if state not in defaulted_states:
if not lookahead:
- lookahead = YaccSymbol()
- lookahead.type = '$end'
+ if not lookaheadstack:
+ lookahead = get_token() # Get the next token
+ else:
+ lookahead = lookaheadstack.pop()
+ if not lookahead:
+ lookahead = YaccSymbol()
+ lookahead.type = '$end'
+
+ # Check the action table
+ ltype = lookahead.type
+ t = actions[state].get(ltype)
+ else:
+ t = defaulted_states[state]
+ #--! DEBUG
+ debug.debug('Defaulted state %s: Reduce using %d', state, -t)
+ #--! DEBUG
#--! DEBUG
debug.debug('Stack : %s',
('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
#--! DEBUG
- # Check the action table
- ltype = lookahead.type
- t = actions[state].get(ltype)
-
if t is not None:
if t > 0:
# shift a symbol on the stack
@@ -437,10 +459,12 @@ class LRParser:
#--! DEBUG
if plen:
- debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
- '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', -t)
+ debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
+ '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
+ goto[statestack[-1-plen]][pname])
else:
- debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], -t)
+ debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
+ goto[statestack[-1]][pname])
#--! DEBUG
@@ -652,13 +676,14 @@ class LRParser:
def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
#--! parseopt-start
- lookahead = None # Current lookahead symbol
- lookaheadstack = [] # Stack of lookahead symbols
- actions = self.action # Local reference to action table (to avoid lookup on self.)
- goto = self.goto # Local reference to goto table (to avoid lookup on self.)
- prod = self.productions # Local reference to production list (to avoid lookup on self.)
- pslice = YaccProduction(None) # Production object passed to grammar rules
- errorcount = 0 # Used during error recovery
+ lookahead = None # Current lookahead symbol
+ lookaheadstack = [] # Stack of lookahead symbols
+ actions = self.action # Local reference to action table (to avoid lookup on self.)
+ goto = self.goto # Local reference to goto table (to avoid lookup on self.)
+ prod = self.productions # Local reference to production list (to avoid lookup on self.)
+ defaulted_states = self.defaulted_states # Local reference to defaulted states
+ pslice = YaccProduction(None) # Production object passed to grammar rules
+ errorcount = 0 # Used during error recovery
# If no lexer was given, we will try to use the lex module
@@ -706,19 +731,22 @@ class LRParser:
# the next token off of the lookaheadstack or from the lexer
- if not lookahead:
- if not lookaheadstack:
- lookahead = get_token() # Get the next token
- else:
- lookahead = lookaheadstack.pop()
+ if state not in defaulted_states:
if not lookahead:
- lookahead = YaccSymbol()
- lookahead.type = '$end'
-
+ if not lookaheadstack:
+ lookahead = get_token() # Get the next token
+ else:
+ lookahead = lookaheadstack.pop()
+ if not lookahead:
+ lookahead = YaccSymbol()
+ lookahead.type = '$end'
+
+ # Check the action table
+ ltype = lookahead.type
+ t = actions[state].get(ltype)
+ else:
+ t = defaulted_states[state]
- # Check the action table
- ltype = lookahead.type
- t = actions[state].get(ltype)
if t is not None:
if t > 0:
@@ -941,13 +969,14 @@ class LRParser:
def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
#--! parseopt-notrack-start
- lookahead = None # Current lookahead symbol
- lookaheadstack = [] # Stack of lookahead symbols
- actions = self.action # Local reference to action table (to avoid lookup on self.)
- goto = self.goto # Local reference to goto table (to avoid lookup on self.)
- prod = self.productions # Local reference to production list (to avoid lookup on self.)
- pslice = YaccProduction(None) # Production object passed to grammar rules
- errorcount = 0 # Used during error recovery
+ lookahead = None # Current lookahead symbol
+ lookaheadstack = [] # Stack of lookahead symbols
+ actions = self.action # Local reference to action table (to avoid lookup on self.)
+ goto = self.goto # Local reference to goto table (to avoid lookup on self.)
+ prod = self.productions # Local reference to production list (to avoid lookup on self.)
+ defaulted_states = self.defaulted_states # Local reference to defaulted states
+ pslice = YaccProduction(None) # Production object passed to grammar rules
+ errorcount = 0 # Used during error recovery
# If no lexer was given, we will try to use the lex module
@@ -995,19 +1024,22 @@ class LRParser:
# the next token off of the lookaheadstack or from the lexer
- if not lookahead:
- if not lookaheadstack:
- lookahead = get_token() # Get the next token
- else:
- lookahead = lookaheadstack.pop()
+ if state not in defaulted_states:
if not lookahead:
- lookahead = YaccSymbol()
- lookahead.type = '$end'
-
+ if not lookaheadstack:
+ lookahead = get_token() # Get the next token
+ else:
+ lookahead = lookaheadstack.pop()
+ if not lookahead:
+ lookahead = YaccSymbol()
+ lookahead.type = '$end'
+
+ # Check the action table
+ ltype = lookahead.type
+ t = actions[state].get(ltype)
+ else:
+ t = defaulted_states[state]
- # Check the action table
- ltype = lookahead.type
- t = actions[state].get(ltype)
if t is not None:
if t > 0:
@@ -1495,7 +1527,7 @@ class Grammar(object):
try:
c = eval(s)
if (len(c) > 1):
- raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
+ raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
(file, line, s, prodname))
if c not in self.Terminals:
self.Terminals[c] = []
@@ -1511,7 +1543,7 @@ class Grammar(object):
if syms[-1] == '%prec':
raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
if syms[-2] != '%prec':
- raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
+ raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
(file, line))
precname = syms[-1]
prodprec = self.Precedence.get(precname)
@@ -1962,6 +1994,7 @@ class LRTable(object):
for p in self.lr_productions:
p.bind(pdict)
+
# -----------------------------------------------------------------------------
# === LR Generator ===
#
@@ -2550,7 +2583,7 @@ class LRGeneratedTable(LRTable):
else:
chosenp, rejectp = oldp, pp
self.rr_conflicts.append((st, chosenp, rejectp))
- log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',
+ log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',
a, st_actionp[a].number, st_actionp[a])
else:
raise LALRError('Unknown conflict in state %d' % st)
@@ -2943,7 +2976,7 @@ class ParserReflect(object):
counthash[name] = linen
else:
filename = inspect.getsourcefile(module)
- self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
+ self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
filename, linen, name, prev)
# Get the start symbol
@@ -3089,7 +3122,7 @@ class ParserReflect(object):
self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
self.error = True
elif not func.__doc__:
- self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
+ self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
file, line, func.__name__)
else:
try: