summaryrefslogtreecommitdiff
path: root/ply/lex.py
diff options
context:
space:
mode:
authorGabriel Araujo <gabrielaraujof@outlook.com.br>2015-01-22 12:54:39 -0300
committerGabriel Araujo <gabrielaraujof@outlook.com.br>2015-01-22 12:54:39 -0300
commitbdd5da3e7d19e5bf1928dfebfd372b958b207ee9 (patch)
tree5b828e09f29cc68a3c574a5cfd581b943acba154 /ply/lex.py
parentc423682c8a45a631035a2a8e46c976768a0cecb0 (diff)
downloadply-bdd5da3e7d19e5bf1928dfebfd372b958b207ee9.tar.gz
Added the option of defining EOF rules
One can define rules which will be triggered when the end of file (or input stream) is reached.
Diffstat (limited to 'ply/lex.py')
-rw-r--r--ply/lex.py24
1 files changed, 24 insertions, 0 deletions
diff --git a/ply/lex.py b/ply/lex.py
index 8f05537..b384b7f 100644
--- a/ply/lex.py
+++ b/ply/lex.py
@@ -126,11 +126,13 @@ class Lexer:
self.lexstateinfo = None # State information
self.lexstateignore = {} # Dictionary of ignored characters for each state
self.lexstateerrorf = {} # Dictionary of error functions for each state
+ self.lexstateeoff = {} # Dictionary of eof functions for each state
self.lexreflags = 0 # Optional re compile flags
self.lexdata = None # Actual input data (as a string)
self.lexpos = 0 # Current position in input text
self.lexlen = 0 # Length of the input text
self.lexerrorf = None # Error rule (if any)
+ self.lexeoff = None # EOF rule (if any)
self.lextokens = None # List of valid tokens
self.lexignore = "" # Ignored characters
self.lexliterals = "" # Literal characters that can be passed through
@@ -267,6 +269,7 @@ class Lexer:
self.lexretext = self.lexstateretext[state]
self.lexignore = self.lexstateignore.get(state,"")
self.lexerrorf = self.lexstateerrorf.get(state,None)
+ self.lexeoff = self.lexstateeoff.get(state, None)
self.lexstate = state
# ------------------------------------------------------------
@@ -392,6 +395,17 @@ class Lexer:
self.lexpos = lexpos
raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:])
+ if self.lexeoff:
+ tok = LexToken()
+ tok.type = "eof"
+ tok.value = ''
+ tok.lineno = self.lineno
+ tok.lexpos = lexpos
+ tok.lexer = self
+ self.lexpos = lexpos
+ newtok = self.lexeoff(tok)
+ return newtok
+
self.lexpos = lexpos + 1
if self.lexdata is None:
raise RuntimeError("No input string given with input()")
@@ -670,6 +684,7 @@ class LexerReflect(object):
self.strsym = { } # Symbols defined as strings
self.ignore = { } # Ignore strings by state
self.errorf = { } # Error functions by state
+ self.eoff = { } # EOF functions by state
for s in self.stateinfo:
self.funcsym[s] = []
@@ -689,6 +704,9 @@ class LexerReflect(object):
if tokname == 'error':
for s in states:
self.errorf[s] = t
+ elif tokname == 'eof':
+ for s in states:
+ self.eoff[s] = t
elif tokname == 'ignore':
line = func_code(t).co_firstlineno
file = func_code(t).co_filename
@@ -986,6 +1004,12 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now
if not lexobj.lexerrorf:
errorlog.warning("No t_error rule is defined")
+ # Set up eof functions
+ lexobj.lexstateeoff = linfo.eoff
+ lexobj.lexeoff = linfo.eoff.get("INITIAL", None)
+ if not lexobj.lexeoff:
+ errorlog.warning("No t_eof rule is defined")
+
# Check state information for ignore and error rules
for s,stype in stateinfo.items():
if stype == 'exclusive':