diff options
author | Gabriel Araujo <gabrielaraujof@outlook.com.br> | 2015-01-22 12:54:39 -0300 |
---|---|---|
committer | Gabriel Araujo <gabrielaraujof@outlook.com.br> | 2015-01-22 12:54:39 -0300 |
commit | bdd5da3e7d19e5bf1928dfebfd372b958b207ee9 (patch) | |
tree | 5b828e09f29cc68a3c574a5cfd581b943acba154 /ply/lex.py | |
parent | c423682c8a45a631035a2a8e46c976768a0cecb0 (diff) | |
download | ply-bdd5da3e7d19e5bf1928dfebfd372b958b207ee9.tar.gz |
Added the option of defining EOF rules
One can define rules which will be triggered when the end of file (or
input stream) is reached.
Diffstat (limited to 'ply/lex.py')
-rw-r--r-- | ply/lex.py | 24 |
1 files changed, 24 insertions, 0 deletions
@@ -126,11 +126,13 @@ class Lexer: self.lexstateinfo = None # State information self.lexstateignore = {} # Dictionary of ignored characters for each state self.lexstateerrorf = {} # Dictionary of error functions for each state + self.lexstateeoff = {} # Dictionary of eof functions for each state self.lexreflags = 0 # Optional re compile flags self.lexdata = None # Actual input data (as a string) self.lexpos = 0 # Current position in input text self.lexlen = 0 # Length of the input text self.lexerrorf = None # Error rule (if any) + self.lexeoff = None # EOF rule (if any) self.lextokens = None # List of valid tokens self.lexignore = "" # Ignored characters self.lexliterals = "" # Literal characters that can be passed through @@ -267,6 +269,7 @@ class Lexer: self.lexretext = self.lexstateretext[state] self.lexignore = self.lexstateignore.get(state,"") self.lexerrorf = self.lexstateerrorf.get(state,None) + self.lexeoff = self.lexstateeoff.get(state, None) self.lexstate = state # ------------------------------------------------------------ @@ -392,6 +395,17 @@ class Lexer: self.lexpos = lexpos raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) + if self.lexeoff: + tok = LexToken() + tok.type = "eof" + tok.value = '' + tok.lineno = self.lineno + tok.lexpos = lexpos + tok.lexer = self + self.lexpos = lexpos + newtok = self.lexeoff(tok) + return newtok + self.lexpos = lexpos + 1 if self.lexdata is None: raise RuntimeError("No input string given with input()") @@ -670,6 +684,7 @@ class LexerReflect(object): self.strsym = { } # Symbols defined as strings self.ignore = { } # Ignore strings by state self.errorf = { } # Error functions by state + self.eoff = { } # EOF functions by state for s in self.stateinfo: self.funcsym[s] = [] @@ -689,6 +704,9 @@ class LexerReflect(object): if tokname == 'error': for s in states: self.errorf[s] = t + elif tokname == 'eof': + for s in states: + self.eoff[s] = t elif tokname == 'ignore': line = func_code(t).co_firstlineno file = func_code(t).co_filename @@ -986,6 +1004,12 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now if not lexobj.lexerrorf: errorlog.warning("No t_error rule is defined") + # Set up eof functions + lexobj.lexstateeoff = linfo.eoff + lexobj.lexeoff = linfo.eoff.get("INITIAL", None) + if not lexobj.lexeoff: + errorlog.warning("No t_eof rule is defined") + # Check state information for ignore and error rules for s,stype in stateinfo.items(): if stype == 'exclusive': |