summaryrefslogtreecommitdiff
path: root/scripts/find_error.py
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2010-02-18 11:19:11 +0100
committerGeorg Brandl <georg@python.org>2010-02-18 11:19:11 +0100
commitefba3200cf5d5bd3a50239531eef40eb2ef55fb5 (patch)
tree36f3a1403439fad1ce48fc3bad31437ee47a5476 /scripts/find_error.py
parent629031ea2df80c98801636a447b46c6ea8ef98a3 (diff)
downloadpygments-efba3200cf5d5bd3a50239531eef40eb2ef55fb5.tar.gz
Make the find_error script more usable: make it possible to inspect the state stack for simple regex lexers, and make the output prettier.
Diffstat (limited to 'scripts/find_error.py')
-rwxr-xr-x[-rw-r--r--]scripts/find_error.py98
1 files changed, 90 insertions, 8 deletions
diff --git a/scripts/find_error.py b/scripts/find_error.py
index a3cdad4f..82914b50 100644..100755
--- a/scripts/find_error.py
+++ b/scripts/find_error.py
@@ -18,38 +18,120 @@ try:
import pygments
except ImportError:
# try parent path
- sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+
from pygments import highlight
+from pygments.lexer import RegexLexer
from pygments.lexers import get_lexer_for_filename, get_lexer_by_name
-from pygments.token import Error
+from pygments.token import Error, _TokenType
+
+
+class DebuggingRegexLexer(RegexLexer):
+ """Make the state stack, position and current match instance attributes."""
+
+ def get_tokens_unprocessed(self, text, stack=('root',)):
+ """
+ Split ``text`` into (tokentype, text) pairs.
+
+ ``stack`` is the inital stack (default: ``['root']``)
+ """
+ self.pos = 0
+ tokendefs = self._tokens
+ self.statestack = list(stack)
+ statetokens = tokendefs[self.statestack[-1]]
+ while 1:
+ for rexmatch, action, new_state in statetokens:
+ self.m = rexmatch(text, self.pos)
+ if self.m:
+ if type(action) is _TokenType:
+ yield self.pos, action, self.m.group()
+ else:
+ for item in action(self, self.m):
+ yield item
+ self.pos = self.m.end()
+ if new_state is not None:
+ # state transition
+ if isinstance(new_state, tuple):
+ for state in new_state:
+ if state == '#pop':
+ self.statestack.pop()
+ elif state == '#push':
+ self.statestack.append(self.statestack[-1])
+ else:
+ self.statestack.append(state)
+ elif isinstance(new_state, int):
+ # pop
+ del self.statestack[new_state:]
+ elif new_state == '#push':
+ self.statestack.append(self.statestack[-1])
+ else:
+ assert False, 'wrong state def: %r' % new_state
+ statetokens = tokendefs[self.statestack[-1]]
+ break
+ else:
+ try:
+ if text[self.pos] == '\n':
+ # at EOL, reset state to 'root'
+ self.pos += 1
+ self.statestack = ['root']
+ statetokens = tokendefs['root']
+ yield self.pos, Text, u'\n'
+ continue
+ yield self.pos, Error, text[self.pos]
+ self.pos += 1
+ except IndexError:
+ break
+
def main(fn):
try:
- lx = get_lexer_for_filename(fn)
+ lx = get_lexer_for_filename(os.path.basename(fn))
except ValueError:
try:
- name, rest = fn.split("_", 1)
+ name, rest = fn.split('_', 1)
lx = get_lexer_by_name(name)
except ValueError:
raise AssertionError('no lexer found for file %r' % fn)
+ debug_lexer = False
+ # does not work for e.g. ExtendedRegexLexers
+ if lx.__class__.__bases__ == (RegexLexer,):
+ lx.__class__.__bases__ = (DebuggingRegexLexer,)
+ debug_lexer = True
+ lno = 1
text = file(fn, 'U').read()
text = text.strip('\n') + '\n'
text = text.decode('latin1')
ntext = []
+ states = []
for type, val in lx.get_tokens(text):
+ lno += val.count('\n')
if type == Error:
- print "Error parsing", fn
- print "\n".join([' ' + repr(x) for x in ntext[-num:]])
- print `val` + "<<<"
+ print 'Error parsing', fn, 'on line', lno
+ print 'Previous tokens' + (debug_lexer and ' and states' or '') + ':'
+ for i in range(len(ntext) - num, len(ntext)):
+ reprs = map(repr, ntext[i])
+ print ' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0],
+ if debug_lexer:
+ print ' ' + ' ' * (29-len(reprs[0])) + repr(states[i]),
+ print
+ print 'Error token:'
+ l = len(repr(val))
+ print ' ' + repr(val),
+ if debug_lexer:
+ print ' ' * (60-l) + repr(lx.statestack),
+ print
+ print
return 1
ntext.append((type,val))
+ if debug_lexer:
+ states.append(lx.statestack[:])
return 0
num = 10
-if __name__ == "__main__":
+if __name__ == '__main__':
if sys.argv[1][:2] == '-n':
num = int(sys.argv[1][2:])
del sys.argv[1]