summaryrefslogtreecommitdiff
path: root/pygments/lexer.py
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2014-09-20 11:10:38 +0200
committerGeorg Brandl <georg@python.org>2014-09-20 11:10:38 +0200
commit208aca963e68029156bf208bfa82bbaeb1558505 (patch)
tree6fd5bed7470ea76404b618c1c7e2d0ebc07b5e36 /pygments/lexer.py
parent58b53e304650a69a9ecb2115f67eb59005773949 (diff)
downloadpygments-208aca963e68029156bf208bfa82bbaeb1558505.tar.gz
Add a profiling lexer subclass.
Diffstat (limited to 'pygments/lexer.py')
-rw-r--r--pygments/lexer.py61
1 files changed, 59 insertions, 2 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 46f9cb37..0531dcde 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -9,7 +9,11 @@
:license: BSD, see LICENSE for details.
"""
+from __future__ import print_function
+
import re
+import sys
+import time
import itertools
from pygments.filter import apply_filters, Filter
@@ -417,7 +421,7 @@ class RegexLexerMeta(LexerMeta):
self.tokens on the first instantiation.
"""
- def _process_regex(cls, regex, rflags):
+ def _process_regex(cls, regex, rflags, state):
"""Preprocess the regular expression component of a token definition."""
if isinstance(regex, words):
return re.compile(regex_opt(regex.words, prefix=regex.prefix,
@@ -491,7 +495,7 @@ class RegexLexerMeta(LexerMeta):
assert type(tdef) is tuple, "wrong rule def %r" % tdef
try:
- rex = cls._process_regex(tdef[0], rflags)
+ rex = cls._process_regex(tdef[0], rflags, state)
except Exception as err:
raise ValueError("uncompilable regex %r in state %r of %r: %s" %
(tdef[0], state, cls, err))
@@ -804,3 +808,56 @@ def do_insertions(insertions, tokens):
except StopIteration:
insleft = False
break # not strictly necessary
+
+
+class ProfilingRegexLexerMeta(RegexLexerMeta):
+ """Metaclass for ProfilingRegexLexer, collects regex timing info."""
+
+ def _process_regex(cls, regex, rflags, state):
+ if isinstance(regex, words):
+ rex = regex_opt(regex.words, prefix=regex.prefix,
+ suffix=regex.suffix)
+ else:
+ rex = regex
+ compiled = re.compile(rex, rflags)
+
+ def match_func(text, pos, endpos=sys.maxsize):
+ info = cls._prof_data[-1].setdefault((state, rex), [0, 0.0])
+ t0 = time.time()
+ res = compiled.match(text, pos, endpos)
+ t1 = time.time()
+ info[0] += 1
+ info[1] += t1 - t0
+ return res
+ return match_func
+
+
+@add_metaclass(ProfilingRegexLexerMeta)
+class ProfilingRegexLexer(RegexLexer):
+ """Drop-in replacement for RegexLexer that does profiling of its regexes."""
+
+ _prof_data = []
+ _prof_sort_index = 4 # defaults to time per call
+
+ def get_tokens_unprocessed(self, text, stack=('root',)):
+ # this needs to be a stack, since using(this) will produce nested calls
+ self.__class__._prof_data.append({})
+ for tok in RegexLexer.get_tokens_unprocessed(self, text, stack):
+ yield tok
+ rawdata = self.__class__._prof_data.pop()
+ data = sorted(((s, repr(r).strip('u\'').replace('\\\\', '\\')[:65],
+ n, 1000 * t, 1000 * t / n)
+ for ((s, r), (n, t)) in rawdata.items()),
+ key=lambda x: x[self._prof_sort_index],
+ reverse=True)
+ sum_total = sum(x[3] for x in data)
+
+ print()
+ print('Profiling result for %s lexing %d chars in %.3f ms' %
+ (self.__class__.__name__, len(text), sum_total))
+ print('=' * 110)
+ print('%-20s %-64s ncalls tottime percall' % ('state', 'regex'))
+ print('-' * 110)
+ for d in data:
+ print('%-20s %-65s %5d %8.4f %8.4f' % d)
+ print('=' * 110)