diff options
author | Georg Brandl <georg@python.org> | 2014-09-20 11:10:38 +0200 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2014-09-20 11:10:38 +0200 |
commit | 208aca963e68029156bf208bfa82bbaeb1558505 (patch) | |
tree | 6fd5bed7470ea76404b618c1c7e2d0ebc07b5e36 /pygments/lexer.py | |
parent | 58b53e304650a69a9ecb2115f67eb59005773949 (diff) | |
download | pygments-208aca963e68029156bf208bfa82bbaeb1558505.tar.gz |
Add a profiling lexer subclass.
Diffstat (limited to 'pygments/lexer.py')
-rw-r--r-- | pygments/lexer.py | 61 |
1 files changed, 59 insertions, 2 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py index 46f9cb37..0531dcde 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -9,7 +9,11 @@ :license: BSD, see LICENSE for details. """ +from __future__ import print_function + import re +import sys +import time import itertools from pygments.filter import apply_filters, Filter @@ -417,7 +421,7 @@ class RegexLexerMeta(LexerMeta): self.tokens on the first instantiation. """ - def _process_regex(cls, regex, rflags): + def _process_regex(cls, regex, rflags, state): """Preprocess the regular expression component of a token definition.""" if isinstance(regex, words): return re.compile(regex_opt(regex.words, prefix=regex.prefix, @@ -491,7 +495,7 @@ class RegexLexerMeta(LexerMeta): assert type(tdef) is tuple, "wrong rule def %r" % tdef try: - rex = cls._process_regex(tdef[0], rflags) + rex = cls._process_regex(tdef[0], rflags, state) except Exception as err: raise ValueError("uncompilable regex %r in state %r of %r: %s" % (tdef[0], state, cls, err)) @@ -804,3 +808,56 @@ def do_insertions(insertions, tokens): except StopIteration: insleft = False break # not strictly necessary + + +class ProfilingRegexLexerMeta(RegexLexerMeta): + """Metaclass for ProfilingRegexLexer, collects regex timing info.""" + + def _process_regex(cls, regex, rflags, state): + if isinstance(regex, words): + rex = regex_opt(regex.words, prefix=regex.prefix, + suffix=regex.suffix) + else: + rex = regex + compiled = re.compile(rex, rflags) + + def match_func(text, pos, endpos=sys.maxsize): + info = cls._prof_data[-1].setdefault((state, rex), [0, 0.0]) + t0 = time.time() + res = compiled.match(text, pos, endpos) + t1 = time.time() + info[0] += 1 + info[1] += t1 - t0 + return res + return match_func + + +@add_metaclass(ProfilingRegexLexerMeta) +class ProfilingRegexLexer(RegexLexer): + """Drop-in replacement for RegexLexer that does profiling of its regexes.""" + + _prof_data = [] + _prof_sort_index = 4 # defaults to time per call + + def get_tokens_unprocessed(self, text, stack=('root',)): + # this needs to be a stack, since using(this) will produce nested calls + self.__class__._prof_data.append({}) + for tok in RegexLexer.get_tokens_unprocessed(self, text, stack): + yield tok + rawdata = self.__class__._prof_data.pop() + data = sorted(((s, repr(r).strip('u\'').replace('\\\\', '\\')[:65], + n, 1000 * t, 1000 * t / n) + for ((s, r), (n, t)) in rawdata.items()), + key=lambda x: x[self._prof_sort_index], + reverse=True) + sum_total = sum(x[3] for x in data) + + print() + print('Profiling result for %s lexing %d chars in %.3f ms' % + (self.__class__.__name__, len(text), sum_total)) + print('=' * 110) + print('%-20s %-64s ncalls tottime percall' % ('state', 'regex')) + print('-' * 110) + for d in data: + print('%-20s %-65s %5d %8.4f %8.4f' % d) + print('=' * 110) |