Add a profiling lexer subclass.

author: Georg Brandl <georg@python.org> 2014-09-20 11:10:38 +0200
committer: Georg Brandl <georg@python.org> 2014-09-20 11:10:38 +0200
commit: 208aca963e68029156bf208bfa82bbaeb1558505 (patch)
tree: 6fd5bed7470ea76404b618c1c7e2d0ebc07b5e36 /pygments/lexer.py
parent: 58b53e304650a69a9ecb2115f67eb59005773949 (diff)
download: pygments-208aca963e68029156bf208bfa82bbaeb1558505.tar.gz
1 files changed, 59 insertions, 2 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 46f9cb37..0531dcde 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -9,7 +9,11 @@
     :license: BSD, see LICENSE for details.
 """
 
+from __future__ import print_function
+
 import re
+import sys
+import time
 import itertools
 
 from pygments.filter import apply_filters, Filter
@@ -417,7 +421,7 @@ class RegexLexerMeta(LexerMeta):
     self.tokens on the first instantiation.
     """
 
-    def _process_regex(cls, regex, rflags):
+    def _process_regex(cls, regex, rflags, state):
         """Preprocess the regular expression component of a token definition."""
         if isinstance(regex, words):
             return re.compile(regex_opt(regex.words, prefix=regex.prefix,
@@ -491,7 +495,7 @@ class RegexLexerMeta(LexerMeta):
             assert type(tdef) is tuple, "wrong rule def %r" % tdef
 
             try:
-                rex = cls._process_regex(tdef[0], rflags)
+                rex = cls._process_regex(tdef[0], rflags, state)
             except Exception as err:
                 raise ValueError("uncompilable regex %r in state %r of %r: %s" %
                                  (tdef[0], state, cls, err))
@@ -804,3 +808,56 @@ def do_insertions(insertions, tokens):
         except StopIteration:
             insleft = False
             break  # not strictly necessary
+
+
+class ProfilingRegexLexerMeta(RegexLexerMeta):
+    """Metaclass for ProfilingRegexLexer, collects regex timing info."""
+
+    def _process_regex(cls, regex, rflags, state):
+        if isinstance(regex, words):
+            rex = regex_opt(regex.words, prefix=regex.prefix,
+                            suffix=regex.suffix)
+        else:
+            rex = regex
+        compiled = re.compile(rex, rflags)
+
+        def match_func(text, pos, endpos=sys.maxsize):
+            info = cls._prof_data[-1].setdefault((state, rex), [0, 0.0])
+            t0 = time.time()
+            res = compiled.match(text, pos, endpos)
+            t1 = time.time()
+            info[0] += 1
+            info[1] += t1 - t0
+            return res
+        return match_func
+
+
+@add_metaclass(ProfilingRegexLexerMeta)
+class ProfilingRegexLexer(RegexLexer):
+    """Drop-in replacement for RegexLexer that does profiling of its regexes."""
+
+    _prof_data = []
+    _prof_sort_index = 4  # defaults to time per call
+
+    def get_tokens_unprocessed(self, text, stack=('root',)):
+        # this needs to be a stack, since using(this) will produce nested calls
+        self.__class__._prof_data.append({})
+        for tok in RegexLexer.get_tokens_unprocessed(self, text, stack):
+            yield tok
+        rawdata = self.__class__._prof_data.pop()
+        data = sorted(((s, repr(r).strip('u\'').replace('\\\\', '\\')[:65],
+                        n, 1000 * t, 1000 * t / n)
+                       for ((s, r), (n, t)) in rawdata.items()),
+                      key=lambda x: x[self._prof_sort_index],
+                      reverse=True)
+        sum_total = sum(x[3] for x in data)
+
+        print()
+        print('Profiling result for %s lexing %d chars in %.3f ms' %
+              (self.__class__.__name__, len(text), sum_total))
+        print('=' * 110)
+        print('%-20s %-64s ncalls  tottime  percall' % ('state', 'regex'))
+        print('-' * 110)
+        for d in data:
+            print('%-20s %-65s %5d %8.4f %8.4f' % d)
+        print('=' * 110)
author	Georg Brandl <georg@python.org>	2014-09-20 11:10:38 +0200
committer	Georg Brandl <georg@python.org>	2014-09-20 11:10:38 +0200
commit	208aca963e68029156bf208bfa82bbaeb1558505 (patch)
tree	6fd5bed7470ea76404b618c1c7e2d0ebc07b5e36 /pygments/lexer.py
parent	58b53e304650a69a9ecb2115f67eb59005773949 (diff)
download	pygments-208aca963e68029156bf208bfa82bbaeb1558505.tar.gz