Optimized HtmlFormatter to run ~10x faster for long lines

Changed string concatenation to list extend/join to reduce memory allocations. How to reproduce: Test data: $ echo '{' $(for i in {1..10000}; do printf '"key%d":"value %d", ' $i $i; done) '}' > /tmp/one-line.json Before: $ time ./pygmentize -O encoding=utf-8 -O stripnl=False -f html -l 'json' > /dev/null < /tmp/one-line.json real 0m5.597s user 0m4.400s sys 0m1.158s After: $ time ./pygmentize -O encoding=utf-8 -O stripnl=False -f html -l 'json' > /dev/null < /tmp/one-line.json real 0m0.347s user 0m0.309s sys 0m0.029s
author: Giedrius Dubinskas <d.giedrius+bitbucket@gmail.com> 2015-10-16 21:27:23 +0300
committer: Giedrius Dubinskas <d.giedrius+bitbucket@gmail.com> 2015-10-16 21:27:23 +0300
commit: 2a74e18cb1b8b7cc2848e236a636e49be46ed165 (patch)
tree: 344dbfeaea3065552ee69db2d863c30dfccfce17 /pygments/formatters
parent: 3462d42d897351070e3003b7d686a61a5ec5ad3d (diff)
download: pygments-2a74e18cb1b8b7cc2848e236a636e49be46ed165.tar.gz
1 files changed, 12 insertions, 11 deletions
diff --git a/pygments/formatters/html.py b/pygments/formatters/html.py
index b22be54f..b03a4bd5 100644
--- a/pygments/formatters/html.py
+++ b/pygments/formatters/html.py
@@ -711,7 +711,7 @@ class HtmlFormatter(Formatter):
         tagsfile = self.tagsfile
 
         lspan = ''
-        line = ''
+        line = []
         for ttype, value in tokensource:
             if nocls:
                 cclass = getcls(ttype)
@@ -742,30 +742,31 @@ class HtmlFormatter(Formatter):
             for part in parts[:-1]:
                 if line:
                     if lspan != cspan:
-                        line += (lspan and '</span>') + cspan + part + \
-                                (cspan and '</span>') + lsep
+                        line.extend(((lspan and '</span>'), cspan, part,
+                                (cspan and '</span>'), lsep))
                     else: # both are the same
-                        line += part + (lspan and '</span>') + lsep
-                    yield 1, line
-                    line = ''
+                        line.extend((part, (lspan and '</span>'), lsep))
+                    yield 1, ''.join(line)
+                    line = []
                 elif part:
-                    yield 1, cspan + part + (cspan and '</span>') + lsep
+                    yield 1, ''.join((cspan, part, (cspan and '</span>'), lsep))
                 else:
                     yield 1, lsep
             # for the last line
             if line and parts[-1]:
                 if lspan != cspan:
-                    line += (lspan and '</span>') + cspan + parts[-1]
+                    line.extend(((lspan and '</span>'), cspan, parts[-1]))
                     lspan = cspan
                 else:
-                    line += parts[-1]
+                    line.append(parts[-1])
             elif parts[-1]:
-                line = cspan + parts[-1]
+                line = [cspan, parts[-1]]
                 lspan = cspan
             # else we neither have to open a new span nor set lspan
 
         if line:
-            yield 1, line + (lspan and '</span>') + lsep
+            line.extend(((lspan and '</span>'), lsep))
+            yield 1, ''.join(line)
 
     def _lookup_ctag(self, token):
         entry = ctags.TagEntry()
author	Giedrius Dubinskas <d.giedrius+bitbucket@gmail.com>	2015-10-16 21:27:23 +0300
committer	Giedrius Dubinskas <d.giedrius+bitbucket@gmail.com>	2015-10-16 21:27:23 +0300
commit	2a74e18cb1b8b7cc2848e236a636e49be46ed165 (patch)
tree	344dbfeaea3065552ee69db2d863c30dfccfce17 /pygments/formatters
parent	3462d42d897351070e3003b7d686a61a5ec5ad3d (diff)
download	pygments-2a74e18cb1b8b7cc2848e236a636e49be46ed165.tar.gz