diff options
author | Giedrius Dubinskas <d.giedrius+bitbucket@gmail.com> | 2015-10-16 21:27:23 +0300 |
---|---|---|
committer | Giedrius Dubinskas <d.giedrius+bitbucket@gmail.com> | 2015-10-16 21:27:23 +0300 |
commit | 2a74e18cb1b8b7cc2848e236a636e49be46ed165 (patch) | |
tree | 344dbfeaea3065552ee69db2d863c30dfccfce17 /pygments/formatters | |
parent | 3462d42d897351070e3003b7d686a61a5ec5ad3d (diff) | |
download | pygments-2a74e18cb1b8b7cc2848e236a636e49be46ed165.tar.gz |
Optimized HtmlFormatter to run ~10x faster for long lines
Changed string concatenation to list extend/join to reduce memory allocations.
How to reproduce:
Test data:
$ echo '{' $(for i in {1..10000}; do printf '"key%d":"value %d", ' $i $i; done) '}' > /tmp/one-line.json
Before:
$ time ./pygmentize -O encoding=utf-8 -O stripnl=False -f html -l 'json' > /dev/null < /tmp/one-line.json
real 0m5.597s
user 0m4.400s
sys 0m1.158s
After:
$ time ./pygmentize -O encoding=utf-8 -O stripnl=False -f html -l 'json' > /dev/null < /tmp/one-line.json
real 0m0.347s
user 0m0.309s
sys 0m0.029s
Diffstat (limited to 'pygments/formatters')
-rw-r--r-- | pygments/formatters/html.py | 23 |
1 files changed, 12 insertions, 11 deletions
diff --git a/pygments/formatters/html.py b/pygments/formatters/html.py index b22be54f..b03a4bd5 100644 --- a/pygments/formatters/html.py +++ b/pygments/formatters/html.py @@ -711,7 +711,7 @@ class HtmlFormatter(Formatter): tagsfile = self.tagsfile lspan = '' - line = '' + line = [] for ttype, value in tokensource: if nocls: cclass = getcls(ttype) @@ -742,30 +742,31 @@ class HtmlFormatter(Formatter): for part in parts[:-1]: if line: if lspan != cspan: - line += (lspan and '</span>') + cspan + part + \ - (cspan and '</span>') + lsep + line.extend(((lspan and '</span>'), cspan, part, + (cspan and '</span>'), lsep)) else: # both are the same - line += part + (lspan and '</span>') + lsep - yield 1, line - line = '' + line.extend((part, (lspan and '</span>'), lsep)) + yield 1, ''.join(line) + line = [] elif part: - yield 1, cspan + part + (cspan and '</span>') + lsep + yield 1, ''.join((cspan, part, (cspan and '</span>'), lsep)) else: yield 1, lsep # for the last line if line and parts[-1]: if lspan != cspan: - line += (lspan and '</span>') + cspan + parts[-1] + line.extend(((lspan and '</span>'), cspan, parts[-1])) lspan = cspan else: - line += parts[-1] + line.append(parts[-1]) elif parts[-1]: - line = cspan + parts[-1] + line = [cspan, parts[-1]] lspan = cspan # else we neither have to open a new span nor set lspan if line: - yield 1, line + (lspan and '</span>') + lsep + line.extend(((lspan and '</span>'), lsep)) + yield 1, ''.join(line) def _lookup_ctag(self, token): entry = ctags.TagEntry() |