summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgbrandl <devnull@localhost>2007-01-29 17:05:44 +0100
committergbrandl <devnull@localhost>2007-01-29 17:05:44 +0100
commit536e7dc0adab42a76dcb48ed07e3c317e4a95e10 (patch)
tree6d78d860742f436b5953aae96a48bf2c2bcc1be0
parente7ab63f9c55fe71d55386779127704166e313917 (diff)
downloadpygments-536e7dc0adab42a76dcb48ed07e3c317e4a95e10.tar.gz
[svn] HTML formatter overhaul.
-rw-r--r--CHANGES4
-rw-r--r--TODO3
-rw-r--r--pygments/formatters/html.py240
-rw-r--r--tests/test_html_formatter.py22
4 files changed, 155 insertions, 114 deletions
diff --git a/CHANGES b/CHANGES
index de28c70b..c8175a13 100644
--- a/CHANGES
+++ b/CHANGES
@@ -5,6 +5,10 @@ Version 0.7 (in development)
----------------------------
(codename to be selected, released Feb XX, 2007)
+- Made the HTML formatter more flexible, and easily subclassable in order
+ to make it easy to implement custom wrappers, e.g. alternate line
+ number markup.
+
- Added an `outencoding` option to all formatters, making it possible
to override the `encoding` (which is used by lexers and formatters) when
using the command line interface. Also, if using the terminal formatter
diff --git a/TODO b/TODO
index ad64ca1a..b67950cf 100644
--- a/TODO
+++ b/TODO
@@ -39,7 +39,4 @@ for 0.7
for 0.8 -- 1.0
--------------
-- more setuptools entrypoints (html formatter etc.)
- see paste script's Commands
-
- add folding? would require more language-aware parsers...
diff --git a/pygments/formatters/html.py b/pygments/formatters/html.py
index f44053c7..368d65a2 100644
--- a/pygments/formatters/html.py
+++ b/pygments/formatters/html.py
@@ -5,13 +5,11 @@
Formatter for HTML output.
- TODO: this is a bit of a mess and not subclassable ATM.
-
:copyright: 2006 by Georg Brandl, Armin Ronacher.
:license: BSD, see LICENSE for more details.
"""
import sys, os
-import cStringIO
+import StringIO
from pygments.formatter import Formatter
from pygments.token import Token, Text, STANDARD_TYPES
@@ -54,7 +52,7 @@ def _get_ttype_class(ttype):
return fname + aname
-DOC_TEMPLATE = '''\
+DOC_HEADER = '''\
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
@@ -70,14 +68,10 @@ td.linenos { background-color: #f0f0f0; padding-right: 10px; }
<body>
<h2>%(title)s</h2>
-%(code)s
-
-</body>
-</html>
'''
-DOC_TEMPLATE_EXTERNALCSS = '''\
+DOC_HEADER_EXTERNALCSS = '''\
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
@@ -90,8 +84,9 @@ DOC_TEMPLATE_EXTERNALCSS = '''\
<body>
<h2>%(title)s</h2>
-%(code)s
+'''
+DOC_FOOTER = '''\
</body>
</html>
'''
@@ -288,23 +283,93 @@ class HtmlFormatter(Formatter):
(arg, self.style.background_color, text_style))
return '\n'.join(lines)
- def _format_nowrap(self, tokensource, outfile, lnos=False):
+ def _wrap_full(self, inner, outfile):
+ if self.cssfile:
+ try:
+ filename = outfile.name
+ cssfilename = os.path.join(os.path.dirname(filename), self.cssfile)
+ except AttributeError:
+ print >>sys.stderr, 'Note: Cannot determine output file name, ' \
+ 'using current directory as base for the CSS file name'
+ cssfilename = self.cssfile
+ # write CSS file
+ try:
+ cf = open(cssfilename, "w")
+ cf.write(CSSFILE_TEMPLATE %
+ {'styledefs': self.get_style_defs('body')})
+ cf.close()
+ except IOError, err:
+ err.strerror = 'Error writing CSS file: ' + err.strerror
+ raise
+
+ yield 0, (DOC_HEADER_EXTERNALCSS %
+ dict(title = self.title,
+ cssfile = self.cssfile,
+ encoding = self.encoding))
+ else:
+ yield 0, (DOC_HEADER %
+ dict(title = self.title,
+ styledefs = self.get_style_defs('body'),
+ encoding = self.encoding))
+
+ for t, line in inner:
+ yield t, line
+ yield 0, DOC_FOOTER
+
+ def _wrap_linenos(self, inner):
+ dummyoutfile = StringIO.StringIO()
lncount = 0
+ for t, line in inner:
+ if t:
+ lncount += 1
+ dummyoutfile.write(line)
+
+ fl = self.linenostart
+ mw = len(str(lncount + fl - 1))
+ sp = self.linenospecial
+ st = self.linenostep
+ if sp:
+ ls = '\n'.join([(i%st == 0 and
+ (i%sp == 0 and '<span class="special">%*d</span>'
+ or '%*d') % (mw, i)
+ or '')
+ for i in range(fl, fl + lncount)])
+ else:
+ ls = '\n'.join([(i%st == 0 and ('%*d' % (mw, i)) or '')
+ for i in range(fl, fl + lncount)])
+
+ yield 0, ('<table><tr><td class="linenos"><pre>' +
+ ls + '</pre></td><td class="code">')
+ yield 0, dummyoutfile.getvalue()
+ yield 0, '</td></tr></table>'
+
+ def _wrap_div(self, inner):
+ yield 0, ('<div' + (self.cssclass and ' class="%s" ' % self.cssclass)
+ + (self.cssstyles and ' style="%s"' % self.cssstyles) + '>')
+ for tup in inner:
+ yield tup
+ yield 0, '</div>\n'
+
+ def _wrap_pre(self, inner):
+ yield 0, '<pre>'
+ for tup in inner:
+ yield tup
+ yield 0, '</pre>'
+
+ def _format_lines(self, tokensource):
+ """
+ Just format the tokens, without any wrapping tags.
+ Yield individual lines.
+ """
nocls = self.noclasses
enc = self.encoding
# for <span style=""> lookup only
getcls = self.ttype2class.get
c2s = self.class2style
- write = outfile.write
lspan = ''
+ line = ''
for ttype, value in tokensource:
- if enc:
- value = value.encode(enc)
- htmlvalue = escape_html(value)
- if lnos:
- lncount += value.count("\n")
-
if nocls:
cclass = getcls(ttype)
while cclass is None:
@@ -315,97 +380,56 @@ class HtmlFormatter(Formatter):
cls = self._get_css_class(ttype)
cspan = cls and '<span class="%s">' % cls
- if cspan == lspan:
- if not cspan:
- write(htmlvalue)
+ if enc:
+ value = value.encode(enc)
+
+ parts = escape_html(value).split('\n')
+
+ # for all but the last line
+ for part in parts[:-1]:
+ if line:
+ if lspan != cspan:
+ line += '</span>' + cspan + part + (cspan and '</span>') + '\n'
+ else: # both are the same
+ line += part + (lspan and '</span>') + '\n'
+ yield 1, line
+ line = ''
else:
- write(htmlvalue.replace('\n', '</span>\n' + cspan))
- elif htmlvalue: # if no value, leave old span open
- if lspan:
- write('</span>')
- lspan = cspan
- if cspan:
- htmlvalue = htmlvalue.replace('\n', '</span>\n' + cspan)
- write(cspan + htmlvalue)
+ yield 1, cspan + part + (cspan and '</span>') + '\n'
+ # for the last line
+ if line:
+ if lspan != cspan:
+ line += '</span>' + cspan + parts[-1]
+ lspan = cspan
else:
- write(htmlvalue)
- if lspan:
- write('</span>')
- return lncount
+ line += parts[-1]
+ else:
+ line = cspan + parts[-1]
+
+ if line:
+ yield 1, line + (lspan and '</span>') + '\n'
def format(self, tokensource, outfile):
- if self.nowrap:
- self._format_nowrap(tokensource, outfile)
- return
-
- realoutfile = outfile
- lnos = self.linenos
- full = self.full
-
- div = ('<div' + (self.cssclass and ' class="%s" ' % self.cssclass)
- + (self.cssstyles and ' style="%s"' % self.cssstyles) + '>')
- if full or lnos:
- outfile = cStringIO.StringIO()
- else:
- outfile.write(div)
-
- outfile.write('<pre>')
- lncount = self._format_nowrap(tokensource, outfile, lnos)
- outfile.write('</pre>')
-
- ret = ''
- if lnos:
- fl = self.linenostart
- mw = len(str(lncount + fl - 1))
- sp = self.linenospecial
- st = self.linenostep
- if sp:
- ls = '\n'.join([(i%st == 0 and
- (i%sp == 0 and '<span class="special">%*d</span>'
- or '%*d') % (mw, i)
- or '')
- for i in range(fl, fl + lncount)])
- else:
- ls = '\n'.join([(i%st == 0 and ('%*d' % (mw, i)) or '')
- for i in range(fl, fl + lncount)])
-
- ret = div + ('<table><tr>'
- '<td class="linenos"><pre>' +
- ls + '</pre></td><td class="code">')
- ret += outfile.getvalue()
- ret += '</td></tr></table>'
-
- if full:
- if not ret:
- ret = div + outfile.getvalue() + '</div>\n'
- if self.cssfile:
- try:
- filename = realoutfile.name
- cssfilename = os.path.join(os.path.dirname(filename), self.cssfile)
- except AttributeError:
- print >>sys.stderr, 'Note: Cannot determine output file name, ' \
- 'using current directory as base for the CSS file name'
- cssfilename = self.cssfile
- realoutfile.write(DOC_TEMPLATE_EXTERNALCSS %
- dict(title = self.title,
- cssfile = self.cssfile,
- encoding = self.encoding,
- code = ret))
- try:
- cf = open(cssfilename, "w")
- cf.write(CSSFILE_TEMPLATE % {'styledefs':
- self.get_style_defs('body')})
- cf.close()
- except IOError, err:
- err.strerror = 'Error writing CSS file: ' + err.strerror
- raise
- else:
- realoutfile.write(DOC_TEMPLATE %
- dict(title = self.title,
- styledefs = self.get_style_defs('body'),
- encoding = self.encoding,
- code = ret))
- elif lnos:
- realoutfile.write(ret + '</div>\n')
- else:
- realoutfile.write('</div>\n')
+ """
+ The formatting process uses several nested generators; which of
+ them are used is determined by the user's options.
+
+ Each generator should take at least one argument, ``inner``,
+ and wrap the pieces of text generated by this.
+
+ Always yield 2-tuples: (core, text). If "core" is 1, the text
+ is part of the original tokensource being highlighted, if it's
+ 0, the text is some piece of wrapping. This makes it possible to
+ use several different wrappers that process the original source
+ linewise, e.g. line number generators.
+ """
+ source = self._format_lines(tokensource)
+ if not self.nowrap:
+ source = self._wrap_div(self._wrap_pre(source))
+ if self.linenos:
+ source = self._wrap_linenos(source)
+ if self.full:
+ source = self._wrap_full(source, outfile)
+
+ for t, piece in source:
+ outfile.write(piece)
diff --git a/tests/test_html_formatter.py b/tests/test_html_formatter.py
index 7e25aeaf..29d484c1 100644
--- a/tests/test_html_formatter.py
+++ b/tests/test_html_formatter.py
@@ -7,15 +7,31 @@
:license: BSD, see LICENSE for more details.
"""
+import os
import unittest
import StringIO
import random
from pygments import lexers, formatters
from pygments.token import _TokenType
+from pygments.formatters import HtmlFormatter
+from pygments.lexers import PythonLexer
class HtmlFormatterTest(unittest.TestCase):
- def test_external_css(self):
- # TODO: write this test.
- pass
+# TODO: write this test.
+# def test_external_css(self):
+# pass
+
+ def test_all_options(self):
+ tokensource = list(PythonLexer().get_tokens(file(os.path.join(testdir, testfile)).read()))
+
+ for optdict in [dict(nowrap=True),
+ dict(linenos=True),
+ dict(linenos=True, full=True),
+ dict(linenos=True, full=True, noclasses=True)]:
+
+ outfile = StringIO.StringIO()
+ fmt = HtmlFormatter(**optdict)
+ fmt.format(tokensource, outfile)
+