diff options
author | gbrandl <devnull@localhost> | 2006-12-20 21:14:44 +0100 |
---|---|---|
committer | gbrandl <devnull@localhost> | 2006-12-20 21:14:44 +0100 |
commit | 1d634b6950bfac268f6d05a673fd48094738f01e (patch) | |
tree | 2788e9f4315a10392eee6371860c6045f8f74e31 /pygments | |
parent | 25ad3be9474211bb8a652deadea00677886dadf5 (diff) | |
download | pygments-1d634b6950bfac268f6d05a673fd48094738f01e.tar.gz |
[svn] Improve Unicode handling without encoding.
Diffstat (limited to 'pygments')
-rw-r--r-- | pygments/__init__.py | 10 | ||||
-rw-r--r-- | pygments/formatter.py | 10 | ||||
-rw-r--r-- | pygments/formatters/bbcode.py | 4 | ||||
-rw-r--r-- | pygments/formatters/html.py | 5 | ||||
-rw-r--r-- | pygments/formatters/latex.py | 5 | ||||
-rw-r--r-- | pygments/formatters/other.py | 8 | ||||
-rw-r--r-- | pygments/formatters/rtf.py | 5 | ||||
-rw-r--r-- | pygments/formatters/terminal.py | 4 | ||||
-rw-r--r-- | pygments/lexers/compiled.py | 2 |
9 files changed, 42 insertions, 11 deletions
diff --git a/pygments/__init__.py b/pygments/__init__.py index 6b55ce85..0c03ef89 100644 --- a/pygments/__init__.py +++ b/pygments/__init__.py @@ -33,7 +33,8 @@ __all__ = ['lex', 'format', 'highlight'] import sys, os -from cStringIO import StringIO +from StringIO import StringIO +from cStringIO import StringIO as CStringIO def lex(code, lexer): @@ -51,10 +52,13 @@ def format(tokens, formatter, outfile=None): with a ``write`` method), the result will be written to it, otherwise it is returned as a string. """ - realoutfile = outfile or StringIO() - formatter.format(tokens, realoutfile) if not outfile: + # if we want Unicode output, we have to use Python StringIO + realoutfile = formatter.encoding and CStringIO() or StringIO() + formatter.format(tokens, realoutfile) return realoutfile.getvalue() + else: + formatter.format(tokens, outfile) def highlight(code, lexer, formatter, outfile=None): diff --git a/pygments/formatter.py b/pygments/formatter.py index c750f0d8..41959565 100644 --- a/pygments/formatter.py +++ b/pygments/formatter.py @@ -41,14 +41,20 @@ class Formatter(object): ``encoding`` If given, must be an encoding name. This will be used to convert the Unicode token strings to byte strings in the - output (default: 'latin1'). + output. If it is "" or None, Unicode strings will be written + to the output file, which most file-like objects do not + support (default: None). """ + #: If True, this formatter outputs Unicode strings when no encoding + #: option is given. + unicodeoutput = True + def __init__(self, **options): self.style = _lookup_style(options.get('style', 'default')) self.full = get_bool_opt(options, 'full', False) self.title = options.get('title', '') - self.encoding = options.get('encoding', 'latin1') + self.encoding = options.get('encoding', None) or None self.options = options def get_style_defs(self, arg=''): diff --git a/pygments/formatters/bbcode.py b/pygments/formatters/bbcode.py index 0e9338bd..d9b96485 100644 --- a/pygments/formatters/bbcode.py +++ b/pygments/formatters/bbcode.py @@ -72,11 +72,13 @@ class BBCodeFormatter(Formatter): if self._mono: outfile.write('[font=monospace]') + enc = self.encoding lastval = '' lasttype = None for ttype, value in tokensource: - value = value.encode(self.encoding) + if enc: + value = value.encode(enc) while ttype not in self.styles: ttype = ttype.parent if ttype == lasttype: diff --git a/pygments/formatters/html.py b/pygments/formatters/html.py index 3a5bafb1..f72046ef 100644 --- a/pygments/formatters/html.py +++ b/pygments/formatters/html.py @@ -226,6 +226,7 @@ class HtmlFormatter(Formatter): def _format_nowrap(self, tokensource, outfile, lnos=False): lncount = 0 nocls = self.noclasses + enc = self.encoding # for <span style=""> lookup only getcls = self.ttype2class.get c2s = self.class2style @@ -233,7 +234,9 @@ class HtmlFormatter(Formatter): write = outfile.write lspan = '' for ttype, value in tokensource: - htmlvalue = escape_html(value.encode(self.encoding)) + if enc: + value = value.encode(enc) + htmlvalue = escape_html(value) if lnos: lncount += value.count("\n") diff --git a/pygments/formatters/latex.py b/pygments/formatters/latex.py index 305bb174..d3d4f0b5 100644 --- a/pygments/formatters/latex.py +++ b/pygments/formatters/latex.py @@ -149,6 +149,7 @@ class LatexFormatter(Formatter): def format(self, tokensource, outfile): # TODO: add support for background colors + enc = self.encoding if self.full: realoutfile = outfile @@ -165,7 +166,9 @@ class LatexFormatter(Formatter): outfile.write(']\n') for ttype, value in tokensource: - value = escape_tex(value.encode(self.encoding)) + if enc: + value = value.encode(enc) + value = escape_tex(value) cmd = self.ttype2cmd.get(ttype) while cmd is None: ttype = ttype.parent diff --git a/pygments/formatters/other.py b/pygments/formatters/other.py index 4d6bd204..f6101848 100644 --- a/pygments/formatters/other.py +++ b/pygments/formatters/other.py @@ -20,8 +20,12 @@ class NullFormatter(Formatter): Output the text unchanged without any formatting. """ def format(self, tokensource, outfile): + enc = self.encoding for ttype, value in tokensource: - outfile.write(value.encode(self.encoding)) + if enc: + outfile.write(value.encode(enc)) + else: + outfile.write(value) class RawTokenFormatter(Formatter): @@ -37,6 +41,8 @@ class RawTokenFormatter(Formatter): the given compression algorithm (default: ''). """ + unicodeoutput = False + def __init__(self, **options): Formatter.__init__(self, **options) self.compress = options.get('compress', '') diff --git a/pygments/formatters/rtf.py b/pygments/formatters/rtf.py index 4eaa929b..1aa0b9b8 100644 --- a/pygments/formatters/rtf.py +++ b/pygments/formatters/rtf.py @@ -18,6 +18,8 @@ __all__ = ['RtfFormatter'] class RtfFormatter(Formatter): """Output RTF (Rich Text Format).""" + unicodeoutput = False + def __init__(self, **options): """ Additional options accepted: @@ -67,6 +69,9 @@ class RtfFormatter(Formatter): return text.replace('\n', '\\par\n') def format(self, tokensource, outfile): + if not self.encoding: + outfile.write(u'') + outfile.write(r'{\rtf1\ansi\deff0' r'{\fonttbl{\f0\fmodern\fprq1\fcharset0%s;}}{\colortbl;' % (self.fontface and ' ' + self._escape(self.fontface) or '')) diff --git a/pygments/formatters/terminal.py b/pygments/formatters/terminal.py index b0a134b8..375e9cd1 100644 --- a/pygments/formatters/terminal.py +++ b/pygments/formatters/terminal.py @@ -73,8 +73,10 @@ class TerminalFormatter(Formatter): self.colorscheme = options.get('colorscheme', None) or TERMINAL_COLORS def format(self, tokensource, outfile): + enc = self.encoding for ttype, value in tokensource: - value = value.encode(self.encoding) + if enc: + value = value.encode(enc) color = self.colorscheme.get(ttype) while color is None: ttype = ttype[:-1] diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py index 82997d3d..8d25b26b 100644 --- a/pygments/lexers/compiled.py +++ b/pygments/lexers/compiled.py @@ -147,7 +147,7 @@ class CppLexer(RegexLexer): (r'(\d+\.\d*|\.\d+)', Number.Float), (r'\d+', Number.Integer), (r'[~!%^&*+=|?:<>/-]', Operator), - (r'[()\[\],.]', Punctuation), + (r'[()\[\],.;]', Punctuation), (r'(asm|auto|break|case|catch|const|const_cast|continue|' r'default|delete|do|dynamic_cast|else|enum|explicit|export|' r'extern|for|friend|goto|if|mutable|namespace|new|operator|' |