summaryrefslogtreecommitdiff
path: root/pygments
diff options
context:
space:
mode:
authorgbrandl <devnull@localhost>2006-12-20 21:14:44 +0100
committergbrandl <devnull@localhost>2006-12-20 21:14:44 +0100
commit1d634b6950bfac268f6d05a673fd48094738f01e (patch)
tree2788e9f4315a10392eee6371860c6045f8f74e31 /pygments
parent25ad3be9474211bb8a652deadea00677886dadf5 (diff)
downloadpygments-1d634b6950bfac268f6d05a673fd48094738f01e.tar.gz
[svn] Improve Unicode handling without encoding.
Diffstat (limited to 'pygments')
-rw-r--r--pygments/__init__.py10
-rw-r--r--pygments/formatter.py10
-rw-r--r--pygments/formatters/bbcode.py4
-rw-r--r--pygments/formatters/html.py5
-rw-r--r--pygments/formatters/latex.py5
-rw-r--r--pygments/formatters/other.py8
-rw-r--r--pygments/formatters/rtf.py5
-rw-r--r--pygments/formatters/terminal.py4
-rw-r--r--pygments/lexers/compiled.py2
9 files changed, 42 insertions, 11 deletions
diff --git a/pygments/__init__.py b/pygments/__init__.py
index 6b55ce85..0c03ef89 100644
--- a/pygments/__init__.py
+++ b/pygments/__init__.py
@@ -33,7 +33,8 @@ __all__ = ['lex', 'format', 'highlight']
import sys, os
-from cStringIO import StringIO
+from StringIO import StringIO
+from cStringIO import StringIO as CStringIO
def lex(code, lexer):
@@ -51,10 +52,13 @@ def format(tokens, formatter, outfile=None):
with a ``write`` method), the result will be written to it, otherwise
it is returned as a string.
"""
- realoutfile = outfile or StringIO()
- formatter.format(tokens, realoutfile)
if not outfile:
+ # if we want Unicode output, we have to use Python StringIO
+ realoutfile = formatter.encoding and CStringIO() or StringIO()
+ formatter.format(tokens, realoutfile)
return realoutfile.getvalue()
+ else:
+ formatter.format(tokens, outfile)
def highlight(code, lexer, formatter, outfile=None):
diff --git a/pygments/formatter.py b/pygments/formatter.py
index c750f0d8..41959565 100644
--- a/pygments/formatter.py
+++ b/pygments/formatter.py
@@ -41,14 +41,20 @@ class Formatter(object):
``encoding``
If given, must be an encoding name. This will be used to
convert the Unicode token strings to byte strings in the
- output (default: 'latin1').
+ output. If it is "" or None, Unicode strings will be written
+ to the output file, which most file-like objects do not
+ support (default: None).
"""
+ #: If True, this formatter outputs Unicode strings when no encoding
+ #: option is given.
+ unicodeoutput = True
+
def __init__(self, **options):
self.style = _lookup_style(options.get('style', 'default'))
self.full = get_bool_opt(options, 'full', False)
self.title = options.get('title', '')
- self.encoding = options.get('encoding', 'latin1')
+ self.encoding = options.get('encoding', None) or None
self.options = options
def get_style_defs(self, arg=''):
diff --git a/pygments/formatters/bbcode.py b/pygments/formatters/bbcode.py
index 0e9338bd..d9b96485 100644
--- a/pygments/formatters/bbcode.py
+++ b/pygments/formatters/bbcode.py
@@ -72,11 +72,13 @@ class BBCodeFormatter(Formatter):
if self._mono:
outfile.write('[font=monospace]')
+ enc = self.encoding
lastval = ''
lasttype = None
for ttype, value in tokensource:
- value = value.encode(self.encoding)
+ if enc:
+ value = value.encode(enc)
while ttype not in self.styles:
ttype = ttype.parent
if ttype == lasttype:
diff --git a/pygments/formatters/html.py b/pygments/formatters/html.py
index 3a5bafb1..f72046ef 100644
--- a/pygments/formatters/html.py
+++ b/pygments/formatters/html.py
@@ -226,6 +226,7 @@ class HtmlFormatter(Formatter):
def _format_nowrap(self, tokensource, outfile, lnos=False):
lncount = 0
nocls = self.noclasses
+ enc = self.encoding
# for <span style=""> lookup only
getcls = self.ttype2class.get
c2s = self.class2style
@@ -233,7 +234,9 @@ class HtmlFormatter(Formatter):
write = outfile.write
lspan = ''
for ttype, value in tokensource:
- htmlvalue = escape_html(value.encode(self.encoding))
+ if enc:
+ value = value.encode(enc)
+ htmlvalue = escape_html(value)
if lnos:
lncount += value.count("\n")
diff --git a/pygments/formatters/latex.py b/pygments/formatters/latex.py
index 305bb174..d3d4f0b5 100644
--- a/pygments/formatters/latex.py
+++ b/pygments/formatters/latex.py
@@ -149,6 +149,7 @@ class LatexFormatter(Formatter):
def format(self, tokensource, outfile):
# TODO: add support for background colors
+ enc = self.encoding
if self.full:
realoutfile = outfile
@@ -165,7 +166,9 @@ class LatexFormatter(Formatter):
outfile.write(']\n')
for ttype, value in tokensource:
- value = escape_tex(value.encode(self.encoding))
+ if enc:
+ value = value.encode(enc)
+ value = escape_tex(value)
cmd = self.ttype2cmd.get(ttype)
while cmd is None:
ttype = ttype.parent
diff --git a/pygments/formatters/other.py b/pygments/formatters/other.py
index 4d6bd204..f6101848 100644
--- a/pygments/formatters/other.py
+++ b/pygments/formatters/other.py
@@ -20,8 +20,12 @@ class NullFormatter(Formatter):
Output the text unchanged without any formatting.
"""
def format(self, tokensource, outfile):
+ enc = self.encoding
for ttype, value in tokensource:
- outfile.write(value.encode(self.encoding))
+ if enc:
+ outfile.write(value.encode(enc))
+ else:
+ outfile.write(value)
class RawTokenFormatter(Formatter):
@@ -37,6 +41,8 @@ class RawTokenFormatter(Formatter):
the given compression algorithm (default: '').
"""
+ unicodeoutput = False
+
def __init__(self, **options):
Formatter.__init__(self, **options)
self.compress = options.get('compress', '')
diff --git a/pygments/formatters/rtf.py b/pygments/formatters/rtf.py
index 4eaa929b..1aa0b9b8 100644
--- a/pygments/formatters/rtf.py
+++ b/pygments/formatters/rtf.py
@@ -18,6 +18,8 @@ __all__ = ['RtfFormatter']
class RtfFormatter(Formatter):
"""Output RTF (Rich Text Format)."""
+ unicodeoutput = False
+
def __init__(self, **options):
"""
Additional options accepted:
@@ -67,6 +69,9 @@ class RtfFormatter(Formatter):
return text.replace('\n', '\\par\n')
def format(self, tokensource, outfile):
+ if not self.encoding:
+ outfile.write(u'')
+
outfile.write(r'{\rtf1\ansi\deff0'
r'{\fonttbl{\f0\fmodern\fprq1\fcharset0%s;}}{\colortbl;' %
(self.fontface and ' ' + self._escape(self.fontface) or ''))
diff --git a/pygments/formatters/terminal.py b/pygments/formatters/terminal.py
index b0a134b8..375e9cd1 100644
--- a/pygments/formatters/terminal.py
+++ b/pygments/formatters/terminal.py
@@ -73,8 +73,10 @@ class TerminalFormatter(Formatter):
self.colorscheme = options.get('colorscheme', None) or TERMINAL_COLORS
def format(self, tokensource, outfile):
+ enc = self.encoding
for ttype, value in tokensource:
- value = value.encode(self.encoding)
+ if enc:
+ value = value.encode(enc)
color = self.colorscheme.get(ttype)
while color is None:
ttype = ttype[:-1]
diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py
index 82997d3d..8d25b26b 100644
--- a/pygments/lexers/compiled.py
+++ b/pygments/lexers/compiled.py
@@ -147,7 +147,7 @@ class CppLexer(RegexLexer):
(r'(\d+\.\d*|\.\d+)', Number.Float),
(r'\d+', Number.Integer),
(r'[~!%^&*+=|?:<>/-]', Operator),
- (r'[()\[\],.]', Punctuation),
+ (r'[()\[\],.;]', Punctuation),
(r'(asm|auto|break|case|catch|const|const_cast|continue|'
r'default|delete|do|dynamic_cast|else|enum|explicit|export|'
r'extern|for|friend|goto|if|mutable|namespace|new|operator|'