summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2014-11-06 13:28:14 +0100
committerGeorg Brandl <georg@python.org>2014-11-06 13:28:14 +0100
commit01f13cc2e664ebab1815fbaca35f3815160a8d56 (patch)
treeb116184640587b02bd5498b757b103cbd07a09ce
parent8c0814068d229cfbf67f9e3a070bcdaa089c7ffa (diff)
downloadpygments-01f13cc2e664ebab1815fbaca35f3815160a8d56.tar.gz
Fix Raw token lexer/formatter with new default encoding.
-rw-r--r--pygments/formatters/other.py1
-rw-r--r--pygments/lexers/special.py3
2 files changed, 3 insertions, 1 deletions
diff --git a/pygments/formatters/other.py b/pygments/formatters/other.py
index 6100f70b..ea8138c7 100644
--- a/pygments/formatters/other.py
+++ b/pygments/formatters/other.py
@@ -64,6 +64,7 @@ class RawTokenFormatter(Formatter):
Formatter.__init__(self, **options)
# We ignore self.encoding if it is set, since it gets set for lexer
# and formatter if given with -Oencoding on the command line.
+ # The RawTokenFormatter outputs only ASCII. Override here.
self.encoding = 'ascii' # let pygments.format() do the right thing
self.compress = get_choice_opt(options, 'compress',
['', 'none', 'gz', 'bz2'], '')
diff --git a/pygments/lexers/special.py b/pygments/lexers/special.py
index 9ea2e22c..bd869044 100644
--- a/pygments/lexers/special.py
+++ b/pygments/lexers/special.py
@@ -36,6 +36,7 @@ _ttype_cache = {}
line_re = re.compile(b'.*?\n')
+
class RawTokenLexer(Lexer):
"""
Recreate a token stream formatted with the `RawTokenFormatter`. This
@@ -82,7 +83,7 @@ class RawTokenLexer(Lexer):
try:
ttypestr, val = match.group().split(b'\t', 1)
except ValueError:
- val = match.group().decode(self.encoding)
+ val = match.group().decode('ascii', 'replace')
ttype = Error
else:
ttype = _ttype_cache.get(ttypestr)