diff options
author | Tim Hatch <tim@timhatch.com> | 2014-05-17 09:19:35 -0700 |
---|---|---|
committer | Tim Hatch <tim@timhatch.com> | 2014-05-17 09:19:35 -0700 |
commit | a62cdcfaa5a260274303cb93b92a3f2e2ce3be98 (patch) | |
tree | e7d2b4cd4e10a6e43fe4b2dc4e03ab2e827ebc20 /pygments/formatters/rtf.py | |
parent | 70a10a2e423d9729b62c7b56faca28889c0d688a (diff) | |
parent | ff12540907fe9d98bf02c9508a171659457b14b2 (diff) | |
download | pygments-a62cdcfaa5a260274303cb93b92a3f2e2ce3be98.tar.gz |
Merged in timgilbert/pygments-main/clj-keyword-fix (pull request #326)
Tweaking clojure keyword lexing
Diffstat (limited to 'pygments/formatters/rtf.py')
-rw-r--r-- | pygments/formatters/rtf.py | 33 |
1 files changed, 18 insertions, 15 deletions
diff --git a/pygments/formatters/rtf.py b/pygments/formatters/rtf.py index 9d87e8f1..cf65a927 100644 --- a/pygments/formatters/rtf.py +++ b/pygments/formatters/rtf.py @@ -10,7 +10,7 @@ """ from pygments.formatter import Formatter -from pygments.util import get_int_opt +from pygments.util import get_int_opt, _surrogatepair __all__ = ['RtfFormatter'] @@ -22,6 +22,10 @@ class RtfFormatter(Formatter): documents with color information and other useful stuff. Perfect for Copy and Paste into Microsoft® Word® documents. + Please note that ``encoding`` and ``outencoding`` options are ignored. + The RTF format is ASCII natively, but handles unicode characters correctly + thanks to escape sequences. + .. versionadded:: 0.6 Additional options accepted: @@ -74,28 +78,27 @@ class RtfFormatter(Formatter): # escape text text = self._escape(text) - if self.encoding in ('utf-8', 'utf-16', 'utf-32'): - encoding = 'iso-8859-15' - else: - encoding = self.encoding or 'iso-8859-15' buf = [] for c in text: - if ord(c) > 128: - ansic = c.encode(encoding, 'ignore') or '?' - if ord(ansic) > 128: - ansic = '\\\'%x' % ord(ansic) - else: - ansic = c - buf.append(r'\ud{\u%d%s}' % (ord(c), ansic)) - else: + cn = ord(c) + if cn < (2**7): + # ASCII character buf.append(str(c)) + elif (2**7) <= cn < (2**16): + # single unicode escape sequence + buf.append(r'{\u%d}' % cn) + elif (2**16) <= cn: + # RTF limits unicode to 16 bits. + # Force surrogate pairs + h,l = _surrogatepair(cn) + buf.append(r'{\u%d}{\u%d}' % (h,l)) return ''.join(buf).replace('\n', '\\par\n') def format_unencoded(self, tokensource, outfile): # rtf 1.8 header - outfile.write(r'{\rtf1\ansi\deff0' + outfile.write(r'{\rtf1\ansi\uc0\deff0' r'{\fonttbl{\f0\fmodern\fprq1\fcharset0%s;}}' r'{\colortbl;' % (self.fontface and ' ' + self._escape(self.fontface) or @@ -114,7 +117,7 @@ class RtfFormatter(Formatter): int(color[4:6], 16) )) offset += 1 - outfile.write(r'}\f0') + outfile.write(r'}\f0 ') if self.fontsize: outfile.write(r'\fs%d' % (self.fontsize)) |