Merged in timgilbert/pygments-main/clj-keyword-fix (pull request #326)

Tweaking clojure keyword lexing
author: Tim Hatch <tim@timhatch.com> 2014-05-17 09:19:35 -0700
committer: Tim Hatch <tim@timhatch.com> 2014-05-17 09:19:35 -0700
commit: a62cdcfaa5a260274303cb93b92a3f2e2ce3be98 (patch)
tree: e7d2b4cd4e10a6e43fe4b2dc4e03ab2e827ebc20 /pygments/formatters/rtf.py
parent: 70a10a2e423d9729b62c7b56faca28889c0d688a (diff)
parent: ff12540907fe9d98bf02c9508a171659457b14b2 (diff)
download: pygments-a62cdcfaa5a260274303cb93b92a3f2e2ce3be98.tar.gz
1 files changed, 18 insertions, 15 deletions
diff --git a/pygments/formatters/rtf.py b/pygments/formatters/rtf.py
index 9d87e8f1..cf65a927 100644
--- a/pygments/formatters/rtf.py
+++ b/pygments/formatters/rtf.py
@@ -10,7 +10,7 @@
 """
 
 from pygments.formatter import Formatter
-from pygments.util import get_int_opt
+from pygments.util import get_int_opt, _surrogatepair
 
 
 __all__ = ['RtfFormatter']
@@ -22,6 +22,10 @@ class RtfFormatter(Formatter):
     documents with color information and other useful stuff. Perfect for Copy and
     Paste into Microsoft® Word® documents.
 
+    Please note that ``encoding`` and ``outencoding`` options are ignored.
+    The RTF format is ASCII natively, but handles unicode characters correctly
+    thanks to escape sequences.
+
     .. versionadded:: 0.6
 
     Additional options accepted:
@@ -74,28 +78,27 @@ class RtfFormatter(Formatter):
 
         # escape text
         text = self._escape(text)
-        if self.encoding in ('utf-8', 'utf-16', 'utf-32'):
-            encoding = 'iso-8859-15'
-        else:
-            encoding = self.encoding or 'iso-8859-15'
 
         buf = []
         for c in text:
-            if ord(c) > 128:
-                ansic = c.encode(encoding, 'ignore') or '?'
-                if ord(ansic) > 128:
-                    ansic = '\\\'%x' % ord(ansic)
-                else:
-                    ansic = c
-                buf.append(r'\ud{\u%d%s}' % (ord(c), ansic))
-            else:
+            cn = ord(c)
+            if cn < (2**7):
+                # ASCII character
                 buf.append(str(c))
+            elif (2**7) <= cn < (2**16):
+                # single unicode escape sequence
+                buf.append(r'{\u%d}' % cn)
+            elif (2**16) <= cn:
+                # RTF limits unicode to 16 bits.
+                # Force surrogate pairs
+                h,l = _surrogatepair(cn)
+                buf.append(r'{\u%d}{\u%d}' % (h,l))
 
         return ''.join(buf).replace('\n', '\\par\n')
 
     def format_unencoded(self, tokensource, outfile):
         # rtf 1.8 header
-        outfile.write(r'{\rtf1\ansi\deff0'
+        outfile.write(r'{\rtf1\ansi\uc0\deff0'
                       r'{\fonttbl{\f0\fmodern\fprq1\fcharset0%s;}}'
                       r'{\colortbl;' % (self.fontface and
                                         ' ' + self._escape(self.fontface) or
@@ -114,7 +117,7 @@ class RtfFormatter(Formatter):
                         int(color[4:6], 16)
                     ))
                     offset += 1
-        outfile.write(r'}\f0')
+        outfile.write(r'}\f0 ')
         if self.fontsize:
             outfile.write(r'\fs%d' % (self.fontsize))
author	Tim Hatch <tim@timhatch.com>	2014-05-17 09:19:35 -0700
committer	Tim Hatch <tim@timhatch.com>	2014-05-17 09:19:35 -0700
commit	a62cdcfaa5a260274303cb93b92a3f2e2ce3be98 (patch)
tree	e7d2b4cd4e10a6e43fe4b2dc4e03ab2e827ebc20 /pygments/formatters/rtf.py
parent	70a10a2e423d9729b62c7b56faca28889c0d688a (diff)
parent	ff12540907fe9d98bf02c9508a171659457b14b2 (diff)
download	pygments-a62cdcfaa5a260274303cb93b92a3f2e2ce3be98.tar.gz