2 files changed, 78 insertions, 14 deletions
diff --git a/pygments/formatters/rtf.py b/pygments/formatters/rtf.py
index 4b03f8a7..b6a60df3 100644
--- a/pygments/formatters/rtf.py
+++ b/pygments/formatters/rtf.py
@@ -22,6 +22,10 @@ class RtfFormatter(Formatter):
     documents with color information and other useful stuff. Perfect for Copy and
     Paste into Microsoft® Word® documents.
 
+    Please note that ``encoding`` and ``outencoding`` options are ignored.
+    The RTF format is ASCII natively, but handles unicode characters correctly
+    thanks to escape sequences.
+
     .. versionadded:: 0.6
 
     Additional options accepted:
@@ -74,28 +78,32 @@ class RtfFormatter(Formatter):
 
         # escape text
         text = self._escape(text)
-        if self.encoding in ('utf-8', 'utf-16', 'utf-32'):
-            encoding = 'iso-8859-15'
-        else:
-            encoding = self.encoding or 'iso-8859-15'
 
         buf = []
         for c in text:
-            if ord(c) > 128:
-                ansic = c.encode(encoding, 'ignore')
-                if ansic and ord(ansic) > 128:
-                    ansic = '\\\'%x' % ord(ansic)
-                else:
-                    ansic = '?'
-                buf.append(r'\ud{\u%d%s}' % (ord(c), ansic))
-            else:
+            cn = ord(c)
+            if cn < (2**7):
+                # ASCII character
                 buf.append(str(c))
+            elif (2**7) <= cn < (2**16):
+                # single unicode escape sequence
+                buf.append(r'{\u%d}' % cn)
+            elif (2**16) <= cn:
+                # RTF limits unicode to 16 bits.
+                # Given a unicode character code
+                # with length greater than 16 bits,
+                # print the two 16 bit surrogate pair.
+                # From example D28 of:
+                # http://www.unicode.org/book/ch03.pdf
+                h = ((cn - 0x10000) / 0x400) + 0xD800
+                l = ((cn - 0x10000) % 0x400) + 0xDC00
+                buf.append(r'{\u%d}{\u%d}' % (h,l))
 
         return ''.join(buf).replace('\n', '\\par\n')
 
     def format_unencoded(self, tokensource, outfile):
         # rtf 1.8 header
-        outfile.write(r'{\rtf1\ansi\deff0'
+        outfile.write(r'{\rtf1\ansi\uc0\deff0'
                       r'{\fonttbl{\f0\fmodern\fprq1\fcharset0%s;}}'
                       r'{\colortbl;' % (self.fontface and
                                         ' ' + self._escape(self.fontface) or
@@ -114,7 +122,7 @@ class RtfFormatter(Formatter):
                         int(color[4:6], 16)
                     ))
                     offset += 1
-        outfile.write(r'}\f0')
+        outfile.write(r'}\f0 ')
         if self.fontsize:
             outfile.write(r'\fs%d' % (self.fontsize))
 
diff --git a/tests/test_rtf_formatter.py b/tests/test_rtf_formatter.py
new file mode 100644
index 00000000..ca500786
--- /dev/null
+++ b/tests/test_rtf_formatter.py
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+"""
+    Pygments RTF formatter tests
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import unittest
+
+from pygments.util import StringIO
+from pygments.formatters import RtfFormatter
+from pygments.lexers.special import TextLexer
+
+class RtfFormatterTest(unittest.TestCase):
+
+    def format_rtf(self, t):
+        tokensource = list(TextLexer().get_tokens(t))
+        fmt = RtfFormatter()
+        buf = StringIO()
+        fmt.format(tokensource, buf)
+        result = buf.getvalue()
+        buf.close()
+        return result
+
+    def test_rtf_header(self):
+        t = u''
+        result = self.format_rtf(t)
+        self.assertTrue(result.startswith(r'{\rtf1\ansi\uc0'))
+
+    def test_ascii_characters(self):
+        t = u'a b c d ~'
+        result = self.format_rtf(t)
+        self.assertTrue(result.endswith(r'a b c d ~\par' '\n' r'}'))
+
+    def test_escape_characters(self):
+        t = u'\ {{'
+        result = self.format_rtf(t)
+        self.assertTrue(result.endswith(r'\\ \{\{\par' '\n' r'}'))
+
+    def test_single_characters(self):
+        t = u'â € ¤ каждой'
+        result = self.format_rtf(t)
+        expected = (r'{\u226} {\u8364} {\u164} '
+                    r'{\u1082}{\u1072}{\u1078}{\u1076}{\u1086}{\u1081}'
+                    r'\par' '\n' r'}')
+        self.assertTrue(result.endswith(expected))
+
+    def test_double_characters(self):
+        t = u'က 힣 ↕ ↕︎ 鼖'
+        result = self.format_rtf(t)
+        expected = (r'{\u4096} {\u55203} {\u8597} '
+                    r'{\u8597}{\u65038} {\u55422}{\u56859}'
+                    r'\par' '\n' r'}')
+        self.assertTrue(result.endswith(expected))