summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--AUTHORS1
-rw-r--r--pygments/formatters/rtf.py33
-rw-r--r--pygments/util.py5
-rw-r--r--tests/string_asserts.py22
-rw-r--r--tests/test_rtf_formatter.py109
-rw-r--r--tests/test_string_asserts.py39
6 files changed, 194 insertions, 15 deletions
diff --git a/AUTHORS b/AUTHORS
index 5b15e8c7..1bedef4b 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -122,6 +122,7 @@ Other contributors, listed alphabetically, are:
* Ronny Pfannschmidt -- BBCode lexer
* Benjamin Peterson -- Test suite refactoring
* Dominik Picheta -- Nimrod lexer
+* Andrew Pinkham -- RTF Formatter Refactoring
* Clément Prévost -- UrbiScript lexer
* raichoo -- Idris lexer
* Kashif Rasul -- CUDA lexer
diff --git a/pygments/formatters/rtf.py b/pygments/formatters/rtf.py
index 4b03f8a7..cf65a927 100644
--- a/pygments/formatters/rtf.py
+++ b/pygments/formatters/rtf.py
@@ -10,7 +10,7 @@
"""
from pygments.formatter import Formatter
-from pygments.util import get_int_opt
+from pygments.util import get_int_opt, _surrogatepair
__all__ = ['RtfFormatter']
@@ -22,6 +22,10 @@ class RtfFormatter(Formatter):
documents with color information and other useful stuff. Perfect for Copy and
Paste into Microsoft® Word® documents.
+ Please note that ``encoding`` and ``outencoding`` options are ignored.
+ The RTF format is ASCII natively, but handles unicode characters correctly
+ thanks to escape sequences.
+
.. versionadded:: 0.6
Additional options accepted:
@@ -74,28 +78,27 @@ class RtfFormatter(Formatter):
# escape text
text = self._escape(text)
- if self.encoding in ('utf-8', 'utf-16', 'utf-32'):
- encoding = 'iso-8859-15'
- else:
- encoding = self.encoding or 'iso-8859-15'
buf = []
for c in text:
- if ord(c) > 128:
- ansic = c.encode(encoding, 'ignore')
- if ansic and ord(ansic) > 128:
- ansic = '\\\'%x' % ord(ansic)
- else:
- ansic = '?'
- buf.append(r'\ud{\u%d%s}' % (ord(c), ansic))
- else:
+ cn = ord(c)
+ if cn < (2**7):
+ # ASCII character
buf.append(str(c))
+ elif (2**7) <= cn < (2**16):
+ # single unicode escape sequence
+ buf.append(r'{\u%d}' % cn)
+ elif (2**16) <= cn:
+ # RTF limits unicode to 16 bits.
+ # Force surrogate pairs
+ h,l = _surrogatepair(cn)
+ buf.append(r'{\u%d}{\u%d}' % (h,l))
return ''.join(buf).replace('\n', '\\par\n')
def format_unencoded(self, tokensource, outfile):
# rtf 1.8 header
- outfile.write(r'{\rtf1\ansi\deff0'
+ outfile.write(r'{\rtf1\ansi\uc0\deff0'
r'{\fonttbl{\f0\fmodern\fprq1\fcharset0%s;}}'
r'{\colortbl;' % (self.fontface and
' ' + self._escape(self.fontface) or
@@ -114,7 +117,7 @@ class RtfFormatter(Formatter):
int(color[4:6], 16)
))
offset += 1
- outfile.write(r'}\f0')
+ outfile.write(r'}\f0 ')
if self.fontsize:
outfile.write(r'\fs%d' % (self.fontsize))
diff --git a/pygments/util.py b/pygments/util.py
index c302900f..5dc6981f 100644
--- a/pygments/util.py
+++ b/pygments/util.py
@@ -208,6 +208,11 @@ def looks_like_xml(text):
# Python narrow build compatibility
def _surrogatepair(c):
+ # Given a unicode character code
+ # with length greater than 16 bits,
+ # return the two 16 bit surrogate pair.
+ # From example D28 of:
+ # http://www.unicode.org/book/ch03.pdf
return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff)))
def unirange(a, b):
diff --git a/tests/string_asserts.py b/tests/string_asserts.py
new file mode 100644
index 00000000..025a5281
--- /dev/null
+++ b/tests/string_asserts.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+"""
+ Pygments string assert utility
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+class StringTests(object):
+
+ def assertStartsWith(self, haystack, needle, msg=None):
+ if msg is None:
+ msg = "'{}' does not start with '{}'".format(haystack, needle)
+ if not haystack.startswith(needle):
+ raise(AssertionError(msg))
+
+ def assertEndsWith(self, haystack, needle, msg=None):
+ if msg is None:
+ msg = "'{}' does not end with '{}'".format(haystack, needle)
+ if not haystack.endswith(needle):
+ raise(AssertionError(msg))
diff --git a/tests/test_rtf_formatter.py b/tests/test_rtf_formatter.py
new file mode 100644
index 00000000..30b136fd
--- /dev/null
+++ b/tests/test_rtf_formatter.py
@@ -0,0 +1,109 @@
+# -*- coding: utf-8 -*-
+"""
+ Pygments RTF formatter tests
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import unittest
+from string_asserts import StringTests
+
+from pygments.util import StringIO
+from pygments.formatters import RtfFormatter
+from pygments.lexers.special import TextLexer
+
+class RtfFormatterTest(StringTests, unittest.TestCase):
+ foot = (r'\par' '\n' r'}')
+
+ def _escape(self, string):
+ return(string.replace("\n", r"\n"))
+
+ def _build_message(self, *args, **kwargs):
+ string = kwargs.get('string', None)
+ t = self._escape(kwargs.get('t', ''))
+ expected = self._escape(kwargs.get('expected', ''))
+ result = self._escape(kwargs.get('result', ''))
+
+ if string is None:
+ string = (u"The expected output of '{t}'\n"
+ u"\t\tShould be '{expected}'\n"
+ u"\t\tActually outputs '{result}'\n"
+ u"\t(WARNING: Partial Output of Result!)")
+
+ end = -(len(self._escape(self.foot)))
+ start = end-len(expected)
+
+ return string.format(t=t,
+ result = result[start:end],
+ expected = expected)
+
+ def format_rtf(self, t):
+ tokensource = list(TextLexer().get_tokens(t))
+ fmt = RtfFormatter()
+ buf = StringIO()
+ fmt.format(tokensource, buf)
+ result = buf.getvalue()
+ buf.close()
+ return result
+
+ def test_rtf_header(self):
+ t = u''
+ result = self.format_rtf(t)
+ expected = r'{\rtf1\ansi\uc0'
+ msg = (u"RTF documents are expected to start with '{expected}'\n"
+ u"\t\tStarts intead with '{result}'\n"
+ u"\t(WARNING: Partial Output of Result!)".format(
+ expected = expected,
+ result = result[:len(expected)]))
+ self.assertStartsWith(result, expected, msg)
+
+ def test_rtf_footer(self):
+ t = u''
+ result = self.format_rtf(t)
+ expected = self.foot
+ msg = (u"RTF documents are expected to end with '{expected}'\n"
+ u"\t\tEnds intead with '{result}'\n"
+ u"\t(WARNING: Partial Output of Result!)".format(
+ expected = self._escape(expected),
+ result = self._escape(result[-len(expected):])))
+ self.assertEndsWith(result, expected, msg)
+
+ def test_ascii_characters(self):
+ t = u'a b c d ~'
+ result = self.format_rtf(t)
+ expected = (r'a b c d ~')
+ if not result.endswith(self.foot):
+ return(unittest.skip('RTF Footer incorrect'))
+ msg = self._build_message(t=t, result=result, expected=expected)
+ self.assertEndsWith(result, expected+self.foot, msg)
+
+ def test_escape_characters(self):
+ t = u'\ {{'
+ result = self.format_rtf(t)
+ expected = (r'\\ \{\{')
+ if not result.endswith(self.foot):
+ return(unittest.skip('RTF Footer incorrect'))
+ msg = self._build_message(t=t, result=result, expected=expected)
+ self.assertEndsWith(result, expected+self.foot, msg)
+
+ def test_single_characters(self):
+ t = u'â € ¤ каждой'
+ result = self.format_rtf(t)
+ expected = (r'{\u226} {\u8364} {\u164} '
+ r'{\u1082}{\u1072}{\u1078}{\u1076}{\u1086}{\u1081}')
+ if not result.endswith(self.foot):
+ return(unittest.skip('RTF Footer incorrect'))
+ msg = self._build_message(t=t, result=result, expected=expected)
+ self.assertEndsWith(result, expected+self.foot, msg)
+
+ def test_double_characters(self):
+ t = u'က 힣 ↕ ↕︎ 鼖'
+ result = self.format_rtf(t)
+ expected = (r'{\u4096} {\u55203} {\u8597} '
+ r'{\u8597}{\u65038} {\u55422}{\u56859}')
+ if not result.endswith(self.foot):
+ return(unittest.skip('RTF Footer incorrect'))
+ msg = self._build_message(t=t, result=result, expected=expected)
+ self.assertEndsWith(result, expected+self.foot, msg)
diff --git a/tests/test_string_asserts.py b/tests/test_string_asserts.py
new file mode 100644
index 00000000..0beed15c
--- /dev/null
+++ b/tests/test_string_asserts.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+"""
+ Pygments string assert utility tests
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import unittest
+from string_asserts import StringTests
+
+class TestStringTests(StringTests, unittest.TestCase):
+
+ def test_startswith_correct(self):
+ self.assertStartsWith("AAA", "A")
+
+ # @unittest.expectedFailure not supported by nose
+ def test_startswith_incorrect(self):
+ with self.assertRaises(AssertionError):
+ self.assertStartsWith("AAA", "B")
+
+ # @unittest.expectedFailure not supported by nose
+ def test_startswith_short(self):
+ with self.assertRaises(AssertionError):
+ self.assertStartsWith("A", "AA")
+
+ def test_endswith_correct(self):
+ self.assertEndsWith("AAA", "A")
+
+ # @unittest.expectedFailure not supported by nose
+ def test_endswith_incorrect(self):
+ with self.assertRaises(AssertionError):
+ self.assertEndsWith("AAA", "B")
+
+ # @unittest.expectedFailure not supported by nose
+ def test_endswith_short(self):
+ with self.assertRaises(AssertionError):
+ self.assertEndsWith("A", "AA")