diff options
-rw-r--r-- | AUTHORS | 1 | ||||
-rw-r--r-- | pygments/formatters/rtf.py | 33 | ||||
-rw-r--r-- | pygments/util.py | 5 | ||||
-rw-r--r-- | tests/string_asserts.py | 22 | ||||
-rw-r--r-- | tests/test_rtf_formatter.py | 109 | ||||
-rw-r--r-- | tests/test_string_asserts.py | 39 |
6 files changed, 194 insertions, 15 deletions
@@ -122,6 +122,7 @@ Other contributors, listed alphabetically, are: * Ronny Pfannschmidt -- BBCode lexer * Benjamin Peterson -- Test suite refactoring * Dominik Picheta -- Nimrod lexer +* Andrew Pinkham -- RTF Formatter Refactoring * Clément Prévost -- UrbiScript lexer * raichoo -- Idris lexer * Kashif Rasul -- CUDA lexer diff --git a/pygments/formatters/rtf.py b/pygments/formatters/rtf.py index 4b03f8a7..cf65a927 100644 --- a/pygments/formatters/rtf.py +++ b/pygments/formatters/rtf.py @@ -10,7 +10,7 @@ """ from pygments.formatter import Formatter -from pygments.util import get_int_opt +from pygments.util import get_int_opt, _surrogatepair __all__ = ['RtfFormatter'] @@ -22,6 +22,10 @@ class RtfFormatter(Formatter): documents with color information and other useful stuff. Perfect for Copy and Paste into Microsoft® Word® documents. + Please note that ``encoding`` and ``outencoding`` options are ignored. + The RTF format is ASCII natively, but handles unicode characters correctly + thanks to escape sequences. + .. versionadded:: 0.6 Additional options accepted: @@ -74,28 +78,27 @@ class RtfFormatter(Formatter): # escape text text = self._escape(text) - if self.encoding in ('utf-8', 'utf-16', 'utf-32'): - encoding = 'iso-8859-15' - else: - encoding = self.encoding or 'iso-8859-15' buf = [] for c in text: - if ord(c) > 128: - ansic = c.encode(encoding, 'ignore') - if ansic and ord(ansic) > 128: - ansic = '\\\'%x' % ord(ansic) - else: - ansic = '?' - buf.append(r'\ud{\u%d%s}' % (ord(c), ansic)) - else: + cn = ord(c) + if cn < (2**7): + # ASCII character buf.append(str(c)) + elif (2**7) <= cn < (2**16): + # single unicode escape sequence + buf.append(r'{\u%d}' % cn) + elif (2**16) <= cn: + # RTF limits unicode to 16 bits. + # Force surrogate pairs + h,l = _surrogatepair(cn) + buf.append(r'{\u%d}{\u%d}' % (h,l)) return ''.join(buf).replace('\n', '\\par\n') def format_unencoded(self, tokensource, outfile): # rtf 1.8 header - outfile.write(r'{\rtf1\ansi\deff0' + outfile.write(r'{\rtf1\ansi\uc0\deff0' r'{\fonttbl{\f0\fmodern\fprq1\fcharset0%s;}}' r'{\colortbl;' % (self.fontface and ' ' + self._escape(self.fontface) or @@ -114,7 +117,7 @@ class RtfFormatter(Formatter): int(color[4:6], 16) )) offset += 1 - outfile.write(r'}\f0') + outfile.write(r'}\f0 ') if self.fontsize: outfile.write(r'\fs%d' % (self.fontsize)) diff --git a/pygments/util.py b/pygments/util.py index c302900f..5dc6981f 100644 --- a/pygments/util.py +++ b/pygments/util.py @@ -208,6 +208,11 @@ def looks_like_xml(text): # Python narrow build compatibility def _surrogatepair(c): + # Given a unicode character code + # with length greater than 16 bits, + # return the two 16 bit surrogate pair. + # From example D28 of: + # http://www.unicode.org/book/ch03.pdf return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff))) def unirange(a, b): diff --git a/tests/string_asserts.py b/tests/string_asserts.py new file mode 100644 index 00000000..025a5281 --- /dev/null +++ b/tests/string_asserts.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +""" + Pygments string assert utility + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +class StringTests(object): + + def assertStartsWith(self, haystack, needle, msg=None): + if msg is None: + msg = "'{}' does not start with '{}'".format(haystack, needle) + if not haystack.startswith(needle): + raise(AssertionError(msg)) + + def assertEndsWith(self, haystack, needle, msg=None): + if msg is None: + msg = "'{}' does not end with '{}'".format(haystack, needle) + if not haystack.endswith(needle): + raise(AssertionError(msg)) diff --git a/tests/test_rtf_formatter.py b/tests/test_rtf_formatter.py new file mode 100644 index 00000000..30b136fd --- /dev/null +++ b/tests/test_rtf_formatter.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +""" + Pygments RTF formatter tests + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import unittest +from string_asserts import StringTests + +from pygments.util import StringIO +from pygments.formatters import RtfFormatter +from pygments.lexers.special import TextLexer + +class RtfFormatterTest(StringTests, unittest.TestCase): + foot = (r'\par' '\n' r'}') + + def _escape(self, string): + return(string.replace("\n", r"\n")) + + def _build_message(self, *args, **kwargs): + string = kwargs.get('string', None) + t = self._escape(kwargs.get('t', '')) + expected = self._escape(kwargs.get('expected', '')) + result = self._escape(kwargs.get('result', '')) + + if string is None: + string = (u"The expected output of '{t}'\n" + u"\t\tShould be '{expected}'\n" + u"\t\tActually outputs '{result}'\n" + u"\t(WARNING: Partial Output of Result!)") + + end = -(len(self._escape(self.foot))) + start = end-len(expected) + + return string.format(t=t, + result = result[start:end], + expected = expected) + + def format_rtf(self, t): + tokensource = list(TextLexer().get_tokens(t)) + fmt = RtfFormatter() + buf = StringIO() + fmt.format(tokensource, buf) + result = buf.getvalue() + buf.close() + return result + + def test_rtf_header(self): + t = u'' + result = self.format_rtf(t) + expected = r'{\rtf1\ansi\uc0' + msg = (u"RTF documents are expected to start with '{expected}'\n" + u"\t\tStarts intead with '{result}'\n" + u"\t(WARNING: Partial Output of Result!)".format( + expected = expected, + result = result[:len(expected)])) + self.assertStartsWith(result, expected, msg) + + def test_rtf_footer(self): + t = u'' + result = self.format_rtf(t) + expected = self.foot + msg = (u"RTF documents are expected to end with '{expected}'\n" + u"\t\tEnds intead with '{result}'\n" + u"\t(WARNING: Partial Output of Result!)".format( + expected = self._escape(expected), + result = self._escape(result[-len(expected):]))) + self.assertEndsWith(result, expected, msg) + + def test_ascii_characters(self): + t = u'a b c d ~' + result = self.format_rtf(t) + expected = (r'a b c d ~') + if not result.endswith(self.foot): + return(unittest.skip('RTF Footer incorrect')) + msg = self._build_message(t=t, result=result, expected=expected) + self.assertEndsWith(result, expected+self.foot, msg) + + def test_escape_characters(self): + t = u'\ {{' + result = self.format_rtf(t) + expected = (r'\\ \{\{') + if not result.endswith(self.foot): + return(unittest.skip('RTF Footer incorrect')) + msg = self._build_message(t=t, result=result, expected=expected) + self.assertEndsWith(result, expected+self.foot, msg) + + def test_single_characters(self): + t = u'â € ¤ каждой' + result = self.format_rtf(t) + expected = (r'{\u226} {\u8364} {\u164} ' + r'{\u1082}{\u1072}{\u1078}{\u1076}{\u1086}{\u1081}') + if not result.endswith(self.foot): + return(unittest.skip('RTF Footer incorrect')) + msg = self._build_message(t=t, result=result, expected=expected) + self.assertEndsWith(result, expected+self.foot, msg) + + def test_double_characters(self): + t = u'က 힣 ↕ ↕︎ 鼖' + result = self.format_rtf(t) + expected = (r'{\u4096} {\u55203} {\u8597} ' + r'{\u8597}{\u65038} {\u55422}{\u56859}') + if not result.endswith(self.foot): + return(unittest.skip('RTF Footer incorrect')) + msg = self._build_message(t=t, result=result, expected=expected) + self.assertEndsWith(result, expected+self.foot, msg) diff --git a/tests/test_string_asserts.py b/tests/test_string_asserts.py new file mode 100644 index 00000000..0beed15c --- /dev/null +++ b/tests/test_string_asserts.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +""" + Pygments string assert utility tests + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import unittest +from string_asserts import StringTests + +class TestStringTests(StringTests, unittest.TestCase): + + def test_startswith_correct(self): + self.assertStartsWith("AAA", "A") + + # @unittest.expectedFailure not supported by nose + def test_startswith_incorrect(self): + with self.assertRaises(AssertionError): + self.assertStartsWith("AAA", "B") + + # @unittest.expectedFailure not supported by nose + def test_startswith_short(self): + with self.assertRaises(AssertionError): + self.assertStartsWith("A", "AA") + + def test_endswith_correct(self): + self.assertEndsWith("AAA", "A") + + # @unittest.expectedFailure not supported by nose + def test_endswith_incorrect(self): + with self.assertRaises(AssertionError): + self.assertEndsWith("AAA", "B") + + # @unittest.expectedFailure not supported by nose + def test_endswith_short(self): + with self.assertRaises(AssertionError): + self.assertEndsWith("A", "AA") |