diff options
author | Georg Brandl <georg@python.org> | 2014-10-07 16:29:05 +0200 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2014-10-07 16:29:05 +0200 |
commit | b8100205450bf23cca7efdf569112391e355ee35 (patch) | |
tree | 88d2ab792377fec5506d186e60f2e5e6e89cd820 | |
parent | a6cf012f0d3eb16412e9aad23c44e4a8309f22f1 (diff) | |
download | pygments-b8100205450bf23cca7efdf569112391e355ee35.tar.gz |
Closes #1028: fix filters to return Unicode strings
-rw-r--r-- | pygments/filters/__init__.py | 12 | ||||
-rw-r--r-- | tests/test_basic_api.py | 23 |
2 files changed, 22 insertions, 13 deletions
diff --git a/pygments/filters/__init__.py b/pygments/filters/__init__.py index 162c61ef..2685c784 100644 --- a/pygments/filters/__init__.py +++ b/pygments/filters/__init__.py @@ -239,16 +239,16 @@ class VisibleWhitespaceFilter(Filter): setattr(self, name, (opt and default or '')) tabsize = get_int_opt(options, 'tabsize', 8) if self.tabs: - self.tabs += ' '*(tabsize-1) + self.tabs += ' ' * (tabsize - 1) if self.newlines: self.newlines += '\n' self.wstt = get_bool_opt(options, 'wstokentype', True) def filter(self, lexer, stream): if self.wstt: - spaces = self.spaces or ' ' - tabs = self.tabs or '\t' - newlines = self.newlines or '\n' + spaces = self.spaces or u' ' + tabs = self.tabs or u'\t' + newlines = self.newlines or u'\n' regex = re.compile(r'\s') def replacefunc(wschar): if wschar == ' ': @@ -298,7 +298,7 @@ class GobbleFilter(Filter): if left < len(value): return value[left:], 0 else: - return '', left - len(value) + return u'', left - len(value) def filter(self, lexer, stream): n = self.n @@ -309,7 +309,7 @@ class GobbleFilter(Filter): (parts[0], left) = self.gobble(parts[0], left) for i in range(1, len(parts)): (parts[i], left) = self.gobble(parts[i], n) - value = '\n'.join(parts) + value = u'\n'.join(parts) if value != '': yield ttype, value diff --git a/tests/test_basic_api.py b/tests/test_basic_api.py index 0beb0171..e0df3447 100644 --- a/tests/test_basic_api.py +++ b/tests/test_basic_api.py @@ -257,18 +257,27 @@ def test_styles(): class FiltersTest(unittest.TestCase): def test_basic(self): - filter_args = { - 'whitespace': {'spaces': True, 'tabs': True, 'newlines': True}, - 'highlight': {'names': ['isinstance', 'lexers', 'x']}, - } - for x in filters.FILTERS: + filters_args = [ + ('whitespace', {'spaces': True, 'tabs': True, 'newlines': True}), + ('whitespace', {'wstokentype': False, 'spaces': True}), + ('highlight', {'names': ['isinstance', 'lexers', 'x']}), + ('codetagify', {'codetags': 'API'}), + ('keywordcase', {'case': 'capitalize'}), + ('raiseonerror', {}), + ('gobble', {'n': 4}), + ('tokenmerge', {}), + ] + for x, args in filters_args: lx = lexers.PythonLexer() - lx.add_filter(x, **filter_args.get(x, {})) + lx.add_filter(x, **args) with open(TESTFILE, 'rb') as fp: text = fp.read().decode('utf-8') tokens = list(lx.get_tokens(text)) + self.assertTrue(all(isinstance(t[1], text_type) + for t in tokens), + '%s filter did not return Unicode' % x) roundtext = ''.join([t[1] for t in tokens]) - if x not in ('whitespace', 'keywordcase'): + if x not in ('whitespace', 'keywordcase', 'gobble'): # these filters change the text self.assertEqual(roundtext, text, "lexer roundtrip with %s filter failed" % x) |