diff options
-rw-r--r-- | mako/ext/extract.py | 12 | ||||
-rw-r--r-- | test/ext/test_babelplugin.py | 13 | ||||
-rw-r--r-- | test/templates/gettext_cp1251.mako | 1 | ||||
-rw-r--r-- | test/templates/gettext_utf8.mako | 1 |
4 files changed, 24 insertions, 3 deletions
diff --git a/mako/ext/extract.py b/mako/ext/extract.py index 313c088..8dd2e96 100644 --- a/mako/ext/extract.py +++ b/mako/ext/extract.py @@ -16,6 +16,7 @@ class MessageExtractor(object): def extract_nodes(self, nodes): translator_comments = [] in_translator_comments = False + input_encoding = self.config['encoding'] or 'ascii' comment_tags = list( filter(None, re.split(r'\s+', self.config['comment-tags']))) @@ -76,13 +77,18 @@ class MessageExtractor(object): comment[1] for comment in translator_comments] if isinstance(code, compat.text_type): - code = code.encode('ascii', 'backslashreplace') + code = code.encode(input_encoding, 'backslashreplace') used_translator_comments = False - code = compat.byte_buffer(code) + # We add extra newline to work around a pybabel bug + # (see python-babel/babel#274, parse_encoding dies if the first + # input string of the input is non-ascii) + # Also, because we added it, we have to subtract one from + # node.lineno + code = compat.byte_buffer(compat.b('\n') + code) for message in self.process_python( - code, node.lineno, translator_strings): + code, node.lineno - 1, translator_strings): yield message used_translator_comments = True diff --git a/test/ext/test_babelplugin.py b/test/ext/test_babelplugin.py index 3789b58..abce70a 100644 --- a/test/ext/test_babelplugin.py +++ b/test/ext/test_babelplugin.py @@ -78,3 +78,16 @@ class ExtractMakoTestCase(TemplateTest): (99, '_', 'No action at a distance.', []), ] self.assertEqual(expected, messages) + + @skip() + def test_extract_utf8(self): + mako_tmpl = open(os.path.join(template_base, 'gettext_utf8.mako'), 'rb') + message = next(extract(mako_tmpl, {'_', None}, [], {'encoding': 'utf-8'})) + assert message == (1, '_', u'K\xf6ln', []) + + @skip() + def test_extract_cp1251(self): + mako_tmpl = open(os.path.join(template_base, 'gettext_cp1251.mako'), 'rb') + message = next(extract(mako_tmpl, {'_', None}, [], {'encoding': 'cp1251'})) + # "test" in Rusian. File encoding is cp1251 (aka "windows-1251") + assert message == (1, '_', u'\u0442\u0435\u0441\u0442', []) diff --git a/test/templates/gettext_cp1251.mako b/test/templates/gettext_cp1251.mako new file mode 100644 index 0000000..9341d93 --- /dev/null +++ b/test/templates/gettext_cp1251.mako @@ -0,0 +1 @@ +${_("òåñò")} diff --git a/test/templates/gettext_utf8.mako b/test/templates/gettext_utf8.mako new file mode 100644 index 0000000..761f946 --- /dev/null +++ b/test/templates/gettext_utf8.mako @@ -0,0 +1 @@ +${_("Köln")} |