1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
# ext/extract.py
# Copyright 2006-2020 the Mako authors and contributors <see AUTHORS file>
#
# This module is part of Mako and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from io import BytesIO
from io import StringIO
import re
from mako import lexer
from mako import parsetree
class MessageExtractor:
use_bytes = True
def process_file(self, fileobj):
template_node = lexer.Lexer(
fileobj.read(), input_encoding=self.config["encoding"]
).parse()
for extracted in self.extract_nodes(template_node.get_children()):
yield extracted
def extract_nodes(self, nodes):
translator_comments = []
in_translator_comments = False
input_encoding = self.config["encoding"] or "ascii"
comment_tags = list(
filter(None, re.split(r"\s+", self.config["comment-tags"]))
)
for node in nodes:
child_nodes = None
if (
in_translator_comments
and isinstance(node, parsetree.Text)
and not node.content.strip()
):
# Ignore whitespace within translator comments
continue
if isinstance(node, parsetree.Comment):
value = node.text.strip()
if in_translator_comments:
translator_comments.extend(
self._split_comment(node.lineno, value)
)
continue
for comment_tag in comment_tags:
if value.startswith(comment_tag):
in_translator_comments = True
translator_comments.extend(
self._split_comment(node.lineno, value)
)
continue
if isinstance(node, parsetree.DefTag):
code = node.function_decl.code
child_nodes = node.nodes
elif isinstance(node, parsetree.BlockTag):
code = node.body_decl.code
child_nodes = node.nodes
elif isinstance(node, parsetree.CallTag):
code = node.code.code
child_nodes = node.nodes
elif isinstance(node, parsetree.PageTag):
code = node.body_decl.code
elif isinstance(node, parsetree.CallNamespaceTag):
code = node.expression
child_nodes = node.nodes
elif isinstance(node, parsetree.ControlLine):
if node.isend:
in_translator_comments = False
continue
code = node.text
elif isinstance(node, parsetree.Code):
in_translator_comments = False
code = node.code.code
elif isinstance(node, parsetree.Expression):
code = node.code.code
else:
continue
# Comments don't apply unless they immediately precede the message
if (
translator_comments
and translator_comments[-1][0] < node.lineno - 1
):
translator_comments = []
translator_strings = [
comment[1] for comment in translator_comments
]
if isinstance(code, str) and self.use_bytes:
code = code.encode(input_encoding, "backslashreplace")
used_translator_comments = False
# We add extra newline to work around a pybabel bug
# (see python-babel/babel#274, parse_encoding dies if the first
# input string of the input is non-ascii)
# Also, because we added it, we have to subtract one from
# node.lineno
if self.use_bytes:
code = BytesIO(b"\n" + code)
else:
code = StringIO("\n" + code)
for message in self.process_python(
code, node.lineno - 1, translator_strings
):
yield message
used_translator_comments = True
if used_translator_comments:
translator_comments = []
in_translator_comments = False
if child_nodes:
for extracted in self.extract_nodes(child_nodes):
yield extracted
@staticmethod
def _split_comment(lineno, comment):
"""Return the multiline comment at lineno split into a list of
comment line numbers and the accompanying comment line"""
return [
(lineno + index, line)
for index, line in enumerate(comment.splitlines())
]
|