mako/ext/extract.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

# ext/extract.py
# Copyright 2006-2022 the Mako authors and contributors <see AUTHORS file>
#
# This module is part of Mako and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php

from io import BytesIO
from io import StringIO
import re

from mako import lexer
from mako import parsetree


class MessageExtractor:
    use_bytes = True

    def process_file(self, fileobj):
        template_node = lexer.Lexer(
            fileobj.read(), input_encoding=self.config["encoding"]
        ).parse()
        yield from self.extract_nodes(template_node.get_children())

    def extract_nodes(self, nodes):
        translator_comments = []
        in_translator_comments = False
        input_encoding = self.config["encoding"] or "ascii"
        comment_tags = list(
            filter(None, re.split(r"\s+", self.config["comment-tags"]))
        )

        for node in nodes:
            child_nodes = None
            if (
                in_translator_comments
                and isinstance(node, parsetree.Text)
                and not node.content.strip()
            ):
                # Ignore whitespace within translator comments
                continue

            if isinstance(node, parsetree.Comment):
                value = node.text.strip()
                if in_translator_comments:
                    translator_comments.extend(
                        self._split_comment(node.lineno, value)
                    )
                    continue
                for comment_tag in comment_tags:
                    if value.startswith(comment_tag):
                        in_translator_comments = True
                        translator_comments.extend(
                            self._split_comment(node.lineno, value)
                        )
                continue

            if isinstance(node, parsetree.DefTag):
                code = node.function_decl.code
                child_nodes = node.nodes
            elif isinstance(node, parsetree.BlockTag):
                code = node.body_decl.code
                child_nodes = node.nodes
            elif isinstance(node, parsetree.CallTag):
                code = node.code.code
                child_nodes = node.nodes
            elif isinstance(node, parsetree.PageTag):
                code = node.body_decl.code
            elif isinstance(node, parsetree.CallNamespaceTag):
                code = node.expression
                child_nodes = node.nodes
            elif isinstance(node, parsetree.ControlLine):
                if node.isend:
                    in_translator_comments = False
                    continue
                code = node.text
            elif isinstance(node, parsetree.Code):
                in_translator_comments = False
                code = node.code.code
            elif isinstance(node, parsetree.Expression):
                code = node.code.code
            else:
                continue

            # Comments don't apply unless they immediately precede the message
            if (
                translator_comments
                and translator_comments[-1][0] < node.lineno - 1
            ):
                translator_comments = []

            translator_strings = [
                comment[1] for comment in translator_comments
            ]

            if isinstance(code, str) and self.use_bytes:
                code = code.encode(input_encoding, "backslashreplace")

            used_translator_comments = False
            # We add extra newline to work around a pybabel bug
            # (see python-babel/babel#274, parse_encoding dies if the first
            # input string of the input is non-ascii)
            # Also, because we added it, we have to subtract one from
            # node.lineno
            if self.use_bytes:
                code = BytesIO(b"\n" + code)
            else:
                code = StringIO("\n" + code)

            for message in self.process_python(
                code, node.lineno - 1, translator_strings
            ):
                yield message
                used_translator_comments = True

            if used_translator_comments:
                translator_comments = []
            in_translator_comments = False

            if child_nodes:
                yield from self.extract_nodes(child_nodes)

    @staticmethod
    def _split_comment(lineno, comment):
        """Return the multiline comment at lineno split into a list of
        comment line numbers and the accompanying comment line"""
        return [
            (lineno + index, line)
            for index, line in enumerate(comment.splitlines())
        ]