horizon/utils/babel_extract_angular.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170

# -*- encoding: utf-8 -*-
# Copyright 2015, Rackspace, US, Inc.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

from html import parser
import re

from oslo_utils import encodeutils


# regex to find filter translation expressions
filter_regex = re.compile(
    r"""{\$\s*(::)?\s*('([^']|\\')+'|"([^"]|\\")+")\s*\|\s*translate\s*\$}"""
)

# browser innerHTML decodes some html entities automatically, so when
# we extract the msgid and want to match what Javascript sees, we need
# to leave some entities alone, but decode all the rest. Add entries
# to HTML_ENTITIES as necessary.
HTML_ENTITY_PASSTHROUGH = {'amp', 'gt', 'lt'}
HTML_ENTITY_DECODED = {
    'reg': u'®',
    'times': u'×'
}


class AngularGettextHTMLParser(parser.HTMLParser):
    """Parse HTML to find translate directives.

    Currently this parses for these forms of translation:

    <translate>content</translate>
        The content will be translated. Angular value templating will be
        recognised and transformed into gettext-familiar translation
        strings (i.e. "{$ expression $}" becomes "%(expression)")
    <p translate>content</p>
        The content will be translated. As above.
    {$ 'content' | translate $}
        The string will be translated, minus expression handling (i.e. just
        bare strings are allowed.)
    {$ ::'content' | translate $}
        The string will be translated. As above.
    """

    def __init__(self):
        super().__init__(
            convert_charrefs=False
        )

        self.in_translate = False
        self.inner_tags = []
        self.data = ''
        self.strings = []
        self.line = 0
        self.plural = False
        self.plural_form = ''
        self.comments = []

    def handle_starttag(self, tag, attrs):
        self.line = self.getpos()[0]
        if tag == 'translate' or \
                (attrs and 'translate' in [attr[0] for attr in attrs]):
            self.in_translate = True
            self.plural_form = ''
            for attr, value in attrs:
                if attr == 'translate-plural':
                    self.plural = True
                    self.plural_form = value
                if attr == 'translate-comment':
                    self.comments.append(value)
        elif self.in_translate:
            s = tag
            if attrs:
                s += ' ' + ' '.join('%s="%s"' % a for a in attrs)
            self.data += '<%s>' % s
            self.inner_tags.append(tag)
        else:
            for attr in attrs:
                if not attr[1]:
                    continue
                for match in filter_regex.findall(attr[1]):
                    if match:
                        self.strings.append(
                            (self.line, u'gettext', match[1][1:-1], [])
                        )

    def handle_data(self, data):
        if self.in_translate:
            self.data += data
        else:
            for match in filter_regex.findall(data):
                self.strings.append(
                    (self.line, u'gettext', match[1][1:-1], [])
                )

    def handle_entityref(self, name):
        if self.in_translate:
            if name in HTML_ENTITY_PASSTHROUGH:
                self.data += '&%s;' % name
            else:
                self.data += HTML_ENTITY_DECODED[name]

    def handle_charref(self, name):
        if self.in_translate:
            self.data += '&#%s;' % name

    def handle_comment(self, comment):
        if self.in_translate:
            self.data += '<!--%s-->' % comment

    def handle_endtag(self, tag):
        if self.in_translate:
            if self.inner_tags:
                tag = self.inner_tags.pop()
                self.data += "</%s>" % tag
                return
            if self.plural_form:
                messages = (
                    self.data.strip(),
                    self.plural_form
                )
                func_name = u'ngettext'
            else:
                messages = self.data.strip()
                func_name = u'gettext'
            self.strings.append(
                (self.line, func_name, messages, self.comments)
            )
            self.in_translate = False
            self.data = ''
            self.comments = []


def extract_angular(fileobj, keywords, comment_tags, options):
    """Extract messages from angular template (HTML) files.

    It extract messages from angular template (HTML) files that use
    angular-gettext translate directive as per
    https://angular-gettext.rocketeer.be/

    :param fileobj: the file-like object the messages should be extracted
                    from
    :param keywords: This is a standard parameter so it isaccepted but ignored.

    :param comment_tags: This is a standard parameter so it is accepted but
                        ignored.
    :param options: Another standard parameter that is accepted but ignored.
    :return: an iterator over ``(lineno, funcname, message, comments)``
             tuples
    :rtype: ``iterator``
    """

    parser = AngularGettextHTMLParser()

    for line in fileobj:
        parser.feed(encodeutils.safe_decode(line))

    for string in parser.strings:
        yield(string)