summaryrefslogtreecommitdiff
path: root/src/pip/_vendor/html5lib/filters/lint.py
blob: fcc07eec5b2e5ea926fa8b2af199e14c9cac50dd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from __future__ import absolute_import, division, unicode_literals

from pip._vendor.six import text_type

from . import base
from ..constants import namespaces, voidElements

from ..constants import spaceCharacters
spaceCharacters = "".join(spaceCharacters)


class Filter(base.Filter):
    """Lints the token stream for errors

    If it finds any errors, it'll raise an ``AssertionError``.

    """
    def __init__(self, source, require_matching_tags=True):
        """Creates a Filter

        :arg source: the source token stream

        :arg require_matching_tags: whether or not to require matching tags

        """
        super(Filter, self).__init__(source)
        self.require_matching_tags = require_matching_tags

    def __iter__(self):
        open_elements = []
        for token in base.Filter.__iter__(self):
            type = token["type"]
            if type in ("StartTag", "EmptyTag"):
                namespace = token["namespace"]
                name = token["name"]
                assert namespace is None or isinstance(namespace, text_type)
                assert namespace != ""
                assert isinstance(name, text_type)
                assert name != ""
                assert isinstance(token["data"], dict)
                if (not namespace or namespace == namespaces["html"]) and name in voidElements:
                    assert type == "EmptyTag"
                else:
                    assert type == "StartTag"
                if type == "StartTag" and self.require_matching_tags:
                    open_elements.append((namespace, name))
                for (namespace, name), value in token["data"].items():
                    assert namespace is None or isinstance(namespace, text_type)
                    assert namespace != ""
                    assert isinstance(name, text_type)
                    assert name != ""
                    assert isinstance(value, text_type)

            elif type == "EndTag":
                namespace = token["namespace"]
                name = token["name"]
                assert namespace is None or isinstance(namespace, text_type)
                assert namespace != ""
                assert isinstance(name, text_type)
                assert name != ""
                if (not namespace or namespace == namespaces["html"]) and name in voidElements:
                    assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
                elif self.require_matching_tags:
                    start = open_elements.pop()
                    assert start == (namespace, name)

            elif type == "Comment":
                data = token["data"]
                assert isinstance(data, text_type)

            elif type in ("Characters", "SpaceCharacters"):
                data = token["data"]
                assert isinstance(data, text_type)
                assert data != ""
                if type == "SpaceCharacters":
                    assert data.strip(spaceCharacters) == ""

            elif type == "Doctype":
                name = token["name"]
                assert name is None or isinstance(name, text_type)
                assert token["publicId"] is None or isinstance(name, text_type)
                assert token["systemId"] is None or isinstance(name, text_type)

            elif type == "Entity":
                assert isinstance(token["name"], text_type)

            elif type == "SerializerError":
                assert isinstance(token["data"], text_type)

            else:
                assert False, "Unknown token type: %(type)s" % {"type": type}

            yield token