summaryrefslogtreecommitdiff
path: root/src/lxml/cssselect.py
blob: e8ce5c8e10d1a196ac29b7085365538fcff4622b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
"""CSS Selectors based on XPath.

This module supports selecting XML/HTML tags based on CSS selectors.
See the `CSSSelector` class for details.

This is a thin wrapper around cssselect 0.7 or later.
"""

from __future__ import absolute_import

from . import etree
try:
    import cssselect as external_cssselect
except ImportError:
    raise ImportError(
        'cssselect does not seem to be installed. '
        'See https://pypi.org/project/cssselect/')


SelectorSyntaxError = external_cssselect.SelectorSyntaxError
ExpressionError = external_cssselect.ExpressionError
SelectorError = external_cssselect.SelectorError


__all__ = ['SelectorSyntaxError', 'ExpressionError', 'SelectorError',
           'CSSSelector']


class LxmlTranslator(external_cssselect.GenericTranslator):
    """
    A custom CSS selector to XPath translator with lxml-specific extensions.
    """
    def xpath_contains_function(self, xpath, function):
        # Defined there, removed in later drafts:
        # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
        if function.argument_types() not in (['STRING'], ['IDENT']):
            raise ExpressionError(
                "Expected a single string or ident for :contains(), got %r"
                % function.arguments)
        value = function.arguments[0].value
        return xpath.add_condition(
            'contains(__lxml_internal_css:lower-case(string(.)), %s)'
            % self.xpath_literal(value.lower()))


class LxmlHTMLTranslator(LxmlTranslator, external_cssselect.HTMLTranslator):
    """
    lxml extensions + HTML support.
    """


def _make_lower_case(context, s):
    return s.lower()

ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')
ns.prefix = '__lxml_internal_css'
ns['lower-case'] = _make_lower_case


class CSSSelector(etree.XPath):
    """A CSS selector.

    Usage::

        >>> from lxml import etree, cssselect
        >>> select = cssselect.CSSSelector("a tag > child")

        >>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>")
        >>> [ el.tag for el in select(root) ]
        ['child']

    To use CSS namespaces, you need to pass a prefix-to-namespace
    mapping as ``namespaces`` keyword argument::

        >>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
        >>> select_ns = cssselect.CSSSelector('root > rdf|Description',
        ...                                   namespaces={'rdf': rdfns})

        >>> rdf = etree.XML((
        ...     '<root xmlns:rdf="%s">'
        ...       '<rdf:Description>blah</rdf:Description>'
        ...     '</root>') % rdfns)
        >>> [(el.tag, el.text) for el in select_ns(rdf)]
        [('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')]

    """
    def __init__(self, css, namespaces=None, translator='xml'):
        if translator == 'xml':
            translator = LxmlTranslator()
        elif translator == 'html':
            translator = LxmlHTMLTranslator()
        elif translator == 'xhtml':
            translator = LxmlHTMLTranslator(xhtml=True)
        path = translator.css_to_xpath(css)
        super(CSSSelector, self).__init__(path, namespaces=namespaces)
        self.css = css

    def __repr__(self):
        return '<%s %x for %r>' % (
            self.__class__.__name__,
            abs(id(self)),
            self.css)