numpydoc/xref.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197

import re

# When sphinx (including the napoleon extension) parses the parameters
# section of a docstring, it converts the information into field lists.
# Some items in the list are for the parameter type. When the type fields
# are processed, the text is split and some tokens are turned into
# pending_xref nodes. These nodes are responsible for creating links.
#
# numpydoc does not create field lists, so the type information is
# not placed into fields that can be processed to make links. Instead,
# when parsing the type information we identify tokens that are link
# worthy and wrap them around a :obj: role.

# Note: we never split on commas that are not followed by a space
# You risk creating bad rst markup if you do so.

QUALIFIED_NAME_RE = re.compile(
    # e.g int, numpy.array, ~numpy.array, .class_in_current_module
    r"^"
    r"[~\.]?"
    r"[a-zA-Z_]\w*"
    r"(?:\.[a-zA-Z_]\w*)*"
    r"$"
)

CONTAINER_SPLIT_RE = re.compile(
    # splits dict(str, int) into
    #    ['dict', '[', 'str', ', ', 'int', ']', '']
    r"(\s*[\[\]\(\)\{\}]\s*|,\s+)"
)

CONTAINER_SPLIT_REJECT_RE = re.compile(
    # Leads to bad markup e.g.
    # {int}qualified_name
    r"[\]\)\}]\w"
)

DOUBLE_QUOTE_SPLIT_RE = re.compile(
    # splits 'callable ``f(x0, *args)`` or ``f(x0, y0, *args)``' into
    #    ['callable ', '``f(x0, *args)``', ' or ', '``f(x0, y0, *args)``', '']
    r"(``.+?``)"
)

ROLE_SPLIT_RE = re.compile(
    # splits to preserve ReST roles
    r"(:\w+:`.+?(?<!\\)`)"
)

SINGLE_QUOTE_SPLIT_RE = re.compile(
    # splits to preserve quoted expressions roles
    r"(`.+?`)"
)

TEXT_SPLIT_RE = re.compile(
    # splits on ' or ', ' | ', ', ' and ' '
    r"(\s+or\s+|\s+\|\s+|,\s+|\s+)"
)

CONTAINER_CHARS = set("[](){}")

# Save people some time and add some common standard aliases
DEFAULT_LINKS = {
    # Python
    "None": ":data:`python:None`",
    "bool": ":ref:`bool <python:bltin-boolean-values>`",
    "boolean": ":ref:`bool <python:bltin-boolean-values>`",
    "True": ":data:`python:True`",
    "False": ":data:`python:False`",
    "list": ":class:`python:list`",
    "tuple": ":class:`python:tuple`",
    "str": ":class:`python:str`",
    "string": ":class:`python:str`",
    "dict": ":class:`python:dict`",
    "float": ":class:`python:float`",
    "int": ":class:`python:int`",
    "callable": ":func:`python:callable`",
    "iterable": ":term:`python:iterable`",
    "sequence": ":term:`python:sequence`",
    "contextmanager": ":func:`python:contextlib.contextmanager`",
    "namedtuple": ":func:`python:collections.namedtuple`",
    "generator": ":term:`python:generator`",
    # NumPy
    "array": "numpy.ndarray",
    "ndarray": "numpy.ndarray",
    "np.ndarray": "numpy.ndarray",
    "array-like": ":term:`numpy:array_like`",
    "array_like": ":term:`numpy:array_like`",
    "scalar": ":ref:`scalar <numpy:arrays.scalars>`",
    "RandomState": "numpy.random.RandomState",
    "np.random.RandomState": "numpy.random.RandomState",
    "np.inf": ":data:`numpy.inf`",
    "np.nan": ":data:`numpy.nan`",
    "numpy": ":mod:`numpy`",
}


def make_xref(param_type, xref_aliases, xref_ignore):
    """Parse and apply appropriate sphinx role(s) to `param_type`.

    The :obj: role is the default.

    Parameters
    ----------
    param_type : str
        text
    xref_aliases : dict
        Mapping used to resolve common abbreviations and aliases
        to fully qualified names that can be cross-referenced.
    xref_ignore : set or "all"
        A set containing words not to cross-reference. Instead of a set, the
        string 'all' can be given to ignore all unrecognized terms.
        Unrecognized terms include those that are not in `xref_aliases` and
        are not already wrapped in a reST role.

    Returns
    -------
    out : str
        Text with fully-qualified names and terms that may be wrapped in a
        ``:obj:`` role.
    """
    ignore_set = xref_ignore
    wrap_unknown = True
    if isinstance(xref_ignore, str):
        if xref_ignore.lower() == "all":
            wrap_unknown = False
            ignore_set = set()
        else:
            raise TypeError(f"xref_ignore must be a set or 'all', got {xref_ignore}")

    if param_type in xref_aliases:
        link, title = xref_aliases[param_type], param_type
        param_type = link
    else:
        link = title = param_type

    if QUALIFIED_NAME_RE.match(link) and link not in ignore_set:
        if link != title:
            return f":obj:`{title} <{link}>`"
        if wrap_unknown:
            return f":obj:`{link}`"
        return link

    def _split_and_apply_re(s, pattern):
        """
        Split string using the regex pattern,
        apply main function to the parts that do not match the pattern,
        combine the results
        """
        results = []
        tokens = pattern.split(s)
        n = len(tokens)
        if n > 1:
            for i, tok in enumerate(tokens):
                if pattern.match(tok):
                    results.append(tok)
                else:
                    res = make_xref(tok, xref_aliases, xref_ignore)
                    # Opening brackets immediately after a role is
                    # bad markup. Detect that and add backslash.
                    # :role:`type`( to :role:`type`\(
                    if res and res[-1] == "`" and i < n - 1:
                        next_char = tokens[i + 1][0]
                        if next_char in "([{":
                            res += "\\"
                    results.append(res)

            return "".join(results)
        return s

    # The cases are dealt with in an order the prevents
    # conflict.
    # Then the strategy is:
    #   - Identify a pattern we are not interested in
    #   - split off the pattern
    #   - re-apply the function to the other parts
    #   - join the results with the pattern

    # Unsplittable literal
    if "``" in param_type:
        return _split_and_apply_re(param_type, DOUBLE_QUOTE_SPLIT_RE)

    # Any roles
    if ":`" in param_type:
        return _split_and_apply_re(param_type, ROLE_SPLIT_RE)

    # Any quoted expressions
    if "`" in param_type:
        return _split_and_apply_re(param_type, SINGLE_QUOTE_SPLIT_RE)

    # Any sort of bracket '[](){}'
    if any(c in CONTAINER_CHARS for c in param_type):
        if CONTAINER_SPLIT_REJECT_RE.search(param_type):
            return param_type
        return _split_and_apply_re(param_type, CONTAINER_SPLIT_RE)

    # Common splitter tokens
    return _split_and_apply_re(param_type, TEXT_SPLIT_RE)