import re # When sphinx (including the napoleon extension) parses the parameters # section of a docstring, it converts the information into field lists. # Some items in the list are for the parameter type. When the type fields # are processed, the text is split and some tokens are turned into # pending_xref nodes. These nodes are responsible for creating links. # # numpydoc does not create field lists, so the type information is # not placed into fields that can be processed to make links. Instead, # when parsing the type information we identify tokens that are link # worthy and wrap them around a :obj: role. # Note: we never split on commas that are not followed by a space # You risk creating bad rst markup if you do so. QUALIFIED_NAME_RE = re.compile( # e.g int, numpy.array, ~numpy.array, .class_in_current_module r"^" r"[~\.]?" r"[a-zA-Z_]\w*" r"(?:\.[a-zA-Z_]\w*)*" r"$" ) CONTAINER_SPLIT_RE = re.compile( # splits dict(str, int) into # ['dict', '[', 'str', ', ', 'int', ']', ''] r"(\s*[\[\]\(\)\{\}]\s*|,\s+)" ) CONTAINER_SPLIT_REJECT_RE = re.compile( # Leads to bad markup e.g. # {int}qualified_name r"[\]\)\}]\w" ) DOUBLE_QUOTE_SPLIT_RE = re.compile( # splits 'callable ``f(x0, *args)`` or ``f(x0, y0, *args)``' into # ['callable ', '``f(x0, *args)``', ' or ', '``f(x0, y0, *args)``', ''] r"(``.+?``)" ) ROLE_SPLIT_RE = re.compile( # splits to preserve ReST roles r"(:\w+:`.+?(?`", "boolean": ":ref:`bool `", "True": ":data:`python:True`", "False": ":data:`python:False`", "list": ":class:`python:list`", "tuple": ":class:`python:tuple`", "str": ":class:`python:str`", "string": ":class:`python:str`", "dict": ":class:`python:dict`", "float": ":class:`python:float`", "int": ":class:`python:int`", "callable": ":func:`python:callable`", "iterable": ":term:`python:iterable`", "sequence": ":term:`python:sequence`", "contextmanager": ":func:`python:contextlib.contextmanager`", "namedtuple": ":func:`python:collections.namedtuple`", "generator": ":term:`python:generator`", # NumPy "array": "numpy.ndarray", "ndarray": "numpy.ndarray", "np.ndarray": "numpy.ndarray", "array-like": ":term:`numpy:array_like`", "array_like": ":term:`numpy:array_like`", "scalar": ":ref:`scalar `", "RandomState": "numpy.random.RandomState", "np.random.RandomState": "numpy.random.RandomState", "np.inf": ":data:`numpy.inf`", "np.nan": ":data:`numpy.nan`", "numpy": ":mod:`numpy`", } def make_xref(param_type, xref_aliases, xref_ignore): """Parse and apply appropriate sphinx role(s) to `param_type`. The :obj: role is the default. Parameters ---------- param_type : str text xref_aliases : dict Mapping used to resolve common abbreviations and aliases to fully qualified names that can be cross-referenced. xref_ignore : set or "all" A set containing words not to cross-reference. Instead of a set, the string 'all' can be given to ignore all unrecognized terms. Unrecognized terms include those that are not in `xref_aliases` and are not already wrapped in a reST role. Returns ------- out : str Text with fully-qualified names and terms that may be wrapped in a ``:obj:`` role. """ ignore_set = xref_ignore wrap_unknown = True if isinstance(xref_ignore, str): if xref_ignore.lower() == "all": wrap_unknown = False ignore_set = set() else: raise TypeError(f"xref_ignore must be a set or 'all', got {xref_ignore}") if param_type in xref_aliases: link, title = xref_aliases[param_type], param_type param_type = link else: link = title = param_type if QUALIFIED_NAME_RE.match(link) and link not in ignore_set: if link != title: return f":obj:`{title} <{link}>`" if wrap_unknown: return f":obj:`{link}`" return link def _split_and_apply_re(s, pattern): """ Split string using the regex pattern, apply main function to the parts that do not match the pattern, combine the results """ results = [] tokens = pattern.split(s) n = len(tokens) if n > 1: for i, tok in enumerate(tokens): if pattern.match(tok): results.append(tok) else: res = make_xref(tok, xref_aliases, xref_ignore) # Opening brackets immediately after a role is # bad markup. Detect that and add backslash. # :role:`type`( to :role:`type`\( if res and res[-1] == "`" and i < n - 1: next_char = tokens[i + 1][0] if next_char in "([{": res += "\\" results.append(res) return "".join(results) return s # The cases are dealt with in an order the prevents # conflict. # Then the strategy is: # - Identify a pattern we are not interested in # - split off the pattern # - re-apply the function to the other parts # - join the results with the pattern # Unsplittable literal if "``" in param_type: return _split_and_apply_re(param_type, DOUBLE_QUOTE_SPLIT_RE) # Any roles if ":`" in param_type: return _split_and_apply_re(param_type, ROLE_SPLIT_RE) # Any quoted expressions if "`" in param_type: return _split_and_apply_re(param_type, SINGLE_QUOTE_SPLIT_RE) # Any sort of bracket '[](){}' if any(c in CONTAINER_CHARS for c in param_type): if CONTAINER_SPLIT_REJECT_RE.search(param_type): return param_type return _split_and_apply_re(param_type, CONTAINER_SPLIT_RE) # Common splitter tokens return _split_and_apply_re(param_type, TEXT_SPLIT_RE)