diff options
author | Adam Turner <9087854+aa-turner@users.noreply.github.com> | 2022-06-16 21:46:54 +0100 |
---|---|---|
committer | Adam Turner <9087854+aa-turner@users.noreply.github.com> | 2022-06-16 21:46:54 +0100 |
commit | 7e38d2dad808e6f3995d7266a78390a8ccc37781 (patch) | |
tree | dec683a1a86f1e20cac6892c6993ce8523d73179 /sphinx | |
parent | ce31e1c0c7b32f6be93186e0fef076ef65ff0b05 (diff) | |
parent | 6ab15a047cf9d3957aa884f5aa5b20e98f039f6e (diff) | |
download | sphinx-git-7e38d2dad808e6f3995d7266a78390a8ccc37781.tar.gz |
Merge branch '5.x'
# Conflicts:
# .github/workflows/builddoc.yml
# .github/workflows/lint.yml
# sphinx/registry.py
Diffstat (limited to 'sphinx')
-rw-r--r-- | sphinx/application.py | 64 | ||||
-rw-r--r-- | sphinx/builders/__init__.py | 16 | ||||
-rw-r--r-- | sphinx/builders/html/__init__.py | 18 | ||||
-rw-r--r-- | sphinx/cmd/quickstart.py | 5 | ||||
-rw-r--r-- | sphinx/config.py | 1 | ||||
-rw-r--r-- | sphinx/domains/std.py | 50 | ||||
-rw-r--r-- | sphinx/ext/autodoc/importer.py | 7 | ||||
-rw-r--r-- | sphinx/pycode/ast.py | 9 | ||||
-rw-r--r-- | sphinx/registry.py | 18 | ||||
-rw-r--r-- | sphinx/search/en.py | 7 | ||||
-rw-r--r-- | sphinx/search/zh.py | 9 | ||||
-rw-r--r-- | sphinx/themes/agogo/layout.html | 2 | ||||
-rw-r--r-- | sphinx/themes/agogo/static/agogo.css_t | 6 | ||||
-rw-r--r-- | sphinx/util/console.py | 5 | ||||
-rw-r--r-- | sphinx/util/inspect.py | 4 | ||||
-rw-r--r-- | sphinx/util/logging.py | 7 | ||||
-rw-r--r-- | sphinx/util/parallel.py | 8 | ||||
-rw-r--r-- | sphinx/util/stemmer/__init__.py | 63 | ||||
-rw-r--r-- | sphinx/util/stemmer/porter.py | 406 | ||||
-rw-r--r-- | sphinx/util/typing.py | 5 |
20 files changed, 211 insertions, 499 deletions
diff --git a/sphinx/application.py b/sphinx/application.py index 0aceff56b..218801322 100644 --- a/sphinx/application.py +++ b/sphinx/application.py @@ -133,9 +133,6 @@ class Sphinx: self.phase = BuildPhase.INITIALIZATION self.verbosity = verbosity self.extensions: Dict[str, Extension] = {} - self.builder: Optional[Builder] = None - self.env: Optional[BuildEnvironment] = None - self.project: Optional[Project] = None self.registry = SphinxComponentRegistry() # validate provided directories @@ -246,10 +243,16 @@ class Sphinx: # create the project self.project = Project(self.srcdir, self.config.source_suffix) + + # set up the build environment + self.env = self._init_env(freshenv) + # create the builder self.builder = self.create_builder(buildername) - # set up the build environment - self._init_env(freshenv) + + # build environment post-initialisation, after creating the builder + self._post_init_env() + # set up the builder self._init_builder() @@ -281,20 +284,34 @@ class Sphinx: else: logger.info(__('not available for built-in messages')) - def _init_env(self, freshenv: bool) -> None: + def _init_env(self, freshenv: bool) -> BuildEnvironment: filename = path.join(self.doctreedir, ENV_PICKLE_FILENAME) if freshenv or not os.path.exists(filename): - self.env = BuildEnvironment(self) - self.env.find_files(self.config, self.builder) + return self._create_fresh_env() else: - try: - with progress_message(__('loading pickled environment')): - with open(filename, 'rb') as f: - self.env = pickle.load(f) - self.env.setup(self) - except Exception as err: - logger.info(__('failed: %s'), err) - self._init_env(freshenv=True) + return self._load_existing_env(filename) + + def _create_fresh_env(self) -> BuildEnvironment: + env = BuildEnvironment(self) + self._fresh_env_used = True + return env + + def _load_existing_env(self, filename: str) -> BuildEnvironment: + try: + with progress_message(__('loading pickled environment')): + with open(filename, 'rb') as f: + env = pickle.load(f) + env.setup(self) + self._fresh_env_used = False + except Exception as err: + logger.info(__('failed: %s'), err) + env = self._create_fresh_env() + return env + + def _post_init_env(self) -> None: + if self._fresh_env_used: + self.env.find_files(self.config, self.builder) + del self._fresh_env_used def preload_builder(self, name: str) -> None: self.registry.preload_builder(self, name) @@ -304,10 +321,11 @@ class Sphinx: logger.info(__('No builder selected, using default: html')) name = 'html' - return self.registry.create_builder(self, name) + return self.registry.create_builder(self, name, self.env) def _init_builder(self) -> None: - self.builder.set_environment(self.env) + if not hasattr(self.builder, "env"): + self.builder.set_environment(self.env) self.builder.init() self.events.emit('builder-inited') @@ -984,8 +1002,9 @@ class Sphinx: kwargs['defer'] = 'defer' self.registry.add_js_file(filename, priority=priority, **kwargs) - if hasattr(self.builder, 'add_js_file'): - self.builder.add_js_file(filename, priority=priority, **kwargs) # type: ignore + if hasattr(self, 'builder') and hasattr(self.builder, 'add_js_file'): + self.builder.add_js_file(filename, # type: ignore[attr-defined] + priority=priority, **kwargs) def add_css_file(self, filename: str, priority: int = 500, **kwargs: Any) -> None: """Register a stylesheet to include in the HTML output. @@ -1045,8 +1064,9 @@ class Sphinx: """ logger.debug('[app] adding stylesheet: %r', filename) self.registry.add_css_files(filename, priority=priority, **kwargs) - if hasattr(self.builder, 'add_css_file'): - self.builder.add_css_file(filename, priority=priority, **kwargs) # type: ignore + if hasattr(self, 'builder') and hasattr(self.builder, 'add_css_file'): + self.builder.add_css_file(filename, # type: ignore[attr-defined] + priority=priority, **kwargs) def add_latex_package(self, packagename: str, options: str = None, after_hyperref: bool = False) -> None: diff --git a/sphinx/builders/__init__.py b/sphinx/builders/__init__.py index d8500e11b..9705ba894 100644 --- a/sphinx/builders/__init__.py +++ b/sphinx/builders/__init__.py @@ -3,6 +3,7 @@ import codecs import pickle import time +import warnings from os import path from typing import (TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Type, Union) @@ -11,6 +12,7 @@ from docutils import nodes from docutils.nodes import Node from sphinx.config import Config +from sphinx.deprecation import RemovedInSphinx70Warning from sphinx.environment import CONFIG_CHANGED_REASON, CONFIG_OK, BuildEnvironment from sphinx.environment.adapters.asset import ImageAdapter from sphinx.errors import SphinxError @@ -75,7 +77,7 @@ class Builder: #: The builder supports data URIs or not. supported_data_uri_images = False - def __init__(self, app: "Sphinx") -> None: + def __init__(self, app: "Sphinx", env: BuildEnvironment = None) -> None: self.srcdir = app.srcdir self.confdir = app.confdir self.outdir = app.outdir @@ -83,7 +85,14 @@ class Builder: ensuredir(self.doctreedir) self.app: Sphinx = app - self.env: Optional[BuildEnvironment] = None + if env is not None: + self.env: BuildEnvironment = env + self.env.set_versioning_method(self.versioning_method, + self.versioning_compare) + elif env is not Ellipsis: + # ... is passed by SphinxComponentRegistry.create_builder to not show two warnings. + warnings.warn("The 'env' argument to Builder will be required from Sphinx 7.", + RemovedInSphinx70Warning, stacklevel=2) self.events: EventManager = app.events self.config: Config = app.config self.tags: Tags = app.tags @@ -105,6 +114,9 @@ class Builder: def set_environment(self, env: BuildEnvironment) -> None: """Store BuildEnvironment object.""" + warnings.warn("Builder.set_environment is deprecated, pass env to " + "'Builder.__init__()' instead.", + RemovedInSphinx70Warning, stacklevel=2) self.env = env self.env.set_versioning_method(self.versioning_method, self.versioning_compare) diff --git a/sphinx/builders/html/__init__.py b/sphinx/builders/html/__init__.py index b76739523..7737a1d38 100644 --- a/sphinx/builders/html/__init__.py +++ b/sphinx/builders/html/__init__.py @@ -26,6 +26,7 @@ from sphinx.builders import Builder from sphinx.config import Config from sphinx.deprecation import RemovedInSphinx70Warning, deprecated_alias from sphinx.domains import Domain, Index, IndexEntry +from sphinx.environment import BuildEnvironment from sphinx.environment.adapters.asset import ImageAdapter from sphinx.environment.adapters.indexentries import IndexEntries from sphinx.environment.adapters.toctree import TocTree @@ -51,6 +52,17 @@ INVENTORY_FILENAME = 'objects.inv' logger = logging.getLogger(__name__) return_codes_re = re.compile('[\r\n]+') +DOMAIN_INDEX_TYPE = Tuple[ + # Index name (e.g. py-modindex) + str, + # Index class + Type[Index], + # list of (heading string, list of index entries) pairs. + List[Tuple[str, List[IndexEntry]]], + # whether sub-entries should start collapsed + bool +] + def get_stable_hash(obj: Any) -> str: """ @@ -197,10 +209,10 @@ class StandaloneHTMLBuilder(Builder): download_support = True # enable download role imgpath: str = None - domain_indices: List[Tuple[str, Type[Index], List[Tuple[str, List[IndexEntry]]], bool]] = [] # NOQA + domain_indices: List[DOMAIN_INDEX_TYPE] = [] - def __init__(self, app: Sphinx) -> None: - super().__init__(app) + def __init__(self, app: Sphinx, env: BuildEnvironment = None) -> None: + super().__init__(app, env) # CSS files self.css_files: List[Stylesheet] = [] diff --git a/sphinx/cmd/quickstart.py b/sphinx/cmd/quickstart.py index 47853c90d..610052ea9 100644 --- a/sphinx/cmd/quickstart.py +++ b/sphinx/cmd/quickstart.py @@ -7,11 +7,14 @@ import sys import time from collections import OrderedDict from os import path -from typing import Any, Callable, Dict, List, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Union # try to import readline, unix specific enhancement try: import readline + if TYPE_CHECKING and sys.platform == "win32": # always false, for type checking + raise ImportError + if readline.__doc__ and 'libedit' in readline.__doc__: readline.parse_and_bind("bind ^I rl_complete") USE_LIBEDIT = True diff --git a/sphinx/config.py b/sphinx/config.py index 8c6dcfe32..318173f27 100644 --- a/sphinx/config.py +++ b/sphinx/config.py @@ -140,6 +140,7 @@ class Config: 'smartquotes_excludes': ({'languages': ['ja'], 'builders': ['man', 'text']}, 'env', []), + 'option_emphasise_placeholders': (False, 'env', []), } def __init__(self, config: Dict[str, Any] = {}, overrides: Dict[str, Any] = {}) -> None: diff --git a/sphinx/domains/std.py b/sphinx/domains/std.py index d5c962dc8..88a4d28cb 100644 --- a/sphinx/domains/std.py +++ b/sphinx/domains/std.py @@ -15,7 +15,7 @@ from sphinx.addnodes import desc_signature, pending_xref from sphinx.directives import ObjectDescription from sphinx.domains import Domain, ObjType from sphinx.locale import _, __ -from sphinx.roles import XRefRole +from sphinx.roles import EmphasizedLiteral, XRefRole from sphinx.util import docname_join, logging, ws_re from sphinx.util.docutils import SphinxDirective from sphinx.util.nodes import clean_astext, make_id, make_refnode @@ -34,6 +34,8 @@ option_desc_re = re.compile(r'((?:/|--|-|\+)?[^\s=]+)(=?\s*.*)') # RE for grammar tokens token_re = re.compile(r'`((~?\w*:)?\w+)`', re.U) +samp_role = EmphasizedLiteral() + class GenericObject(ObjectDescription[str]): """ @@ -170,15 +172,41 @@ class Cmdoption(ObjectDescription[str]): location=signode) continue optname, args = m.groups() - if optname.endswith('[') and args.endswith(']'): + if optname[-1] == '[' and args[-1] == ']': # optional value surrounded by brackets (ex. foo[=bar]) optname = optname[:-1] args = '[' + args if count: - signode += addnodes.desc_addname(', ', ', ') + if self.env.config.option_emphasise_placeholders: + signode += addnodes.desc_sig_punctuation(',', ',') + signode += addnodes.desc_sig_space() + else: + signode += addnodes.desc_addname(', ', ', ') signode += addnodes.desc_name(optname, optname) - signode += addnodes.desc_addname(args, args) + if self.env.config.option_emphasise_placeholders: + add_end_bracket = False + if not args: + continue + if args[0] == '[' and args[-1] == ']': + add_end_bracket = True + signode += addnodes.desc_sig_punctuation('[', '[') + args = args[1:-1] + if args[0] == ' ': + signode += addnodes.desc_sig_space() + args = args.strip() + if args[0] == '=': + signode += addnodes.desc_sig_punctuation('=', '=') + args = args[1:] + for part in samp_role.parse(args): + if isinstance(part, nodes.Text): + signode += nodes.Text(part.astext()) + else: + signode += part + if add_end_bracket: + signode += addnodes.desc_sig_punctuation(']', ']') + else: + signode += addnodes.desc_addname(args, args) if not count: firstname = optname signode['allnames'] = [optname] @@ -573,11 +601,11 @@ class StandardDomain(Domain): } dangling_warnings = { - 'term': 'term not in glossary: %(target)s', - 'numref': 'undefined label: %(target)s', - 'keyword': 'unknown keyword: %(target)s', - 'doc': 'unknown document: %(target)s', - 'option': 'unknown option: %(target)s', + 'term': 'term not in glossary: %(target)r', + 'numref': 'undefined label: %(target)r', + 'keyword': 'unknown keyword: %(target)r', + 'doc': 'unknown document: %(target)r', + 'option': 'unknown option: %(target)r', } # node_class -> (figtype, title_getter) @@ -1072,9 +1100,9 @@ def warn_missing_reference(app: "Sphinx", domain: Domain, node: pending_xref else: target = node['reftarget'] if target not in domain.anonlabels: # type: ignore - msg = __('undefined label: %s') + msg = __('undefined label: %r') else: - msg = __('Failed to create a cross reference. A title or caption not found: %s') + msg = __('Failed to create a cross reference. A title or caption not found: %r') logger.warning(msg % target, location=node, type='ref', subtype=node['reftype']) return True diff --git a/sphinx/ext/autodoc/importer.py b/sphinx/ext/autodoc/importer.py index d392ae75d..977cfbba4 100644 --- a/sphinx/ext/autodoc/importer.py +++ b/sphinx/ext/autodoc/importer.py @@ -3,7 +3,7 @@ import importlib import traceback import warnings -from typing import Any, Callable, Dict, List, NamedTuple, Optional +from typing import TYPE_CHECKING, Any, Callable, Dict, List, NamedTuple, Optional from sphinx.ext.autodoc.mock import ismock, undecorate from sphinx.pycode import ModuleAnalyzer, PycodeError @@ -11,10 +11,7 @@ from sphinx.util import logging from sphinx.util.inspect import (getannotations, getmro, getslots, isclass, isenumclass, safe_getattr) -if False: - # For type annotation - from typing import Type # NOQA - +if TYPE_CHECKING: from sphinx.ext.autodoc import ObjectMember logger = logging.getLogger(__name__) diff --git a/sphinx/pycode/ast.py b/sphinx/pycode/ast.py index 755116475..d4646f0b7 100644 --- a/sphinx/pycode/ast.py +++ b/sphinx/pycode/ast.py @@ -141,6 +141,9 @@ class _UnparseVisitor(ast.NodeVisitor): return "%s.%s" % (self.visit(node.value), node.attr) def visit_BinOp(self, node: ast.BinOp) -> str: + # Special case ``**`` to not have surrounding spaces. + if isinstance(node.op, ast.Pow): + return "".join(map(self.visit, (node.left, node.op, node.right))) return " ".join(self.visit(e) for e in [node.left, node.op, node.right]) def visit_BoolOp(self, node: ast.BoolOp) -> str: @@ -202,7 +205,11 @@ class _UnparseVisitor(ast.NodeVisitor): return "%s[%s]" % (self.visit(node.value), self.visit(node.slice)) def visit_UnaryOp(self, node: ast.UnaryOp) -> str: - return "%s %s" % (self.visit(node.op), self.visit(node.operand)) + # UnaryOp is one of {UAdd, USub, Invert, Not}, which refer to ``+x``, + # ``-x``, ``~x``, and ``not x``. Only Not needs a space. + if isinstance(node.op, ast.Not): + return "%s %s" % (self.visit(node.op), self.visit(node.operand)) + return "%s%s" % (self.visit(node.op), self.visit(node.operand)) def visit_Tuple(self, node: ast.Tuple) -> str: if len(node.elts) == 0: diff --git a/sphinx/registry.py b/sphinx/registry.py index 87864b311..d08ba71a7 100644 --- a/sphinx/registry.py +++ b/sphinx/registry.py @@ -1,6 +1,7 @@ """Sphinx component registry.""" import traceback +import warnings from importlib import import_module from types import MethodType from typing import TYPE_CHECKING, Any, Callable, Dict, Iterator, List, Tuple, Type, Union @@ -19,6 +20,7 @@ except ImportError: from sphinx.builders import Builder from sphinx.config import Config +from sphinx.deprecation import RemovedInSphinx70Warning from sphinx.domains import Domain, Index, ObjType from sphinx.domains.std import GenericObject, Target from sphinx.environment import BuildEnvironment @@ -146,11 +148,23 @@ class SphinxComponentRegistry: self.load_extension(app, entry_point.module) - def create_builder(self, app: "Sphinx", name: str) -> Builder: + def create_builder(self, app: "Sphinx", name: str, + env: BuildEnvironment = None) -> Builder: if name not in self.builders: raise SphinxError(__('Builder name %s not registered') % name) - return self.builders[name](app) + try: + return self.builders[name](app, env) + except TypeError: + warnings.warn( + f"The custom builder {name} defines a custom __init__ method without the " + f"'env'argument. Report this bug to the developers of your custom builder, " + f"this is likely not a issue with Sphinx. The 'env' argument will be required " + f"from Sphinx 7.", RemovedInSphinx70Warning, stacklevel=2) + builder = self.builders[name](app, env=...) # type: ignore[arg-type] + if env is not None: + builder.set_environment(env) + return builder def add_domain(self, domain: Type[Domain], override: bool = False) -> None: logger.debug('[app] adding domain: %r', domain) diff --git a/sphinx/search/en.py b/sphinx/search/en.py index 53cd917dc..19bd9f019 100644 --- a/sphinx/search/en.py +++ b/sphinx/search/en.py @@ -2,8 +2,9 @@ from typing import Dict +import snowballstemmer + from sphinx.search import SearchLanguage -from sphinx.util.stemmer import get_stemmer english_stopwords = set(""" a and are as at @@ -211,7 +212,7 @@ class SearchEnglish(SearchLanguage): stopwords = english_stopwords def init(self, options: Dict) -> None: - self.stemmer = get_stemmer() + self.stemmer = snowballstemmer.stemmer('porter') def stem(self, word: str) -> str: - return self.stemmer.stem(word.lower()) + return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/search/zh.py b/sphinx/search/zh.py index 700c2683f..86f612d5d 100644 --- a/sphinx/search/zh.py +++ b/sphinx/search/zh.py @@ -4,8 +4,9 @@ import os import re from typing import Dict, List +import snowballstemmer + from sphinx.search import SearchLanguage -from sphinx.util.stemmer import get_stemmer try: import jieba @@ -230,7 +231,7 @@ class SearchChinese(SearchLanguage): if dict_path and os.path.isfile(dict_path): jieba.load_userdict(dict_path) - self.stemmer = get_stemmer() + self.stemmer = snowballstemmer.stemmer('english') def split(self, input: str) -> List[str]: chinese: List[str] = [] @@ -252,8 +253,8 @@ class SearchChinese(SearchLanguage): should_not_be_stemmed = ( word in self.latin_terms and len(word) >= 3 and - len(self.stemmer.stem(word.lower())) < 3 + len(self.stemmer.stemWord(word.lower())) < 3 ) if should_not_be_stemmed: return word.lower() - return self.stemmer.stem(word.lower()) + return self.stemmer.stemWord(word.lower()) diff --git a/sphinx/themes/agogo/layout.html b/sphinx/themes/agogo/layout.html index e89657ba1..d76050c9b 100644 --- a/sphinx/themes/agogo/layout.html +++ b/sphinx/themes/agogo/layout.html @@ -36,7 +36,7 @@ {%- macro agogo_sidebar() %} {%- block sidebartoc %} <h3>{{ _('Table of Contents') }}</h3> - {{ toctree() }} + {{ toctree(includehidden=True) }} {%- endblock %} {%- block sidebarsearch %} <div role="search"> diff --git a/sphinx/themes/agogo/static/agogo.css_t b/sphinx/themes/agogo/static/agogo.css_t index 14c5e52ce..53c4c3848 100644 --- a/sphinx/themes/agogo/static/agogo.css_t +++ b/sphinx/themes/agogo/static/agogo.css_t @@ -273,12 +273,6 @@ div.document ol { div.sidebar, aside.sidebar { - width: {{ theme_sidebarwidth|todim }}; - {%- if theme_rightsidebar|tobool %} - float: right; - {%- else %} - float: left; - {%- endif %} font-size: .9em; } diff --git a/sphinx/util/console.py b/sphinx/util/console.py index abdbf4219..88b208470 100644 --- a/sphinx/util/console.py +++ b/sphinx/util/console.py @@ -23,6 +23,9 @@ def terminal_safe(s: str) -> str: def get_terminal_width() -> int: """Borrowed from the py lib.""" + if sys.platform == "win32": + # For static typing, as fcntl & termios never exist on Windows. + return int(os.environ.get('COLUMNS', 80)) - 1 try: import fcntl import struct @@ -32,7 +35,7 @@ def get_terminal_width() -> int: terminal_width = width except Exception: # FALLBACK - terminal_width = int(os.environ.get('COLUMNS', "80")) - 1 + terminal_width = int(os.environ.get('COLUMNS', 80)) - 1 return terminal_width diff --git a/sphinx/util/inspect.py b/sphinx/util/inspect.py index a807ceb83..3d89a4f6e 100644 --- a/sphinx/util/inspect.py +++ b/sphinx/util/inspect.py @@ -28,10 +28,6 @@ else: MethodDescriptorType = type(str.join) WrapperDescriptorType = type(dict.__dict__['fromkeys']) -if False: - # For type annotation - from typing import Type # NOQA - logger = logging.getLogger(__name__) memory_address_re = re.compile(r' at 0x[0-9a-f]{8,16}(?=>)', re.IGNORECASE) diff --git a/sphinx/util/logging.py b/sphinx/util/logging.py index 37fa672af..d43116f87 100644 --- a/sphinx/util/logging.py +++ b/sphinx/util/logging.py @@ -12,6 +12,7 @@ from docutils.utils import get_source_line from sphinx.errors import SphinxWarning from sphinx.util.console import colorize +from sphinx.util.osutil import abspath if TYPE_CHECKING: from sphinx.application import Sphinx @@ -381,8 +382,8 @@ class WarningSuppressor(logging.Filter): super().__init__() def filter(self, record: logging.LogRecord) -> bool: - type = getattr(record, 'type', None) - subtype = getattr(record, 'subtype', None) + type = getattr(record, 'type', '') + subtype = getattr(record, 'subtype', '') try: suppress_warnings = self.app.config.suppress_warnings @@ -514,6 +515,8 @@ class WarningLogRecordTranslator(SphinxLogRecordTranslator): def get_node_location(node: Node) -> Optional[str]: (source, line) = get_source_line(node) + if source: + source = abspath(source) if source and line: return "%s:%s" % (source, line) elif source: diff --git a/sphinx/util/parallel.py b/sphinx/util/parallel.py index e4bd852b0..193d2a80d 100644 --- a/sphinx/util/parallel.py +++ b/sphinx/util/parallel.py @@ -1,6 +1,7 @@ """Parallel building utilities.""" import os +import sys import time import traceback from math import sqrt @@ -16,6 +17,11 @@ from sphinx.util import logging logger = logging.getLogger(__name__) +if sys.platform != "win32": + ForkProcess = multiprocessing.context.ForkProcess +else: + # For static typing, as ForkProcess doesn't exist on Windows + ForkProcess = multiprocessing.process.BaseProcess # our parallel functionality only works for the forking Process parallel_available = multiprocessing and os.name == 'posix' @@ -49,7 +55,7 @@ class ParallelTasks: # task arguments self._args: Dict[int, Optional[List[Any]]] = {} # list of subprocesses (both started and waiting) - self._procs: Dict[int, multiprocessing.context.ForkProcess] = {} + self._procs: Dict[int, ForkProcess] = {} # list of receiving pipe connections of running subprocesses self._precvs: Dict[int, Any] = {} # list of receiving pipe connections of waiting subprocesses diff --git a/sphinx/util/stemmer/__init__.py b/sphinx/util/stemmer/__init__.py index ff6c365c7..6d27592d8 100644 --- a/sphinx/util/stemmer/__init__.py +++ b/sphinx/util/stemmer/__init__.py @@ -1,37 +1,62 @@ """Word stemming utilities for Sphinx.""" -from sphinx.util.stemmer.porter import PorterStemmer +import warnings -try: - from Stemmer import Stemmer as _PyStemmer - PYSTEMMER = True -except ImportError: - PYSTEMMER = False +import snowballstemmer + +from sphinx.deprecation import RemovedInSphinx70Warning + + +class PorterStemmer: + def __init__(self): + warnings.warn(f"{self.__class__.__name__} is deprecated, use " + "snowballstemmer.stemmer('porter') instead.", + RemovedInSphinx70Warning, stacklevel=2) + self.stemmer = snowballstemmer.stemmer('porter') + + def stem(self, p: str, i: int, j: int) -> str: + warnings.warn(f"{self.__class__.__name__}.stem() is deprecated, use " + "snowballstemmer.stemmer('porter').stemWord() instead.", + RemovedInSphinx70Warning, stacklevel=2) + return self.stemmer.stemWord(p) class BaseStemmer: + def __init__(self): + warnings.warn(f"{self.__class__.__name__} is deprecated, use " + "snowballstemmer.stemmer('porter') instead.", + RemovedInSphinx70Warning, stacklevel=3) + def stem(self, word: str) -> str: - raise NotImplementedError() + raise NotImplementedError class PyStemmer(BaseStemmer): - def __init__(self) -> None: - self.stemmer = _PyStemmer('porter') + def __init__(self): # NoQA + super().__init__() + self.stemmer = snowballstemmer.stemmer('porter') def stem(self, word: str) -> str: + warnings.warn(f"{self.__class__.__name__}.stem() is deprecated, use " + "snowballstemmer.stemmer('porter').stemWord() instead.", + RemovedInSphinx70Warning, stacklevel=2) return self.stemmer.stemWord(word) -class StandardStemmer(PorterStemmer, BaseStemmer): - """All those porter stemmer implementations look hideous; - make at least the stem method nicer. - """ - def stem(self, word: str) -> str: # type: ignore - return super().stem(word, 0, len(word) - 1) +class StandardStemmer(BaseStemmer): + def __init__(self): # NoQA + super().__init__() + self.stemmer = snowballstemmer.stemmer('porter') + + def stem(self, word: str) -> str: + warnings.warn(f"{self.__class__.__name__}.stem() is deprecated, use " + "snowballstemmer.stemmer('porter').stemWord() instead.", + RemovedInSphinx70Warning, stacklevel=2) + return self.stemmer.stemWord(word) def get_stemmer() -> BaseStemmer: - if PYSTEMMER: - return PyStemmer() - else: - return StandardStemmer() + warnings.warn("get_stemmer() is deprecated, use " + "snowballstemmer.stemmer('porter') instead.", + RemovedInSphinx70Warning, stacklevel=2) + return PyStemmer() diff --git a/sphinx/util/stemmer/porter.py b/sphinx/util/stemmer/porter.py deleted file mode 100644 index c4f89eb95..000000000 --- a/sphinx/util/stemmer/porter.py +++ /dev/null @@ -1,406 +0,0 @@ -"""Porter Stemming Algorithm - -This is the Porter stemming algorithm, ported to Python from the -version coded up in ANSI C by the author. It may be be regarded -as canonical, in that it follows the algorithm presented in - -Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14, -no. 3, pp 130-137, - -only differing from it at the points made --DEPARTURE-- below. - -See also https://tartarus.org/martin/PorterStemmer/ - -The algorithm as described in the paper could be exactly replicated -by adjusting the points of DEPARTURE, but this is barely necessary, -because (a) the points of DEPARTURE are definitely improvements, and -(b) no encoding of the Porter stemmer I have seen is anything like -as exact as this version, even with the points of DEPARTURE! - -Release 1: January 2001 - -:author: Vivake Gupta <v@nano.com>. -:license: Public Domain ("can be used free of charge for any purpose"). -""" - - -class PorterStemmer: - - def __init__(self) -> None: - """The main part of the stemming algorithm starts here. - b is a buffer holding a word to be stemmed. The letters are in b[k0], - b[k0+1] ... ending at b[k]. In fact k0 = 0 in this demo program. k is - readjusted downwards as the stemming progresses. Zero termination is - not in fact used in the algorithm. - - Note that only lower case sequences are stemmed. Forcing to lower case - should be done before stem(...) is called. - """ - - self.b = "" # buffer for word to be stemmed - self.k = 0 - self.k0 = 0 - self.j = 0 # j is a general offset into the string - - def cons(self, i: int) -> int: - """cons(i) is TRUE <=> b[i] is a consonant.""" - if self.b[i] == 'a' or self.b[i] == 'e' or self.b[i] == 'i' \ - or self.b[i] == 'o' or self.b[i] == 'u': - return 0 - if self.b[i] == 'y': - if i == self.k0: - return 1 - else: - return (not self.cons(i - 1)) - return 1 - - def m(self) -> int: - """m() measures the number of consonant sequences between k0 and j. - if c is a consonant sequence and v a vowel sequence, and <..> - indicates arbitrary presence, - - <c><v> gives 0 - <c>vc<v> gives 1 - <c>vcvc<v> gives 2 - <c>vcvcvc<v> gives 3 - .... - """ - n = 0 - i = self.k0 - while 1: - if i > self.j: - return n - if not self.cons(i): - break - i = i + 1 - i = i + 1 - while 1: - while 1: - if i > self.j: - return n - if self.cons(i): - break - i = i + 1 - i = i + 1 - n = n + 1 - while 1: - if i > self.j: - return n - if not self.cons(i): - break - i = i + 1 - i = i + 1 - - def vowelinstem(self) -> int: - """vowelinstem() is TRUE <=> k0,...j contains a vowel""" - for i in range(self.k0, self.j + 1): - if not self.cons(i): - return 1 - return 0 - - def doublec(self, j: int) -> int: - """doublec(j) is TRUE <=> j,(j-1) contain a double consonant.""" - if j < (self.k0 + 1): - return 0 - if (self.b[j] != self.b[j - 1]): - return 0 - return self.cons(j) - - def cvc(self, i: int) -> int: - """cvc(i) is TRUE <=> i-2,i-1,i has the form - consonant - vowel - consonant - and also if the second c is not w,x or y. this is used when trying to - restore an e at the end of a short e.g. - - cav(e), lov(e), hop(e), crim(e), but - snow, box, tray. - """ - if i < (self.k0 + 2) or not self.cons(i) or self.cons(i - 1) \ - or not self.cons(i - 2): - return 0 - ch = self.b[i] - if ch in ('w', 'x', 'y'): - return 0 - return 1 - - def ends(self, s: str) -> int: - """ends(s) is TRUE <=> k0,...k ends with the string s.""" - length = len(s) - if s[length - 1] != self.b[self.k]: # tiny speed-up - return 0 - if length > (self.k - self.k0 + 1): - return 0 - if self.b[self.k - length + 1:self.k + 1] != s: - return 0 - self.j = self.k - length - return 1 - - def setto(self, s: str) -> None: - """setto(s) sets (j+1),...k to the characters in the string s, - readjusting k.""" - length = len(s) - self.b = self.b[:self.j + 1] + s + self.b[self.j + length + 1:] - self.k = self.j + length - - def r(self, s: str) -> None: - """r(s) is used further down.""" - if self.m() > 0: - self.setto(s) - - def step1ab(self) -> None: - """step1ab() gets rid of plurals and -ed or -ing. e.g. - - caresses -> caress - ponies -> poni - ties -> ti - caress -> caress - cats -> cat - - feed -> feed - agreed -> agree - disabled -> disable - - matting -> mat - mating -> mate - meeting -> meet - milling -> mill - messing -> mess - - meetings -> meet - """ - if self.b[self.k] == 's': - if self.ends("sses"): - self.k = self.k - 2 - elif self.ends("ies"): - self.setto("i") - elif self.b[self.k - 1] != 's': - self.k = self.k - 1 - if self.ends("eed"): - if self.m() > 0: - self.k = self.k - 1 - elif (self.ends("ed") or self.ends("ing")) and self.vowelinstem(): - self.k = self.j - if self.ends("at"): - self.setto("ate") - elif self.ends("bl"): - self.setto("ble") - elif self.ends("iz"): - self.setto("ize") - elif self.doublec(self.k): - self.k = self.k - 1 - ch = self.b[self.k] - if ch in ('l', 's', 'z'): - self.k = self.k + 1 - elif (self.m() == 1 and self.cvc(self.k)): - self.setto("e") - - def step1c(self) -> None: - """step1c() turns terminal y to i when there is another vowel in - the stem.""" - if (self.ends("y") and self.vowelinstem()): - self.b = self.b[:self.k] + 'i' + self.b[self.k + 1:] - - def step2(self) -> None: - """step2() maps double suffices to single ones. - so -ization ( = -ize plus -ation) maps to -ize etc. note that the - string before the suffix must give m() > 0. - """ - if self.b[self.k - 1] == 'a': - if self.ends("ational"): - self.r("ate") - elif self.ends("tional"): - self.r("tion") - elif self.b[self.k - 1] == 'c': - if self.ends("enci"): - self.r("ence") - elif self.ends("anci"): - self.r("ance") - elif self.b[self.k - 1] == 'e': - if self.ends("izer"): - self.r("ize") - elif self.b[self.k - 1] == 'l': - if self.ends("bli"): - self.r("ble") # --DEPARTURE-- - # To match the published algorithm, replace this phrase with - # if self.ends("abli"): self.r("able") - elif self.ends("alli"): - self.r("al") - elif self.ends("entli"): - self.r("ent") - elif self.ends("eli"): - self.r("e") - elif self.ends("ousli"): - self.r("ous") - elif self.b[self.k - 1] == 'o': - if self.ends("ization"): - self.r("ize") - elif self.ends("ation"): - self.r("ate") - elif self.ends("ator"): - self.r("ate") - elif self.b[self.k - 1] == 's': - if self.ends("alism"): - self.r("al") - elif self.ends("iveness"): - self.r("ive") - elif self.ends("fulness"): - self.r("ful") - elif self.ends("ousness"): - self.r("ous") - elif self.b[self.k - 1] == 't': - if self.ends("aliti"): - self.r("al") - elif self.ends("iviti"): - self.r("ive") - elif self.ends("biliti"): - self.r("ble") - elif self.b[self.k - 1] == 'g': # --DEPARTURE-- - if self.ends("logi"): - self.r("log") - # To match the published algorithm, delete this phrase - - def step3(self) -> None: - """step3() dels with -ic-, -full, -ness etc. similar strategy - to step2.""" - if self.b[self.k] == 'e': - if self.ends("icate"): - self.r("ic") - elif self.ends("ative"): - self.r("") - elif self.ends("alize"): - self.r("al") - elif self.b[self.k] == 'i': - if self.ends("iciti"): - self.r("ic") - elif self.b[self.k] == 'l': - if self.ends("ical"): - self.r("ic") - elif self.ends("ful"): - self.r("") - elif self.b[self.k] == 's': - if self.ends("ness"): - self.r("") - - def step4(self) -> None: - """step4() takes off -ant, -ence etc., in context <c>vcvc<v>.""" - if self.b[self.k - 1] == 'a': - if self.ends("al"): - pass - else: - return - elif self.b[self.k - 1] == 'c': - if self.ends("ance"): - pass - elif self.ends("ence"): - pass - else: - return - elif self.b[self.k - 1] == 'e': - if self.ends("er"): - pass - else: - return - elif self.b[self.k - 1] == 'i': - if self.ends("ic"): - pass - else: - return - elif self.b[self.k - 1] == 'l': - if self.ends("able"): - pass - elif self.ends("ible"): - pass - else: - return - elif self.b[self.k - 1] == 'n': - if self.ends("ant"): - pass - elif self.ends("ement"): - pass - elif self.ends("ment"): - pass - elif self.ends("ent"): - pass - else: - return - elif self.b[self.k - 1] == 'o': - if self.ends("ion") and (self.b[self.j] == 's' or - self.b[self.j] == 't'): - pass - elif self.ends("ou"): - pass - # takes care of -ous - else: - return - elif self.b[self.k - 1] == 's': - if self.ends("ism"): - pass - else: - return - elif self.b[self.k - 1] == 't': - if self.ends("ate"): - pass - elif self.ends("iti"): - pass - else: - return - elif self.b[self.k - 1] == 'u': - if self.ends("ous"): - pass - else: - return - elif self.b[self.k - 1] == 'v': - if self.ends("ive"): - pass - else: - return - elif self.b[self.k - 1] == 'z': - if self.ends("ize"): - pass - else: - return - else: - return - if self.m() > 1: - self.k = self.j - - def step5(self) -> None: - """step5() removes a final -e if m() > 1, and changes -ll to -l if - m() > 1. - """ - self.j = self.k - if self.b[self.k] == 'e': - a = self.m() - if a > 1 or (a == 1 and not self.cvc(self.k - 1)): - self.k = self.k - 1 - if self.b[self.k] == 'l' and self.doublec(self.k) and self.m() > 1: - self.k = self.k - 1 - - def stem(self, p: str, i: int, j: int) -> str: - """In stem(p,i,j), p is a char pointer, and the string to be stemmed - is from p[i] to p[j] inclusive. Typically i is zero and j is the - offset to the last character of a string, (p[j+1] == '\0'). The - stemmer adjusts the characters p[i] ... p[j] and returns the new - end-point of the string, k. Stemming never increases word length, so - i <= k <= j. To turn the stemmer into a module, declare 'stem' as - extern, and delete the remainder of this file. - """ - # copy the parameters into statics - self.b = p - self.k = j - self.k0 = i - if self.k <= self.k0 + 1: - return self.b # --DEPARTURE-- - - # With this line, strings of length 1 or 2 don't go through the - # stemming process, although no mention is made of this in the - # published algorithm. Remove the line to match the published - # algorithm. - - self.step1ab() - self.step1c() - self.step2() - self.step3() - self.step4() - self.step5() - return self.b[self.k0:self.k + 1] diff --git a/sphinx/util/typing.py b/sphinx/util/typing.py index 62bd462b8..430d22d16 100644 --- a/sphinx/util/typing.py +++ b/sphinx/util/typing.py @@ -31,11 +31,6 @@ try: except ImportError: UnionType = None -if False: - # For type annotation - from typing import Type # NOQA # for python3.5.1 - - # builtin classes that have incorrect __module__ INVALID_BUILTIN_CLASSES = { Struct: 'struct.Struct', # Before Python 3.9 |