summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric McDonald <221418+emcd@users.noreply.github.com>2022-11-18 02:05:24 -0800
committerGitHub <noreply@github.com>2022-11-18 11:05:24 +0100
commit7b82cc82c857b78316b3f8cc13f702eea8704efb (patch)
tree8dd1de4b50dacadeb8064bad445bee52a55060b4
parent83d9fdc5dbc4bd3afb00fc57357c8b255d102d78 (diff)
downloadpylint-git-7b82cc82c857b78316b3f8cc13f702eea8704efb.tar.gz
Deduplicate module file paths to prevent redundant scans. (#7747)
* Use dict for 'expand_modules' result rather than list. With 'path' as the key, we get deduplication for free and do not need to reprocess the list for deduplication later. * Fix deduplication to account for CLI args marker. * Fix corner case with CLI arg flag handling during deduplication. * Add 'deduplication' to custom Pyenchant dict. Closes #6242 Closes #4053 Co-authored-by: Eric McDonald <emcd@users.noreply.github.com> Co-authored-by: Pierre Sassoulas <pierre.sassoulas@gmail.com>
-rw-r--r--.pyenchant_pylint_custom_dict.txt1
-rw-r--r--doc/whatsnew/fragments/6242.bugfix4
-rw-r--r--pylint/lint/expand_modules.py30
-rw-r--r--pylint/lint/pylinter.py4
-rw-r--r--tests/lint/unittest_expand_modules.py81
-rw-r--r--tests/test_pylint_runners.py23
6 files changed, 109 insertions, 34 deletions
diff --git a/.pyenchant_pylint_custom_dict.txt b/.pyenchant_pylint_custom_dict.txt
index 2b8e846a5..29f42e332 100644
--- a/.pyenchant_pylint_custom_dict.txt
+++ b/.pyenchant_pylint_custom_dict.txt
@@ -74,6 +74,7 @@ cyclomatic
dataclass
datetime
debian
+deduplication
deepcopy
defaultdicts
defframe
diff --git a/doc/whatsnew/fragments/6242.bugfix b/doc/whatsnew/fragments/6242.bugfix
new file mode 100644
index 000000000..25d323e7e
--- /dev/null
+++ b/doc/whatsnew/fragments/6242.bugfix
@@ -0,0 +1,4 @@
+Pylint will now filter duplicates given to it before linting. The output should
+be the same whether a file is given/discovered multiple times or not.
+
+Closes #6242, #4053
diff --git a/pylint/lint/expand_modules.py b/pylint/lint/expand_modules.py
index 91887cd92..e43208dea 100644
--- a/pylint/lint/expand_modules.py
+++ b/pylint/lint/expand_modules.py
@@ -66,11 +66,11 @@ def expand_modules(
ignore_list: list[str],
ignore_list_re: list[Pattern[str]],
ignore_list_paths_re: list[Pattern[str]],
-) -> tuple[list[ModuleDescriptionDict], list[ErrorDescriptionDict]]:
+) -> tuple[dict[str, ModuleDescriptionDict], list[ErrorDescriptionDict]]:
"""Take a list of files/modules/packages and return the list of tuple
(file, module name) which have to be actually checked.
"""
- result: list[ModuleDescriptionDict] = []
+ result: dict[str, ModuleDescriptionDict] = {}
errors: list[ErrorDescriptionDict] = []
path = sys.path.copy()
@@ -120,15 +120,17 @@ def expand_modules(
is_namespace = modutils.is_namespace(spec)
is_directory = modutils.is_directory(spec)
if not is_namespace:
- result.append(
- {
+ if filepath in result:
+ # Always set arg flag if module explicitly given.
+ result[filepath]["isarg"] = True
+ else:
+ result[filepath] = {
"path": filepath,
"name": modname,
"isarg": True,
"basepath": filepath,
"basename": modname,
}
- )
has_init = (
not (modname.endswith(".__init__") or modname == "__init__")
and os.path.basename(filepath) == "__init__.py"
@@ -148,13 +150,13 @@ def expand_modules(
subfilepath, is_namespace, path=additional_search_path
)
submodname = ".".join(modpath)
- result.append(
- {
- "path": subfilepath,
- "name": submodname,
- "isarg": False,
- "basepath": filepath,
- "basename": modname,
- }
- )
+ # Preserve arg flag if module is also explicitly given.
+ isarg = subfilepath in result and result[subfilepath]["isarg"]
+ result[subfilepath] = {
+ "path": subfilepath,
+ "name": submodname,
+ "isarg": isarg,
+ "basepath": filepath,
+ "basename": modname,
+ }
return result, errors
diff --git a/pylint/lint/pylinter.py b/pylint/lint/pylinter.py
index ac8dee3bc..70f27c7c0 100644
--- a/pylint/lint/pylinter.py
+++ b/pylint/lint/pylinter.py
@@ -879,12 +879,12 @@ class PyLinter(
The returned generator yield one item for each Python module that should be linted.
"""
- for descr in self._expand_files(files_or_modules):
+ for descr in self._expand_files(files_or_modules).values():
name, filepath, is_arg = descr["name"], descr["path"], descr["isarg"]
if self.should_analyze_file(name, filepath, is_argument=is_arg):
yield FileItem(name, filepath, descr["basename"])
- def _expand_files(self, modules: Sequence[str]) -> list[ModuleDescriptionDict]:
+ def _expand_files(self, modules: Sequence[str]) -> dict[str, ModuleDescriptionDict]:
"""Get modules and errors from a list of modules and handle errors."""
result, errors = expand_modules(
modules,
diff --git a/tests/lint/unittest_expand_modules.py b/tests/lint/unittest_expand_modules.py
index 1883db543..88f058b1e 100644
--- a/tests/lint/unittest_expand_modules.py
+++ b/tests/lint/unittest_expand_modules.py
@@ -45,6 +45,14 @@ this_file_from_init = {
"path": EXPAND_MODULES,
}
+this_file_from_init_deduplicated = {
+ "basename": "lint",
+ "basepath": INIT_PATH,
+ "isarg": True,
+ "name": "lint.unittest_expand_modules",
+ "path": EXPAND_MODULES,
+}
+
unittest_lint = {
"basename": "lint",
"basepath": INIT_PATH,
@@ -77,7 +85,6 @@ test_caching = {
"path": str(TEST_DIRECTORY / "lint/test_caching.py"),
}
-
init_of_package = {
"basename": "lint",
"basepath": INIT_PATH,
@@ -87,6 +94,20 @@ init_of_package = {
}
+def _list_expected_package_modules(
+ deduplicating: bool = False,
+) -> tuple[dict[str, object], ...]:
+ """Generates reusable list of modules for our package."""
+ return (
+ init_of_package,
+ test_caching,
+ test_pylinter,
+ test_utils,
+ this_file_from_init_deduplicated if deduplicating else this_file_from_init,
+ unittest_lint,
+ )
+
+
class TestExpandModules(CheckerTestCase):
"""Test the expand_modules function while allowing options to be set."""
@@ -102,23 +123,19 @@ class TestExpandModules(CheckerTestCase):
@pytest.mark.parametrize(
"files_or_modules,expected",
[
- ([__file__], [this_file]),
+ ([__file__], {this_file["path"]: this_file}),
(
[str(Path(__file__).parent)],
- [
- init_of_package,
- test_caching,
- test_pylinter,
- test_utils,
- this_file_from_init,
- unittest_lint,
- ],
+ {
+ module["path"]: module # pylint: disable=unsubscriptable-object
+ for module in _list_expected_package_modules()
+ },
),
],
)
@set_config(ignore_paths="")
def test_expand_modules(
- self, files_or_modules: list[str], expected: list[ModuleDescriptionDict]
+ self, files_or_modules: list[str], expected: dict[str, ModuleDescriptionDict]
) -> None:
"""Test expand_modules with the default value of ignore-paths."""
ignore_list: list[str] = []
@@ -129,25 +146,54 @@ class TestExpandModules(CheckerTestCase):
ignore_list_re,
self.linter.config.ignore_paths,
)
- modules.sort(key=lambda d: d["name"])
assert modules == expected
assert not errors
@pytest.mark.parametrize(
"files_or_modules,expected",
[
- ([__file__], []),
+ ([__file__, __file__], {this_file["path"]: this_file}),
+ (
+ [EXPAND_MODULES, str(Path(__file__).parent), EXPAND_MODULES],
+ {
+ module["path"]: module # pylint: disable=unsubscriptable-object
+ for module in _list_expected_package_modules(deduplicating=True)
+ },
+ ),
+ ],
+ )
+ @set_config(ignore_paths="")
+ def test_expand_modules_deduplication(
+ self, files_or_modules: list[str], expected: dict[str, ModuleDescriptionDict]
+ ) -> None:
+ """Test expand_modules deduplication."""
+ ignore_list: list[str] = []
+ ignore_list_re: list[re.Pattern[str]] = []
+ modules, errors = expand_modules(
+ files_or_modules,
+ ignore_list,
+ ignore_list_re,
+ self.linter.config.ignore_paths,
+ )
+ assert modules == expected
+ assert not errors
+
+ @pytest.mark.parametrize(
+ "files_or_modules,expected",
+ [
+ ([__file__], {}),
(
[str(Path(__file__).parent)],
- [
- init_of_package,
- ],
+ {
+ module["path"]: module # pylint: disable=unsubscriptable-object
+ for module in (init_of_package,)
+ },
),
],
)
@set_config(ignore_paths=".*/lint/.*")
def test_expand_modules_with_ignore(
- self, files_or_modules: list[str], expected: list[ModuleDescriptionDict]
+ self, files_or_modules: list[str], expected: dict[str, ModuleDescriptionDict]
) -> None:
"""Test expand_modules with a non-default value of ignore-paths."""
ignore_list: list[str] = []
@@ -158,6 +204,5 @@ class TestExpandModules(CheckerTestCase):
ignore_list_re,
self.linter.config.ignore_paths,
)
- modules.sort(key=lambda d: d["name"])
assert modules == expected
assert not errors
diff --git a/tests/test_pylint_runners.py b/tests/test_pylint_runners.py
index 936de8fd6..8e68f8eaa 100644
--- a/tests/test_pylint_runners.py
+++ b/tests/test_pylint_runners.py
@@ -5,8 +5,10 @@
from __future__ import annotations
+import contextlib
import os
import pathlib
+import shlex
import sys
from collections.abc import Sequence
from io import BufferedReader
@@ -57,6 +59,27 @@ def test_runner_with_arguments(runner: _RunCallable, tmpdir: LocalPath) -> None:
assert err.value.code == 0
+def test_pylint_argument_deduplication(
+ tmpdir: LocalPath, tests_directory: pathlib.Path
+) -> None:
+ """Check that the Pylint runner does not over-report on duplicate
+ arguments.
+
+ See https://github.com/PyCQA/pylint/issues/6242 and
+ https://github.com/PyCQA/pylint/issues/4053
+ """
+ filepath = str(tests_directory / "functional/t/too/too_many_branches.py")
+ testargs = shlex.split("--report n --score n --max-branches 13")
+ testargs.extend([filepath] * 4)
+ exit_stack = contextlib.ExitStack()
+ exit_stack.enter_context(tmpdir.as_cwd())
+ exit_stack.enter_context(patch.object(sys, "argv", testargs))
+ err = exit_stack.enter_context(pytest.raises(SystemExit))
+ with exit_stack:
+ run_pylint(testargs)
+ assert err.value.code == 0
+
+
def test_pylint_run_jobs_equal_zero_dont_crash_with_cpu_fraction(
tmpdir: LocalPath,
) -> None: