summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniƫl van Noord <13665637+DanielNoord@users.noreply.github.com>2021-12-30 15:14:20 +0100
committerGitHub <noreply@github.com>2021-12-30 15:14:20 +0100
commit019794b808271d45f86a7014e9c91cb04458a47a (patch)
tree84a3de7a71d581d3c373b1ff6720a52886251fda
parent3d3d48f80418d26ea64c73d510483b0a9e0a0698 (diff)
downloadpylint-git-019794b808271d45f86a7014e9c91cb04458a47a.tar.gz
Use ``dill`` to pickle when run in parallel mode (#5609)
-rw-r--r--ChangeLog3
-rw-r--r--doc/whatsnew/2.13.rst3
-rw-r--r--pylint/lint/parallel.py49
-rw-r--r--setup.cfg4
-rw-r--r--tests/test_check_parallel.py25
5 files changed, 65 insertions, 19 deletions
diff --git a/ChangeLog b/ChangeLog
index 87cf8b765..19a3b73ea 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -11,6 +11,9 @@ Release date: TBA
..
Put new features here and also in 'doc/whatsnew/2.13.rst'
+* When run in parallel mode ``pylint`` now pickles the data passed to subprocesses with
+ the ``dill`` package. The ``dill`` package has therefore been added as a dependency.
+
* ``used-before-assignment`` now considers that assignments in a try block
may not have occurred when the except or finally blocks are executed.
diff --git a/doc/whatsnew/2.13.rst b/doc/whatsnew/2.13.rst
index bf38d767e..9510cd54a 100644
--- a/doc/whatsnew/2.13.rst
+++ b/doc/whatsnew/2.13.rst
@@ -32,6 +32,9 @@ Extensions
Other Changes
=============
+* When run in parallel mode ``pylint`` now pickles the data passed to subprocesses with
+ the ``dill`` package. The ``dill`` package has therefore been added as a dependency.
+
* By default, pylint does no longer take files starting with ``.#`` into account. Those are
considered `emacs file locks`_. This behavior can be reverted by redefining the
``ignore-patterns`` option.
diff --git a/pylint/lint/parallel.py b/pylint/lint/parallel.py
index eaacb710d..6dbeed983 100644
--- a/pylint/lint/parallel.py
+++ b/pylint/lint/parallel.py
@@ -3,7 +3,18 @@
import collections
import functools
-from typing import Any, DefaultDict, Iterable, List, Tuple
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ DefaultDict,
+ Iterable,
+ List,
+ Sequence,
+ Tuple,
+ Union,
+)
+
+import dill
from pylint import reporters
from pylint.lint.utils import _patch_sys_path
@@ -16,6 +27,9 @@ try:
except ImportError:
multiprocessing = None # type: ignore[assignment]
+if TYPE_CHECKING:
+ from pylint.lint import PyLinter
+
# PyLinter object used by worker processes when checking files using multiprocessing
# should only be used by the worker processes
_worker_linter = None
@@ -33,9 +47,16 @@ def _get_new_args(message):
return (message.msg_id, message.symbol, location, message.msg, message.confidence)
-def _worker_initialize(linter, arguments=None):
+def _worker_initialize(
+ linter: bytes, arguments: Union[None, str, Sequence[str]] = None
+) -> None:
+ """Function called to initialize a worker for a Process within a multiprocessing Pool
+
+ :param linter: A linter-class (PyLinter) instance pickled with dill
+ :param arguments: File or module name(s) to lint and to be added to sys.path
+ """
global _worker_linter # pylint: disable=global-statement
- _worker_linter = linter
+ _worker_linter = dill.loads(linter)
# On the worker process side the messages are just collected and passed back to
# parent process as _worker_check_file function's return value
@@ -97,26 +118,24 @@ def _merge_mapreduce_data(linter, all_mapreduce_data):
checker.reduce_map_data(linter, collated_map_reduce_data[checker.name])
-def check_parallel(linter, jobs, files: Iterable[FileItem], arguments=None):
+def check_parallel(
+ linter: "PyLinter",
+ jobs: int,
+ files: Iterable[FileItem],
+ arguments: Union[None, str, Sequence[str]] = None,
+) -> None:
"""Use the given linter to lint the files with given amount of workers (jobs)
This splits the work filestream-by-filestream. If you need to do work across
multiple files, as in the similarity-checker, then inherit from MapReduceMixin and
- implement the map/reduce mixin functionality"""
- # The reporter does not need to be passed to worker processes, i.e. the reporter does
- original_reporter = linter.reporter
- linter.reporter = None
-
+ implement the map/reduce mixin functionality.
+ """
# The linter is inherited by all the pool's workers, i.e. the linter
# is identical to the linter object here. This is required so that
# a custom PyLinter object can be used.
initializer = functools.partial(_worker_initialize, arguments=arguments)
pool = multiprocessing.Pool( # pylint: disable=consider-using-with
- jobs, initializer=initializer, initargs=[linter]
+ jobs, initializer=initializer, initargs=[dill.dumps(linter)]
)
- # ...and now when the workers have inherited the linter, the actual reporter
- # can be set back here on the parent process so that results get stored into
- # correct reporter
- linter.set_reporter(original_reporter)
linter.open()
try:
all_stats = []
@@ -141,7 +160,7 @@ def check_parallel(linter, jobs, files: Iterable[FileItem], arguments=None):
msg = Message(
msg[0], msg[1], MessageLocationTuple(*msg[2]), msg[3], msg[4]
)
- linter.reporter.handle_message(msg) # type: ignore[attr-defined] # linter.set_reporter() call above makes linter have a reporter attr
+ linter.reporter.handle_message(msg)
all_stats.append(stats)
all_mapreduce_data[worker_idx].append(mapreduce_data)
linter.msg_status |= msg_status
diff --git a/setup.cfg b/setup.cfg
index 2a0bb250a..8b438e645 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -43,6 +43,7 @@ project_urls =
[options]
packages = find:
install_requires =
+ dill>=0.2
platformdirs>=2.2.0
astroid>=2.9.0,<2.10 # (You should also upgrade requirements_test_min.txt)
isort>=4.2.5,<6
@@ -123,3 +124,6 @@ ignore_missing_imports = True
[mypy-git.*]
ignore_missing_imports = True
+
+[mypy-dill]
+ignore_missing_imports = True
diff --git a/tests/test_check_parallel.py b/tests/test_check_parallel.py
index 01fc352dc..4fa49d148 100644
--- a/tests/test_check_parallel.py
+++ b/tests/test_check_parallel.py
@@ -11,9 +11,12 @@
# pylint: disable=protected-access,missing-function-docstring,no-self-use
+import argparse
+import multiprocessing
import os
from typing import List
+import dill
import pytest
from astroid import nodes
@@ -174,8 +177,22 @@ class TestCheckParallelFramework:
def test_worker_initialize(self) -> None:
linter = PyLinter(reporter=Reporter())
- worker_initialize(linter=linter)
- assert pylint.lint.parallel._worker_linter == linter
+ worker_initialize(linter=dill.dumps(linter))
+ assert isinstance(pylint.lint.parallel._worker_linter, type(linter))
+
+ def test_worker_initialize_pickling(self) -> None:
+ """Test that we can pickle objects that standard pickling in multiprocessing can't.
+
+ See:
+ https://stackoverflow.com/questions/8804830/python-multiprocessing-picklingerror-cant-pickle-type-function
+ https://github.com/PyCQA/pylint/pull/5584
+ """
+ linter = PyLinter(reporter=Reporter())
+ linter.attribute = argparse.ArgumentParser() # type: ignore[attr-defined]
+ pool = multiprocessing.Pool( # pylint: disable=consider-using-with
+ 2, initializer=worker_initialize, initargs=[dill.dumps(linter)]
+ )
+ pool.imap_unordered(print, [1, 2])
def test_worker_check_single_file_uninitialised(self) -> None:
pylint.lint.parallel._worker_linter = None
@@ -186,7 +203,7 @@ class TestCheckParallelFramework:
def test_worker_check_single_file_no_checkers(self) -> None:
linter = PyLinter(reporter=Reporter())
- worker_initialize(linter=linter)
+ worker_initialize(linter=dill.dumps(linter))
(
_, # proc-id
@@ -225,7 +242,7 @@ class TestCheckParallelFramework:
def test_worker_check_sequential_checker(self) -> None:
"""Same as test_worker_check_single_file_no_checkers with SequentialTestChecker"""
linter = PyLinter(reporter=Reporter())
- worker_initialize(linter=linter)
+ worker_initialize(linter=dill.dumps(linter))
# Add the only checker we care about in this test
linter.register_checker(SequentialTestChecker(linter))