diff options
author | Frank Harrison <frank@doublethefish.com> | 2020-03-26 11:41:22 +0000 |
---|---|---|
committer | Pierre Sassoulas <pierre.sassoulas@gmail.com> | 2021-01-02 09:56:39 +0100 |
commit | b41e8d940dbd0a92d2805a99eb0f97c01f620197 (patch) | |
tree | b0b0ca5d36531b469e4573c17c79b3ab0bd4c37c /tests/test_check_parallel.py | |
parent | 579b58d3583fb0efac58aaa8e4d63f6dcb05b0bb (diff) | |
download | pylint-git-b41e8d940dbd0a92d2805a99eb0f97c01f620197.tar.gz |
mapreduce| Fixes -jN for map/reduce Checkers (e.g. SimilarChecker)
This integrate the map/reduce functionality into lint.check_process().
We previously had `map` being invoked, here we add `reduce` support.
We do this by collecting the map-data by worker and then passing it to a
reducer function on the Checker object, if available - determined by
whether they confirm to the `mapreduce_checker.MapReduceMixin` mixin
interface or nor.
This allows Checker objects to function across file-streams when using
multiprocessing/-j2+. For example SimilarChecker needs to be able to
compare data across all files.
The tests, that we also add here, check that a Checker instance returns
and reports expected data and errors, such as error-messages and stats -
at least in a exit-ok (0) situation.
On a personal note, as we are copying more data across process
boundaries, I suspect that the memory implications of this might cause
issues for large projects already running with -jN and duplicate code
detection on. That said, given that it takes a long time to perform
lints of large code bases that is an issue for the [near?] future and
likely to be part of the performance work. Either way but let's get it
working first and deal with memory and perforamnce considerations later
- I say this as there are many quick wins we can make here, e.g.
file-batching, hashing lines, data compression and so on.
Diffstat (limited to 'tests/test_check_parallel.py')
-rw-r--r-- | tests/test_check_parallel.py | 27 |
1 files changed, 21 insertions, 6 deletions
diff --git a/tests/test_check_parallel.py b/tests/test_check_parallel.py index e8f67f4b6..c45b0b3b9 100644 --- a/tests/test_check_parallel.py +++ b/tests/test_check_parallel.py @@ -103,9 +103,17 @@ class TestCheckParallelFramework: def test_worker_check_single_file_no_checkers(self): linter = PyLinter(reporter=Reporter()) worker_initialize(linter=linter) - (name, _, _, msgs, stats, msg_status) = worker_check_single_file( - _gen_file_data() - ) + + ( + _, # proc-id + name, + _, # file_path + _, # base_name + msgs, + stats, + msg_status, + _, # mapreduce_data + ) = worker_check_single_file(_gen_file_data()) assert name == "--test-file_data-name-0--" assert [] == msgs no_errors_status = 0 @@ -140,9 +148,16 @@ class TestCheckParallelFramework: # Add the only checker we care about in this test linter.register_checker(SequentialTestChecker(linter)) - (name, _, _, msgs, stats, msg_status) = worker_check_single_file( - _gen_file_data() - ) + ( + _, # proc-id + name, + _, # file_path + _, # base_name + msgs, + stats, + msg_status, + _, # mapreduce_data + ) = worker_check_single_file(_gen_file_data()) # Ensure we return the same data as the single_file_no_checkers test assert name == "--test-file_data-name-0--" |