diff options
author | Frank Harrison <frank@doublethefish.com> | 2020-03-26 10:50:55 +0000 |
---|---|---|
committer | Pierre Sassoulas <pierre.sassoulas@gmail.com> | 2021-01-02 09:56:39 +0100 |
commit | 579b58d3583fb0efac58aaa8e4d63f6dcb05b0bb (patch) | |
tree | 38a4cfd80e1a6cc9418d5551bd68de2bef2a3509 | |
parent | 854a7f55823206b8bd9d91095c3ca12d78d63f2a (diff) | |
download | pylint-git-579b58d3583fb0efac58aaa8e4d63f6dcb05b0bb.tar.gz |
mapreduce| Adds map/reduce functionality to SimilarChecker
Before adding a new mixin this proves the concept works, adding tests as
examples of how this would work in the main linter.
The idea here is that, because `check_parallel()` uses a multiprocess
`map` function, that the natural follow on is to use a 'reduce`
paradigm. This should demonstrate that.
-rw-r--r-- | pylint/checkers/similar.py | 30 | ||||
-rw-r--r-- | tests/checkers/unittest_similar.py | 139 | ||||
-rw-r--r-- | tests/input/similar_lines_a.py | 63 | ||||
-rw-r--r-- | tests/input/similar_lines_b.py | 36 |
4 files changed, 267 insertions, 1 deletions
diff --git a/pylint/checkers/similar.py b/pylint/checkers/similar.py index 82f79e8cc..3ac071bb3 100644 --- a/pylint/checkers/similar.py +++ b/pylint/checkers/similar.py @@ -160,6 +160,20 @@ class Similar: for lineset2 in self.linesets[idx + 1 :]: yield from self._find_common(lineset, lineset2) + def get_map_data(self): + """Returns the data we can use for a map/reduce process + + In this case we are returning this instance's Linesets, that is all file + information that will later be used for vectorisation. + """ + return self.linesets + + def combine_mapreduce_data(self, linesets_collection): + """Reduces and recombines data into a format that we can report on + + The partner function of get_map_data()""" + self.linesets = [line for lineset in linesets_collection for line in lineset] + def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports): """return lines with leading/trailing whitespace and any ignored code @@ -352,7 +366,7 @@ class SimilarChecker(BaseChecker, Similar): def set_option(self, optname, value, action=None, optdict=None): """method called to set an option (registered in the options list) - overridden to report options setting to Similar + Overridden to report options setting to Similar """ BaseChecker.set_option(self, optname, value, action, optdict) if optname == "min-similarity-lines": @@ -402,6 +416,20 @@ class SimilarChecker(BaseChecker, Similar): stats["nb_duplicated_lines"] = duplicated stats["percent_duplicated_lines"] = total and duplicated * 100.0 / total + def get_map_data(self): + """ Passthru override """ + return Similar.get_map_data(self) + + @classmethod + def reduce_map_data(cls, linter, data): + """Reduces and recombines data into a format that we can report on + + The partner function of get_map_data()""" + recombined = SimilarChecker(linter) + recombined.open() + Similar.combine_mapreduce_data(recombined, linesets_collection=data) + recombined.close() + def register(linter): """required method to auto register this checker """ diff --git a/tests/checkers/unittest_similar.py b/tests/checkers/unittest_similar.py index ed4af2f5c..ebc5c3ba1 100644 --- a/tests/checkers/unittest_similar.py +++ b/tests/checkers/unittest_similar.py @@ -21,6 +21,8 @@ from pathlib import Path import pytest from pylint.checkers import similar +from pylint.lint import PyLinter +from pylint.testutils import GenericTestReporter as Reporter INPUT = Path(__file__).parent / ".." / "input" SIMILAR1 = str(INPUT / "similar1") @@ -234,3 +236,140 @@ def test_no_args(): assert ex.code == 1 else: pytest.fail("not system exit") + + +def test_get_map_data(): + """Tests that a SimilarChecker respects the MapReduceMixin interface""" + linter = PyLinter(reporter=Reporter()) + + # Add a parallel checker to ensure it can map and reduce + linter.register_checker(similar.SimilarChecker(linter)) + + source_streams = ( + str(INPUT / "similar_lines_a.py"), + str(INPUT / "similar_lines_b.py"), + ) + expected_linelists = ( + ( + "", + "", + "", + "", + "", + "", + "def adipiscing(elit):", + 'etiam = "id"', + 'dictum = "purus,"', + 'vitae = "pretium"', + 'neque = "Vivamus"', + 'nec = "ornare"', + 'tortor = "sit"', + "return etiam, dictum, vitae, neque, nec, tortor", + "", + "", + "class Amet:", + "def similar_function_3_lines(self, tellus):", + "agittis = 10", + "tellus *= 300", + "return agittis, tellus", + "", + "def lorem(self, ipsum):", + 'dolor = "sit"', + 'amet = "consectetur"', + "return (lorem, dolor, amet)", + "", + "def similar_function_5_lines(self, similar):", + "some_var = 10", + "someother_var *= 300", + 'fusce = "sit"', + 'amet = "tortor"', + "return some_var, someother_var, fusce, amet", + "", + 'def __init__(self, moleskie, lectus="Mauris", ac="pellentesque"):', + 'metus = "ut"', + 'lobortis = "urna."', + 'Integer = "nisl"', + '(mauris,) = "interdum"', + 'non = "odio"', + 'semper = "aliquam"', + 'malesuada = "nunc."', + 'iaculis = "dolor"', + 'facilisis = "ultrices"', + 'vitae = "ut."', + "", + "return (", + "metus,", + "lobortis,", + "Integer,", + "mauris,", + "non,", + "semper,", + "malesuada,", + "iaculis,", + "facilisis,", + "vitae,", + ")", + "", + "def similar_function_3_lines(self, tellus):", + "agittis = 10", + "tellus *= 300", + "return agittis, tellus", + ), + ( + "", + "", + "", + "", + "", + "", + "", + "class Nulla:", + 'tortor = "ultrices quis porta in"', + 'sagittis = "ut tellus"', + "", + "def pulvinar(self, blandit, metus):", + "egestas = [mauris for mauris in zip(blandit, metus)]", + "neque = (egestas, blandit)", + "", + "def similar_function_5_lines(self, similar):", + "some_var = 10", + "someother_var *= 300", + 'fusce = "sit"', + 'amet = "tortor"', + 'iaculis = "dolor"', + "return some_var, someother_var, fusce, amet, iaculis, iaculis", + "", + "", + "def tortor(self):", + "ultrices = 2", + 'quis = ultricies * "porta"', + "return ultricies, quis", + "", + "", + "class Commodo:", + "def similar_function_3_lines(self, tellus):", + "agittis = 10", + "tellus *= 300", + 'laoreet = "commodo "', + "return agittis, tellus, laoreet", + ), + ) + + data = [] + + # Manually perform a 'map' type function + for source_fname in source_streams: + sim = similar.SimilarChecker(linter) + with open(source_fname) as stream: + sim.append_stream(source_fname, stream) + # The map bit, can you tell? ;) + data.extend(sim.get_map_data()) + + assert len(expected_linelists) == len(data) + for source_fname, expected_lines, lineset_obj in zip( + source_streams, expected_linelists, data + ): + assert source_fname == lineset_obj.name + # There doesn't seem to be a faster way of doing this, yet. + lines = (line for idx, line in lineset_obj.enumerate_stripped()) + assert tuple(expected_lines) == tuple(lines) diff --git a/tests/input/similar_lines_a.py b/tests/input/similar_lines_a.py new file mode 100644 index 000000000..65a72a79d --- /dev/null +++ b/tests/input/similar_lines_a.py @@ -0,0 +1,63 @@ +""" A file designed to have lines of similarity when compared to similar_lines_b + +We use lorm-ipsum to generate 'random' code. """ +# Copyright (c) 2020 Frank Harrison <frank@doublethefish.com> + + +def adipiscing(elit): + etiam = "id" + dictum = "purus," + vitae = "pretium" + neque = "Vivamus" + nec = "ornare" + tortor = "sit" + return etiam, dictum, vitae, neque, nec, tortor + + +class Amet: + def similar_function_3_lines(self, tellus): # line same #1 + agittis = 10 # line same #2 + tellus *= 300 # line same #3 + return agittis, tellus # line diff + + def lorem(self, ipsum): + dolor = "sit" + amet = "consectetur" + return (lorem, dolor, amet) + + def similar_function_5_lines(self, similar): # line same #1 + some_var = 10 # line same #2 + someother_var *= 300 # line same #3 + fusce = "sit" # line same #4 + amet = "tortor" # line same #5 + return some_var, someother_var, fusce, amet # line diff + + def __init__(self, moleskie, lectus="Mauris", ac="pellentesque"): + metus = "ut" + lobortis = "urna." + Integer = "nisl" + (mauris,) = "interdum" + non = "odio" + semper = "aliquam" + malesuada = "nunc." + iaculis = "dolor" + facilisis = "ultrices" + vitae = "ut." + + return ( + metus, + lobortis, + Integer, + mauris, + non, + semper, + malesuada, + iaculis, + facilisis, + vitae, + ) + + def similar_function_3_lines(self, tellus): # line same #1 + agittis = 10 # line same #2 + tellus *= 300 # line same #3 + return agittis, tellus # line diff diff --git a/tests/input/similar_lines_b.py b/tests/input/similar_lines_b.py new file mode 100644 index 000000000..21634883d --- /dev/null +++ b/tests/input/similar_lines_b.py @@ -0,0 +1,36 @@ +""" The sister file of similar_lines_a, another file designed to have lines of +similarity when compared to its sister file + +As with the sister file, we use lorm-ipsum to generate 'random' code. """ +# Copyright (c) 2020 Frank Harrison <frank@doublethefish.com> + + +class Nulla: + tortor = "ultrices quis porta in" + sagittis = "ut tellus" + + def pulvinar(self, blandit, metus): + egestas = [mauris for mauris in zip(blandit, metus)] + neque = (egestas, blandit) + + def similar_function_5_lines(self, similar): # line same #1 + some_var = 10 # line same #2 + someother_var *= 300 # line same #3 + fusce = "sit" # line same #4 + amet = "tortor" # line same #5 + iaculis = "dolor" # line diff + return some_var, someother_var, fusce, amet, iaculis, iaculis # line diff + + +def tortor(self): + ultrices = 2 + quis = ultricies * "porta" + return ultricies, quis + + +class Commodo: + def similar_function_3_lines(self, tellus): # line same #1 + agittis = 10 # line same #2 + tellus *= 300 # line same #3 + laoreet = "commodo " # line diff + return agittis, tellus, laoreet # line diff |