summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrank Harrison <frank@doublethefish.com>2020-03-26 10:50:55 +0000
committerPierre Sassoulas <pierre.sassoulas@gmail.com>2021-01-02 09:56:39 +0100
commit579b58d3583fb0efac58aaa8e4d63f6dcb05b0bb (patch)
tree38a4cfd80e1a6cc9418d5551bd68de2bef2a3509
parent854a7f55823206b8bd9d91095c3ca12d78d63f2a (diff)
downloadpylint-git-579b58d3583fb0efac58aaa8e4d63f6dcb05b0bb.tar.gz
mapreduce| Adds map/reduce functionality to SimilarChecker
Before adding a new mixin this proves the concept works, adding tests as examples of how this would work in the main linter. The idea here is that, because `check_parallel()` uses a multiprocess `map` function, that the natural follow on is to use a 'reduce` paradigm. This should demonstrate that.
-rw-r--r--pylint/checkers/similar.py30
-rw-r--r--tests/checkers/unittest_similar.py139
-rw-r--r--tests/input/similar_lines_a.py63
-rw-r--r--tests/input/similar_lines_b.py36
4 files changed, 267 insertions, 1 deletions
diff --git a/pylint/checkers/similar.py b/pylint/checkers/similar.py
index 82f79e8cc..3ac071bb3 100644
--- a/pylint/checkers/similar.py
+++ b/pylint/checkers/similar.py
@@ -160,6 +160,20 @@ class Similar:
for lineset2 in self.linesets[idx + 1 :]:
yield from self._find_common(lineset, lineset2)
+ def get_map_data(self):
+ """Returns the data we can use for a map/reduce process
+
+ In this case we are returning this instance's Linesets, that is all file
+ information that will later be used for vectorisation.
+ """
+ return self.linesets
+
+ def combine_mapreduce_data(self, linesets_collection):
+ """Reduces and recombines data into a format that we can report on
+
+ The partner function of get_map_data()"""
+ self.linesets = [line for lineset in linesets_collection for line in lineset]
+
def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
"""return lines with leading/trailing whitespace and any ignored code
@@ -352,7 +366,7 @@ class SimilarChecker(BaseChecker, Similar):
def set_option(self, optname, value, action=None, optdict=None):
"""method called to set an option (registered in the options list)
- overridden to report options setting to Similar
+ Overridden to report options setting to Similar
"""
BaseChecker.set_option(self, optname, value, action, optdict)
if optname == "min-similarity-lines":
@@ -402,6 +416,20 @@ class SimilarChecker(BaseChecker, Similar):
stats["nb_duplicated_lines"] = duplicated
stats["percent_duplicated_lines"] = total and duplicated * 100.0 / total
+ def get_map_data(self):
+ """ Passthru override """
+ return Similar.get_map_data(self)
+
+ @classmethod
+ def reduce_map_data(cls, linter, data):
+ """Reduces and recombines data into a format that we can report on
+
+ The partner function of get_map_data()"""
+ recombined = SimilarChecker(linter)
+ recombined.open()
+ Similar.combine_mapreduce_data(recombined, linesets_collection=data)
+ recombined.close()
+
def register(linter):
"""required method to auto register this checker """
diff --git a/tests/checkers/unittest_similar.py b/tests/checkers/unittest_similar.py
index ed4af2f5c..ebc5c3ba1 100644
--- a/tests/checkers/unittest_similar.py
+++ b/tests/checkers/unittest_similar.py
@@ -21,6 +21,8 @@ from pathlib import Path
import pytest
from pylint.checkers import similar
+from pylint.lint import PyLinter
+from pylint.testutils import GenericTestReporter as Reporter
INPUT = Path(__file__).parent / ".." / "input"
SIMILAR1 = str(INPUT / "similar1")
@@ -234,3 +236,140 @@ def test_no_args():
assert ex.code == 1
else:
pytest.fail("not system exit")
+
+
+def test_get_map_data():
+ """Tests that a SimilarChecker respects the MapReduceMixin interface"""
+ linter = PyLinter(reporter=Reporter())
+
+ # Add a parallel checker to ensure it can map and reduce
+ linter.register_checker(similar.SimilarChecker(linter))
+
+ source_streams = (
+ str(INPUT / "similar_lines_a.py"),
+ str(INPUT / "similar_lines_b.py"),
+ )
+ expected_linelists = (
+ (
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "def adipiscing(elit):",
+ 'etiam = "id"',
+ 'dictum = "purus,"',
+ 'vitae = "pretium"',
+ 'neque = "Vivamus"',
+ 'nec = "ornare"',
+ 'tortor = "sit"',
+ "return etiam, dictum, vitae, neque, nec, tortor",
+ "",
+ "",
+ "class Amet:",
+ "def similar_function_3_lines(self, tellus):",
+ "agittis = 10",
+ "tellus *= 300",
+ "return agittis, tellus",
+ "",
+ "def lorem(self, ipsum):",
+ 'dolor = "sit"',
+ 'amet = "consectetur"',
+ "return (lorem, dolor, amet)",
+ "",
+ "def similar_function_5_lines(self, similar):",
+ "some_var = 10",
+ "someother_var *= 300",
+ 'fusce = "sit"',
+ 'amet = "tortor"',
+ "return some_var, someother_var, fusce, amet",
+ "",
+ 'def __init__(self, moleskie, lectus="Mauris", ac="pellentesque"):',
+ 'metus = "ut"',
+ 'lobortis = "urna."',
+ 'Integer = "nisl"',
+ '(mauris,) = "interdum"',
+ 'non = "odio"',
+ 'semper = "aliquam"',
+ 'malesuada = "nunc."',
+ 'iaculis = "dolor"',
+ 'facilisis = "ultrices"',
+ 'vitae = "ut."',
+ "",
+ "return (",
+ "metus,",
+ "lobortis,",
+ "Integer,",
+ "mauris,",
+ "non,",
+ "semper,",
+ "malesuada,",
+ "iaculis,",
+ "facilisis,",
+ "vitae,",
+ ")",
+ "",
+ "def similar_function_3_lines(self, tellus):",
+ "agittis = 10",
+ "tellus *= 300",
+ "return agittis, tellus",
+ ),
+ (
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "class Nulla:",
+ 'tortor = "ultrices quis porta in"',
+ 'sagittis = "ut tellus"',
+ "",
+ "def pulvinar(self, blandit, metus):",
+ "egestas = [mauris for mauris in zip(blandit, metus)]",
+ "neque = (egestas, blandit)",
+ "",
+ "def similar_function_5_lines(self, similar):",
+ "some_var = 10",
+ "someother_var *= 300",
+ 'fusce = "sit"',
+ 'amet = "tortor"',
+ 'iaculis = "dolor"',
+ "return some_var, someother_var, fusce, amet, iaculis, iaculis",
+ "",
+ "",
+ "def tortor(self):",
+ "ultrices = 2",
+ 'quis = ultricies * "porta"',
+ "return ultricies, quis",
+ "",
+ "",
+ "class Commodo:",
+ "def similar_function_3_lines(self, tellus):",
+ "agittis = 10",
+ "tellus *= 300",
+ 'laoreet = "commodo "',
+ "return agittis, tellus, laoreet",
+ ),
+ )
+
+ data = []
+
+ # Manually perform a 'map' type function
+ for source_fname in source_streams:
+ sim = similar.SimilarChecker(linter)
+ with open(source_fname) as stream:
+ sim.append_stream(source_fname, stream)
+ # The map bit, can you tell? ;)
+ data.extend(sim.get_map_data())
+
+ assert len(expected_linelists) == len(data)
+ for source_fname, expected_lines, lineset_obj in zip(
+ source_streams, expected_linelists, data
+ ):
+ assert source_fname == lineset_obj.name
+ # There doesn't seem to be a faster way of doing this, yet.
+ lines = (line for idx, line in lineset_obj.enumerate_stripped())
+ assert tuple(expected_lines) == tuple(lines)
diff --git a/tests/input/similar_lines_a.py b/tests/input/similar_lines_a.py
new file mode 100644
index 000000000..65a72a79d
--- /dev/null
+++ b/tests/input/similar_lines_a.py
@@ -0,0 +1,63 @@
+""" A file designed to have lines of similarity when compared to similar_lines_b
+
+We use lorm-ipsum to generate 'random' code. """
+# Copyright (c) 2020 Frank Harrison <frank@doublethefish.com>
+
+
+def adipiscing(elit):
+ etiam = "id"
+ dictum = "purus,"
+ vitae = "pretium"
+ neque = "Vivamus"
+ nec = "ornare"
+ tortor = "sit"
+ return etiam, dictum, vitae, neque, nec, tortor
+
+
+class Amet:
+ def similar_function_3_lines(self, tellus): # line same #1
+ agittis = 10 # line same #2
+ tellus *= 300 # line same #3
+ return agittis, tellus # line diff
+
+ def lorem(self, ipsum):
+ dolor = "sit"
+ amet = "consectetur"
+ return (lorem, dolor, amet)
+
+ def similar_function_5_lines(self, similar): # line same #1
+ some_var = 10 # line same #2
+ someother_var *= 300 # line same #3
+ fusce = "sit" # line same #4
+ amet = "tortor" # line same #5
+ return some_var, someother_var, fusce, amet # line diff
+
+ def __init__(self, moleskie, lectus="Mauris", ac="pellentesque"):
+ metus = "ut"
+ lobortis = "urna."
+ Integer = "nisl"
+ (mauris,) = "interdum"
+ non = "odio"
+ semper = "aliquam"
+ malesuada = "nunc."
+ iaculis = "dolor"
+ facilisis = "ultrices"
+ vitae = "ut."
+
+ return (
+ metus,
+ lobortis,
+ Integer,
+ mauris,
+ non,
+ semper,
+ malesuada,
+ iaculis,
+ facilisis,
+ vitae,
+ )
+
+ def similar_function_3_lines(self, tellus): # line same #1
+ agittis = 10 # line same #2
+ tellus *= 300 # line same #3
+ return agittis, tellus # line diff
diff --git a/tests/input/similar_lines_b.py b/tests/input/similar_lines_b.py
new file mode 100644
index 000000000..21634883d
--- /dev/null
+++ b/tests/input/similar_lines_b.py
@@ -0,0 +1,36 @@
+""" The sister file of similar_lines_a, another file designed to have lines of
+similarity when compared to its sister file
+
+As with the sister file, we use lorm-ipsum to generate 'random' code. """
+# Copyright (c) 2020 Frank Harrison <frank@doublethefish.com>
+
+
+class Nulla:
+ tortor = "ultrices quis porta in"
+ sagittis = "ut tellus"
+
+ def pulvinar(self, blandit, metus):
+ egestas = [mauris for mauris in zip(blandit, metus)]
+ neque = (egestas, blandit)
+
+ def similar_function_5_lines(self, similar): # line same #1
+ some_var = 10 # line same #2
+ someother_var *= 300 # line same #3
+ fusce = "sit" # line same #4
+ amet = "tortor" # line same #5
+ iaculis = "dolor" # line diff
+ return some_var, someother_var, fusce, amet, iaculis, iaculis # line diff
+
+
+def tortor(self):
+ ultrices = 2
+ quis = ultricies * "porta"
+ return ultricies, quis
+
+
+class Commodo:
+ def similar_function_3_lines(self, tellus): # line same #1
+ agittis = 10 # line same #2
+ tellus *= 300 # line same #3
+ laoreet = "commodo " # line diff
+ return agittis, tellus, laoreet # line diff