1 files changed, 139 insertions, 79 deletions
diff --git a/pylint/checkers/similar.py b/pylint/checkers/similar.py
index a53058fd7..369d0ce37 100644
--- a/pylint/checkers/similar.py
+++ b/pylint/checkers/similar.py
@@ -30,8 +30,13 @@ from pylint.reporters.ureports.nodes import Table
 class Similar:
     """finds copy-pasted lines of code in a project"""
 
-    def __init__(self, min_lines=4, ignore_comments=False,
-                 ignore_docstrings=False, ignore_imports=False):
+    def __init__(
+        self,
+        min_lines=4,
+        ignore_comments=False,
+        ignore_docstrings=False,
+        ignore_imports=False,
+    ):
         self.min_lines = min_lines
         self.ignore_comments = ignore_comments
         self.ignore_docstrings = ignore_docstrings
@@ -45,11 +50,15 @@ class Similar:
         else:
             readlines = decoding_stream(stream, encoding).readlines
         try:
-            self.linesets.append(LineSet(streamid,
-                                         readlines(),
-                                         self.ignore_comments,
-                                         self.ignore_docstrings,
-                                         self.ignore_imports))
+            self.linesets.append(
+                LineSet(
+                    streamid,
+                    readlines(),
+                    self.ignore_comments,
+                    self.ignore_docstrings,
+                    self.ignore_imports,
+                )
+            )
         except UnicodeDecodeError:
             pass
 
@@ -87,13 +96,18 @@ class Similar:
             for lineset, idx in couples:
                 print("==%s:%s" % (lineset.name, idx))
             # pylint: disable=W0631
-            for line in lineset._real_lines[idx:idx+num]:
+            for line in lineset._real_lines[idx : idx + num]:
                 print("  ", line.rstrip())
-            nb_lignes_dupliquees += num * (len(couples)-1)
+            nb_lignes_dupliquees += num * (len(couples) - 1)
         nb_total_lignes = sum([len(lineset) for lineset in self.linesets])
-        print("TOTAL lines=%s duplicates=%s percent=%.2f" \
-            % (nb_total_lignes, nb_lignes_dupliquees,
-               nb_lignes_dupliquees*100. / nb_total_lignes))
+        print(
+            "TOTAL lines=%s duplicates=%s percent=%.2f"
+            % (
+                nb_total_lignes,
+                nb_lignes_dupliquees,
+                nb_lignes_dupliquees * 100. / nb_total_lignes,
+            )
+        )
 
     def _find_common(self, lineset1, lineset2):
         """find similarities in the two given linesets"""
@@ -108,7 +122,8 @@ class Similar:
             for index2 in find(lineset1[index1]):
                 non_blank = 0
                 for num, ((_, line1), (_, line2)) in enumerate(
-                        zip(lines1(index1), lines2(index2))):
+                    zip(lines1(index1), lines2(index2))
+                ):
                     if line1 != line2:
                         if non_blank > min_lines:
                             yield num, lineset1, index1, lineset2, index2
@@ -129,10 +144,11 @@ class Similar:
         product
         """
         for idx, lineset in enumerate(self.linesets[:-1]):
-            for lineset2 in self.linesets[idx+1:]:
+            for lineset2 in self.linesets[idx + 1 :]:
                 for sim in self._find_common(lineset, lineset2):
                     yield sim
 
+
 def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
     """return lines with leading/trailing whitespace and any ignored code
     features removed
@@ -143,38 +159,44 @@ def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
     for line in lines:
         line = line.strip()
         if ignore_docstrings:
-            if not docstring and \
-                   (line.startswith('"""') or line.startswith("'''")):
+            if not docstring and (line.startswith('"""') or line.startswith("'''")):
                 docstring = line[:3]
                 line = line[3:]
             if docstring:
                 if line.endswith(docstring):
                     docstring = None
-                line = ''
+                line = ""
         if ignore_imports:
             if line.startswith("import ") or line.startswith("from "):
-                line = ''
+                line = ""
         if ignore_comments:
             # XXX should use regex in checkers/format to avoid cutting
             # at a "#" in a string
-            line = line.split('#', 1)[0].strip()
+            line = line.split("#", 1)[0].strip()
         strippedlines.append(line)
     return strippedlines
 
 
 class LineSet:
     """Holds and indexes all the lines of a single source file"""
-    def __init__(self, name, lines, ignore_comments=False,
-                 ignore_docstrings=False, ignore_imports=False):
+
+    def __init__(
+        self,
+        name,
+        lines,
+        ignore_comments=False,
+        ignore_docstrings=False,
+        ignore_imports=False,
+    ):
         self.name = name
         self._real_lines = lines
-        self._stripped_lines = stripped_lines(lines, ignore_comments,
-                                              ignore_docstrings,
-                                              ignore_imports)
+        self._stripped_lines = stripped_lines(
+            lines, ignore_comments, ignore_docstrings, ignore_imports
+        )
         self._index = self._mk_index()
 
     def __str__(self):
-        return '<Lineset for %s>' % self.name
+        return "<Lineset for %s>" % self.name
 
     def __len__(self):
         return len(self._real_lines)
@@ -198,7 +220,7 @@ class LineSet:
         else:
             lines = self._stripped_lines
         for line in lines:
-            #if line:
+            # if line:
             yield idx, line
             idx += 1
 
@@ -215,18 +237,23 @@ class LineSet:
         return index
 
 
-MSGS = {'R0801': ('Similar lines in %s files\n%s',
-                  'duplicate-code',
-                  'Indicates that a set of similar lines has been detected '
-                  'among multiple file. This usually means that the code should '
-                  'be refactored to avoid this duplication.')}
+MSGS = {
+    "R0801": (
+        "Similar lines in %s files\n%s",
+        "duplicate-code",
+        "Indicates that a set of similar lines has been detected "
+        "among multiple file. This usually means that the code should "
+        "be refactored to avoid this duplication.",
+    )
+}
+
 
 def report_similarities(sect, stats, old_stats):
     """make a layout with some stats about duplication"""
-    lines = ['', 'now', 'previous', 'difference']
-    lines += table_lines_from_stats(stats, old_stats,
-                                    ('nb_duplicated_lines',
-                                     'percent_duplicated_lines'))
+    lines = ["", "now", "previous", "difference"]
+    lines += table_lines_from_stats(
+        stats, old_stats, ("nb_duplicated_lines", "percent_duplicated_lines")
+    )
     sect.append(Table(children=lines, cols=4, rheaders=1, cheaders=1))
 
 
@@ -239,34 +266,57 @@ class SimilarChecker(BaseChecker, Similar):
 
     __implements__ = (IRawChecker,)
     # configuration section name
-    name = 'similarities'
+    name = "similarities"
     # messages
     msgs = MSGS
     # configuration options
     # for available dict keys/values see the optik parser 'add_option' method
-    options = (('min-similarity-lines',  # type: ignore
-                {'default' : 4, 'type' : "int", 'metavar' : '<int>',
-                 'help' : 'Minimum lines number of a similarity.'}),
-               ('ignore-comments',
-                {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
-                 'help': 'Ignore comments when computing similarities.'}
-               ),
-               ('ignore-docstrings',
-                {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
-                 'help': 'Ignore docstrings when computing similarities.'}
-               ),
-               ('ignore-imports',
-                {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>',
-                 'help': 'Ignore imports when computing similarities.'}
-               ),
-              )
+    options = (
+        (
+            "min-similarity-lines",  # type: ignore
+            {
+                "default": 4,
+                "type": "int",
+                "metavar": "<int>",
+                "help": "Minimum lines number of a similarity.",
+            },
+        ),
+        (
+            "ignore-comments",
+            {
+                "default": True,
+                "type": "yn",
+                "metavar": "<y or n>",
+                "help": "Ignore comments when computing similarities.",
+            },
+        ),
+        (
+            "ignore-docstrings",
+            {
+                "default": True,
+                "type": "yn",
+                "metavar": "<y or n>",
+                "help": "Ignore docstrings when computing similarities.",
+            },
+        ),
+        (
+            "ignore-imports",
+            {
+                "default": False,
+                "type": "yn",
+                "metavar": "<y or n>",
+                "help": "Ignore imports when computing similarities.",
+            },
+        ),
+    )
     # reports
-    reports = (('RP0801', 'Duplication', report_similarities),)  # type: ignore
+    reports = (("RP0801", "Duplication", report_similarities),)  # type: ignore
 
     def __init__(self, linter=None):
         BaseChecker.__init__(self, linter)
-        Similar.__init__(self, min_lines=4,
-                         ignore_comments=True, ignore_docstrings=True)
+        Similar.__init__(
+            self, min_lines=4, ignore_comments=True, ignore_docstrings=True
+        )
         self.stats = None
 
     def set_option(self, optname, value, action=None, optdict=None):
@@ -275,20 +325,21 @@ class SimilarChecker(BaseChecker, Similar):
         overridden to report options setting to Similar
         """
         BaseChecker.set_option(self, optname, value, action, optdict)
-        if optname == 'min-similarity-lines':
+        if optname == "min-similarity-lines":
             self.min_lines = self.config.min_similarity_lines
-        elif optname == 'ignore-comments':
+        elif optname == "ignore-comments":
             self.ignore_comments = self.config.ignore_comments
-        elif optname == 'ignore-docstrings':
+        elif optname == "ignore-docstrings":
             self.ignore_docstrings = self.config.ignore_docstrings
-        elif optname == 'ignore-imports':
+        elif optname == "ignore-imports":
             self.ignore_imports = self.config.ignore_imports
 
     def open(self):
         """init the checkers: reset linesets and statistics information"""
         self.linesets = []
-        self.stats = self.linter.add_stats(nb_duplicated_lines=0,
-                                           percent_duplicated_lines=0)
+        self.stats = self.linter.add_stats(
+            nb_duplicated_lines=0, percent_duplicated_lines=0
+        )
 
     def process_module(self, node):
         """process a module
@@ -298,9 +349,7 @@ class SimilarChecker(BaseChecker, Similar):
         stream must implement the readlines method
         """
         with node.stream() as stream:
-            self.append_stream(self.linter.current_name,
-                               stream,
-                               node.file_encoding)
+            self.append_stream(self.linter.current_name, stream, node.file_encoding)
 
     def close(self):
         """compute and display similarities on closing (i.e. end of parsing)"""
@@ -313,49 +362,59 @@ class SimilarChecker(BaseChecker, Similar):
                 msg.append("==%s:%s" % (lineset.name, idx))
             msg.sort()
             # pylint: disable=W0631
-            for line in lineset._real_lines[idx:idx+num]:
+            for line in lineset._real_lines[idx : idx + num]:
                 msg.append(line.rstrip())
-            self.add_message('R0801', args=(len(couples), '\n'.join(msg)))
+            self.add_message("R0801", args=(len(couples), "\n".join(msg)))
             duplicated += num * (len(couples) - 1)
-        stats['nb_duplicated_lines'] = duplicated
-        stats['percent_duplicated_lines'] = total and duplicated * 100. / total
+        stats["nb_duplicated_lines"] = duplicated
+        stats["percent_duplicated_lines"] = total and duplicated * 100. / total
 
 
 def register(linter):
     """required method to auto register this checker """
     linter.register_checker(SimilarChecker(linter))
 
+
 def usage(status=0):
     """display command line usage information"""
     print("finds copy pasted blocks in a set of files")
     print()
-    print('Usage: symilar [-d|--duplicates min_duplicated_lines] \
-[-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...')
+    print(
+        "Usage: symilar [-d|--duplicates min_duplicated_lines] \
+[-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1..."
+    )
     sys.exit(status)
 
+
 def Run(argv=None):
     """standalone command line access point"""
     if argv is None:
         argv = sys.argv[1:]
     from getopt import getopt
-    s_opts = 'hdi'
-    l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports',
-              'ignore-docstrings')
+
+    s_opts = "hdi"
+    l_opts = (
+        "help",
+        "duplicates=",
+        "ignore-comments",
+        "ignore-imports",
+        "ignore-docstrings",
+    )
     min_lines = 4
     ignore_comments = False
     ignore_docstrings = False
     ignore_imports = False
     opts, args = getopt(argv, s_opts, l_opts)
     for opt, val in opts:
-        if opt in ('-d', '--duplicates'):
+        if opt in ("-d", "--duplicates"):
             min_lines = int(val)
-        elif opt in ('-h', '--help'):
+        elif opt in ("-h", "--help"):
             usage()
-        elif opt in ('-i', '--ignore-comments'):
+        elif opt in ("-i", "--ignore-comments"):
             ignore_comments = True
-        elif opt in ('--ignore-docstrings',):
+        elif opt in ("--ignore-docstrings",):
             ignore_docstrings = True
-        elif opt in ('--ignore-imports',):
+        elif opt in ("--ignore-imports",):
             ignore_imports = True
     if not args:
         usage(1)
@@ -366,5 +425,6 @@ def Run(argv=None):
     sim.run()
     sys.exit(0)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     Run()