summaryrefslogtreecommitdiff
path: root/pylint/checkers/similar.py
diff options
context:
space:
mode:
Diffstat (limited to 'pylint/checkers/similar.py')
-rw-r--r--pylint/checkers/similar.py218
1 files changed, 139 insertions, 79 deletions
diff --git a/pylint/checkers/similar.py b/pylint/checkers/similar.py
index a53058fd7..369d0ce37 100644
--- a/pylint/checkers/similar.py
+++ b/pylint/checkers/similar.py
@@ -30,8 +30,13 @@ from pylint.reporters.ureports.nodes import Table
class Similar:
"""finds copy-pasted lines of code in a project"""
- def __init__(self, min_lines=4, ignore_comments=False,
- ignore_docstrings=False, ignore_imports=False):
+ def __init__(
+ self,
+ min_lines=4,
+ ignore_comments=False,
+ ignore_docstrings=False,
+ ignore_imports=False,
+ ):
self.min_lines = min_lines
self.ignore_comments = ignore_comments
self.ignore_docstrings = ignore_docstrings
@@ -45,11 +50,15 @@ class Similar:
else:
readlines = decoding_stream(stream, encoding).readlines
try:
- self.linesets.append(LineSet(streamid,
- readlines(),
- self.ignore_comments,
- self.ignore_docstrings,
- self.ignore_imports))
+ self.linesets.append(
+ LineSet(
+ streamid,
+ readlines(),
+ self.ignore_comments,
+ self.ignore_docstrings,
+ self.ignore_imports,
+ )
+ )
except UnicodeDecodeError:
pass
@@ -87,13 +96,18 @@ class Similar:
for lineset, idx in couples:
print("==%s:%s" % (lineset.name, idx))
# pylint: disable=W0631
- for line in lineset._real_lines[idx:idx+num]:
+ for line in lineset._real_lines[idx : idx + num]:
print(" ", line.rstrip())
- nb_lignes_dupliquees += num * (len(couples)-1)
+ nb_lignes_dupliquees += num * (len(couples) - 1)
nb_total_lignes = sum([len(lineset) for lineset in self.linesets])
- print("TOTAL lines=%s duplicates=%s percent=%.2f" \
- % (nb_total_lignes, nb_lignes_dupliquees,
- nb_lignes_dupliquees*100. / nb_total_lignes))
+ print(
+ "TOTAL lines=%s duplicates=%s percent=%.2f"
+ % (
+ nb_total_lignes,
+ nb_lignes_dupliquees,
+ nb_lignes_dupliquees * 100. / nb_total_lignes,
+ )
+ )
def _find_common(self, lineset1, lineset2):
"""find similarities in the two given linesets"""
@@ -108,7 +122,8 @@ class Similar:
for index2 in find(lineset1[index1]):
non_blank = 0
for num, ((_, line1), (_, line2)) in enumerate(
- zip(lines1(index1), lines2(index2))):
+ zip(lines1(index1), lines2(index2))
+ ):
if line1 != line2:
if non_blank > min_lines:
yield num, lineset1, index1, lineset2, index2
@@ -129,10 +144,11 @@ class Similar:
product
"""
for idx, lineset in enumerate(self.linesets[:-1]):
- for lineset2 in self.linesets[idx+1:]:
+ for lineset2 in self.linesets[idx + 1 :]:
for sim in self._find_common(lineset, lineset2):
yield sim
+
def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
"""return lines with leading/trailing whitespace and any ignored code
features removed
@@ -143,38 +159,44 @@ def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
for line in lines:
line = line.strip()
if ignore_docstrings:
- if not docstring and \
- (line.startswith('"""') or line.startswith("'''")):
+ if not docstring and (line.startswith('"""') or line.startswith("'''")):
docstring = line[:3]
line = line[3:]
if docstring:
if line.endswith(docstring):
docstring = None
- line = ''
+ line = ""
if ignore_imports:
if line.startswith("import ") or line.startswith("from "):
- line = ''
+ line = ""
if ignore_comments:
# XXX should use regex in checkers/format to avoid cutting
# at a "#" in a string
- line = line.split('#', 1)[0].strip()
+ line = line.split("#", 1)[0].strip()
strippedlines.append(line)
return strippedlines
class LineSet:
"""Holds and indexes all the lines of a single source file"""
- def __init__(self, name, lines, ignore_comments=False,
- ignore_docstrings=False, ignore_imports=False):
+
+ def __init__(
+ self,
+ name,
+ lines,
+ ignore_comments=False,
+ ignore_docstrings=False,
+ ignore_imports=False,
+ ):
self.name = name
self._real_lines = lines
- self._stripped_lines = stripped_lines(lines, ignore_comments,
- ignore_docstrings,
- ignore_imports)
+ self._stripped_lines = stripped_lines(
+ lines, ignore_comments, ignore_docstrings, ignore_imports
+ )
self._index = self._mk_index()
def __str__(self):
- return '<Lineset for %s>' % self.name
+ return "<Lineset for %s>" % self.name
def __len__(self):
return len(self._real_lines)
@@ -198,7 +220,7 @@ class LineSet:
else:
lines = self._stripped_lines
for line in lines:
- #if line:
+ # if line:
yield idx, line
idx += 1
@@ -215,18 +237,23 @@ class LineSet:
return index
-MSGS = {'R0801': ('Similar lines in %s files\n%s',
- 'duplicate-code',
- 'Indicates that a set of similar lines has been detected '
- 'among multiple file. This usually means that the code should '
- 'be refactored to avoid this duplication.')}
+MSGS = {
+ "R0801": (
+ "Similar lines in %s files\n%s",
+ "duplicate-code",
+ "Indicates that a set of similar lines has been detected "
+ "among multiple file. This usually means that the code should "
+ "be refactored to avoid this duplication.",
+ )
+}
+
def report_similarities(sect, stats, old_stats):
"""make a layout with some stats about duplication"""
- lines = ['', 'now', 'previous', 'difference']
- lines += table_lines_from_stats(stats, old_stats,
- ('nb_duplicated_lines',
- 'percent_duplicated_lines'))
+ lines = ["", "now", "previous", "difference"]
+ lines += table_lines_from_stats(
+ stats, old_stats, ("nb_duplicated_lines", "percent_duplicated_lines")
+ )
sect.append(Table(children=lines, cols=4, rheaders=1, cheaders=1))
@@ -239,34 +266,57 @@ class SimilarChecker(BaseChecker, Similar):
__implements__ = (IRawChecker,)
# configuration section name
- name = 'similarities'
+ name = "similarities"
# messages
msgs = MSGS
# configuration options
# for available dict keys/values see the optik parser 'add_option' method
- options = (('min-similarity-lines', # type: ignore
- {'default' : 4, 'type' : "int", 'metavar' : '<int>',
- 'help' : 'Minimum lines number of a similarity.'}),
- ('ignore-comments',
- {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
- 'help': 'Ignore comments when computing similarities.'}
- ),
- ('ignore-docstrings',
- {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
- 'help': 'Ignore docstrings when computing similarities.'}
- ),
- ('ignore-imports',
- {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>',
- 'help': 'Ignore imports when computing similarities.'}
- ),
- )
+ options = (
+ (
+ "min-similarity-lines", # type: ignore
+ {
+ "default": 4,
+ "type": "int",
+ "metavar": "<int>",
+ "help": "Minimum lines number of a similarity.",
+ },
+ ),
+ (
+ "ignore-comments",
+ {
+ "default": True,
+ "type": "yn",
+ "metavar": "<y or n>",
+ "help": "Ignore comments when computing similarities.",
+ },
+ ),
+ (
+ "ignore-docstrings",
+ {
+ "default": True,
+ "type": "yn",
+ "metavar": "<y or n>",
+ "help": "Ignore docstrings when computing similarities.",
+ },
+ ),
+ (
+ "ignore-imports",
+ {
+ "default": False,
+ "type": "yn",
+ "metavar": "<y or n>",
+ "help": "Ignore imports when computing similarities.",
+ },
+ ),
+ )
# reports
- reports = (('RP0801', 'Duplication', report_similarities),) # type: ignore
+ reports = (("RP0801", "Duplication", report_similarities),) # type: ignore
def __init__(self, linter=None):
BaseChecker.__init__(self, linter)
- Similar.__init__(self, min_lines=4,
- ignore_comments=True, ignore_docstrings=True)
+ Similar.__init__(
+ self, min_lines=4, ignore_comments=True, ignore_docstrings=True
+ )
self.stats = None
def set_option(self, optname, value, action=None, optdict=None):
@@ -275,20 +325,21 @@ class SimilarChecker(BaseChecker, Similar):
overridden to report options setting to Similar
"""
BaseChecker.set_option(self, optname, value, action, optdict)
- if optname == 'min-similarity-lines':
+ if optname == "min-similarity-lines":
self.min_lines = self.config.min_similarity_lines
- elif optname == 'ignore-comments':
+ elif optname == "ignore-comments":
self.ignore_comments = self.config.ignore_comments
- elif optname == 'ignore-docstrings':
+ elif optname == "ignore-docstrings":
self.ignore_docstrings = self.config.ignore_docstrings
- elif optname == 'ignore-imports':
+ elif optname == "ignore-imports":
self.ignore_imports = self.config.ignore_imports
def open(self):
"""init the checkers: reset linesets and statistics information"""
self.linesets = []
- self.stats = self.linter.add_stats(nb_duplicated_lines=0,
- percent_duplicated_lines=0)
+ self.stats = self.linter.add_stats(
+ nb_duplicated_lines=0, percent_duplicated_lines=0
+ )
def process_module(self, node):
"""process a module
@@ -298,9 +349,7 @@ class SimilarChecker(BaseChecker, Similar):
stream must implement the readlines method
"""
with node.stream() as stream:
- self.append_stream(self.linter.current_name,
- stream,
- node.file_encoding)
+ self.append_stream(self.linter.current_name, stream, node.file_encoding)
def close(self):
"""compute and display similarities on closing (i.e. end of parsing)"""
@@ -313,49 +362,59 @@ class SimilarChecker(BaseChecker, Similar):
msg.append("==%s:%s" % (lineset.name, idx))
msg.sort()
# pylint: disable=W0631
- for line in lineset._real_lines[idx:idx+num]:
+ for line in lineset._real_lines[idx : idx + num]:
msg.append(line.rstrip())
- self.add_message('R0801', args=(len(couples), '\n'.join(msg)))
+ self.add_message("R0801", args=(len(couples), "\n".join(msg)))
duplicated += num * (len(couples) - 1)
- stats['nb_duplicated_lines'] = duplicated
- stats['percent_duplicated_lines'] = total and duplicated * 100. / total
+ stats["nb_duplicated_lines"] = duplicated
+ stats["percent_duplicated_lines"] = total and duplicated * 100. / total
def register(linter):
"""required method to auto register this checker """
linter.register_checker(SimilarChecker(linter))
+
def usage(status=0):
"""display command line usage information"""
print("finds copy pasted blocks in a set of files")
print()
- print('Usage: symilar [-d|--duplicates min_duplicated_lines] \
-[-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...')
+ print(
+ "Usage: symilar [-d|--duplicates min_duplicated_lines] \
+[-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1..."
+ )
sys.exit(status)
+
def Run(argv=None):
"""standalone command line access point"""
if argv is None:
argv = sys.argv[1:]
from getopt import getopt
- s_opts = 'hdi'
- l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports',
- 'ignore-docstrings')
+
+ s_opts = "hdi"
+ l_opts = (
+ "help",
+ "duplicates=",
+ "ignore-comments",
+ "ignore-imports",
+ "ignore-docstrings",
+ )
min_lines = 4
ignore_comments = False
ignore_docstrings = False
ignore_imports = False
opts, args = getopt(argv, s_opts, l_opts)
for opt, val in opts:
- if opt in ('-d', '--duplicates'):
+ if opt in ("-d", "--duplicates"):
min_lines = int(val)
- elif opt in ('-h', '--help'):
+ elif opt in ("-h", "--help"):
usage()
- elif opt in ('-i', '--ignore-comments'):
+ elif opt in ("-i", "--ignore-comments"):
ignore_comments = True
- elif opt in ('--ignore-docstrings',):
+ elif opt in ("--ignore-docstrings",):
ignore_docstrings = True
- elif opt in ('--ignore-imports',):
+ elif opt in ("--ignore-imports",):
ignore_imports = True
if not args:
usage(1)
@@ -366,5 +425,6 @@ def Run(argv=None):
sim.run()
sys.exit(0)
-if __name__ == '__main__':
+
+if __name__ == "__main__":
Run()