diff options
author | Ry4an Brase <ry4an-hg@ry4an.org> | 2012-09-29 23:47:36 -0400 |
---|---|---|
committer | Ry4an Brase <ry4an-hg@ry4an.org> | 2012-09-29 23:47:36 -0400 |
commit | 468da1ceac36ce829ecc423be2173ff6b178dcdb (patch) | |
tree | 279de5ea9a4313194ae5027593f3a8a1b3daf9f9 | |
parent | 18cc2a2b8c8943cf58b36be232b3c5a8219900c5 (diff) | |
download | pylint-git-468da1ceac36ce829ecc423be2173ff6b178dcdb.tar.gz |
Add --ignore-imports option to similarity checking. Closes #106534.
Additionally:
- add access to existing --ignore-docstrings option to symilar command line
- add access to new --ignore-imports option to symilar command line
- add test for existing --ignore-docstring feature
- add test for new --ignore-imports feature
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | checkers/similar.py | 39 | ||||
-rw-r--r-- | test/input/similar1 | 41 | ||||
-rw-r--r-- | test/input/similar2 | 41 | ||||
-rw-r--r-- | test/test_similar.py | 93 |
5 files changed, 156 insertions, 61 deletions
@@ -2,6 +2,9 @@ ChangeLog for PyLint ==================== -- + * #106534: add --ignore-imports option to code similarity checking + and 'symilar' command line tool (patch by Ry4an Brase) + * #104571: check for anomalous backslash escape, introducing new W1401 and W1402 messages (patch by Martin Pool) diff --git a/checkers/similar.py b/checkers/similar.py index ed4f614c1..6c1b8938f 100644 --- a/checkers/similar.py +++ b/checkers/similar.py @@ -29,10 +29,11 @@ class Similar: """finds copy-pasted lines of code in a project""" def __init__(self, min_lines=4, ignore_comments=False, - ignore_docstrings=False): + ignore_docstrings=False, ignore_imports=False): self.min_lines = min_lines self.ignore_comments = ignore_comments self.ignore_docstrings = ignore_docstrings + self.ignore_imports = ignore_imports self.linesets = [] def append_stream(self, streamid, stream): @@ -41,7 +42,8 @@ class Similar: self.linesets.append(LineSet(streamid, stream.readlines(), self.ignore_comments, - self.ignore_docstrings)) + self.ignore_docstrings, + self.ignore_imports)) def run(self): """start looking for similarities and display results on stdout""" @@ -123,7 +125,11 @@ class Similar: for sim in self._find_common(lineset, lineset2): yield sim -def stripped_lines(lines, ignore_comments, ignore_docstrings): +def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports): + """return lines with leading/trailing whitespace and any ignored code + features removed + """ + strippedlines = [] docstring = None for line in lines: @@ -137,6 +143,9 @@ def stripped_lines(lines, ignore_comments, ignore_docstrings): if line.endswith(docstring): docstring = None line = '' + if ignore_imports: + if line.startswith("import ") or line.startswith("from "): + line = '' if ignore_comments: # XXX should use regex in checkers/format to avoid cutting # at a "#" in a string @@ -147,11 +156,12 @@ def stripped_lines(lines, ignore_comments, ignore_docstrings): class LineSet: """Holds and indexes all the lines of a single source file""" def __init__(self, name, lines, ignore_comments=False, - ignore_docstrings=False): + ignore_docstrings=False, ignore_imports=False): self.name = name self._real_lines = lines self._stripped_lines = stripped_lines(lines, ignore_comments, - ignore_docstrings) + ignore_docstrings, + ignore_imports) self._index = self._mk_index() def __str__(self): @@ -236,6 +246,10 @@ class SimilarChecker(BaseChecker, Similar): {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>', 'help': 'Ignore docstrings when computing similarities.'} ), + ('ignore-imports', + {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>', + 'help': 'Ignore imports when computing similarities.'} + ), ) # reports reports = ( ('RP0801', 'Duplication', report_similarities), ) @@ -258,6 +272,8 @@ class SimilarChecker(BaseChecker, Similar): self.ignore_comments = self.config.ignore_comments elif optname == 'ignore-docstrings': self.ignore_docstrings = self.config.ignore_docstrings + elif optname == 'ignore-imports': + self.ignore_imports = self.config.ignore_imports def open(self): """init the checkers: reset linesets and statistics information""" @@ -302,7 +318,7 @@ def usage(status=0): print "finds copy pasted blocks in a set of files" print print 'Usage: symilar [-d|--duplicates min_duplicated_lines] \ -[-i|--ignore-comments] file1...' +[-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...' sys.exit(status) def Run(argv=None): @@ -311,9 +327,12 @@ def Run(argv=None): argv = sys.argv[1:] from getopt import getopt s_opts = 'hdi' - l_opts = ('help', 'duplicates=', 'ignore-comments') + l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports', + 'ignore-docstrings') min_lines = 4 ignore_comments = False + ignore_docstrings = False + ignore_imports = False opts, args = getopt(argv, s_opts, l_opts) for opt, val in opts: if opt in ('-d', '--duplicates'): @@ -322,9 +341,13 @@ def Run(argv=None): usage() elif opt in ('-i', '--ignore-comments'): ignore_comments = True + elif opt in ('--ignore-docstrings'): + ignore_docstrings = True + elif opt in ('--ignore-imports'): + ignore_imports = True if not args: usage(1) - sim = Similar(min_lines, ignore_comments) + sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports) for filename in args: sim.append_stream(filename, open(filename)) sim.run() diff --git a/test/input/similar1 b/test/input/similar1 index d1604d306..2b04ee2f5 100644 --- a/test/input/similar1 +++ b/test/input/similar1 @@ -1,19 +1,22 @@ -this file is used -to check the similar -command line tool - -see the similar2 file which is almost the -same file as this one. -more than 4 -identical lines should -be # ignore comments ! -detected - - -héhéhéh - - - - - -Yo ! +import one +from two import two +three +four +five +six # comments optionally ignored +seven +eight +nine +''' ten +eleven +twelve ''' +thirteen +fourteen +fifteen + + + + +sixteen +seventeen +eighteen diff --git a/test/input/similar2 b/test/input/similar2 index 56f9844fd..77f5f1ed6 100644 --- a/test/input/similar2 +++ b/test/input/similar2 @@ -1,19 +1,22 @@ -this file is used -to check the similar -command line tool - -see the similar1 file which is almost the -same file as this one. -more than 4 -identical lines should -be -detected - - -hohohoh - - - - - -Yo ! +import one +from two import two +three +four +five +six +seven +eight +nine +''' ten +ELEVEN +twelve ''' +thirteen +fourteen +FIFTEEN + + + + +sixteen +seventeen +eighteen diff --git a/test/test_similar.py b/test/test_similar.py index 91299df73..d797a5f46 100644 --- a/test/test_similar.py +++ b/test/test_similar.py @@ -24,24 +24,63 @@ class SimilarTC(TestCase): finally: sys.stdout = sys.__stdout__ self.assertMultiLineEqual(output.strip(), (""" -7 similar lines in 2 files -==%s:5 -==%s:5 - same file as this one. - more than 4 - identical lines should - be - detected - - -TOTAL lines=38 duplicates=7 percent=18.42 +10 similar lines in 2 files +==%s:0 +==%s:0 + import one + from two import two + three + four + five + six + seven + eight + nine + ''' ten +TOTAL lines=44 duplicates=10 percent=22.73 """ % (SIMILAR1, SIMILAR2)).strip()) - def test_dont_ignore_comments(self): + def test_ignore_docsrings(self): sys.stdout = StringIO() try: - similar.Run([SIMILAR1, SIMILAR2]) + similar.Run(['--ignore-docstrings', SIMILAR1, SIMILAR2]) + except SystemExit, ex: + self.assertEqual(ex.code, 0) + output = sys.stdout.getvalue() + else: + self.fail('not system exit') + finally: + sys.stdout = sys.__stdout__ + self.assertMultiLineEqual(output.strip(), (""" +8 similar lines in 2 files +==%s:6 +==%s:6 + seven + eight + nine + ''' ten + ELEVEN + twelve ''' + thirteen + fourteen + +5 similar lines in 2 files +==%s:0 +==%s:0 + import one + from two import two + three + four + five +TOTAL lines=44 duplicates=13 percent=29.55 +""" % ((SIMILAR1, SIMILAR2) * 2)).strip()) + + + def test_ignore_imports(self): + sys.stdout = StringIO() + try: + similar.Run(['--ignore-imports', SIMILAR1, SIMILAR2]) except SystemExit, ex: self.assertEqual(ex.code, 0) output = sys.stdout.getvalue() @@ -50,8 +89,32 @@ TOTAL lines=38 duplicates=7 percent=18.42 finally: sys.stdout = sys.__stdout__ self.assertMultiLineEqual(output.strip(), """ -TOTAL lines=38 duplicates=0 percent=0.00 - """.strip()) +TOTAL lines=44 duplicates=0 percent=0.00 +""".strip()) + + + def test_ignore_nothing(self): + sys.stdout = StringIO() + try: + similar.Run([SIMILAR1, SIMILAR2]) + except SystemExit, ex: + self.assertEqual(ex.code, 0) + output = sys.stdout.getvalue() + else: + self.fail('not system exit') + finally: + sys.stdout = sys.__stdout__ + self.assertMultiLineEqual(output.strip(), (""" +5 similar lines in 2 files +==%s:0 +==%s:0 + import one + from two import two + three + four + five +TOTAL lines=44 duplicates=5 percent=11.36 +""" % (SIMILAR1, SIMILAR2)).strip()) def test_help(self): sys.stdout = StringIO() |