summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRy4an Brase <ry4an-hg@ry4an.org>2012-09-29 23:47:36 -0400
committerRy4an Brase <ry4an-hg@ry4an.org>2012-09-29 23:47:36 -0400
commit468da1ceac36ce829ecc423be2173ff6b178dcdb (patch)
tree279de5ea9a4313194ae5027593f3a8a1b3daf9f9
parent18cc2a2b8c8943cf58b36be232b3c5a8219900c5 (diff)
downloadpylint-git-468da1ceac36ce829ecc423be2173ff6b178dcdb.tar.gz
Add --ignore-imports option to similarity checking. Closes #106534.
Additionally: - add access to existing --ignore-docstrings option to symilar command line - add access to new --ignore-imports option to symilar command line - add test for existing --ignore-docstring feature - add test for new --ignore-imports feature
-rw-r--r--ChangeLog3
-rw-r--r--checkers/similar.py39
-rw-r--r--test/input/similar141
-rw-r--r--test/input/similar241
-rw-r--r--test/test_similar.py93
5 files changed, 156 insertions, 61 deletions
diff --git a/ChangeLog b/ChangeLog
index 80a4ad594..c46bf4ab2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,6 +2,9 @@ ChangeLog for PyLint
====================
--
+ * #106534: add --ignore-imports option to code similarity checking
+ and 'symilar' command line tool (patch by Ry4an Brase)
+
* #104571: check for anomalous backslash escape, introducing new
W1401 and W1402 messages (patch by Martin Pool)
diff --git a/checkers/similar.py b/checkers/similar.py
index ed4f614c1..6c1b8938f 100644
--- a/checkers/similar.py
+++ b/checkers/similar.py
@@ -29,10 +29,11 @@ class Similar:
"""finds copy-pasted lines of code in a project"""
def __init__(self, min_lines=4, ignore_comments=False,
- ignore_docstrings=False):
+ ignore_docstrings=False, ignore_imports=False):
self.min_lines = min_lines
self.ignore_comments = ignore_comments
self.ignore_docstrings = ignore_docstrings
+ self.ignore_imports = ignore_imports
self.linesets = []
def append_stream(self, streamid, stream):
@@ -41,7 +42,8 @@ class Similar:
self.linesets.append(LineSet(streamid,
stream.readlines(),
self.ignore_comments,
- self.ignore_docstrings))
+ self.ignore_docstrings,
+ self.ignore_imports))
def run(self):
"""start looking for similarities and display results on stdout"""
@@ -123,7 +125,11 @@ class Similar:
for sim in self._find_common(lineset, lineset2):
yield sim
-def stripped_lines(lines, ignore_comments, ignore_docstrings):
+def stripped_lines(lines, ignore_comments, ignore_docstrings, ignore_imports):
+ """return lines with leading/trailing whitespace and any ignored code
+ features removed
+ """
+
strippedlines = []
docstring = None
for line in lines:
@@ -137,6 +143,9 @@ def stripped_lines(lines, ignore_comments, ignore_docstrings):
if line.endswith(docstring):
docstring = None
line = ''
+ if ignore_imports:
+ if line.startswith("import ") or line.startswith("from "):
+ line = ''
if ignore_comments:
# XXX should use regex in checkers/format to avoid cutting
# at a "#" in a string
@@ -147,11 +156,12 @@ def stripped_lines(lines, ignore_comments, ignore_docstrings):
class LineSet:
"""Holds and indexes all the lines of a single source file"""
def __init__(self, name, lines, ignore_comments=False,
- ignore_docstrings=False):
+ ignore_docstrings=False, ignore_imports=False):
self.name = name
self._real_lines = lines
self._stripped_lines = stripped_lines(lines, ignore_comments,
- ignore_docstrings)
+ ignore_docstrings,
+ ignore_imports)
self._index = self._mk_index()
def __str__(self):
@@ -236,6 +246,10 @@ class SimilarChecker(BaseChecker, Similar):
{'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',
'help': 'Ignore docstrings when computing similarities.'}
),
+ ('ignore-imports',
+ {'default' : False, 'type' : 'yn', 'metavar' : '<y or n>',
+ 'help': 'Ignore imports when computing similarities.'}
+ ),
)
# reports
reports = ( ('RP0801', 'Duplication', report_similarities), )
@@ -258,6 +272,8 @@ class SimilarChecker(BaseChecker, Similar):
self.ignore_comments = self.config.ignore_comments
elif optname == 'ignore-docstrings':
self.ignore_docstrings = self.config.ignore_docstrings
+ elif optname == 'ignore-imports':
+ self.ignore_imports = self.config.ignore_imports
def open(self):
"""init the checkers: reset linesets and statistics information"""
@@ -302,7 +318,7 @@ def usage(status=0):
print "finds copy pasted blocks in a set of files"
print
print 'Usage: symilar [-d|--duplicates min_duplicated_lines] \
-[-i|--ignore-comments] file1...'
+[-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] file1...'
sys.exit(status)
def Run(argv=None):
@@ -311,9 +327,12 @@ def Run(argv=None):
argv = sys.argv[1:]
from getopt import getopt
s_opts = 'hdi'
- l_opts = ('help', 'duplicates=', 'ignore-comments')
+ l_opts = ('help', 'duplicates=', 'ignore-comments', 'ignore-imports',
+ 'ignore-docstrings')
min_lines = 4
ignore_comments = False
+ ignore_docstrings = False
+ ignore_imports = False
opts, args = getopt(argv, s_opts, l_opts)
for opt, val in opts:
if opt in ('-d', '--duplicates'):
@@ -322,9 +341,13 @@ def Run(argv=None):
usage()
elif opt in ('-i', '--ignore-comments'):
ignore_comments = True
+ elif opt in ('--ignore-docstrings'):
+ ignore_docstrings = True
+ elif opt in ('--ignore-imports'):
+ ignore_imports = True
if not args:
usage(1)
- sim = Similar(min_lines, ignore_comments)
+ sim = Similar(min_lines, ignore_comments, ignore_docstrings, ignore_imports)
for filename in args:
sim.append_stream(filename, open(filename))
sim.run()
diff --git a/test/input/similar1 b/test/input/similar1
index d1604d306..2b04ee2f5 100644
--- a/test/input/similar1
+++ b/test/input/similar1
@@ -1,19 +1,22 @@
-this file is used
-to check the similar
-command line tool
-
-see the similar2 file which is almost the
-same file as this one.
-more than 4
-identical lines should
-be # ignore comments !
-detected
-
-
-héhéhéh
-
-
-
-
-
-Yo !
+import one
+from two import two
+three
+four
+five
+six # comments optionally ignored
+seven
+eight
+nine
+''' ten
+eleven
+twelve '''
+thirteen
+fourteen
+fifteen
+
+
+
+
+sixteen
+seventeen
+eighteen
diff --git a/test/input/similar2 b/test/input/similar2
index 56f9844fd..77f5f1ed6 100644
--- a/test/input/similar2
+++ b/test/input/similar2
@@ -1,19 +1,22 @@
-this file is used
-to check the similar
-command line tool
-
-see the similar1 file which is almost the
-same file as this one.
-more than 4
-identical lines should
-be
-detected
-
-
-hohohoh
-
-
-
-
-
-Yo !
+import one
+from two import two
+three
+four
+five
+six
+seven
+eight
+nine
+''' ten
+ELEVEN
+twelve '''
+thirteen
+fourteen
+FIFTEEN
+
+
+
+
+sixteen
+seventeen
+eighteen
diff --git a/test/test_similar.py b/test/test_similar.py
index 91299df73..d797a5f46 100644
--- a/test/test_similar.py
+++ b/test/test_similar.py
@@ -24,24 +24,63 @@ class SimilarTC(TestCase):
finally:
sys.stdout = sys.__stdout__
self.assertMultiLineEqual(output.strip(), ("""
-7 similar lines in 2 files
-==%s:5
-==%s:5
- same file as this one.
- more than 4
- identical lines should
- be
- detected
-
-
-TOTAL lines=38 duplicates=7 percent=18.42
+10 similar lines in 2 files
+==%s:0
+==%s:0
+ import one
+ from two import two
+ three
+ four
+ five
+ six
+ seven
+ eight
+ nine
+ ''' ten
+TOTAL lines=44 duplicates=10 percent=22.73
""" % (SIMILAR1, SIMILAR2)).strip())
- def test_dont_ignore_comments(self):
+ def test_ignore_docsrings(self):
sys.stdout = StringIO()
try:
- similar.Run([SIMILAR1, SIMILAR2])
+ similar.Run(['--ignore-docstrings', SIMILAR1, SIMILAR2])
+ except SystemExit, ex:
+ self.assertEqual(ex.code, 0)
+ output = sys.stdout.getvalue()
+ else:
+ self.fail('not system exit')
+ finally:
+ sys.stdout = sys.__stdout__
+ self.assertMultiLineEqual(output.strip(), ("""
+8 similar lines in 2 files
+==%s:6
+==%s:6
+ seven
+ eight
+ nine
+ ''' ten
+ ELEVEN
+ twelve '''
+ thirteen
+ fourteen
+
+5 similar lines in 2 files
+==%s:0
+==%s:0
+ import one
+ from two import two
+ three
+ four
+ five
+TOTAL lines=44 duplicates=13 percent=29.55
+""" % ((SIMILAR1, SIMILAR2) * 2)).strip())
+
+
+ def test_ignore_imports(self):
+ sys.stdout = StringIO()
+ try:
+ similar.Run(['--ignore-imports', SIMILAR1, SIMILAR2])
except SystemExit, ex:
self.assertEqual(ex.code, 0)
output = sys.stdout.getvalue()
@@ -50,8 +89,32 @@ TOTAL lines=38 duplicates=7 percent=18.42
finally:
sys.stdout = sys.__stdout__
self.assertMultiLineEqual(output.strip(), """
-TOTAL lines=38 duplicates=0 percent=0.00
- """.strip())
+TOTAL lines=44 duplicates=0 percent=0.00
+""".strip())
+
+
+ def test_ignore_nothing(self):
+ sys.stdout = StringIO()
+ try:
+ similar.Run([SIMILAR1, SIMILAR2])
+ except SystemExit, ex:
+ self.assertEqual(ex.code, 0)
+ output = sys.stdout.getvalue()
+ else:
+ self.fail('not system exit')
+ finally:
+ sys.stdout = sys.__stdout__
+ self.assertMultiLineEqual(output.strip(), ("""
+5 similar lines in 2 files
+==%s:0
+==%s:0
+ import one
+ from two import two
+ three
+ four
+ five
+TOTAL lines=44 duplicates=5 percent=11.36
+""" % (SIMILAR1, SIMILAR2)).strip())
def test_help(self):
sys.stdout = StringIO()