Add a new warning 'superfluous-parens' for unnecessary parentheses

after certain keywords.
author: Torsten Marek <tmarek@google.com> 2013-11-06 09:51:37 -0800
committer: Torsten Marek <tmarek@google.com> 2013-11-06 09:51:37 -0800
commit: d917d28ea80325d4190f943cfc439c678d32d5e0 (patch)
tree: cf9c5ec91721947446a26228f8f51af9120ab008
parent: 126b33ad7325626ead8d4cf614883adebdc8c66f (diff)
download: pylint-git-d917d28ea80325d4190f943cfc439c678d32d5e0.tar.gz
8 files changed, 202 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index ece2d4041..b9bbba9fc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -5,6 +5,9 @@ ChangeLog for Pylint
     * Avoid false used-before-assignment for except handler defined
      identifier used on the same line (#111)
 
+    * Add a new warning 'superfluous-parens' for unnecessary
+      parentheses after certain keywords.
+
     * Fix a potential crash in the redefine-in-handler warning
       if the redefined name is a nested getattr node.
 
diff --git a/checkers/format.py b/checkers/format.py
index d92813f69..0209addd5 100644
--- a/checkers/format.py
+++ b/checkers/format.py
@@ -29,11 +29,14 @@ if not hasattr(tokenize, 'NL'):
 from logilab.common.textutils import pretty_match
 from astroid import nodes
 
-from pylint.interfaces import ITokenChecker, IAstroidChecker
+from pylint.interfaces import ITokenChecker, IAstroidChecker, IRawChecker
 from pylint.checkers import BaseTokenChecker
 from pylint.checkers.utils import check_messages
 from pylint.utils import WarningScope, OPTION_RGX
 
+_KEYWORD_TOKENS = ['assert', 'del', 'elif', 'except', 'for', 'if', 'in', 'not',
+                   'print', 'raise', 'return', 'while', 'yield']
+
 MSGS = {
     'C0301': ('Line too long (%s/%s)',
               'line-too-long',
@@ -78,6 +81,10 @@ MSGS = {
               'no-space-after-comma',
               'Used when a comma (",") is not followed by a space.',
               {'scope': WarningScope.NODE}),
+    'C0325' : ('Unnecessary parens after %r keyword',
+               'superfluous-parens',
+               'Used when a single item in parentheses follows an if, for, or '
+               'other keyword.'),
     }
 
 if sys.version_info < (3, 0):
@@ -177,7 +184,7 @@ class FormatChecker(BaseTokenChecker):
     * use of <> instead of !=
     """
 
-    __implements__ = (ITokenChecker, IAstroidChecker)
+    __implements__ = (ITokenChecker, IAstroidChecker, IRawChecker)
 
     # configuration section name
     name = 'format'
@@ -217,6 +224,96 @@ class FormatChecker(BaseTokenChecker):
             self._lines[line_num] = line.split('\n')[0]
         self.check_lines(line, line_num)
 
+    def process_module(self, module):
+        self._keywords_with_parens = set()
+        for node in module.body:
+            if (isinstance(node, nodes.From) and node.modname == '__future__'
+                and any(name == 'print_function' for name, _ in node.names)):
+                self._keywords_with_parens.add('print')
+
+    def _check_keyword_parentheses(self, tokens, start):
+      """Check that there are not unnecessary parens after a keyword.
+
+      Parens are unnecessary if there is exactly one balanced outer pair on a
+      line, and it is followed by a colon, and contains no commas (i.e. is not a
+      tuple).
+
+      Args:
+      tokens: list of Tokens; the entire list of Tokens.
+      start: int; the position of the keyword in the token list.
+      """
+      # If the next token is not a paren, we're fine.
+      if tokens[start+1][1] != '(':
+          return
+
+      found_comma = False
+      found_and_or = False
+      depth = 0
+      keyword_token = tokens[start][1]
+      line_num = tokens[start][2][0]
+
+      for i in xrange(start, len(tokens) - 1):
+          token = tokens[i]
+
+          # If we hit a newline, then assume any parens were for continuation.
+          if token[0] == tokenize.NL:
+              return
+
+          if token[1] == '(':
+              depth += 1
+          elif token[1] == ')':
+              depth -= 1
+              if not depth:
+                  # ')' can't happen after if (foo), since it would be a syntax error.
+                  if (tokens[i+1][1] in (':', ')', ']', '}', 'in') or
+                      tokens[i+1][0] in (tokenize.NEWLINE, tokenize.ENDMARKER,
+                                           tokenize.COMMENT)):
+                      # The empty tuple () is always accepted.
+                      if i == start + 2:
+                          return
+                      if keyword_token == 'not':
+                          if not found_and_or:
+                              self.add_message('C0325', line=line_num,
+                                               args=keyword_token)
+                      elif keyword_token in ('return', 'yield'):
+                          self.add_message('C0325', line=line_num,
+                                           args=keyword_token)
+                      elif keyword_token not in self._keywords_with_parens:
+                          if not (tokens[i+1][1] == 'in' and found_and_or):
+                              self.add_message('C0325', line=line_num,
+                                               args=keyword_token)
+                  return
+          elif depth == 1:
+              # This is a tuple, which is always acceptable.
+              if token[1] == ',':
+                  return
+              # 'and' and 'or' are the only boolean operators with lower precedence
+              # than 'not', so parens are only required when they are found.
+              elif token[1] in ('and', 'or'):
+                  found_and_or = True
+              # A yield inside an expression must always be in parentheses,
+              # quit early without error.
+              elif token[1] == 'yield':
+                  return
+              # A generator expression always has a 'for' token in it, and
+              # the 'for' token is only legal inside parens when it is in a
+              # generator expression.  The parens are necessary here, so bail
+              # without an error.
+              elif token[1] == 'for':
+                  return
+
+    def _prepare_token_dispatcher(self):
+      raw = [
+          (_KEYWORD_TOKENS,
+           self._check_keyword_parentheses),
+          ]
+
+      dispatch = {}
+      for tokens, handler in raw:
+        for token in tokens:
+          dispatch[token] = handler
+      return dispatch
+
     def process_tokens(self, tokens):
         """process tokens and search for :
 
@@ -237,7 +334,8 @@ class FormatChecker(BaseTokenChecker):
         self._lines = {}
         self._visited_lines = {}
         new_line_delay = False
-        for (tok_type, token, start, _, line) in tokens:
+        token_handlers = self._prepare_token_dispatcher()
+        for idx, (tok_type, token, start, _, line) in enumerate(tokens):
             if new_line_delay:
                 new_line_delay = False
                 self.new_line(tok_type, line, line_num, junk)
@@ -296,6 +394,13 @@ class FormatChecker(BaseTokenChecker):
                 check_equal = 0
                 self.check_indent_level(line, indents[-1], line_num)
 
+            try:
+                handler = token_handlers[token]
+            except KeyError:
+                pass
+            else:
+                handler(tokens, idx)
+
         line_num -= 1 # to be ok with "wc -l"
         if line_num > self.config.max_module_lines:
             self.add_message('C0302', args=line_num, line=1)
diff --git a/test/input/func_block_disable_msg.py b/test/input/func_block_disable_msg.py
index cce04a454..2551823c7 100644
--- a/test/input/func_block_disable_msg.py
+++ b/test/input/func_block_disable_msg.py
@@ -91,7 +91,7 @@ class Foo(object):
         eris = 5
 
         if eris:
-            print ("In block")
+            print "In block"
 
         # pylint: disable=E1101
         # no error
diff --git a/test/input/func_noerror_defined_and_used_on_same_line.py b/test/input/func_noerror_defined_and_used_on_same_line.py
index 71e67b9da..db700cf8b 100644
--- a/test/input/func_noerror_defined_and_used_on_same_line.py
+++ b/test/input/func_noerror_defined_and_used_on_same_line.py
@@ -27,4 +27,4 @@ FUNC3 = lambda (a, b) : a != b
 
 # test http://www.logilab.org/ticket/6954:
 
-with open('f') as f: print(f.read())
+with open('f') as f: print f.read()
diff --git a/test/input/func_superfluous_parens.py b/test/input/func_superfluous_parens.py
new file mode 100644
index 000000000..112ab2bcf
--- /dev/null
+++ b/test/input/func_superfluous_parens.py
@@ -0,0 +1,19 @@
+"""Test the superfluous-parens warning."""
+
+__revision__ = 1
+
+if (3 == 5):
+    pass
+if not (3 == 5):
+    pass
+if not (3 or 5):
+    pass
+for (x) in (1, 2, 3):
+    print x
+if (1) in (1, 2, 3):
+    pass
+if (1, 2) in (1, 2, 3):
+    pass
+DICT = {'a': 1, 'b': 2}
+del(DICT['b'])
+del DICT['a']
diff --git a/test/input/func_w0711.py b/test/input/func_w0711.py
index 9cc791ebc..97c369721 100644
--- a/test/input/func_w0711.py
+++ b/test/input/func_w0711.py
@@ -9,7 +9,7 @@ except Exception or StandardError:
     print "caught1"
 except Exception and StandardError:
     print "caught2"
-except (Exception or StandardError):
+except Exception or StandardError:
     print "caught3"
 except (Exception or StandardError), exc:
     print "caught4"
diff --git a/test/messages/func_superfluous_parens.txt b/test/messages/func_superfluous_parens.txt
new file mode 100644
index 000000000..77ff1866c
--- /dev/null
+++ b/test/messages/func_superfluous_parens.txt
@@ -0,0 +1,5 @@
+C:  5: Unnecessary parens after 'if' keyword
+C:  7: Unnecessary parens after 'not' keyword
+C: 11: Unnecessary parens after 'for' keyword
+C: 13: Unnecessary parens after 'if' keyword
+C: 18: Unnecessary parens after 'del' keyword
diff --git a/test/test_format.py b/test/test_format.py
index f74a65b07..3be2cffb1 100644
--- a/test/test_format.py
+++ b/test/test_format.py
@@ -19,6 +19,8 @@ Check format checker helper functions
 import sys
 import re
 from os import linesep
+import tokenize
+import StringIO
 
 from logilab.common.testlib import TestCase, unittest_main
 from astroid import test_utils
@@ -29,6 +31,10 @@ from pylint.testutils import TestReporter, CheckerTestCase, Message
 
 REPORTER = TestReporter()
 
+def tokenize_str(code):
+    return list(tokenize.generate_tokens(StringIO.StringIO(code).readline))
+
+
 class StringRgxTest(TestCase):
     """test the STRING_RGX regular expression"""
 
@@ -202,5 +208,63 @@ class MultiStatementLineTest(CheckerTestCase):
           self.checker.visit_default(tree.body[0])
 
 
+
+class SuperfluousParenthesesTest(CheckerTestCase):
+    CHECKER_CLASS = FormatChecker
+
+    def testCheckKeywordParensHandlesValidCases(self):
+        self.checker._keywords_with_parens = set()
+        cases = [
+            'if foo:',
+            'if foo():',
+            'if (x and y) or z:',
+            'assert foo()',
+            'assert ()',
+            'if (1, 2) in (3, 4):',
+            'if (a or b) in c:',
+            'return (x for x in x)',
+            'if (x for x in x):',
+            'for x in (x for x in x):',
+            'not (foo or bar)',
+            'not (foo or bar) and baz',
+            ]
+        with self.assertNoMessages():
+            for code in cases:
+                self.checker._check_keyword_parentheses(tokenize_str(code), 0)
+
+    def testCheckKeywordParensHandlesUnnecessaryParens(self):
+        self.checker._keywords_with_parens = set()
+        cases = [
+            (Message('C0325', line=1, args='if'),
+             'if (foo):', 0),
+            (Message('C0325', line=1, args='if'),
+             'if ((foo, bar)):', 0),
+            (Message('C0325', line=1, args='if'),
+             'if (foo(bar)):', 0),
+            (Message('C0325', line=1, args='return'),
+             'return ((x for x in x))', 0),
+            (Message('C0325', line=1, args='not'),
+             'not (foo)', 0),
+            (Message('C0325', line=1, args='not'),
+             'if not (foo):', 1),
+            (Message('C0325', line=1, args='if'),
+             'if (not (foo)):', 0),
+            (Message('C0325', line=1, args='not'),
+             'if (not (foo)):', 2),
+            ]
+        for msg, code, offset in cases:
+            with self.assertAddsMessages(msg):
+                self.checker._check_keyword_parentheses(tokenize_str(code), offset)
+
+    def testFuturePrintStatementWithoutParensWarning(self):
+        code = """from __future__ import print_function
+print('Hello world!')
+"""
+        tree = test_utils.build_module(code)
+        with self.assertNoMessages():
+            self.checker.process_module(tree)
+            self.checker.process_tokens(tokenize_str(code))
+
+
 if __name__ == '__main__':
     unittest_main()
author	Torsten Marek <tmarek@google.com>	2013-11-06 09:51:37 -0800
committer	Torsten Marek <tmarek@google.com>	2013-11-06 09:51:37 -0800
commit	d917d28ea80325d4190f943cfc439c678d32d5e0 (patch)
tree	cf9c5ec91721947446a26228f8f51af9120ab008
parent	126b33ad7325626ead8d4cf614883adebdc8c66f (diff)
download	pylint-git-d917d28ea80325d4190f943cfc439c678d32d5e0.tar.gz