summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles Hebert <charles.hebert@logilab.fr>2009-11-27 15:57:49 +0100
committerCharles Hebert <charles.hebert@logilab.fr>2009-11-27 15:57:49 +0100
commit71ec3b15799abe69963151b08ad38d7f185b0121 (patch)
treeaf44e7b1131796e1b83153311364b8eac8eeb150
parent38038da6b50cff9779d80575a7d2ce7124a31497 (diff)
downloadpylint-git-71ec3b15799abe69963151b08ad38d7f185b0121.tar.gz
Add a checker verifying string formatting (James Lingard's patch) + tests
-rw-r--r--checkers/string_format.py239
-rw-r--r--test/input/func_e99xx.py21
-rw-r--r--test/messages/func_e99xx.txt13
-rw-r--r--test/unittest_lint.py2
4 files changed, 274 insertions, 1 deletions
diff --git a/checkers/string_format.py b/checkers/string_format.py
new file mode 100644
index 000000000..a6f79cad9
--- /dev/null
+++ b/checkers/string_format.py
@@ -0,0 +1,239 @@
+# Copyright (c) 2009 Arista Networks, Inc. - James Lingard
+# Copyright (c) 2004-2009 LOGILAB S.A. (Paris, FRANCE).
+# http://www.logilab.fr/ -- mailto:contact@logilab.fr
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+"""Checker for string formatting operations.
+"""
+
+import string
+from logilab import astng
+from pylint.interfaces import IASTNGChecker
+from pylint.checkers import BaseChecker
+
+MSGS = {
+ 'E9900': ("Unsupported format character %r (%#02x) at index %d",
+ "Used when a unsupported format character is used in a format\
+ string."),
+ 'E9901': ("Format string ends in middle of conversion specifier",
+ "Used when a format string terminates before the end of a \
+ conversion specifier."),
+ 'E9902': ("Mixing named and unnamed conversion specifiers in format string",
+ "Used when a format string contains both named (e.g. '%(foo)d') \
+ and unnamed (e.g. '%d') conversion specifiers. This is also \
+ used when a named conversion specifier contains * for the \
+ minimum field width and/or precision."),
+ 'E9903': ("Expected mapping for format string, not %s",
+ "Used when a format string that uses named conversion specifiers \
+ is used with an argument that is not a mapping."),
+ 'W9900': ("Format string dictionary key should be a string, not %s",
+ "Used when a format string that uses named conversion specifiers \
+ is used with a dictionary whose keys are not all strings."),
+ 'W9901': ("Unused key %r in format string dictionary",
+ "Used when a format string that uses named conversion specifiers \
+ is used with a dictionary that conWtains keys not required by the \
+ format string."),
+ 'E9904': ("Missing key %r in format string dictionary",
+ "Used when a format string that uses named conversion specifiers \
+ is used with a dictionary that doesn't contain all the keys \
+ required by the format string."),
+ 'E9905': ("Too many arguments for format string",
+ "Used when a format string that uses unnamed conversion \
+ specifiers is given too few arguments."),
+ 'E9906': ("Not enough arguments for format string",
+ "Used when a format string that uses unnamed conversion \
+ specifiers is given too many arguments"),
+ }
+
+class IncompleteFormatStringException(Exception):
+ """A format string ended in the middle of a format specifier."""
+ pass
+
+class UnsupportedFormatCharacterException(Exception):
+ """A format character in a format string is not one of the supported
+ format characters."""
+ def __init__(self, index):
+ Exception.__init__(self, index)
+ self.index = index
+
+def parse_format_string(format_string):
+ """Parses a format string, returning a tuple of (keys, num_args), where keys
+ is the set of mapping keys in the format string, and num_args is the number
+ of arguments required by the format string. Raises
+ IncompleteFormatStringException or UnsupportedFormatCharacterException if a
+ parse error occurs."""
+ keys = set()
+ num_args = 0
+ def next_char(i):
+ i += 1
+ if i == len(format_string):
+ raise IncompleteFormatStringException
+ return (i, format_string[i])
+ i = 0
+ while i < len(format_string):
+ c = format_string[i]
+ if c == '%':
+ i, c = next_char(i)
+ # Parse the mapping key (optional).
+ key = None
+ if c == '(':
+ depth = 1
+ i, c = next_char(i)
+ key_start = i
+ while depth != 0:
+ if c == '(':
+ depth += 1
+ elif c == ')':
+ depth -= 1
+ i, c = next_char(i)
+ key_end = i - 1
+ key = format_string[key_start:key_end]
+
+ # Parse the conversion flags (optional).
+ while c in '#0- +':
+ i, c = next_char(i)
+ # Parse the minimum field width (optional).
+ if c == '*':
+ num_args += 1
+ i, c = next_char(i)
+ else:
+ while c in string.digits:
+ i, c = next_char(i)
+ # Parse the precision (optional).
+ if c == '.':
+ i, c = next_char(i)
+ if c == '*':
+ num_args += 1
+ i, c = next_char(i)
+ else:
+ while c in string.digits:
+ i, c = next_char(i)
+ # Parse the length modifier (optional).
+ if c in 'hlL':
+ i, c = next_char(i)
+ # Parse the conversion type (mandatory).
+ if c not in 'diouxXeEfFgGcrs%':
+ raise UnsupportedFormatCharacterException(i)
+ if key:
+ keys.add(key)
+ elif c != '%':
+ num_args += 1
+ i += 1
+ return keys, num_args
+
+class StringFormatChecker(BaseChecker):
+ """Checks string formatting operations to ensure that the format string
+ is valid and the arguments match the format string.
+ """
+ __implements__ = (IASTNGChecker,)
+ name = 'string_format'
+ msgs = MSGS
+ def visit_binop(self, node):
+ if node.op != '%':
+ return
+ f = node.left
+ args = node.right
+ if isinstance(f, astng.Const) and isinstance(f.value, basestring):
+ format_string = f.value
+ try:
+ required_keys, required_num_args = \
+ parse_format_string(format_string)
+ except UnsupportedFormatCharacterException, e:
+ c = format_string[e.index]
+ self.add_message('E9900', node=node, args=(c, ord(c), e.index))
+ except IncompleteFormatStringException:
+ self.add_message('E9901', node=node)
+ else:
+ if required_keys and required_num_args:
+ # The format string uses both named and unnamed format
+ # specifiers.
+ self.add_message('E9902', node=node)
+ elif required_keys:
+ # The format string uses only named format specifiers.
+ # Check that the RHS of the % operator is a mapping object
+ # that contains precisely the set of keys required by the
+ # format string.
+ if isinstance(args, astng.Dict):
+ keys = set()
+ unknown_keys = False
+ for k, v in args.items:
+ if isinstance(k, astng.Const):
+ key = k.value
+ if isinstance(key, basestring):
+ keys.add(key)
+ else:
+ self.add_message('W9900',
+ node=node,
+ args=key)
+ else:
+ # One of the keys was something other than a
+ # constant. Since we can't tell what it is,
+ # supress checks for missing keys in the
+ # dictionary.
+ unknown_keys = True
+ if not unknown_keys:
+ for key in required_keys:
+ if key not in keys:
+ self.add_message('E9904',
+ node=node,
+ args=key)
+ for key in keys:
+ if key not in required_keys:
+ self.add_message('W9901', node=node, args=key)
+ elif (isinstance(args, astng.Const) or
+ isinstance(args, astng.Tuple) or
+ isinstance(args, astng.List) or
+ isinstance(args, astng.ListComp) or
+ isinstance(args, astng.GenExpr) or
+ isinstance(args, astng.Backquote) or
+ isinstance(args, astng.Lambda)):
+ type_name = type(args).__name__
+ self.add_message('E9903', node=node, args=type_name)
+ else:
+ # The RHS of the format specifier is a name or
+ # expression. It may be a mapping object, so
+ # there's nothing we can check.
+ pass
+ else:
+ # The format string uses only unnamed format specifiers.
+ # Check that the number of arguments passed to the RHS of
+ # the % operator matches the number required by the format
+ # string.
+ if isinstance(args, astng.Tuple):
+ num_args = len(args.elts)
+ elif (isinstance(args, astng.Const) or
+ isinstance(args, astng.Dict) or
+ isinstance(args, astng.List) or
+ isinstance(args, astng.ListComp) or
+ isinstance(args, astng.GenExpr) or
+ isinstance(args, astng.Backquote) or
+ isinstance(args, astng.Lambda) or
+ isinstance(args, astng.Function)):
+ num_args = 1
+ else:
+ # The RHS of the format specifier is a name or
+ # expression. It could be a tuple of unknown size, so
+ # there's nothing we can check.
+ num_args = None
+ if num_args is not None:
+ if num_args > required_num_args:
+ self.add_message('E9905', node=node)
+ elif num_args < required_num_args:
+ self.add_message('E9906', node=node)
+
+
+def register(linter):
+ """required method to auto register this checker """
+ linter.register_checker(StringFormatChecker(linter))
diff --git a/test/input/func_e99xx.py b/test/input/func_e99xx.py
new file mode 100644
index 000000000..427eb71e9
--- /dev/null
+++ b/test/input/func_e99xx.py
@@ -0,0 +1,21 @@
+"""test string format error
+"""
+
+__revision__ = 1
+
+PARG_1 = PARG_2 = PARG_3 = 1
+
+def pprint():
+ """Test string format
+ """
+ print "%s %s" % {'PARG_1': 1, 'PARG_2': 2} # E9906
+ print "%s" % (PARG_1, PARG_2) # E9905
+ print "%(PARG_1)d %d" % {'PARG_1': 1, 'PARG_2': 2} # E9902
+ print "%(PARG_1)d %(PARG_2)d" % {'PARG_1': 1} # E9904
+ print "%(PARG_1)d %(PARG_2)d" % {'PARG_1': 1, 'PARG_2':2, 'PARG_3':3} # W9901
+ print "%(PARG_1)d %(PARG_2)d" % {'PARG_1': 1, 2:3} # W9900 E9904
+ print "%(PARG_1)d %(PARG_2)d" % (2, 3) # 9903
+ print "%(PARG_1)d %(PARG_2)d" % [2, 3] # 9903
+ print "%2z" % PARG_1
+ print "strange format %2" % PARG_2
+
diff --git a/test/messages/func_e99xx.txt b/test/messages/func_e99xx.txt
new file mode 100644
index 000000000..b0754b9f7
--- /dev/null
+++ b/test/messages/func_e99xx.txt
@@ -0,0 +1,13 @@
+C: 15: Line too long (81/80)
+E: 11:pprint: Not enough arguments for format string
+E: 12:pprint: Too many arguments for format string
+E: 13:pprint: Mixing named and unnamed conversion specifiers in format string
+E: 14:pprint: Missing key 'PARG_2' in format string dictionary
+E: 16:pprint: Missing key 'PARG_2' in format string dictionary
+E: 17:pprint: Expected mapping for format string, not Tuple
+E: 18:pprint: Expected mapping for format string, not List
+E: 19:pprint: Unsupported format character 'z' (0x7a) at index 2
+E: 20:pprint: Format string ends in middle of conversion specifier
+W: 15:pprint: Unused key 'PARG_3' in format string dictionary
+W: 16:pprint: Format string dictionary key should be a string, not 2
+
diff --git a/test/unittest_lint.py b/test/unittest_lint.py
index 9b50c36ac..1820a7cca 100644
--- a/test/unittest_lint.py
+++ b/test/unittest_lint.py
@@ -220,7 +220,7 @@ class PyLinterTC(TestCase):
if c.is_enabled()]),
['basic', 'classes', 'exceptions', 'format', 'imports',
'logging', 'master', 'metrics', 'miscellaneous', 'newstyle',
- 'similarities', 'typecheck', 'variables'])
+ 'similarities', 'string_format', 'typecheck', 'variables'])
def test_enable_checkers2(self):
self.linter.enable_checkers(['design'], True)