diff options
author | Charles Hebert <charles.hebert@logilab.fr> | 2009-11-27 15:57:49 +0100 |
---|---|---|
committer | Charles Hebert <charles.hebert@logilab.fr> | 2009-11-27 15:57:49 +0100 |
commit | 71ec3b15799abe69963151b08ad38d7f185b0121 (patch) | |
tree | af44e7b1131796e1b83153311364b8eac8eeb150 | |
parent | 38038da6b50cff9779d80575a7d2ce7124a31497 (diff) | |
download | pylint-git-71ec3b15799abe69963151b08ad38d7f185b0121.tar.gz |
Add a checker verifying string formatting (James Lingard's patch) + tests
-rw-r--r-- | checkers/string_format.py | 239 | ||||
-rw-r--r-- | test/input/func_e99xx.py | 21 | ||||
-rw-r--r-- | test/messages/func_e99xx.txt | 13 | ||||
-rw-r--r-- | test/unittest_lint.py | 2 |
4 files changed, 274 insertions, 1 deletions
diff --git a/checkers/string_format.py b/checkers/string_format.py new file mode 100644 index 000000000..a6f79cad9 --- /dev/null +++ b/checkers/string_format.py @@ -0,0 +1,239 @@ +# Copyright (c) 2009 Arista Networks, Inc. - James Lingard +# Copyright (c) 2004-2009 LOGILAB S.A. (Paris, FRANCE). +# http://www.logilab.fr/ -- mailto:contact@logilab.fr +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any later +# version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +"""Checker for string formatting operations. +""" + +import string +from logilab import astng +from pylint.interfaces import IASTNGChecker +from pylint.checkers import BaseChecker + +MSGS = { + 'E9900': ("Unsupported format character %r (%#02x) at index %d", + "Used when a unsupported format character is used in a format\ + string."), + 'E9901': ("Format string ends in middle of conversion specifier", + "Used when a format string terminates before the end of a \ + conversion specifier."), + 'E9902': ("Mixing named and unnamed conversion specifiers in format string", + "Used when a format string contains both named (e.g. '%(foo)d') \ + and unnamed (e.g. '%d') conversion specifiers. This is also \ + used when a named conversion specifier contains * for the \ + minimum field width and/or precision."), + 'E9903': ("Expected mapping for format string, not %s", + "Used when a format string that uses named conversion specifiers \ + is used with an argument that is not a mapping."), + 'W9900': ("Format string dictionary key should be a string, not %s", + "Used when a format string that uses named conversion specifiers \ + is used with a dictionary whose keys are not all strings."), + 'W9901': ("Unused key %r in format string dictionary", + "Used when a format string that uses named conversion specifiers \ + is used with a dictionary that conWtains keys not required by the \ + format string."), + 'E9904': ("Missing key %r in format string dictionary", + "Used when a format string that uses named conversion specifiers \ + is used with a dictionary that doesn't contain all the keys \ + required by the format string."), + 'E9905': ("Too many arguments for format string", + "Used when a format string that uses unnamed conversion \ + specifiers is given too few arguments."), + 'E9906': ("Not enough arguments for format string", + "Used when a format string that uses unnamed conversion \ + specifiers is given too many arguments"), + } + +class IncompleteFormatStringException(Exception): + """A format string ended in the middle of a format specifier.""" + pass + +class UnsupportedFormatCharacterException(Exception): + """A format character in a format string is not one of the supported + format characters.""" + def __init__(self, index): + Exception.__init__(self, index) + self.index = index + +def parse_format_string(format_string): + """Parses a format string, returning a tuple of (keys, num_args), where keys + is the set of mapping keys in the format string, and num_args is the number + of arguments required by the format string. Raises + IncompleteFormatStringException or UnsupportedFormatCharacterException if a + parse error occurs.""" + keys = set() + num_args = 0 + def next_char(i): + i += 1 + if i == len(format_string): + raise IncompleteFormatStringException + return (i, format_string[i]) + i = 0 + while i < len(format_string): + c = format_string[i] + if c == '%': + i, c = next_char(i) + # Parse the mapping key (optional). + key = None + if c == '(': + depth = 1 + i, c = next_char(i) + key_start = i + while depth != 0: + if c == '(': + depth += 1 + elif c == ')': + depth -= 1 + i, c = next_char(i) + key_end = i - 1 + key = format_string[key_start:key_end] + + # Parse the conversion flags (optional). + while c in '#0- +': + i, c = next_char(i) + # Parse the minimum field width (optional). + if c == '*': + num_args += 1 + i, c = next_char(i) + else: + while c in string.digits: + i, c = next_char(i) + # Parse the precision (optional). + if c == '.': + i, c = next_char(i) + if c == '*': + num_args += 1 + i, c = next_char(i) + else: + while c in string.digits: + i, c = next_char(i) + # Parse the length modifier (optional). + if c in 'hlL': + i, c = next_char(i) + # Parse the conversion type (mandatory). + if c not in 'diouxXeEfFgGcrs%': + raise UnsupportedFormatCharacterException(i) + if key: + keys.add(key) + elif c != '%': + num_args += 1 + i += 1 + return keys, num_args + +class StringFormatChecker(BaseChecker): + """Checks string formatting operations to ensure that the format string + is valid and the arguments match the format string. + """ + __implements__ = (IASTNGChecker,) + name = 'string_format' + msgs = MSGS + def visit_binop(self, node): + if node.op != '%': + return + f = node.left + args = node.right + if isinstance(f, astng.Const) and isinstance(f.value, basestring): + format_string = f.value + try: + required_keys, required_num_args = \ + parse_format_string(format_string) + except UnsupportedFormatCharacterException, e: + c = format_string[e.index] + self.add_message('E9900', node=node, args=(c, ord(c), e.index)) + except IncompleteFormatStringException: + self.add_message('E9901', node=node) + else: + if required_keys and required_num_args: + # The format string uses both named and unnamed format + # specifiers. + self.add_message('E9902', node=node) + elif required_keys: + # The format string uses only named format specifiers. + # Check that the RHS of the % operator is a mapping object + # that contains precisely the set of keys required by the + # format string. + if isinstance(args, astng.Dict): + keys = set() + unknown_keys = False + for k, v in args.items: + if isinstance(k, astng.Const): + key = k.value + if isinstance(key, basestring): + keys.add(key) + else: + self.add_message('W9900', + node=node, + args=key) + else: + # One of the keys was something other than a + # constant. Since we can't tell what it is, + # supress checks for missing keys in the + # dictionary. + unknown_keys = True + if not unknown_keys: + for key in required_keys: + if key not in keys: + self.add_message('E9904', + node=node, + args=key) + for key in keys: + if key not in required_keys: + self.add_message('W9901', node=node, args=key) + elif (isinstance(args, astng.Const) or + isinstance(args, astng.Tuple) or + isinstance(args, astng.List) or + isinstance(args, astng.ListComp) or + isinstance(args, astng.GenExpr) or + isinstance(args, astng.Backquote) or + isinstance(args, astng.Lambda)): + type_name = type(args).__name__ + self.add_message('E9903', node=node, args=type_name) + else: + # The RHS of the format specifier is a name or + # expression. It may be a mapping object, so + # there's nothing we can check. + pass + else: + # The format string uses only unnamed format specifiers. + # Check that the number of arguments passed to the RHS of + # the % operator matches the number required by the format + # string. + if isinstance(args, astng.Tuple): + num_args = len(args.elts) + elif (isinstance(args, astng.Const) or + isinstance(args, astng.Dict) or + isinstance(args, astng.List) or + isinstance(args, astng.ListComp) or + isinstance(args, astng.GenExpr) or + isinstance(args, astng.Backquote) or + isinstance(args, astng.Lambda) or + isinstance(args, astng.Function)): + num_args = 1 + else: + # The RHS of the format specifier is a name or + # expression. It could be a tuple of unknown size, so + # there's nothing we can check. + num_args = None + if num_args is not None: + if num_args > required_num_args: + self.add_message('E9905', node=node) + elif num_args < required_num_args: + self.add_message('E9906', node=node) + + +def register(linter): + """required method to auto register this checker """ + linter.register_checker(StringFormatChecker(linter)) diff --git a/test/input/func_e99xx.py b/test/input/func_e99xx.py new file mode 100644 index 000000000..427eb71e9 --- /dev/null +++ b/test/input/func_e99xx.py @@ -0,0 +1,21 @@ +"""test string format error +""" + +__revision__ = 1 + +PARG_1 = PARG_2 = PARG_3 = 1 + +def pprint(): + """Test string format + """ + print "%s %s" % {'PARG_1': 1, 'PARG_2': 2} # E9906 + print "%s" % (PARG_1, PARG_2) # E9905 + print "%(PARG_1)d %d" % {'PARG_1': 1, 'PARG_2': 2} # E9902 + print "%(PARG_1)d %(PARG_2)d" % {'PARG_1': 1} # E9904 + print "%(PARG_1)d %(PARG_2)d" % {'PARG_1': 1, 'PARG_2':2, 'PARG_3':3} # W9901 + print "%(PARG_1)d %(PARG_2)d" % {'PARG_1': 1, 2:3} # W9900 E9904 + print "%(PARG_1)d %(PARG_2)d" % (2, 3) # 9903 + print "%(PARG_1)d %(PARG_2)d" % [2, 3] # 9903 + print "%2z" % PARG_1 + print "strange format %2" % PARG_2 + diff --git a/test/messages/func_e99xx.txt b/test/messages/func_e99xx.txt new file mode 100644 index 000000000..b0754b9f7 --- /dev/null +++ b/test/messages/func_e99xx.txt @@ -0,0 +1,13 @@ +C: 15: Line too long (81/80) +E: 11:pprint: Not enough arguments for format string +E: 12:pprint: Too many arguments for format string +E: 13:pprint: Mixing named and unnamed conversion specifiers in format string +E: 14:pprint: Missing key 'PARG_2' in format string dictionary +E: 16:pprint: Missing key 'PARG_2' in format string dictionary +E: 17:pprint: Expected mapping for format string, not Tuple +E: 18:pprint: Expected mapping for format string, not List +E: 19:pprint: Unsupported format character 'z' (0x7a) at index 2 +E: 20:pprint: Format string ends in middle of conversion specifier +W: 15:pprint: Unused key 'PARG_3' in format string dictionary +W: 16:pprint: Format string dictionary key should be a string, not 2 + diff --git a/test/unittest_lint.py b/test/unittest_lint.py index 9b50c36ac..1820a7cca 100644 --- a/test/unittest_lint.py +++ b/test/unittest_lint.py @@ -220,7 +220,7 @@ class PyLinterTC(TestCase): if c.is_enabled()]), ['basic', 'classes', 'exceptions', 'format', 'imports', 'logging', 'master', 'metrics', 'miscellaneous', 'newstyle', - 'similarities', 'typecheck', 'variables']) + 'similarities', 'string_format', 'typecheck', 'variables']) def test_enable_checkers2(self): self.linter.enable_checkers(['design'], True) |