From 152ca182a603d7327925873db9a795797812968d Mon Sep 17 00:00:00 2001 From: geokala Date: Mon, 25 Jul 2016 02:15:29 +0300 Subject: Check for duplicate dictionary keys (#72) --- pyflakes/checker.py | 101 ++++++++++++++++++++- pyflakes/messages.py | 16 ++++ pyflakes/test/test_dict.py | 217 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 333 insertions(+), 1 deletion(-) create mode 100644 pyflakes/test/test_dict.py diff --git a/pyflakes/checker.py b/pyflakes/checker.py index 9f354fd..68b4e8f 100644 --- a/pyflakes/checker.py +++ b/pyflakes/checker.py @@ -81,6 +81,17 @@ class _FieldsOrder(dict): return fields +def counter(items): + """ + Simplest required implementation of collections.Counter. Required as 2.6 + does not have Counter in collections. + """ + results = {} + for item in items: + results[item] = results.get(item, 0) + 1 + return results + + def iter_child_nodes(node, omit=None, _fields_order=_FieldsOrder()): """ Yield all direct child nodes of *node*, that is, all fields that @@ -97,6 +108,33 @@ def iter_child_nodes(node, omit=None, _fields_order=_FieldsOrder()): yield item +def convert_to_value(item): + if isinstance(item, ast.Str): + return item.s + elif hasattr(ast, 'Bytes') and isinstance(item, ast.Bytes): + return item.s + elif isinstance(item, ast.Tuple): + return tuple(convert_to_value(i) for i in item.elts) + elif isinstance(item, ast.Num): + return item.n + elif isinstance(item, ast.Name): + result = VariableKey(item=item) + constants_lookup = { + 'True': True, + 'False': False, + 'None': None, + } + return constants_lookup.get( + result.name, + result, + ) + elif (not PY33) and isinstance(item, ast.NameConstant): + # None, True, False are nameconstants in python3, but names in 2 + return item.value + else: + return UnhandledKeyType() + + class Binding(object): """ Represents the binding of a value to a name. @@ -133,6 +171,31 @@ class Definition(Binding): """ +class UnhandledKeyType(object): + """ + A dictionary key of a type that we cannot or do not check for duplicates. + """ + + +class VariableKey(object): + """ + A dictionary key which is a variable. + + @ivar item: The variable AST object. + """ + def __init__(self, item): + self.name = item.id + + def __eq__(self, compare): + return ( + compare.__class__ == self.__class__ + and compare.name == self.name + ) + + def __hash__(self): + return hash(self.name) + + class Importation(Definition): """ A binding created by an import statement. @@ -855,7 +918,7 @@ class Checker(object): PASS = ignore # "expr" type nodes - BOOLOP = BINOP = UNARYOP = IFEXP = DICT = SET = \ + BOOLOP = BINOP = UNARYOP = IFEXP = SET = \ COMPARE = CALL = REPR = ATTRIBUTE = SUBSCRIPT = \ STARRED = NAMECONSTANT = handleChildren @@ -876,6 +939,42 @@ class Checker(object): # additional node types COMPREHENSION = KEYWORD = FORMATTEDVALUE = handleChildren + def DICT(self, node): + # Complain if there are duplicate keys with different values + # If they have the same value it's not going to cause potentially + # unexpected behaviour so we'll not complain. + keys = [ + convert_to_value(key) for key in node.keys + ] + + key_counts = counter(keys) + duplicate_keys = [ + key for key, count in key_counts.items() + if count > 1 + ] + + for key in duplicate_keys: + key_indices = [i for i, i_key in enumerate(keys) if i_key == key] + + values = counter( + convert_to_value(node.values[index]) + for index in key_indices + ) + if any(count == 1 for value, count in values.items()): + for key_index in key_indices: + key_node = node.keys[key_index] + if isinstance(key, VariableKey): + self.report(messages.MultiValueRepeatedKeyVariable, + key_node, + key.name) + else: + self.report( + messages.MultiValueRepeatedKeyLiteral, + key_node, + key, + ) + self.handleChildren(node) + def ASSERT(self, node): if isinstance(node.test, ast.Tuple) and node.test.elts != []: self.report(messages.AssertTuple, node) diff --git a/pyflakes/messages.py b/pyflakes/messages.py index 05db5bf..58bb6a2 100644 --- a/pyflakes/messages.py +++ b/pyflakes/messages.py @@ -116,6 +116,22 @@ class DuplicateArgument(Message): self.message_args = (name,) +class MultiValueRepeatedKeyLiteral(Message): + message = 'dictionary key %r repeated with different values' + + def __init__(self, filename, loc, key): + Message.__init__(self, filename, loc) + self.message_args = (key,) + + +class MultiValueRepeatedKeyVariable(Message): + message = 'dictionary key variable %s repeated with different values' + + def __init__(self, filename, loc, key): + Message.__init__(self, filename, loc) + self.message_args = (key,) + + class LateFutureImport(Message): message = 'from __future__ imports must occur at the beginning of the file' diff --git a/pyflakes/test/test_dict.py b/pyflakes/test/test_dict.py new file mode 100644 index 0000000..628ec0c --- /dev/null +++ b/pyflakes/test/test_dict.py @@ -0,0 +1,217 @@ +""" +Tests for dict duplicate keys Pyflakes behavior. +""" + +from sys import version_info + +from pyflakes import messages as m +from pyflakes.test.harness import TestCase, skipIf + + +class Test(TestCase): + + def test_duplicate_keys(self): + self.flakes( + "{'yes': 1, 'yes': 2}", + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + ) + + @skipIf(version_info < (3,), + "bytes and strings with same 'value' are not equal in python3") + @skipIf(version_info[0:2] == (3, 2), + "python3.2 does not allow u"" literal string definition") + def test_duplicate_keys_bytes_vs_unicode_py3(self): + self.flakes("{b'a': 1, u'a': 2}") + + @skipIf(version_info < (3,), + "bytes and strings with same 'value' are not equal in python3") + @skipIf(version_info[0:2] == (3, 2), + "python3.2 does not allow u"" literal string definition") + def test_duplicate_values_bytes_vs_unicode_py3(self): + self.flakes( + "{1: b'a', 1: u'a'}", + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + ) + + @skipIf(version_info >= (3,), + "bytes and strings with same 'value' are equal in python2") + def test_duplicate_keys_bytes_vs_unicode_py2(self): + self.flakes( + "{b'a': 1, u'a': 2}", + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + ) + + @skipIf(version_info >= (3,), + "bytes and strings with same 'value' are equal in python2") + def test_duplicate_values_bytes_vs_unicode_py2(self): + self.flakes("{1: b'a', 1: u'a'}") + + def test_multiple_duplicate_keys(self): + self.flakes( + "{'yes': 1, 'yes': 2, 'no': 2, 'no': 3}", + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + ) + + def test_duplicate_keys_in_function(self): + self.flakes( + ''' + def f(thing): + pass + f({'yes': 1, 'yes': 2}) + ''', + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + ) + + def test_duplicate_keys_in_lambda(self): + self.flakes( + "lambda x: {(0,1): 1, (0,1): 2}", + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + ) + + def test_duplicate_keys_tuples(self): + self.flakes( + "{(0,1): 1, (0,1): 2}", + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + ) + + def test_duplicate_keys_tuples_int_and_float(self): + self.flakes( + "{(0,1): 1, (0,1.0): 2}", + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + ) + + def test_duplicate_keys_ints(self): + self.flakes( + "{1: 1, 1: 2}", + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + ) + + def test_duplicate_keys_bools(self): + self.flakes( + "{True: 1, True: 2}", + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + ) + + def test_duplicate_keys_bools_false(self): + # Needed to ensure 2.x correctly coerces these from variables + self.flakes( + "{False: 1, False: 2}", + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + ) + + def test_duplicate_keys_none(self): + self.flakes( + "{None: 1, None: 2}", + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + ) + + def test_duplicate_variable_keys(self): + self.flakes( + ''' + a = 1 + {a: 1, a: 2} + ''', + m.MultiValueRepeatedKeyVariable, + m.MultiValueRepeatedKeyVariable, + ) + + def test_duplicate_variable_values(self): + self.flakes( + ''' + a = 1 + b = 2 + {1: a, 1: b} + ''', + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + ) + + def test_duplicate_variable_values_same_value(self): + # Current behaviour is not to look up variable values. This is to + # confirm that. + self.flakes( + ''' + a = 1 + b = 1 + {1: a, 1: b} + ''', + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + ) + + def test_duplicate_key_float_and_int(self): + """ + These do look like different values, but when it comes to their use as + keys, they compare as equal and so are actually duplicates. + The literal dict {1: 1, 1.0: 1} actually becomes {1.0: 1}. + """ + self.flakes( + ''' + {1: 1, 1.0: 2} + ''', + m.MultiValueRepeatedKeyLiteral, + m.MultiValueRepeatedKeyLiteral, + ) + + def test_no_duplicate_key_error_same_value(self): + self.flakes(''' + {'yes': 1, 'yes': 1} + ''') + + def test_no_duplicate_key_errors(self): + self.flakes(''' + {'yes': 1, 'no': 2} + ''') + + def test_no_duplicate_keys_tuples_same_first_element(self): + self.flakes("{(0,1): 1, (0,2): 1}") + + def test_no_duplicate_key_errors_func_call(self): + self.flakes(''' + def test(thing): + pass + test({True: 1, None: 2, False: 1}) + ''') + + def test_no_duplicate_key_errors_bool_or_none(self): + self.flakes("{True: 1, None: 2, False: 1}") + + def test_no_duplicate_key_errors_ints(self): + self.flakes(''' + {1: 1, 2: 1} + ''') + + def test_no_duplicate_key_errors_vars(self): + self.flakes(''' + test = 'yes' + rest = 'yes' + {test: 1, rest: 2} + ''') + + def test_no_duplicate_key_errors_tuples(self): + self.flakes(''' + {(0,1): 1, (0,2): 1} + ''') + + def test_no_duplicate_key_errors_instance_attributes(self): + self.flakes(''' + class Test(): + pass + f = Test() + f.a = 1 + {f.a: 1, f.a: 1} + ''') -- cgit v1.2.1