# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html # For details: https://github.com/pylint-dev/pylint/blob/main/LICENSE # Copyright (c) https://github.com/pylint-dev/pylint/blob/main/CONTRIBUTORS.txt from __future__ import annotations import astroid from astroid import nodes from pylint import checkers from pylint.checkers import utils from pylint.interfaces import HIGH, INFERENCE class RecommendationChecker(checkers.BaseChecker): name = "refactoring" msgs = { "C0200": ( "Consider using enumerate instead of iterating with range and len", "consider-using-enumerate", "Emitted when code that iterates with range and len is " "encountered. Such code can be simplified by using the " "enumerate builtin.", ), "C0201": ( "Consider iterating the dictionary directly instead of calling .keys()", "consider-iterating-dictionary", "Emitted when the keys of a dictionary are iterated through the ``.keys()`` " "method or when ``.keys()`` is used for a membership check. " "It is enough to iterate through the dictionary itself, " "``for key in dictionary``. For membership checks, " "``if key in dictionary`` is faster.", ), "C0206": ( "Consider iterating with .items()", "consider-using-dict-items", "Emitted when iterating over the keys of a dictionary and accessing the " "value by index lookup. " "Both the key and value can be accessed by iterating using the .items() " "method of the dictionary instead.", ), "C0207": ( "Use %s instead", "use-maxsplit-arg", "Emitted when accessing only the first or last element of str.split(). " "The first and last element can be accessed by using " "str.split(sep, maxsplit=1)[0] or str.rsplit(sep, maxsplit=1)[-1] " "instead.", ), "C0208": ( "Use a sequence type when iterating over values", "use-sequence-for-iteration", "When iterating over values, sequence types (e.g., ``lists``, ``tuples``, ``ranges``) " "are more efficient than ``sets``.", ), "C0209": ( "Formatting a regular string which could be an f-string", "consider-using-f-string", "Used when we detect a string that is being formatted with format() or % " "which could potentially be an f-string. The use of f-strings is preferred. " "Requires Python 3.6 and ``py-version >= 3.6``.", ), } def open(self) -> None: py_version = self.linter.config.py_version self._py36_plus = py_version >= (3, 6) @staticmethod def _is_builtin(node: nodes.NodeNG, function: str) -> bool: inferred = utils.safe_infer(node) if not inferred: return False return utils.is_builtin_object(inferred) and inferred.name == function @utils.only_required_for_messages( "consider-iterating-dictionary", "use-maxsplit-arg" ) def visit_call(self, node: nodes.Call) -> None: self._check_consider_iterating_dictionary(node) self._check_use_maxsplit_arg(node) def _check_consider_iterating_dictionary(self, node: nodes.Call) -> None: if not isinstance(node.func, nodes.Attribute): return if node.func.attrname != "keys": return if isinstance(node.parent, nodes.BinOp) and node.parent.op in {"&", "|", "^"}: return comp_ancestor = utils.get_node_first_ancestor_of_type(node, nodes.Compare) if ( isinstance(node.parent, (nodes.For, nodes.Comprehension)) or comp_ancestor and any( op for op, comparator in comp_ancestor.ops if op in {"in", "not in"} and (comparator in node.node_ancestors() or comparator is node) ) ): inferred = utils.safe_infer(node.func) if not isinstance(inferred, astroid.BoundMethod) or not isinstance( inferred.bound, nodes.Dict ): return self.add_message( "consider-iterating-dictionary", node=node, confidence=INFERENCE ) def _check_use_maxsplit_arg(self, node: nodes.Call) -> None: """Add message when accessing first or last elements of a str.split() or str.rsplit(). """ # Check if call is split() or rsplit() if not ( isinstance(node.func, nodes.Attribute) and node.func.attrname in {"split", "rsplit"} and isinstance(utils.safe_infer(node.func), astroid.BoundMethod) ): return inferred_expr = utils.safe_infer(node.func.expr) if isinstance(inferred_expr, astroid.Instance) and any( inferred_expr.nodes_of_class(nodes.ClassDef) ): return try: sep = utils.get_argument_from_call(node, 0, "sep") except utils.NoSuchArgumentError: return try: # Ignore if maxsplit arg has been set utils.get_argument_from_call(node, 1, "maxsplit") return except utils.NoSuchArgumentError: pass if isinstance(node.parent, nodes.Subscript): try: subscript_value = utils.get_subscript_const_value(node.parent).value except utils.InferredTypeError: return # Check for cases where variable (Name) subscripts may be mutated within a loop if isinstance(node.parent.slice, nodes.Name): # Check if loop present within the scope of the node scope = node.scope() for loop_node in scope.nodes_of_class((nodes.For, nodes.While)): if not loop_node.parent_of(node): continue # Check if var is mutated within loop (Assign/AugAssign) for assignment_node in loop_node.nodes_of_class(nodes.AugAssign): if node.parent.slice.name == assignment_node.target.name: return for assignment_node in loop_node.nodes_of_class(nodes.Assign): if node.parent.slice.name in [ n.name for n in assignment_node.targets ]: return if subscript_value in (-1, 0): fn_name = node.func.attrname new_fn = "rsplit" if subscript_value == -1 else "split" new_name = ( node.func.as_string().rsplit(fn_name, maxsplit=1)[0] + new_fn + f"({sep.as_string()}, maxsplit=1)[{subscript_value}]" ) self.add_message("use-maxsplit-arg", node=node, args=(new_name,)) @utils.only_required_for_messages( "consider-using-enumerate", "consider-using-dict-items", "use-sequence-for-iteration", ) def visit_for(self, node: nodes.For) -> None: self._check_consider_using_enumerate(node) self._check_consider_using_dict_items(node) self._check_use_sequence_for_iteration(node) def _check_consider_using_enumerate(self, node: nodes.For) -> None: """Emit a convention whenever range and len are used for indexing.""" # Verify that we have a `range([start], len(...), [stop])` call and # that the object which is iterated is used as a subscript in the # body of the for. # Is it a proper range call? if not isinstance(node.iter, nodes.Call): return if not self._is_builtin(node.iter.func, "range"): return if not node.iter.args: return is_constant_zero = ( isinstance(node.iter.args[0], nodes.Const) and node.iter.args[0].value == 0 ) if len(node.iter.args) == 2 and not is_constant_zero: return if len(node.iter.args) > 2: return # Is it a proper len call? if not isinstance(node.iter.args[-1], nodes.Call): return second_func = node.iter.args[-1].func if not self._is_builtin(second_func, "len"): return len_args = node.iter.args[-1].args if not len_args or len(len_args) != 1: return iterating_object = len_args[0] if isinstance(iterating_object, nodes.Name): expected_subscript_val_type = nodes.Name elif isinstance(iterating_object, nodes.Attribute): expected_subscript_val_type = nodes.Attribute else: return # If we're defining __iter__ on self, enumerate won't work scope = node.scope() if ( isinstance(iterating_object, nodes.Name) and iterating_object.name == "self" and scope.name == "__iter__" ): return # Verify that the body of the for loop uses a subscript # with the object that was iterated. This uses some heuristics # in order to make sure that the same object is used in the # for body. for child in node.body: for subscript in child.nodes_of_class(nodes.Subscript): if not isinstance(subscript.value, expected_subscript_val_type): continue value = subscript.slice if not isinstance(value, nodes.Name): continue if subscript.value.scope() != node.scope(): # Ignore this subscript if it's not in the same # scope. This means that in the body of the for # loop, another scope was created, where the same # name for the iterating object was used. continue if value.name == node.target.name and ( isinstance(subscript.value, nodes.Name) and iterating_object.name == subscript.value.name or isinstance(subscript.value, nodes.Attribute) and iterating_object.attrname == subscript.value.attrname ): self.add_message("consider-using-enumerate", node=node) return def _check_consider_using_dict_items(self, node: nodes.For) -> None: """Add message when accessing dict values by index lookup.""" # Verify that we have a .keys() call and # that the object which is iterated is used as a subscript in the # body of the for. iterating_object_name = utils.get_iterating_dictionary_name(node) if iterating_object_name is None: return # Verify that the body of the for loop uses a subscript # with the object that was iterated. This uses some heuristics # in order to make sure that the same object is used in the # for body. for child in node.body: for subscript in child.nodes_of_class(nodes.Subscript): if not isinstance(subscript.value, (nodes.Name, nodes.Attribute)): continue value = subscript.slice if ( not isinstance(value, nodes.Name) or value.name != node.target.name or iterating_object_name != subscript.value.as_string() ): continue last_definition_lineno = value.lookup(value.name)[1][-1].lineno if last_definition_lineno > node.lineno: # Ignore this subscript if it has been redefined after # the for loop. This checks for the line number using .lookup() # to get the line number where the iterating object was last # defined and compare that to the for loop's line number continue if ( isinstance(subscript.parent, nodes.Assign) and subscript in subscript.parent.targets or isinstance(subscript.parent, nodes.AugAssign) and subscript == subscript.parent.target ): # Ignore this subscript if it is the target of an assignment # Early termination as dict index lookup is necessary return self.add_message("consider-using-dict-items", node=node) return @utils.only_required_for_messages( "consider-using-dict-items", "use-sequence-for-iteration", ) def visit_comprehension(self, node: nodes.Comprehension) -> None: self._check_consider_using_dict_items_comprehension(node) self._check_use_sequence_for_iteration(node) def _check_consider_using_dict_items_comprehension( self, node: nodes.Comprehension ) -> None: """Add message when accessing dict values by index lookup.""" iterating_object_name = utils.get_iterating_dictionary_name(node) if iterating_object_name is None: return for child in node.parent.get_children(): for subscript in child.nodes_of_class(nodes.Subscript): if not isinstance(subscript.value, (nodes.Name, nodes.Attribute)): continue value = subscript.slice if ( not isinstance(value, nodes.Name) or value.name != node.target.name or iterating_object_name != subscript.value.as_string() ): continue self.add_message("consider-using-dict-items", node=node) return def _check_use_sequence_for_iteration( self, node: nodes.For | nodes.Comprehension ) -> None: """Check if code iterates over an in-place defined set. Sets using `*` are not considered in-place. """ if isinstance(node.iter, nodes.Set) and not any( utils.has_starred_node_recursive(node) ): self.add_message( "use-sequence-for-iteration", node=node.iter, confidence=HIGH ) @utils.only_required_for_messages("consider-using-f-string") def visit_const(self, node: nodes.Const) -> None: if self._py36_plus: # f-strings require Python 3.6 if node.pytype() == "builtins.str" and not isinstance( node.parent, nodes.JoinedStr ): self._detect_replacable_format_call(node) def _detect_replacable_format_call(self, node: nodes.Const) -> None: """Check whether a string is used in a call to format() or '%' and whether it can be replaced by an f-string. """ if ( isinstance(node.parent, nodes.Attribute) and node.parent.attrname == "format" ): # Don't warn on referencing / assigning .format without calling it if not isinstance(node.parent.parent, nodes.Call): return if node.parent.parent.args: for arg in node.parent.parent.args: # If star expressions with more than 1 element are being used if isinstance(arg, nodes.Starred): inferred = utils.safe_infer(arg.value) if ( isinstance(inferred, astroid.List) and len(inferred.elts) > 1 ): return # Backslashes can't be in f-string expressions if "\\" in arg.as_string(): return elif node.parent.parent.keywords: keyword_args = [ i[0] for i in utils.parse_format_method_string(node.value)[0] ] for keyword in node.parent.parent.keywords: # If keyword is used multiple times if keyword_args.count(keyword.arg) > 1: return keyword = utils.safe_infer(keyword.value) # If lists of more than one element are being unpacked if isinstance(keyword, nodes.Dict): if len(keyword.items) > 1 and len(keyword_args) > 1: return # If all tests pass, then raise message self.add_message( "consider-using-f-string", node=node, line=node.lineno, col_offset=node.col_offset, ) elif isinstance(node.parent, nodes.BinOp) and node.parent.op == "%": # Backslashes can't be in f-string expressions if "\\" in node.parent.right.as_string(): return # If % applied to another type than str, it's modulo and can't be replaced by formatting if not hasattr(node.parent.left, "value") or not isinstance( node.parent.left.value, str ): return inferred_right = utils.safe_infer(node.parent.right) # If dicts or lists of length > 1 are used if isinstance(inferred_right, nodes.Dict): if len(inferred_right.items) > 1: return elif isinstance(inferred_right, nodes.List): if len(inferred_right.elts) > 1: return # If all tests pass, then raise message self.add_message( "consider-using-f-string", node=node, line=node.lineno, col_offset=node.col_offset, )