summaryrefslogtreecommitdiff
path: root/pylint/checkers/non_ascii_names.py
blob: 455e575ad99836dfef33927b213085fdcc9c43e6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
# For details: https://github.com/PyCQA/pylint/blob/main/LICENSE
# Copyright (c) https://github.com/PyCQA/pylint/blob/main/CONTRIBUTORS.txt

"""All alphanumeric unicode character are allowed in Python but due
to similarities in how they look they can be confused.

See: https://peps.python.org/pep-0672/#confusing-features

The following checkers are intended to make users are aware of these issues.
"""

from __future__ import annotations

from astroid import nodes

from pylint import constants, interfaces, lint
from pylint.checkers import base_checker, utils

NON_ASCII_HELP = (
    "Used when the name contains at least one non-ASCII unicode character. "
    "See https://peps.python.org/pep-0672/#confusing-features"
    " for a background why this could be bad. \n"
    "If your programming guideline defines that you are programming in "
    "English, then there should be no need for non ASCII characters in "
    "Python Names. If not you can simply disable this check."
)


class NonAsciiNameChecker(base_checker.BaseChecker):
    """A strict name checker only allowing ASCII.

    Note: This check only checks Names, so it ignores the content of
          docstrings and comments!
    """

    msgs = {
        "C2401": (
            '%s name "%s" contains a non-ASCII character, consider renaming it.',
            "non-ascii-name",
            NON_ASCII_HELP,
            {"old_names": [("C0144", "old-non-ascii-name")]},
        ),
        # First %s will always be "file"
        "W2402": (
            '%s name "%s" contains a non-ASCII character.',
            "non-ascii-file-name",
            (
                # Some = PyCharm at the time of writing didn't display the non_ascii_name_loł
                # files. That's also why this is a warning and not only a convention!
                "Under python 3.5, PEP 3131 allows non-ascii identifiers, but not non-ascii file names."
                "Since Python 3.5, even though Python supports UTF-8 files, some editors or tools "
                "don't."
            ),
        ),
        # First %s will always be "module"
        "C2403": (
            '%s name "%s" contains a non-ASCII character, use an ASCII-only alias for import.',
            "non-ascii-module-import",
            NON_ASCII_HELP,
        ),
    }

    name = "NonASCII-Checker"

    def _check_name(self, node_type: str, name: str | None, node: nodes.NodeNG) -> None:
        """Check whether a name is using non-ASCII characters."""

        if name is None:
            # For some nodes i.e. *kwargs from a dict, the name will be empty
            return

        if not str(name).isascii():
            type_label = constants.HUMAN_READABLE_TYPES[node_type]
            args = (type_label.capitalize(), name)

            msg = "non-ascii-name"

            # Some node types have customized messages
            if node_type == "file":
                msg = "non-ascii-file-name"
            elif node_type == "module":
                msg = "non-ascii-module-import"

            self.add_message(msg, node=node, args=args, confidence=interfaces.HIGH)

    @utils.only_required_for_messages("non-ascii-name", "non-ascii-file-name")
    def visit_module(self, node: nodes.Module) -> None:
        self._check_name("file", node.name.split(".")[-1], node)

    @utils.only_required_for_messages("non-ascii-name")
    def visit_functiondef(
        self, node: nodes.FunctionDef | nodes.AsyncFunctionDef
    ) -> None:
        self._check_name("function", node.name, node)

        # Check argument names
        arguments = node.args

        # Check position only arguments
        if arguments.posonlyargs:
            for pos_only_arg in arguments.posonlyargs:
                self._check_name("argument", pos_only_arg.name, pos_only_arg)

        # Check "normal" arguments
        if arguments.args:
            for arg in arguments.args:
                self._check_name("argument", arg.name, arg)

        # Check key word only arguments
        if arguments.kwonlyargs:
            for kwarg in arguments.kwonlyargs:
                self._check_name("argument", kwarg.name, kwarg)

    visit_asyncfunctiondef = visit_functiondef

    @utils.only_required_for_messages("non-ascii-name")
    def visit_global(self, node: nodes.Global) -> None:
        for name in node.names:
            self._check_name("const", name, node)

    @utils.only_required_for_messages("non-ascii-name")
    def visit_assignname(self, node: nodes.AssignName) -> None:
        """Check module level assigned names."""
        # The NameChecker from which this Checker originates knows a lot of different
        # versions of variables, i.e. constants, inline variables etc.
        # To simplify we use only `variable` here, as we don't need to apply different
        # rules to different types of variables.
        frame = node.frame()

        if isinstance(frame, nodes.FunctionDef):
            if node.parent in frame.body:
                # Only perform the check if the assignment was done in within the body
                # of the function (and not the function parameter definition
                # (will be handled in visit_functiondef)
                # or within a decorator (handled in visit_call)
                self._check_name("variable", node.name, node)
        elif isinstance(frame, nodes.ClassDef):
            self._check_name("attr", node.name, node)
        else:
            # Possibilities here:
            # - isinstance(node.assign_type(), nodes.Comprehension) == inlinevar
            # - isinstance(frame, nodes.Module) == variable (constant?)
            # - some other kind of assignment missed but still most likely a variable
            self._check_name("variable", node.name, node)

    @utils.only_required_for_messages("non-ascii-name")
    def visit_classdef(self, node: nodes.ClassDef) -> None:
        self._check_name("class", node.name, node)
        for attr, anodes in node.instance_attrs.items():
            if not any(node.instance_attr_ancestors(attr)):
                self._check_name("attr", attr, anodes[0])

    def _check_module_import(self, node: nodes.ImportFrom | nodes.Import) -> None:
        for module_name, alias in node.names:
            name = alias or module_name
            self._check_name("module", name, node)

    @utils.only_required_for_messages("non-ascii-name", "non-ascii-module-import")
    def visit_import(self, node: nodes.Import) -> None:
        self._check_module_import(node)

    @utils.only_required_for_messages("non-ascii-name", "non-ascii-module-import")
    def visit_importfrom(self, node: nodes.ImportFrom) -> None:
        self._check_module_import(node)

    @utils.only_required_for_messages("non-ascii-name")
    def visit_call(self, node: nodes.Call) -> None:
        """Check if the used keyword args are correct."""
        for keyword in node.keywords:
            self._check_name("argument", keyword.arg, keyword)


def register(linter: lint.PyLinter) -> None:
    linter.register_checker(NonAsciiNameChecker(linter))