non_ascii_names.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
  2. # For details: https://github.com/pylint-dev/pylint/blob/main/LICENSE
  3. # Copyright (c) https://github.com/pylint-dev/pylint/blob/main/CONTRIBUTORS.txt
  4. """All alphanumeric unicode character are allowed in Python but due
  5. to similarities in how they look they can be confused.
  6. See: https://peps.python.org/pep-0672/#confusing-features
  7. The following checkers are intended to make users are aware of these issues.
  8. """
  9. from __future__ import annotations
  10. from astroid import nodes
  11. from pylint import constants, interfaces, lint
  12. from pylint.checkers import base_checker, utils
  13. NON_ASCII_HELP = (
  14. "Used when the name contains at least one non-ASCII unicode character. "
  15. "See https://peps.python.org/pep-0672/#confusing-features"
  16. " for a background why this could be bad. \n"
  17. "If your programming guideline defines that you are programming in "
  18. "English, then there should be no need for non ASCII characters in "
  19. "Python Names. If not you can simply disable this check."
  20. )
  21. class NonAsciiNameChecker(base_checker.BaseChecker):
  22. """A strict name checker only allowing ASCII.
  23. Note: This check only checks Names, so it ignores the content of
  24. docstrings and comments!
  25. """
  26. msgs = {
  27. "C2401": (
  28. '%s name "%s" contains a non-ASCII character, consider renaming it.',
  29. "non-ascii-name",
  30. NON_ASCII_HELP,
  31. {"old_names": [("C0144", "old-non-ascii-name")]},
  32. ),
  33. # First %s will always be "file"
  34. "W2402": (
  35. '%s name "%s" contains a non-ASCII character.',
  36. "non-ascii-file-name",
  37. (
  38. # Some = PyCharm at the time of writing didn't display the non_ascii_name_loł
  39. # files. That's also why this is a warning and not only a convention!
  40. "Under python 3.5, PEP 3131 allows non-ascii identifiers, but not non-ascii file names."
  41. "Since Python 3.5, even though Python supports UTF-8 files, some editors or tools "
  42. "don't."
  43. ),
  44. ),
  45. # First %s will always be "module"
  46. "C2403": (
  47. '%s name "%s" contains a non-ASCII character, use an ASCII-only alias for import.',
  48. "non-ascii-module-import",
  49. NON_ASCII_HELP,
  50. ),
  51. }
  52. name = "NonASCII-Checker"
  53. def _check_name(self, node_type: str, name: str | None, node: nodes.NodeNG) -> None:
  54. """Check whether a name is using non-ASCII characters."""
  55. if name is None:
  56. # For some nodes i.e. *kwargs from a dict, the name will be empty
  57. return
  58. if not str(name).isascii():
  59. type_label = constants.HUMAN_READABLE_TYPES[node_type]
  60. args = (type_label.capitalize(), name)
  61. msg = "non-ascii-name"
  62. # Some node types have customized messages
  63. if node_type == "file":
  64. msg = "non-ascii-file-name"
  65. elif node_type == "module":
  66. msg = "non-ascii-module-import"
  67. self.add_message(msg, node=node, args=args, confidence=interfaces.HIGH)
  68. @utils.only_required_for_messages("non-ascii-name", "non-ascii-file-name")
  69. def visit_module(self, node: nodes.Module) -> None:
  70. self._check_name("file", node.name.split(".")[-1], node)
  71. @utils.only_required_for_messages("non-ascii-name")
  72. def visit_functiondef(
  73. self, node: nodes.FunctionDef | nodes.AsyncFunctionDef
  74. ) -> None:
  75. self._check_name("function", node.name, node)
  76. # Check argument names
  77. arguments = node.args
  78. # Check position only arguments
  79. if arguments.posonlyargs:
  80. for pos_only_arg in arguments.posonlyargs:
  81. self._check_name("argument", pos_only_arg.name, pos_only_arg)
  82. # Check "normal" arguments
  83. if arguments.args:
  84. for arg in arguments.args:
  85. self._check_name("argument", arg.name, arg)
  86. # Check key word only arguments
  87. if arguments.kwonlyargs:
  88. for kwarg in arguments.kwonlyargs:
  89. self._check_name("argument", kwarg.name, kwarg)
  90. visit_asyncfunctiondef = visit_functiondef
  91. @utils.only_required_for_messages("non-ascii-name")
  92. def visit_global(self, node: nodes.Global) -> None:
  93. for name in node.names:
  94. self._check_name("const", name, node)
  95. @utils.only_required_for_messages("non-ascii-name")
  96. def visit_assignname(self, node: nodes.AssignName) -> None:
  97. """Check module level assigned names."""
  98. # The NameChecker from which this Checker originates knows a lot of different
  99. # versions of variables, i.e. constants, inline variables etc.
  100. # To simplify we use only `variable` here, as we don't need to apply different
  101. # rules to different types of variables.
  102. frame = node.frame()
  103. if isinstance(frame, nodes.FunctionDef):
  104. if node.parent in frame.body:
  105. # Only perform the check if the assignment was done in within the body
  106. # of the function (and not the function parameter definition
  107. # (will be handled in visit_functiondef)
  108. # or within a decorator (handled in visit_call)
  109. self._check_name("variable", node.name, node)
  110. elif isinstance(frame, nodes.ClassDef):
  111. self._check_name("attr", node.name, node)
  112. else:
  113. # Possibilities here:
  114. # - isinstance(node.assign_type(), nodes.Comprehension) == inlinevar
  115. # - isinstance(frame, nodes.Module) == variable (constant?)
  116. # - some other kind of assignment missed but still most likely a variable
  117. self._check_name("variable", node.name, node)
  118. @utils.only_required_for_messages("non-ascii-name")
  119. def visit_classdef(self, node: nodes.ClassDef) -> None:
  120. self._check_name("class", node.name, node)
  121. for attr, anodes in node.instance_attrs.items():
  122. if not any(node.instance_attr_ancestors(attr)):
  123. self._check_name("attr", attr, anodes[0])
  124. def _check_module_import(self, node: nodes.ImportFrom | nodes.Import) -> None:
  125. for module_name, alias in node.names:
  126. name = alias or module_name
  127. self._check_name("module", name, node)
  128. @utils.only_required_for_messages("non-ascii-name", "non-ascii-module-import")
  129. def visit_import(self, node: nodes.Import) -> None:
  130. self._check_module_import(node)
  131. @utils.only_required_for_messages("non-ascii-name", "non-ascii-module-import")
  132. def visit_importfrom(self, node: nodes.ImportFrom) -> None:
  133. self._check_module_import(node)
  134. @utils.only_required_for_messages("non-ascii-name")
  135. def visit_call(self, node: nodes.Call) -> None:
  136. """Check if the used keyword args are correct."""
  137. for keyword in node.keywords:
  138. self._check_name("argument", keyword.arg, keyword)
  139. def register(linter: lint.PyLinter) -> None:
  140. linter.register_checker(NonAsciiNameChecker(linter))