recommendation_checker.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441
  1. # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
  2. # For details: https://github.com/pylint-dev/pylint/blob/main/LICENSE
  3. # Copyright (c) https://github.com/pylint-dev/pylint/blob/main/CONTRIBUTORS.txt
  4. from __future__ import annotations
  5. import astroid
  6. from astroid import nodes
  7. from pylint import checkers
  8. from pylint.checkers import utils
  9. from pylint.interfaces import HIGH, INFERENCE
  10. class RecommendationChecker(checkers.BaseChecker):
  11. name = "refactoring"
  12. msgs = {
  13. "C0200": (
  14. "Consider using enumerate instead of iterating with range and len",
  15. "consider-using-enumerate",
  16. "Emitted when code that iterates with range and len is "
  17. "encountered. Such code can be simplified by using the "
  18. "enumerate builtin.",
  19. ),
  20. "C0201": (
  21. "Consider iterating the dictionary directly instead of calling .keys()",
  22. "consider-iterating-dictionary",
  23. "Emitted when the keys of a dictionary are iterated through the ``.keys()`` "
  24. "method or when ``.keys()`` is used for a membership check. "
  25. "It is enough to iterate through the dictionary itself, "
  26. "``for key in dictionary``. For membership checks, "
  27. "``if key in dictionary`` is faster.",
  28. ),
  29. "C0206": (
  30. "Consider iterating with .items()",
  31. "consider-using-dict-items",
  32. "Emitted when iterating over the keys of a dictionary and accessing the "
  33. "value by index lookup. "
  34. "Both the key and value can be accessed by iterating using the .items() "
  35. "method of the dictionary instead.",
  36. ),
  37. "C0207": (
  38. "Use %s instead",
  39. "use-maxsplit-arg",
  40. "Emitted when accessing only the first or last element of str.split(). "
  41. "The first and last element can be accessed by using "
  42. "str.split(sep, maxsplit=1)[0] or str.rsplit(sep, maxsplit=1)[-1] "
  43. "instead.",
  44. ),
  45. "C0208": (
  46. "Use a sequence type when iterating over values",
  47. "use-sequence-for-iteration",
  48. "When iterating over values, sequence types (e.g., ``lists``, ``tuples``, ``ranges``) "
  49. "are more efficient than ``sets``.",
  50. ),
  51. "C0209": (
  52. "Formatting a regular string which could be a f-string",
  53. "consider-using-f-string",
  54. "Used when we detect a string that is being formatted with format() or % "
  55. "which could potentially be a f-string. The use of f-strings is preferred. "
  56. "Requires Python 3.6 and ``py-version >= 3.6``.",
  57. ),
  58. }
  59. def open(self) -> None:
  60. py_version = self.linter.config.py_version
  61. self._py36_plus = py_version >= (3, 6)
  62. @staticmethod
  63. def _is_builtin(node: nodes.NodeNG, function: str) -> bool:
  64. inferred = utils.safe_infer(node)
  65. if not inferred:
  66. return False
  67. return utils.is_builtin_object(inferred) and inferred.name == function
  68. @utils.only_required_for_messages(
  69. "consider-iterating-dictionary", "use-maxsplit-arg"
  70. )
  71. def visit_call(self, node: nodes.Call) -> None:
  72. self._check_consider_iterating_dictionary(node)
  73. self._check_use_maxsplit_arg(node)
  74. def _check_consider_iterating_dictionary(self, node: nodes.Call) -> None:
  75. if not isinstance(node.func, nodes.Attribute):
  76. return
  77. if node.func.attrname != "keys":
  78. return
  79. if isinstance(node.parent, nodes.BinOp) and node.parent.op in {"&", "|", "^"}:
  80. return
  81. comp_ancestor = utils.get_node_first_ancestor_of_type(node, nodes.Compare)
  82. if (
  83. isinstance(node.parent, (nodes.For, nodes.Comprehension))
  84. or comp_ancestor
  85. and any(
  86. op
  87. for op, comparator in comp_ancestor.ops
  88. if op in {"in", "not in"}
  89. and (comparator in node.node_ancestors() or comparator is node)
  90. )
  91. ):
  92. inferred = utils.safe_infer(node.func)
  93. if not isinstance(inferred, astroid.BoundMethod) or not isinstance(
  94. inferred.bound, nodes.Dict
  95. ):
  96. return
  97. self.add_message(
  98. "consider-iterating-dictionary", node=node, confidence=INFERENCE
  99. )
  100. def _check_use_maxsplit_arg(self, node: nodes.Call) -> None:
  101. """Add message when accessing first or last elements of a str.split() or
  102. str.rsplit().
  103. """
  104. # Check if call is split() or rsplit()
  105. if not (
  106. isinstance(node.func, nodes.Attribute)
  107. and node.func.attrname in {"split", "rsplit"}
  108. and isinstance(utils.safe_infer(node.func), astroid.BoundMethod)
  109. ):
  110. return
  111. inferred_expr = utils.safe_infer(node.func.expr)
  112. if isinstance(inferred_expr, astroid.Instance) and any(
  113. inferred_expr.nodes_of_class(nodes.ClassDef)
  114. ):
  115. return
  116. try:
  117. sep = utils.get_argument_from_call(node, 0, "sep")
  118. except utils.NoSuchArgumentError:
  119. return
  120. try:
  121. # Ignore if maxsplit arg has been set
  122. utils.get_argument_from_call(node, 1, "maxsplit")
  123. return
  124. except utils.NoSuchArgumentError:
  125. pass
  126. if isinstance(node.parent, nodes.Subscript):
  127. try:
  128. subscript_value = utils.get_subscript_const_value(node.parent).value
  129. except utils.InferredTypeError:
  130. return
  131. # Check for cases where variable (Name) subscripts may be mutated within a loop
  132. if isinstance(node.parent.slice, nodes.Name):
  133. # Check if loop present within the scope of the node
  134. scope = node.scope()
  135. for loop_node in scope.nodes_of_class((nodes.For, nodes.While)):
  136. if not loop_node.parent_of(node):
  137. continue
  138. # Check if var is mutated within loop (Assign/AugAssign)
  139. for assignment_node in loop_node.nodes_of_class(nodes.AugAssign):
  140. if node.parent.slice.name == assignment_node.target.name:
  141. return
  142. for assignment_node in loop_node.nodes_of_class(nodes.Assign):
  143. if node.parent.slice.name in [
  144. n.name for n in assignment_node.targets
  145. ]:
  146. return
  147. if subscript_value in (-1, 0):
  148. fn_name = node.func.attrname
  149. new_fn = "rsplit" if subscript_value == -1 else "split"
  150. new_name = (
  151. node.func.as_string().rsplit(fn_name, maxsplit=1)[0]
  152. + new_fn
  153. + f"({sep.as_string()}, maxsplit=1)[{subscript_value}]"
  154. )
  155. self.add_message("use-maxsplit-arg", node=node, args=(new_name,))
  156. @utils.only_required_for_messages(
  157. "consider-using-enumerate",
  158. "consider-using-dict-items",
  159. "use-sequence-for-iteration",
  160. )
  161. def visit_for(self, node: nodes.For) -> None:
  162. self._check_consider_using_enumerate(node)
  163. self._check_consider_using_dict_items(node)
  164. self._check_use_sequence_for_iteration(node)
  165. def _check_consider_using_enumerate(self, node: nodes.For) -> None:
  166. """Emit a convention whenever range and len are used for indexing."""
  167. # Verify that we have a `range([start], len(...), [stop])` call and
  168. # that the object which is iterated is used as a subscript in the
  169. # body of the for.
  170. # Is it a proper range call?
  171. if not isinstance(node.iter, nodes.Call):
  172. return
  173. if not self._is_builtin(node.iter.func, "range"):
  174. return
  175. if not node.iter.args:
  176. return
  177. is_constant_zero = (
  178. isinstance(node.iter.args[0], nodes.Const) and node.iter.args[0].value == 0
  179. )
  180. if len(node.iter.args) == 2 and not is_constant_zero:
  181. return
  182. if len(node.iter.args) > 2:
  183. return
  184. # Is it a proper len call?
  185. if not isinstance(node.iter.args[-1], nodes.Call):
  186. return
  187. second_func = node.iter.args[-1].func
  188. if not self._is_builtin(second_func, "len"):
  189. return
  190. len_args = node.iter.args[-1].args
  191. if not len_args or len(len_args) != 1:
  192. return
  193. iterating_object = len_args[0]
  194. if isinstance(iterating_object, nodes.Name):
  195. expected_subscript_val_type = nodes.Name
  196. elif isinstance(iterating_object, nodes.Attribute):
  197. expected_subscript_val_type = nodes.Attribute
  198. else:
  199. return
  200. # If we're defining __iter__ on self, enumerate won't work
  201. scope = node.scope()
  202. if (
  203. isinstance(iterating_object, nodes.Name)
  204. and iterating_object.name == "self"
  205. and scope.name == "__iter__"
  206. ):
  207. return
  208. # Verify that the body of the for loop uses a subscript
  209. # with the object that was iterated. This uses some heuristics
  210. # in order to make sure that the same object is used in the
  211. # for body.
  212. for child in node.body:
  213. for subscript in child.nodes_of_class(nodes.Subscript):
  214. if not isinstance(subscript.value, expected_subscript_val_type):
  215. continue
  216. value = subscript.slice
  217. if not isinstance(value, nodes.Name):
  218. continue
  219. if subscript.value.scope() != node.scope():
  220. # Ignore this subscript if it's not in the same
  221. # scope. This means that in the body of the for
  222. # loop, another scope was created, where the same
  223. # name for the iterating object was used.
  224. continue
  225. if value.name == node.target.name and (
  226. isinstance(subscript.value, nodes.Name)
  227. and iterating_object.name == subscript.value.name
  228. or isinstance(subscript.value, nodes.Attribute)
  229. and iterating_object.attrname == subscript.value.attrname
  230. ):
  231. self.add_message("consider-using-enumerate", node=node)
  232. return
  233. def _check_consider_using_dict_items(self, node: nodes.For) -> None:
  234. """Add message when accessing dict values by index lookup."""
  235. # Verify that we have a .keys() call and
  236. # that the object which is iterated is used as a subscript in the
  237. # body of the for.
  238. iterating_object_name = utils.get_iterating_dictionary_name(node)
  239. if iterating_object_name is None:
  240. return
  241. # Verify that the body of the for loop uses a subscript
  242. # with the object that was iterated. This uses some heuristics
  243. # in order to make sure that the same object is used in the
  244. # for body.
  245. for child in node.body:
  246. for subscript in child.nodes_of_class(nodes.Subscript):
  247. if not isinstance(subscript.value, (nodes.Name, nodes.Attribute)):
  248. continue
  249. value = subscript.slice
  250. if (
  251. not isinstance(value, nodes.Name)
  252. or value.name != node.target.name
  253. or iterating_object_name != subscript.value.as_string()
  254. ):
  255. continue
  256. last_definition_lineno = value.lookup(value.name)[1][-1].lineno
  257. if last_definition_lineno > node.lineno:
  258. # Ignore this subscript if it has been redefined after
  259. # the for loop. This checks for the line number using .lookup()
  260. # to get the line number where the iterating object was last
  261. # defined and compare that to the for loop's line number
  262. continue
  263. if (
  264. isinstance(subscript.parent, nodes.Assign)
  265. and subscript in subscript.parent.targets
  266. or isinstance(subscript.parent, nodes.AugAssign)
  267. and subscript == subscript.parent.target
  268. ):
  269. # Ignore this subscript if it is the target of an assignment
  270. # Early termination as dict index lookup is necessary
  271. return
  272. self.add_message("consider-using-dict-items", node=node)
  273. return
  274. @utils.only_required_for_messages(
  275. "consider-using-dict-items",
  276. "use-sequence-for-iteration",
  277. )
  278. def visit_comprehension(self, node: nodes.Comprehension) -> None:
  279. self._check_consider_using_dict_items_comprehension(node)
  280. self._check_use_sequence_for_iteration(node)
  281. def _check_consider_using_dict_items_comprehension(
  282. self, node: nodes.Comprehension
  283. ) -> None:
  284. """Add message when accessing dict values by index lookup."""
  285. iterating_object_name = utils.get_iterating_dictionary_name(node)
  286. if iterating_object_name is None:
  287. return
  288. for child in node.parent.get_children():
  289. for subscript in child.nodes_of_class(nodes.Subscript):
  290. if not isinstance(subscript.value, (nodes.Name, nodes.Attribute)):
  291. continue
  292. value = subscript.slice
  293. if (
  294. not isinstance(value, nodes.Name)
  295. or value.name != node.target.name
  296. or iterating_object_name != subscript.value.as_string()
  297. ):
  298. continue
  299. self.add_message("consider-using-dict-items", node=node)
  300. return
  301. def _check_use_sequence_for_iteration(
  302. self, node: nodes.For | nodes.Comprehension
  303. ) -> None:
  304. """Check if code iterates over an in-place defined set.
  305. Sets using `*` are not considered in-place.
  306. """
  307. if isinstance(node.iter, nodes.Set) and not any(
  308. utils.has_starred_node_recursive(node)
  309. ):
  310. self.add_message(
  311. "use-sequence-for-iteration", node=node.iter, confidence=HIGH
  312. )
  313. @utils.only_required_for_messages("consider-using-f-string")
  314. def visit_const(self, node: nodes.Const) -> None:
  315. if self._py36_plus:
  316. # f-strings require Python 3.6
  317. if node.pytype() == "builtins.str" and not isinstance(
  318. node.parent, nodes.JoinedStr
  319. ):
  320. self._detect_replacable_format_call(node)
  321. def _detect_replacable_format_call(self, node: nodes.Const) -> None:
  322. """Check whether a string is used in a call to format() or '%' and whether it
  323. can be replaced by an f-string.
  324. """
  325. if (
  326. isinstance(node.parent, nodes.Attribute)
  327. and node.parent.attrname == "format"
  328. ):
  329. # Don't warn on referencing / assigning .format without calling it
  330. if not isinstance(node.parent.parent, nodes.Call):
  331. return
  332. if node.parent.parent.args:
  333. for arg in node.parent.parent.args:
  334. # If star expressions with more than 1 element are being used
  335. if isinstance(arg, nodes.Starred):
  336. inferred = utils.safe_infer(arg.value)
  337. if (
  338. isinstance(inferred, astroid.List)
  339. and len(inferred.elts) > 1
  340. ):
  341. return
  342. # Backslashes can't be in f-string expressions
  343. if "\\" in arg.as_string():
  344. return
  345. elif node.parent.parent.keywords:
  346. keyword_args = [
  347. i[0] for i in utils.parse_format_method_string(node.value)[0]
  348. ]
  349. for keyword in node.parent.parent.keywords:
  350. # If keyword is used multiple times
  351. if keyword_args.count(keyword.arg) > 1:
  352. return
  353. keyword = utils.safe_infer(keyword.value)
  354. # If lists of more than one element are being unpacked
  355. if isinstance(keyword, nodes.Dict):
  356. if len(keyword.items) > 1 and len(keyword_args) > 1:
  357. return
  358. # If all tests pass, then raise message
  359. self.add_message(
  360. "consider-using-f-string",
  361. node=node,
  362. line=node.lineno,
  363. col_offset=node.col_offset,
  364. )
  365. elif isinstance(node.parent, nodes.BinOp) and node.parent.op == "%":
  366. # Backslashes can't be in f-string expressions
  367. if "\\" in node.parent.right.as_string():
  368. return
  369. # If % applied to another type than str, it's modulo and can't be replaced by formatting
  370. if not hasattr(node.parent.left, "value") or not isinstance(
  371. node.parent.left.value, str
  372. ):
  373. return
  374. # Brackets can be inconvenient in f-string expressions
  375. if "{" in node.parent.left.value or "}" in node.parent.left.value:
  376. return
  377. inferred_right = utils.safe_infer(node.parent.right)
  378. # If dicts or lists of length > 1 are used
  379. if isinstance(inferred_right, nodes.Dict):
  380. if len(inferred_right.items) > 1:
  381. return
  382. elif isinstance(inferred_right, nodes.List):
  383. if len(inferred_right.elts) > 1:
  384. return
  385. # If all tests pass, then raise message
  386. self.add_message(
  387. "consider-using-f-string",
  388. node=node,
  389. line=node.lineno,
  390. col_offset=node.col_offset,
  391. )