renaming.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. from __future__ import annotations
  2. from contextlib import contextmanager
  3. from typing import Final, Iterator
  4. from mypy.nodes import (
  5. AssignmentStmt,
  6. Block,
  7. BreakStmt,
  8. ClassDef,
  9. ContinueStmt,
  10. ForStmt,
  11. FuncDef,
  12. Import,
  13. ImportAll,
  14. ImportFrom,
  15. IndexExpr,
  16. ListExpr,
  17. Lvalue,
  18. MatchStmt,
  19. MemberExpr,
  20. MypyFile,
  21. NameExpr,
  22. StarExpr,
  23. TryStmt,
  24. TupleExpr,
  25. WhileStmt,
  26. WithStmt,
  27. )
  28. from mypy.patterns import AsPattern
  29. from mypy.traverser import TraverserVisitor
  30. # Scope kinds
  31. FILE: Final = 0
  32. FUNCTION: Final = 1
  33. CLASS: Final = 2
  34. class VariableRenameVisitor(TraverserVisitor):
  35. """Rename variables to allow redefinition of variables.
  36. For example, consider this code:
  37. x = 0
  38. f(x)
  39. x = "a"
  40. g(x)
  41. It will be transformed like this:
  42. x' = 0
  43. f(x')
  44. x = "a"
  45. g(x)
  46. There will be two independent variables (x' and x) that will have separate
  47. inferred types. The publicly exposed variant will get the non-suffixed name.
  48. This is the last definition at module top level and the first definition
  49. (argument) within a function.
  50. Renaming only happens for assignments within the same block. Renaming is
  51. performed before semantic analysis, immediately after parsing.
  52. The implementation performs a rudimentary static analysis. The analysis is
  53. overly conservative to keep things simple.
  54. """
  55. def __init__(self) -> None:
  56. # Counter for labeling new blocks
  57. self.block_id = 0
  58. # Number of surrounding try statements that disallow variable redefinition
  59. self.disallow_redef_depth = 0
  60. # Number of surrounding loop statements
  61. self.loop_depth = 0
  62. # Map block id to loop depth.
  63. self.block_loop_depth: dict[int, int] = {}
  64. # Stack of block ids being processed.
  65. self.blocks: list[int] = []
  66. # List of scopes; each scope maps short (unqualified) name to block id.
  67. self.var_blocks: list[dict[str, int]] = []
  68. # References to variables that we may need to rename. List of
  69. # scopes; each scope is a mapping from name to list of collections
  70. # of names that refer to the same logical variable.
  71. self.refs: list[dict[str, list[list[NameExpr]]]] = []
  72. # Number of reads of the most recent definition of a variable (per scope)
  73. self.num_reads: list[dict[str, int]] = []
  74. # Kinds of nested scopes (FILE, FUNCTION or CLASS)
  75. self.scope_kinds: list[int] = []
  76. def visit_mypy_file(self, file_node: MypyFile) -> None:
  77. """Rename variables within a file.
  78. This is the main entry point to this class.
  79. """
  80. self.clear()
  81. with self.enter_scope(FILE), self.enter_block():
  82. for d in file_node.defs:
  83. d.accept(self)
  84. def visit_func_def(self, fdef: FuncDef) -> None:
  85. # Conservatively do not allow variable defined before a function to
  86. # be redefined later, since function could refer to either definition.
  87. self.reject_redefinition_of_vars_in_scope()
  88. with self.enter_scope(FUNCTION), self.enter_block():
  89. for arg in fdef.arguments:
  90. name = arg.variable.name
  91. # 'self' can't be redefined since it's special as it allows definition of
  92. # attributes. 'cls' can't be used to define attributes so we can ignore it.
  93. can_be_redefined = name != "self" # TODO: Proper check
  94. self.record_assignment(arg.variable.name, can_be_redefined)
  95. self.handle_arg(name)
  96. for stmt in fdef.body.body:
  97. stmt.accept(self)
  98. def visit_class_def(self, cdef: ClassDef) -> None:
  99. self.reject_redefinition_of_vars_in_scope()
  100. with self.enter_scope(CLASS):
  101. super().visit_class_def(cdef)
  102. def visit_block(self, block: Block) -> None:
  103. with self.enter_block():
  104. super().visit_block(block)
  105. def visit_while_stmt(self, stmt: WhileStmt) -> None:
  106. with self.enter_loop():
  107. super().visit_while_stmt(stmt)
  108. def visit_for_stmt(self, stmt: ForStmt) -> None:
  109. stmt.expr.accept(self)
  110. self.analyze_lvalue(stmt.index, True)
  111. # Also analyze as non-lvalue so that every for loop index variable is assumed to be read.
  112. stmt.index.accept(self)
  113. with self.enter_loop():
  114. stmt.body.accept(self)
  115. if stmt.else_body:
  116. stmt.else_body.accept(self)
  117. def visit_break_stmt(self, stmt: BreakStmt) -> None:
  118. self.reject_redefinition_of_vars_in_loop()
  119. def visit_continue_stmt(self, stmt: ContinueStmt) -> None:
  120. self.reject_redefinition_of_vars_in_loop()
  121. def visit_try_stmt(self, stmt: TryStmt) -> None:
  122. # Variables defined by a try statement get special treatment in the
  123. # type checker which allows them to be always redefined, so no need to
  124. # do renaming here.
  125. with self.enter_try():
  126. super().visit_try_stmt(stmt)
  127. def visit_with_stmt(self, stmt: WithStmt) -> None:
  128. for expr in stmt.expr:
  129. expr.accept(self)
  130. for target in stmt.target:
  131. if target is not None:
  132. self.analyze_lvalue(target)
  133. # We allow redefinitions in the body of a with statement for
  134. # convenience. This is unsafe since with statements can affect control
  135. # flow by catching exceptions, but this is rare except for
  136. # assertRaises() and other similar functions, where the exception is
  137. # raised by the last statement in the body, which usually isn't a
  138. # problem.
  139. stmt.body.accept(self)
  140. def visit_import(self, imp: Import) -> None:
  141. for id, as_id in imp.ids:
  142. self.record_assignment(as_id or id, False)
  143. def visit_import_from(self, imp: ImportFrom) -> None:
  144. for id, as_id in imp.names:
  145. self.record_assignment(as_id or id, False)
  146. def visit_assignment_stmt(self, s: AssignmentStmt) -> None:
  147. s.rvalue.accept(self)
  148. for lvalue in s.lvalues:
  149. self.analyze_lvalue(lvalue)
  150. def visit_match_stmt(self, s: MatchStmt) -> None:
  151. s.subject.accept(self)
  152. for i in range(len(s.patterns)):
  153. with self.enter_block():
  154. s.patterns[i].accept(self)
  155. guard = s.guards[i]
  156. if guard is not None:
  157. guard.accept(self)
  158. # We already entered a block, so visit this block's statements directly
  159. for stmt in s.bodies[i].body:
  160. stmt.accept(self)
  161. def visit_capture_pattern(self, p: AsPattern) -> None:
  162. if p.name is not None:
  163. self.analyze_lvalue(p.name)
  164. def analyze_lvalue(self, lvalue: Lvalue, is_nested: bool = False) -> None:
  165. """Process assignment; in particular, keep track of (re)defined names.
  166. Args:
  167. is_nested: True for non-outermost Lvalue in a multiple assignment such as
  168. "x, y = ..."
  169. """
  170. if isinstance(lvalue, NameExpr):
  171. name = lvalue.name
  172. is_new = self.record_assignment(name, True)
  173. if is_new:
  174. self.handle_def(lvalue)
  175. else:
  176. self.handle_refine(lvalue)
  177. if is_nested:
  178. # This allows these to be redefined freely even if never read. Multiple
  179. # assignment like "x, _ _ = y" defines dummy variables that are never read.
  180. self.handle_ref(lvalue)
  181. elif isinstance(lvalue, (ListExpr, TupleExpr)):
  182. for item in lvalue.items:
  183. self.analyze_lvalue(item, is_nested=True)
  184. elif isinstance(lvalue, MemberExpr):
  185. lvalue.expr.accept(self)
  186. elif isinstance(lvalue, IndexExpr):
  187. lvalue.base.accept(self)
  188. lvalue.index.accept(self)
  189. elif isinstance(lvalue, StarExpr):
  190. # Propagate is_nested since in a typical use case like "x, *rest = ..." 'rest' may
  191. # be freely reused.
  192. self.analyze_lvalue(lvalue.expr, is_nested=is_nested)
  193. def visit_name_expr(self, expr: NameExpr) -> None:
  194. self.handle_ref(expr)
  195. # Helpers for renaming references
  196. def handle_arg(self, name: str) -> None:
  197. """Store function argument."""
  198. self.refs[-1][name] = [[]]
  199. self.num_reads[-1][name] = 0
  200. def handle_def(self, expr: NameExpr) -> None:
  201. """Store new name definition."""
  202. name = expr.name
  203. names = self.refs[-1].setdefault(name, [])
  204. names.append([expr])
  205. self.num_reads[-1][name] = 0
  206. def handle_refine(self, expr: NameExpr) -> None:
  207. """Store assignment to an existing name (that replaces previous value, if any)."""
  208. name = expr.name
  209. if name in self.refs[-1]:
  210. names = self.refs[-1][name]
  211. if not names:
  212. names.append([])
  213. names[-1].append(expr)
  214. def handle_ref(self, expr: NameExpr) -> None:
  215. """Store reference to defined name."""
  216. name = expr.name
  217. if name in self.refs[-1]:
  218. names = self.refs[-1][name]
  219. if not names:
  220. names.append([])
  221. names[-1].append(expr)
  222. num_reads = self.num_reads[-1]
  223. num_reads[name] = num_reads.get(name, 0) + 1
  224. def flush_refs(self) -> None:
  225. """Rename all references within the current scope.
  226. This will be called at the end of a scope.
  227. """
  228. is_func = self.scope_kinds[-1] == FUNCTION
  229. for name, refs in self.refs[-1].items():
  230. if len(refs) == 1:
  231. # Only one definition -- no renaming needed.
  232. continue
  233. if is_func:
  234. # In a function, don't rename the first definition, as it
  235. # may be an argument that must preserve the name.
  236. to_rename = refs[1:]
  237. else:
  238. # At module top level, don't rename the final definition,
  239. # as it will be publicly visible outside the module.
  240. to_rename = refs[:-1]
  241. for i, item in enumerate(to_rename):
  242. rename_refs(item, i)
  243. self.refs.pop()
  244. # Helpers for determining which assignments define new variables
  245. def clear(self) -> None:
  246. self.blocks = []
  247. self.var_blocks = []
  248. @contextmanager
  249. def enter_block(self) -> Iterator[None]:
  250. self.block_id += 1
  251. self.blocks.append(self.block_id)
  252. self.block_loop_depth[self.block_id] = self.loop_depth
  253. try:
  254. yield
  255. finally:
  256. self.blocks.pop()
  257. @contextmanager
  258. def enter_try(self) -> Iterator[None]:
  259. self.disallow_redef_depth += 1
  260. try:
  261. yield
  262. finally:
  263. self.disallow_redef_depth -= 1
  264. @contextmanager
  265. def enter_loop(self) -> Iterator[None]:
  266. self.loop_depth += 1
  267. try:
  268. yield
  269. finally:
  270. self.loop_depth -= 1
  271. def current_block(self) -> int:
  272. return self.blocks[-1]
  273. @contextmanager
  274. def enter_scope(self, kind: int) -> Iterator[None]:
  275. self.var_blocks.append({})
  276. self.refs.append({})
  277. self.num_reads.append({})
  278. self.scope_kinds.append(kind)
  279. try:
  280. yield
  281. finally:
  282. self.flush_refs()
  283. self.var_blocks.pop()
  284. self.num_reads.pop()
  285. self.scope_kinds.pop()
  286. def is_nested(self) -> int:
  287. return len(self.var_blocks) > 1
  288. def reject_redefinition_of_vars_in_scope(self) -> None:
  289. """Make it impossible to redefine defined variables in the current scope.
  290. This is used if we encounter a function definition that
  291. can make it ambiguous which definition is live. Example:
  292. x = 0
  293. def f() -> int:
  294. return x
  295. x = '' # Error -- cannot redefine x across function definition
  296. """
  297. var_blocks = self.var_blocks[-1]
  298. for key in var_blocks:
  299. var_blocks[key] = -1
  300. def reject_redefinition_of_vars_in_loop(self) -> None:
  301. """Reject redefinition of variables in the innermost loop.
  302. If there is an early exit from a loop, there may be ambiguity about which
  303. value may escape the loop. Example where this matters:
  304. while f():
  305. x = 0
  306. if g():
  307. break
  308. x = '' # Error -- not a redefinition
  309. reveal_type(x) # int
  310. This method ensures that the second assignment to 'x' doesn't introduce a new
  311. variable.
  312. """
  313. var_blocks = self.var_blocks[-1]
  314. for key, block in var_blocks.items():
  315. if self.block_loop_depth.get(block) == self.loop_depth:
  316. var_blocks[key] = -1
  317. def record_assignment(self, name: str, can_be_redefined: bool) -> bool:
  318. """Record assignment to given name and return True if it defines a new variable.
  319. Args:
  320. can_be_redefined: If True, allows assignment in the same block to redefine
  321. this name (if this is a new definition)
  322. """
  323. if self.num_reads[-1].get(name, -1) == 0:
  324. # Only set, not read, so no reason to redefine
  325. return False
  326. if self.disallow_redef_depth > 0:
  327. # Can't redefine within try/with a block.
  328. can_be_redefined = False
  329. block = self.current_block()
  330. var_blocks = self.var_blocks[-1]
  331. if name not in var_blocks:
  332. # New definition in this scope.
  333. if can_be_redefined:
  334. # Store the block where this was defined to allow redefinition in
  335. # the same block only.
  336. var_blocks[name] = block
  337. else:
  338. # This doesn't support arbitrary redefinition.
  339. var_blocks[name] = -1
  340. return True
  341. elif var_blocks[name] == block:
  342. # Redefinition -- defines a new variable with the same name.
  343. return True
  344. else:
  345. # Assigns to an existing variable.
  346. return False
  347. class LimitedVariableRenameVisitor(TraverserVisitor):
  348. """Perform some limited variable renaming in with statements.
  349. This allows reusing a variable in multiple with statements with
  350. different types. For example, the two instances of 'x' can have
  351. incompatible types:
  352. with C() as x:
  353. f(x)
  354. with D() as x:
  355. g(x)
  356. The above code gets renamed conceptually into this (not valid Python!):
  357. with C() as x':
  358. f(x')
  359. with D() as x:
  360. g(x)
  361. If there's a reference to a variable defined in 'with' outside the
  362. statement, or if there's any trickiness around variable visibility
  363. (e.g. function definitions), we give up and won't perform renaming.
  364. The main use case is to allow binding both readable and writable
  365. binary files into the same variable. These have different types:
  366. with open(fnam, 'rb') as f: ...
  367. with open(fnam, 'wb') as f: ...
  368. """
  369. def __init__(self) -> None:
  370. # Short names of variables bound in with statements using "as"
  371. # in a surrounding scope
  372. self.bound_vars: list[str] = []
  373. # Stack of names that can't be safely renamed, per scope ('*' means that
  374. # no names can be renamed)
  375. self.skipped: list[set[str]] = []
  376. # References to variables that we may need to rename. Stack of
  377. # scopes; each scope is a mapping from name to list of collections
  378. # of names that refer to the same logical variable.
  379. self.refs: list[dict[str, list[list[NameExpr]]]] = []
  380. def visit_mypy_file(self, file_node: MypyFile) -> None:
  381. """Rename variables within a file.
  382. This is the main entry point to this class.
  383. """
  384. with self.enter_scope():
  385. for d in file_node.defs:
  386. d.accept(self)
  387. def visit_func_def(self, fdef: FuncDef) -> None:
  388. self.reject_redefinition_of_vars_in_scope()
  389. with self.enter_scope():
  390. for arg in fdef.arguments:
  391. self.record_skipped(arg.variable.name)
  392. super().visit_func_def(fdef)
  393. def visit_class_def(self, cdef: ClassDef) -> None:
  394. self.reject_redefinition_of_vars_in_scope()
  395. with self.enter_scope():
  396. super().visit_class_def(cdef)
  397. def visit_with_stmt(self, stmt: WithStmt) -> None:
  398. for expr in stmt.expr:
  399. expr.accept(self)
  400. old_len = len(self.bound_vars)
  401. for target in stmt.target:
  402. if target is not None:
  403. self.analyze_lvalue(target)
  404. for target in stmt.target:
  405. if target:
  406. target.accept(self)
  407. stmt.body.accept(self)
  408. while len(self.bound_vars) > old_len:
  409. self.bound_vars.pop()
  410. def analyze_lvalue(self, lvalue: Lvalue) -> None:
  411. if isinstance(lvalue, NameExpr):
  412. name = lvalue.name
  413. if name in self.bound_vars:
  414. # Name bound in a surrounding with statement, so it can be renamed
  415. self.visit_name_expr(lvalue)
  416. else:
  417. var_info = self.refs[-1]
  418. if name not in var_info:
  419. var_info[name] = []
  420. var_info[name].append([])
  421. self.bound_vars.append(name)
  422. elif isinstance(lvalue, (ListExpr, TupleExpr)):
  423. for item in lvalue.items:
  424. self.analyze_lvalue(item)
  425. elif isinstance(lvalue, MemberExpr):
  426. lvalue.expr.accept(self)
  427. elif isinstance(lvalue, IndexExpr):
  428. lvalue.base.accept(self)
  429. lvalue.index.accept(self)
  430. elif isinstance(lvalue, StarExpr):
  431. self.analyze_lvalue(lvalue.expr)
  432. def visit_import(self, imp: Import) -> None:
  433. # We don't support renaming imports
  434. for id, as_id in imp.ids:
  435. self.record_skipped(as_id or id)
  436. def visit_import_from(self, imp: ImportFrom) -> None:
  437. # We don't support renaming imports
  438. for id, as_id in imp.names:
  439. self.record_skipped(as_id or id)
  440. def visit_import_all(self, imp: ImportAll) -> None:
  441. # Give up, since we don't know all imported names yet
  442. self.reject_redefinition_of_vars_in_scope()
  443. def visit_name_expr(self, expr: NameExpr) -> None:
  444. name = expr.name
  445. if name in self.bound_vars:
  446. # Record reference so that it can be renamed later
  447. for scope in reversed(self.refs):
  448. if name in scope:
  449. scope[name][-1].append(expr)
  450. else:
  451. self.record_skipped(name)
  452. @contextmanager
  453. def enter_scope(self) -> Iterator[None]:
  454. self.skipped.append(set())
  455. self.refs.append({})
  456. yield None
  457. self.flush_refs()
  458. def reject_redefinition_of_vars_in_scope(self) -> None:
  459. self.record_skipped("*")
  460. def record_skipped(self, name: str) -> None:
  461. self.skipped[-1].add(name)
  462. def flush_refs(self) -> None:
  463. ref_dict = self.refs.pop()
  464. skipped = self.skipped.pop()
  465. if "*" not in skipped:
  466. for name, refs in ref_dict.items():
  467. if len(refs) <= 1 or name in skipped:
  468. continue
  469. # At module top level we must not rename the final definition,
  470. # as it may be publicly visible
  471. to_rename = refs[:-1]
  472. for i, item in enumerate(to_rename):
  473. rename_refs(item, i)
  474. def rename_refs(names: list[NameExpr], index: int) -> None:
  475. name = names[0].name
  476. new_name = name + "'" * (index + 1)
  477. for expr in names:
  478. expr.name = new_name