binder.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. from __future__ import annotations
  2. from collections import defaultdict
  3. from contextlib import contextmanager
  4. from typing import DefaultDict, Iterator, List, Optional, Tuple, Union, cast
  5. from typing_extensions import TypeAlias as _TypeAlias
  6. from mypy.erasetype import remove_instance_last_known_values
  7. from mypy.join import join_simple
  8. from mypy.literals import Key, literal, literal_hash, subkeys
  9. from mypy.nodes import Expression, IndexExpr, MemberExpr, NameExpr, RefExpr, TypeInfo, Var
  10. from mypy.subtypes import is_same_type, is_subtype
  11. from mypy.types import (
  12. AnyType,
  13. NoneType,
  14. PartialType,
  15. Type,
  16. TypeOfAny,
  17. TypeType,
  18. UnionType,
  19. get_proper_type,
  20. )
  21. from mypy.typevars import fill_typevars_with_any
  22. BindableExpression: _TypeAlias = Union[IndexExpr, MemberExpr, NameExpr]
  23. class Frame:
  24. """A Frame represents a specific point in the execution of a program.
  25. It carries information about the current types of expressions at
  26. that point, arising either from assignments to those expressions
  27. or the result of isinstance checks. It also records whether it is
  28. possible to reach that point at all.
  29. This information is not copied into a new Frame when it is pushed
  30. onto the stack, so a given Frame only has information about types
  31. that were assigned in that frame.
  32. """
  33. def __init__(self, id: int, conditional_frame: bool = False) -> None:
  34. self.id = id
  35. self.types: dict[Key, Type] = {}
  36. self.unreachable = False
  37. self.conditional_frame = conditional_frame
  38. # Should be set only if we're entering a frame where it's not
  39. # possible to accurately determine whether or not contained
  40. # statements will be unreachable or not.
  41. #
  42. # Long-term, we should improve mypy to the point where we no longer
  43. # need this field.
  44. self.suppress_unreachable_warnings = False
  45. def __repr__(self) -> str:
  46. return f"Frame({self.id}, {self.types}, {self.unreachable}, {self.conditional_frame})"
  47. Assigns = DefaultDict[Expression, List[Tuple[Type, Optional[Type]]]]
  48. class ConditionalTypeBinder:
  49. """Keep track of conditional types of variables.
  50. NB: Variables are tracked by literal expression, so it is possible
  51. to confuse the binder; for example,
  52. ```
  53. class A:
  54. a: Union[int, str] = None
  55. x = A()
  56. lst = [x]
  57. reveal_type(x.a) # Union[int, str]
  58. x.a = 1
  59. reveal_type(x.a) # int
  60. reveal_type(lst[0].a) # Union[int, str]
  61. lst[0].a = 'a'
  62. reveal_type(x.a) # int
  63. reveal_type(lst[0].a) # str
  64. ```
  65. """
  66. # Stored assignments for situations with tuple/list lvalue and rvalue of union type.
  67. # This maps an expression to a list of bound types for every item in the union type.
  68. type_assignments: Assigns | None = None
  69. def __init__(self) -> None:
  70. self.next_id = 1
  71. # The stack of frames currently used. These map
  72. # literal_hash(expr) -- literals like 'foo.bar' --
  73. # to types. The last element of this list is the
  74. # top-most, current frame. Each earlier element
  75. # records the state as of when that frame was last
  76. # on top of the stack.
  77. self.frames = [Frame(self._get_id())]
  78. # For frames higher in the stack, we record the set of
  79. # Frames that can escape there, either by falling off
  80. # the end of the frame or by a loop control construct
  81. # or raised exception. The last element of self.frames
  82. # has no corresponding element in this list.
  83. self.options_on_return: list[list[Frame]] = []
  84. # Maps literal_hash(expr) to get_declaration(expr)
  85. # for every expr stored in the binder
  86. self.declarations: dict[Key, Type | None] = {}
  87. # Set of other keys to invalidate if a key is changed, e.g. x -> {x.a, x[0]}
  88. # Whenever a new key (e.g. x.a.b) is added, we update this
  89. self.dependencies: dict[Key, set[Key]] = {}
  90. # Whether the last pop changed the newly top frame on exit
  91. self.last_pop_changed = False
  92. self.try_frames: set[int] = set()
  93. self.break_frames: list[int] = []
  94. self.continue_frames: list[int] = []
  95. def _get_id(self) -> int:
  96. self.next_id += 1
  97. return self.next_id
  98. def _add_dependencies(self, key: Key, value: Key | None = None) -> None:
  99. if value is None:
  100. value = key
  101. else:
  102. self.dependencies.setdefault(key, set()).add(value)
  103. for elt in subkeys(key):
  104. self._add_dependencies(elt, value)
  105. def push_frame(self, conditional_frame: bool = False) -> Frame:
  106. """Push a new frame into the binder."""
  107. f = Frame(self._get_id(), conditional_frame)
  108. self.frames.append(f)
  109. self.options_on_return.append([])
  110. return f
  111. def _put(self, key: Key, type: Type, index: int = -1) -> None:
  112. self.frames[index].types[key] = type
  113. def _get(self, key: Key, index: int = -1) -> Type | None:
  114. if index < 0:
  115. index += len(self.frames)
  116. for i in range(index, -1, -1):
  117. if key in self.frames[i].types:
  118. return self.frames[i].types[key]
  119. return None
  120. def put(self, expr: Expression, typ: Type) -> None:
  121. if not isinstance(expr, (IndexExpr, MemberExpr, NameExpr)):
  122. return
  123. if not literal(expr):
  124. return
  125. key = literal_hash(expr)
  126. assert key is not None, "Internal error: binder tried to put non-literal"
  127. if key not in self.declarations:
  128. self.declarations[key] = get_declaration(expr)
  129. self._add_dependencies(key)
  130. self._put(key, typ)
  131. def unreachable(self) -> None:
  132. self.frames[-1].unreachable = True
  133. def suppress_unreachable_warnings(self) -> None:
  134. self.frames[-1].suppress_unreachable_warnings = True
  135. def get(self, expr: Expression) -> Type | None:
  136. key = literal_hash(expr)
  137. assert key is not None, "Internal error: binder tried to get non-literal"
  138. return self._get(key)
  139. def is_unreachable(self) -> bool:
  140. # TODO: Copy the value of unreachable into new frames to avoid
  141. # this traversal on every statement?
  142. return any(f.unreachable for f in self.frames)
  143. def is_unreachable_warning_suppressed(self) -> bool:
  144. # TODO: See todo in 'is_unreachable'
  145. return any(f.suppress_unreachable_warnings for f in self.frames)
  146. def cleanse(self, expr: Expression) -> None:
  147. """Remove all references to a Node from the binder."""
  148. key = literal_hash(expr)
  149. assert key is not None, "Internal error: binder tried cleanse non-literal"
  150. self._cleanse_key(key)
  151. def _cleanse_key(self, key: Key) -> None:
  152. """Remove all references to a key from the binder."""
  153. for frame in self.frames:
  154. if key in frame.types:
  155. del frame.types[key]
  156. def update_from_options(self, frames: list[Frame]) -> bool:
  157. """Update the frame to reflect that each key will be updated
  158. as in one of the frames. Return whether any item changes.
  159. If a key is declared as AnyType, only update it if all the
  160. options are the same.
  161. """
  162. frames = [f for f in frames if not f.unreachable]
  163. changed = False
  164. keys = {key for f in frames for key in f.types}
  165. for key in keys:
  166. current_value = self._get(key)
  167. resulting_values = [f.types.get(key, current_value) for f in frames]
  168. if any(x is None for x in resulting_values):
  169. # We didn't know anything about key before
  170. # (current_value must be None), and we still don't
  171. # know anything about key in at least one possible frame.
  172. continue
  173. type = resulting_values[0]
  174. assert type is not None
  175. declaration_type = get_proper_type(self.declarations.get(key))
  176. if isinstance(declaration_type, AnyType):
  177. # At this point resulting values can't contain None, see continue above
  178. if not all(is_same_type(type, cast(Type, t)) for t in resulting_values[1:]):
  179. type = AnyType(TypeOfAny.from_another_any, source_any=declaration_type)
  180. else:
  181. for other in resulting_values[1:]:
  182. assert other is not None
  183. type = join_simple(self.declarations[key], type, other)
  184. if current_value is None or not is_same_type(type, current_value):
  185. self._put(key, type)
  186. changed = True
  187. self.frames[-1].unreachable = not frames
  188. return changed
  189. def pop_frame(self, can_skip: bool, fall_through: int) -> Frame:
  190. """Pop a frame and return it.
  191. See frame_context() for documentation of fall_through.
  192. """
  193. if fall_through > 0:
  194. self.allow_jump(-fall_through)
  195. result = self.frames.pop()
  196. options = self.options_on_return.pop()
  197. if can_skip:
  198. options.insert(0, self.frames[-1])
  199. self.last_pop_changed = self.update_from_options(options)
  200. return result
  201. @contextmanager
  202. def accumulate_type_assignments(self) -> Iterator[Assigns]:
  203. """Push a new map to collect assigned types in multiassign from union.
  204. If this map is not None, actual binding is deferred until all items in
  205. the union are processed (a union of collected items is later bound
  206. manually by the caller).
  207. """
  208. old_assignments = None
  209. if self.type_assignments is not None:
  210. old_assignments = self.type_assignments
  211. self.type_assignments = defaultdict(list)
  212. yield self.type_assignments
  213. self.type_assignments = old_assignments
  214. def assign_type(
  215. self, expr: Expression, type: Type, declared_type: Type | None, restrict_any: bool = False
  216. ) -> None:
  217. # We should erase last known value in binder, because if we are using it,
  218. # it means that the target is not final, and therefore can't hold a literal.
  219. type = remove_instance_last_known_values(type)
  220. if self.type_assignments is not None:
  221. # We are in a multiassign from union, defer the actual binding,
  222. # just collect the types.
  223. self.type_assignments[expr].append((type, declared_type))
  224. return
  225. if not isinstance(expr, (IndexExpr, MemberExpr, NameExpr)):
  226. return None
  227. if not literal(expr):
  228. return
  229. self.invalidate_dependencies(expr)
  230. if declared_type is None:
  231. # Not sure why this happens. It seems to mainly happen in
  232. # member initialization.
  233. return
  234. if not is_subtype(type, declared_type):
  235. # Pretty sure this is only happens when there's a type error.
  236. # Ideally this function wouldn't be called if the
  237. # expression has a type error, though -- do other kinds of
  238. # errors cause this function to get called at invalid
  239. # times?
  240. return
  241. p_declared = get_proper_type(declared_type)
  242. p_type = get_proper_type(type)
  243. enclosing_type = get_proper_type(self.most_recent_enclosing_type(expr, type))
  244. if isinstance(enclosing_type, AnyType) and not restrict_any:
  245. # If x is Any and y is int, after x = y we do not infer that x is int.
  246. # This could be changed.
  247. # Instead, since we narrowed type from Any in a recent frame (probably an
  248. # isinstance check), but now it is reassigned, we broaden back
  249. # to Any (which is the most recent enclosing type)
  250. self.put(expr, enclosing_type)
  251. # As a special case, when assigning Any to a variable with a
  252. # declared Optional type that has been narrowed to None,
  253. # replace all the Nones in the declared Union type with Any.
  254. # This overrides the normal behavior of ignoring Any assignments to variables
  255. # in order to prevent false positives.
  256. # (See discussion in #3526)
  257. elif (
  258. isinstance(p_type, AnyType)
  259. and isinstance(p_declared, UnionType)
  260. and any(isinstance(get_proper_type(item), NoneType) for item in p_declared.items)
  261. and isinstance(
  262. get_proper_type(self.most_recent_enclosing_type(expr, NoneType())), NoneType
  263. )
  264. ):
  265. # Replace any Nones in the union type with Any
  266. new_items = [
  267. type if isinstance(get_proper_type(item), NoneType) else item
  268. for item in p_declared.items
  269. ]
  270. self.put(expr, UnionType(new_items))
  271. elif isinstance(p_type, AnyType) and not (
  272. isinstance(p_declared, UnionType)
  273. and any(isinstance(get_proper_type(item), AnyType) for item in p_declared.items)
  274. ):
  275. # Assigning an Any value doesn't affect the type to avoid false negatives, unless
  276. # there is an Any item in a declared union type.
  277. self.put(expr, declared_type)
  278. else:
  279. self.put(expr, type)
  280. for i in self.try_frames:
  281. # XXX This should probably not copy the entire frame, but
  282. # just copy this variable into a single stored frame.
  283. self.allow_jump(i)
  284. def invalidate_dependencies(self, expr: BindableExpression) -> None:
  285. """Invalidate knowledge of types that include expr, but not expr itself.
  286. For example, when expr is foo.bar, invalidate foo.bar.baz.
  287. It is overly conservative: it invalidates globally, including
  288. in code paths unreachable from here.
  289. """
  290. key = literal_hash(expr)
  291. assert key is not None
  292. for dep in self.dependencies.get(key, set()):
  293. self._cleanse_key(dep)
  294. def most_recent_enclosing_type(self, expr: BindableExpression, type: Type) -> Type | None:
  295. type = get_proper_type(type)
  296. if isinstance(type, AnyType):
  297. return get_declaration(expr)
  298. key = literal_hash(expr)
  299. assert key is not None
  300. enclosers = [get_declaration(expr)] + [
  301. f.types[key] for f in self.frames if key in f.types and is_subtype(type, f.types[key])
  302. ]
  303. return enclosers[-1]
  304. def allow_jump(self, index: int) -> None:
  305. # self.frames and self.options_on_return have different lengths
  306. # so make sure the index is positive
  307. if index < 0:
  308. index += len(self.options_on_return)
  309. frame = Frame(self._get_id())
  310. for f in self.frames[index + 1 :]:
  311. frame.types.update(f.types)
  312. if f.unreachable:
  313. frame.unreachable = True
  314. self.options_on_return[index].append(frame)
  315. def handle_break(self) -> None:
  316. self.allow_jump(self.break_frames[-1])
  317. self.unreachable()
  318. def handle_continue(self) -> None:
  319. self.allow_jump(self.continue_frames[-1])
  320. self.unreachable()
  321. @contextmanager
  322. def frame_context(
  323. self,
  324. *,
  325. can_skip: bool,
  326. fall_through: int = 1,
  327. break_frame: int = 0,
  328. continue_frame: int = 0,
  329. conditional_frame: bool = False,
  330. try_frame: bool = False,
  331. ) -> Iterator[Frame]:
  332. """Return a context manager that pushes/pops frames on enter/exit.
  333. If can_skip is True, control flow is allowed to bypass the
  334. newly-created frame.
  335. If fall_through > 0, then it will allow control flow that
  336. falls off the end of the frame to escape to its ancestor
  337. `fall_through` levels higher. Otherwise control flow ends
  338. at the end of the frame.
  339. If break_frame > 0, then 'break' statements within this frame
  340. will jump out to the frame break_frame levels higher than the
  341. frame created by this call to frame_context. Similarly for
  342. continue_frame and 'continue' statements.
  343. If try_frame is true, then execution is allowed to jump at any
  344. point within the newly created frame (or its descendants) to
  345. its parent (i.e., to the frame that was on top before this
  346. call to frame_context).
  347. After the context manager exits, self.last_pop_changed indicates
  348. whether any types changed in the newly-topmost frame as a result
  349. of popping this frame.
  350. """
  351. assert len(self.frames) > 1
  352. if break_frame:
  353. self.break_frames.append(len(self.frames) - break_frame)
  354. if continue_frame:
  355. self.continue_frames.append(len(self.frames) - continue_frame)
  356. if try_frame:
  357. self.try_frames.add(len(self.frames) - 1)
  358. new_frame = self.push_frame(conditional_frame)
  359. if try_frame:
  360. # An exception may occur immediately
  361. self.allow_jump(-1)
  362. yield new_frame
  363. self.pop_frame(can_skip, fall_through)
  364. if break_frame:
  365. self.break_frames.pop()
  366. if continue_frame:
  367. self.continue_frames.pop()
  368. if try_frame:
  369. self.try_frames.remove(len(self.frames) - 1)
  370. @contextmanager
  371. def top_frame_context(self) -> Iterator[Frame]:
  372. """A variant of frame_context for use at the top level of
  373. a namespace (module, function, or class).
  374. """
  375. assert len(self.frames) == 1
  376. yield self.push_frame()
  377. self.pop_frame(True, 0)
  378. assert len(self.frames) == 1
  379. def get_declaration(expr: BindableExpression) -> Type | None:
  380. if isinstance(expr, RefExpr):
  381. if isinstance(expr.node, Var):
  382. type = expr.node.type
  383. if not isinstance(get_proper_type(type), PartialType):
  384. return type
  385. elif isinstance(expr.node, TypeInfo):
  386. return TypeType(fill_typevars_with_any(expr.node))
  387. return None