inspections.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. from __future__ import annotations
  2. import os
  3. from collections import defaultdict
  4. from functools import cmp_to_key
  5. from typing import Callable
  6. from mypy.build import State
  7. from mypy.find_sources import InvalidSourceList, SourceFinder
  8. from mypy.messages import format_type
  9. from mypy.modulefinder import PYTHON_EXTENSIONS
  10. from mypy.nodes import (
  11. LDEF,
  12. Decorator,
  13. Expression,
  14. FuncBase,
  15. MemberExpr,
  16. MypyFile,
  17. Node,
  18. OverloadedFuncDef,
  19. RefExpr,
  20. SymbolNode,
  21. TypeInfo,
  22. Var,
  23. )
  24. from mypy.server.update import FineGrainedBuildManager
  25. from mypy.traverser import ExtendedTraverserVisitor
  26. from mypy.typeops import tuple_fallback
  27. from mypy.types import (
  28. FunctionLike,
  29. Instance,
  30. LiteralType,
  31. ProperType,
  32. TupleType,
  33. TypedDictType,
  34. TypeVarType,
  35. UnionType,
  36. get_proper_type,
  37. )
  38. from mypy.typevars import fill_typevars_with_any
  39. def node_starts_after(o: Node, line: int, column: int) -> bool:
  40. return o.line > line or o.line == line and o.column > column
  41. def node_ends_before(o: Node, line: int, column: int) -> bool:
  42. # Unfortunately, end positions for some statements are a mess,
  43. # e.g. overloaded functions, so we return False when we don't know.
  44. if o.end_line is not None and o.end_column is not None:
  45. if o.end_line < line or o.end_line == line and o.end_column < column:
  46. return True
  47. return False
  48. def expr_span(expr: Expression) -> str:
  49. """Format expression span as in mypy error messages."""
  50. return f"{expr.line}:{expr.column + 1}:{expr.end_line}:{expr.end_column}"
  51. def get_instance_fallback(typ: ProperType) -> list[Instance]:
  52. """Returns the Instance fallback for this type if one exists or None."""
  53. if isinstance(typ, Instance):
  54. return [typ]
  55. elif isinstance(typ, TupleType):
  56. return [tuple_fallback(typ)]
  57. elif isinstance(typ, TypedDictType):
  58. return [typ.fallback]
  59. elif isinstance(typ, FunctionLike):
  60. return [typ.fallback]
  61. elif isinstance(typ, LiteralType):
  62. return [typ.fallback]
  63. elif isinstance(typ, TypeVarType):
  64. if typ.values:
  65. res = []
  66. for t in typ.values:
  67. res.extend(get_instance_fallback(get_proper_type(t)))
  68. return res
  69. return get_instance_fallback(get_proper_type(typ.upper_bound))
  70. elif isinstance(typ, UnionType):
  71. res = []
  72. for t in typ.items:
  73. res.extend(get_instance_fallback(get_proper_type(t)))
  74. return res
  75. return []
  76. def find_node(name: str, info: TypeInfo) -> Var | FuncBase | None:
  77. """Find the node defining member 'name' in given TypeInfo."""
  78. # TODO: this code shares some logic with checkmember.py
  79. method = info.get_method(name)
  80. if method:
  81. if isinstance(method, Decorator):
  82. return method.var
  83. if method.is_property:
  84. assert isinstance(method, OverloadedFuncDef)
  85. dec = method.items[0]
  86. assert isinstance(dec, Decorator)
  87. return dec.var
  88. return method
  89. else:
  90. # don't have such method, maybe variable?
  91. node = info.get(name)
  92. v = node.node if node else None
  93. if isinstance(v, Var):
  94. return v
  95. return None
  96. def find_module_by_fullname(fullname: str, modules: dict[str, State]) -> State | None:
  97. """Find module by a node fullname.
  98. This logic mimics the one we use in fixup, so should be good enough.
  99. """
  100. head = fullname
  101. # Special case: a module symbol is considered to be defined in itself, not in enclosing
  102. # package, since this is what users want when clicking go to definition on a module.
  103. if head in modules:
  104. return modules[head]
  105. while True:
  106. if "." not in head:
  107. return None
  108. head, tail = head.rsplit(".", maxsplit=1)
  109. mod = modules.get(head)
  110. if mod is not None:
  111. return mod
  112. class SearchVisitor(ExtendedTraverserVisitor):
  113. """Visitor looking for an expression whose span matches given one exactly."""
  114. def __init__(self, line: int, column: int, end_line: int, end_column: int) -> None:
  115. self.line = line
  116. self.column = column
  117. self.end_line = end_line
  118. self.end_column = end_column
  119. self.result: Expression | None = None
  120. def visit(self, o: Node) -> bool:
  121. if node_starts_after(o, self.line, self.column):
  122. return False
  123. if node_ends_before(o, self.end_line, self.end_column):
  124. return False
  125. if (
  126. o.line == self.line
  127. and o.end_line == self.end_line
  128. and o.column == self.column
  129. and o.end_column == self.end_column
  130. ):
  131. if isinstance(o, Expression):
  132. self.result = o
  133. return self.result is None
  134. def find_by_location(
  135. tree: MypyFile, line: int, column: int, end_line: int, end_column: int
  136. ) -> Expression | None:
  137. """Find an expression matching given span, or None if not found."""
  138. if end_line < line:
  139. raise ValueError('"end_line" must not be before "line"')
  140. if end_line == line and end_column <= column:
  141. raise ValueError('"end_column" must be after "column"')
  142. visitor = SearchVisitor(line, column, end_line, end_column)
  143. tree.accept(visitor)
  144. return visitor.result
  145. class SearchAllVisitor(ExtendedTraverserVisitor):
  146. """Visitor looking for all expressions whose spans enclose given position."""
  147. def __init__(self, line: int, column: int) -> None:
  148. self.line = line
  149. self.column = column
  150. self.result: list[Expression] = []
  151. def visit(self, o: Node) -> bool:
  152. if node_starts_after(o, self.line, self.column):
  153. return False
  154. if node_ends_before(o, self.line, self.column):
  155. return False
  156. if isinstance(o, Expression):
  157. self.result.append(o)
  158. return True
  159. def find_all_by_location(tree: MypyFile, line: int, column: int) -> list[Expression]:
  160. """Find all expressions enclosing given position starting from innermost."""
  161. visitor = SearchAllVisitor(line, column)
  162. tree.accept(visitor)
  163. return list(reversed(visitor.result))
  164. class InspectionEngine:
  165. """Engine for locating and statically inspecting expressions."""
  166. def __init__(
  167. self,
  168. fg_manager: FineGrainedBuildManager,
  169. *,
  170. verbosity: int = 0,
  171. limit: int = 0,
  172. include_span: bool = False,
  173. include_kind: bool = False,
  174. include_object_attrs: bool = False,
  175. union_attrs: bool = False,
  176. force_reload: bool = False,
  177. ) -> None:
  178. self.fg_manager = fg_manager
  179. self.finder = SourceFinder(
  180. self.fg_manager.manager.fscache, self.fg_manager.manager.options
  181. )
  182. self.verbosity = verbosity
  183. self.limit = limit
  184. self.include_span = include_span
  185. self.include_kind = include_kind
  186. self.include_object_attrs = include_object_attrs
  187. self.union_attrs = union_attrs
  188. self.force_reload = force_reload
  189. # Module for which inspection was requested.
  190. self.module: State | None = None
  191. def parse_location(self, location: str) -> tuple[str, list[int]]:
  192. if location.count(":") not in [2, 4]:
  193. raise ValueError("Format should be file:line:column[:end_line:end_column]")
  194. parts = location.split(":")
  195. module, *rest = parts
  196. return module, [int(p) for p in rest]
  197. def reload_module(self, state: State) -> None:
  198. """Reload given module while temporary exporting types."""
  199. old = self.fg_manager.manager.options.export_types
  200. self.fg_manager.manager.options.export_types = True
  201. try:
  202. self.fg_manager.flush_cache()
  203. assert state.path is not None
  204. self.fg_manager.update([(state.id, state.path)], [])
  205. finally:
  206. self.fg_manager.manager.options.export_types = old
  207. def expr_type(self, expression: Expression) -> tuple[str, bool]:
  208. """Format type for an expression using current options.
  209. If type is known, second item returned is True. If type is not known, an error
  210. message is returned instead, and second item returned is False.
  211. """
  212. expr_type = self.fg_manager.manager.all_types.get(expression)
  213. if expr_type is None:
  214. return self.missing_type(expression), False
  215. type_str = format_type(
  216. expr_type, self.fg_manager.manager.options, verbosity=self.verbosity
  217. )
  218. return self.add_prefixes(type_str, expression), True
  219. def object_type(self) -> Instance:
  220. builtins = self.fg_manager.graph["builtins"].tree
  221. assert builtins is not None
  222. object_node = builtins.names["object"].node
  223. assert isinstance(object_node, TypeInfo)
  224. return Instance(object_node, [])
  225. def collect_attrs(self, instances: list[Instance]) -> dict[TypeInfo, list[str]]:
  226. """Collect attributes from all union/typevar variants."""
  227. def item_attrs(attr_dict: dict[TypeInfo, list[str]]) -> set[str]:
  228. attrs = set()
  229. for base in attr_dict:
  230. attrs |= set(attr_dict[base])
  231. return attrs
  232. def cmp_types(x: TypeInfo, y: TypeInfo) -> int:
  233. if x in y.mro:
  234. return 1
  235. if y in x.mro:
  236. return -1
  237. return 0
  238. # First gather all attributes for every union variant.
  239. assert instances
  240. all_attrs = []
  241. for instance in instances:
  242. attrs = {}
  243. mro = instance.type.mro
  244. if not self.include_object_attrs:
  245. mro = mro[:-1]
  246. for base in mro:
  247. attrs[base] = sorted(base.names)
  248. all_attrs.append(attrs)
  249. # Find attributes valid for all variants in a union or type variable.
  250. intersection = item_attrs(all_attrs[0])
  251. for item in all_attrs[1:]:
  252. intersection &= item_attrs(item)
  253. # Combine attributes from all variants into a single dict while
  254. # also removing invalid attributes (unless using --union-attrs).
  255. combined_attrs = defaultdict(list)
  256. for item in all_attrs:
  257. for base in item:
  258. if base in combined_attrs:
  259. continue
  260. for name in item[base]:
  261. if self.union_attrs or name in intersection:
  262. combined_attrs[base].append(name)
  263. # Sort bases by MRO, unrelated will appear in the order they appeared as union variants.
  264. sorted_bases = sorted(combined_attrs.keys(), key=cmp_to_key(cmp_types))
  265. result = {}
  266. for base in sorted_bases:
  267. if not combined_attrs[base]:
  268. # Skip bases where everytihng was filtered out.
  269. continue
  270. result[base] = combined_attrs[base]
  271. return result
  272. def _fill_from_dict(
  273. self, attrs_strs: list[str], attrs_dict: dict[TypeInfo, list[str]]
  274. ) -> None:
  275. for base in attrs_dict:
  276. cls_name = base.name if self.verbosity < 1 else base.fullname
  277. attrs = [f'"{attr}"' for attr in attrs_dict[base]]
  278. attrs_strs.append(f'"{cls_name}": [{", ".join(attrs)}]')
  279. def expr_attrs(self, expression: Expression) -> tuple[str, bool]:
  280. """Format attributes that are valid for a given expression.
  281. If expression type is not an Instance, try using fallback. Attributes are
  282. returned as a JSON (ordered by MRO) that maps base class name to list of
  283. attributes. Attributes may appear in multiple bases if overridden (we simply
  284. follow usual mypy logic for creating new Vars etc).
  285. """
  286. expr_type = self.fg_manager.manager.all_types.get(expression)
  287. if expr_type is None:
  288. return self.missing_type(expression), False
  289. expr_type = get_proper_type(expr_type)
  290. instances = get_instance_fallback(expr_type)
  291. if not instances:
  292. # Everything is an object in Python.
  293. instances = [self.object_type()]
  294. attrs_dict = self.collect_attrs(instances)
  295. # Special case: modules have names apart from those from ModuleType.
  296. if isinstance(expression, RefExpr) and isinstance(expression.node, MypyFile):
  297. node = expression.node
  298. names = sorted(node.names)
  299. if "__builtins__" in names:
  300. # This is just to make tests stable. No one will really need ths name.
  301. names.remove("__builtins__")
  302. mod_dict = {f'"<{node.fullname}>"': [f'"{name}"' for name in names]}
  303. else:
  304. mod_dict = {}
  305. # Special case: for class callables, prepend with the class attributes.
  306. # TODO: also handle cases when such callable appears in a union.
  307. if isinstance(expr_type, FunctionLike) and expr_type.is_type_obj():
  308. template = fill_typevars_with_any(expr_type.type_object())
  309. class_dict = self.collect_attrs(get_instance_fallback(template))
  310. else:
  311. class_dict = {}
  312. # We don't use JSON dump to be sure keys order is always preserved.
  313. base_attrs = []
  314. if mod_dict:
  315. for mod in mod_dict:
  316. base_attrs.append(f'{mod}: [{", ".join(mod_dict[mod])}]')
  317. self._fill_from_dict(base_attrs, class_dict)
  318. self._fill_from_dict(base_attrs, attrs_dict)
  319. return self.add_prefixes(f'{{{", ".join(base_attrs)}}}', expression), True
  320. def format_node(self, module: State, node: FuncBase | SymbolNode) -> str:
  321. return f"{module.path}:{node.line}:{node.column + 1}:{node.name}"
  322. def collect_nodes(self, expression: RefExpr) -> list[FuncBase | SymbolNode]:
  323. """Collect nodes that can be referred to by an expression.
  324. Note: it can be more than one for example in case of a union attribute.
  325. """
  326. node: FuncBase | SymbolNode | None = expression.node
  327. nodes: list[FuncBase | SymbolNode]
  328. if node is None:
  329. # Tricky case: instance attribute
  330. if isinstance(expression, MemberExpr) and expression.kind is None:
  331. base_type = self.fg_manager.manager.all_types.get(expression.expr)
  332. if base_type is None:
  333. return []
  334. # Now we use the base type to figure out where the attribute is defined.
  335. base_type = get_proper_type(base_type)
  336. instances = get_instance_fallback(base_type)
  337. nodes = []
  338. for instance in instances:
  339. node = find_node(expression.name, instance.type)
  340. if node:
  341. nodes.append(node)
  342. if not nodes:
  343. # Try checking class namespace if attribute is on a class object.
  344. if isinstance(base_type, FunctionLike) and base_type.is_type_obj():
  345. instances = get_instance_fallback(
  346. fill_typevars_with_any(base_type.type_object())
  347. )
  348. for instance in instances:
  349. node = find_node(expression.name, instance.type)
  350. if node:
  351. nodes.append(node)
  352. else:
  353. # Still no luck, give up.
  354. return []
  355. else:
  356. return []
  357. else:
  358. # Easy case: a module-level definition
  359. nodes = [node]
  360. return nodes
  361. def modules_for_nodes(
  362. self, nodes: list[FuncBase | SymbolNode], expression: RefExpr
  363. ) -> tuple[dict[FuncBase | SymbolNode, State], bool]:
  364. """Gather modules where given nodes where defined.
  365. Also check if they need to be refreshed (cached nodes may have
  366. lines/columns missing).
  367. """
  368. modules = {}
  369. reload_needed = False
  370. for node in nodes:
  371. module = find_module_by_fullname(node.fullname, self.fg_manager.graph)
  372. if not module:
  373. if expression.kind == LDEF and self.module:
  374. module = self.module
  375. else:
  376. continue
  377. modules[node] = module
  378. if not module.tree or module.tree.is_cache_skeleton or self.force_reload:
  379. reload_needed |= not module.tree or module.tree.is_cache_skeleton
  380. self.reload_module(module)
  381. return modules, reload_needed
  382. def expression_def(self, expression: Expression) -> tuple[str, bool]:
  383. """Find and format definition location for an expression.
  384. If it is not a RefExpr, it is effectively skipped by returning an
  385. empty result.
  386. """
  387. if not isinstance(expression, RefExpr):
  388. # If there are no suitable matches at all, we return error later.
  389. return "", True
  390. nodes = self.collect_nodes(expression)
  391. if not nodes:
  392. return self.missing_node(expression), False
  393. modules, reload_needed = self.modules_for_nodes(nodes, expression)
  394. if reload_needed:
  395. # TODO: line/column are not stored in cache for vast majority of symbol nodes.
  396. # Adding them will make thing faster, but will have visible memory impact.
  397. nodes = self.collect_nodes(expression)
  398. modules, reload_needed = self.modules_for_nodes(nodes, expression)
  399. assert not reload_needed
  400. result = []
  401. for node in modules:
  402. result.append(self.format_node(modules[node], node))
  403. if not result:
  404. return self.missing_node(expression), False
  405. return self.add_prefixes(", ".join(result), expression), True
  406. def missing_type(self, expression: Expression) -> str:
  407. alt_suggestion = ""
  408. if not self.force_reload:
  409. alt_suggestion = " or try --force-reload"
  410. return (
  411. f'No known type available for "{type(expression).__name__}"'
  412. f" (maybe unreachable{alt_suggestion})"
  413. )
  414. def missing_node(self, expression: Expression) -> str:
  415. return (
  416. f'Cannot find definition for "{type(expression).__name__}"'
  417. f" at {expr_span(expression)}"
  418. )
  419. def add_prefixes(self, result: str, expression: Expression) -> str:
  420. prefixes = []
  421. if self.include_kind:
  422. prefixes.append(f"{type(expression).__name__}")
  423. if self.include_span:
  424. prefixes.append(expr_span(expression))
  425. if prefixes:
  426. prefix = ":".join(prefixes) + " -> "
  427. else:
  428. prefix = ""
  429. return prefix + result
  430. def run_inspection_by_exact_location(
  431. self,
  432. tree: MypyFile,
  433. line: int,
  434. column: int,
  435. end_line: int,
  436. end_column: int,
  437. method: Callable[[Expression], tuple[str, bool]],
  438. ) -> dict[str, object]:
  439. """Get type of an expression matching a span.
  440. Type or error is returned as a standard daemon response dict.
  441. """
  442. try:
  443. expression = find_by_location(tree, line, column - 1, end_line, end_column)
  444. except ValueError as err:
  445. return {"error": str(err)}
  446. if expression is None:
  447. span = f"{line}:{column}:{end_line}:{end_column}"
  448. return {"out": f"Can't find expression at span {span}", "err": "", "status": 1}
  449. inspection_str, success = method(expression)
  450. return {"out": inspection_str, "err": "", "status": 0 if success else 1}
  451. def run_inspection_by_position(
  452. self,
  453. tree: MypyFile,
  454. line: int,
  455. column: int,
  456. method: Callable[[Expression], tuple[str, bool]],
  457. ) -> dict[str, object]:
  458. """Get types of all expressions enclosing a position.
  459. Types and/or errors are returned as a standard daemon response dict.
  460. """
  461. expressions = find_all_by_location(tree, line, column - 1)
  462. if not expressions:
  463. position = f"{line}:{column}"
  464. return {
  465. "out": f"Can't find any expressions at position {position}",
  466. "err": "",
  467. "status": 1,
  468. }
  469. inspection_strs = []
  470. status = 0
  471. for expression in expressions:
  472. inspection_str, success = method(expression)
  473. if not success:
  474. status = 1
  475. if inspection_str:
  476. inspection_strs.append(inspection_str)
  477. if self.limit:
  478. inspection_strs = inspection_strs[: self.limit]
  479. return {"out": "\n".join(inspection_strs), "err": "", "status": status}
  480. def find_module(self, file: str) -> tuple[State | None, dict[str, object]]:
  481. """Find module by path, or return a suitable error message.
  482. Note we don't use exceptions to simplify handling 1 vs 2 statuses.
  483. """
  484. if not any(file.endswith(ext) for ext in PYTHON_EXTENSIONS):
  485. return None, {"error": "Source file is not a Python file"}
  486. try:
  487. module, _ = self.finder.crawl_up(os.path.normpath(file))
  488. except InvalidSourceList:
  489. return None, {"error": "Invalid source file name: " + file}
  490. state = self.fg_manager.graph.get(module)
  491. self.module = state
  492. return (
  493. state,
  494. {"out": f"Unknown module: {module}", "err": "", "status": 1} if state is None else {},
  495. )
  496. def run_inspection(
  497. self, location: str, method: Callable[[Expression], tuple[str, bool]]
  498. ) -> dict[str, object]:
  499. """Top-level logic to inspect expression(s) at a location.
  500. This can be re-used by various simple inspections.
  501. """
  502. try:
  503. file, pos = self.parse_location(location)
  504. except ValueError as err:
  505. return {"error": str(err)}
  506. state, err_dict = self.find_module(file)
  507. if state is None:
  508. assert err_dict
  509. return err_dict
  510. # Force reloading to load from cache, account for any edits, etc.
  511. if not state.tree or state.tree.is_cache_skeleton or self.force_reload:
  512. self.reload_module(state)
  513. assert state.tree is not None
  514. if len(pos) == 4:
  515. # Full span, return an exact match only.
  516. line, column, end_line, end_column = pos
  517. return self.run_inspection_by_exact_location(
  518. state.tree, line, column, end_line, end_column, method
  519. )
  520. assert len(pos) == 2
  521. # Inexact location, return all expressions.
  522. line, column = pos
  523. return self.run_inspection_by_position(state.tree, line, column, method)
  524. def get_type(self, location: str) -> dict[str, object]:
  525. """Get types of expression(s) at a location."""
  526. return self.run_inspection(location, self.expr_type)
  527. def get_attrs(self, location: str) -> dict[str, object]:
  528. """Get attributes of expression(s) at a location."""
  529. return self.run_inspection(location, self.expr_attrs)
  530. def get_definition(self, location: str) -> dict[str, object]:
  531. """Get symbol definitions of expression(s) at a location."""
  532. result = self.run_inspection(location, self.expression_def)
  533. if "out" in result and not result["out"]:
  534. # None of the expressions found turns out to be a RefExpr.
  535. _, location = location.split(":", maxsplit=1)
  536. result["out"] = f"No name or member expressions at {location}"
  537. result["status"] = 1
  538. return result