stats.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. """Utilities for calculating and reporting statistics about types."""
  2. from __future__ import annotations
  3. import os
  4. from collections import Counter
  5. from contextlib import contextmanager
  6. from typing import Iterator
  7. from typing_extensions import Final
  8. from mypy import nodes
  9. from mypy.argmap import map_formals_to_actuals
  10. from mypy.nodes import (
  11. AssignmentExpr,
  12. AssignmentStmt,
  13. BreakStmt,
  14. BytesExpr,
  15. CallExpr,
  16. ClassDef,
  17. ComparisonExpr,
  18. ComplexExpr,
  19. ContinueStmt,
  20. EllipsisExpr,
  21. Expression,
  22. ExpressionStmt,
  23. FloatExpr,
  24. FuncDef,
  25. Import,
  26. ImportAll,
  27. ImportFrom,
  28. IndexExpr,
  29. IntExpr,
  30. MemberExpr,
  31. MypyFile,
  32. NameExpr,
  33. Node,
  34. OpExpr,
  35. PassStmt,
  36. RefExpr,
  37. StrExpr,
  38. TypeApplication,
  39. UnaryExpr,
  40. YieldFromExpr,
  41. )
  42. from mypy.traverser import TraverserVisitor
  43. from mypy.typeanal import collect_all_inner_types
  44. from mypy.types import (
  45. AnyType,
  46. CallableType,
  47. FunctionLike,
  48. Instance,
  49. TupleType,
  50. Type,
  51. TypeOfAny,
  52. TypeQuery,
  53. TypeVarType,
  54. get_proper_type,
  55. get_proper_types,
  56. )
  57. from mypy.util import correct_relative_import
  58. TYPE_EMPTY: Final = 0
  59. TYPE_UNANALYZED: Final = 1 # type of non-typechecked code
  60. TYPE_PRECISE: Final = 2
  61. TYPE_IMPRECISE: Final = 3
  62. TYPE_ANY: Final = 4
  63. precision_names: Final = ["empty", "unanalyzed", "precise", "imprecise", "any"]
  64. class StatisticsVisitor(TraverserVisitor):
  65. def __init__(
  66. self,
  67. inferred: bool,
  68. filename: str,
  69. modules: dict[str, MypyFile],
  70. typemap: dict[Expression, Type] | None = None,
  71. all_nodes: bool = False,
  72. visit_untyped_defs: bool = True,
  73. ) -> None:
  74. self.inferred = inferred
  75. self.filename = filename
  76. self.modules = modules
  77. self.typemap = typemap
  78. self.all_nodes = all_nodes
  79. self.visit_untyped_defs = visit_untyped_defs
  80. self.num_precise_exprs = 0
  81. self.num_imprecise_exprs = 0
  82. self.num_any_exprs = 0
  83. self.num_simple_types = 0
  84. self.num_generic_types = 0
  85. self.num_tuple_types = 0
  86. self.num_function_types = 0
  87. self.num_typevar_types = 0
  88. self.num_complex_types = 0
  89. self.num_any_types = 0
  90. self.line = -1
  91. self.line_map: dict[int, int] = {}
  92. self.type_of_any_counter: Counter[int] = Counter()
  93. self.any_line_map: dict[int, list[AnyType]] = {}
  94. # For each scope (top level/function), whether the scope was type checked
  95. # (annotated function).
  96. #
  97. # TODO: Handle --check-untyped-defs
  98. self.checked_scopes = [True]
  99. self.output: list[str] = []
  100. TraverserVisitor.__init__(self)
  101. def visit_mypy_file(self, o: MypyFile) -> None:
  102. self.cur_mod_node = o
  103. self.cur_mod_id = o.fullname
  104. super().visit_mypy_file(o)
  105. def visit_import_from(self, imp: ImportFrom) -> None:
  106. self.process_import(imp)
  107. def visit_import_all(self, imp: ImportAll) -> None:
  108. self.process_import(imp)
  109. def process_import(self, imp: ImportFrom | ImportAll) -> None:
  110. import_id, ok = correct_relative_import(
  111. self.cur_mod_id, imp.relative, imp.id, self.cur_mod_node.is_package_init_file()
  112. )
  113. if ok and import_id in self.modules:
  114. kind = TYPE_PRECISE
  115. else:
  116. kind = TYPE_ANY
  117. self.record_line(imp.line, kind)
  118. def visit_import(self, imp: Import) -> None:
  119. if all(id in self.modules for id, _ in imp.ids):
  120. kind = TYPE_PRECISE
  121. else:
  122. kind = TYPE_ANY
  123. self.record_line(imp.line, kind)
  124. def visit_func_def(self, o: FuncDef) -> None:
  125. with self.enter_scope(o):
  126. self.line = o.line
  127. if len(o.expanded) > 1 and o.expanded != [o] * len(o.expanded):
  128. if o in o.expanded:
  129. print(
  130. "{}:{}: ERROR: cycle in function expansion; skipping".format(
  131. self.filename, o.line
  132. )
  133. )
  134. return
  135. for defn in o.expanded:
  136. assert isinstance(defn, FuncDef)
  137. self.visit_func_def(defn)
  138. else:
  139. if o.type:
  140. assert isinstance(o.type, CallableType)
  141. sig = o.type
  142. arg_types = sig.arg_types
  143. if sig.arg_names and sig.arg_names[0] == "self" and not self.inferred:
  144. arg_types = arg_types[1:]
  145. for arg in arg_types:
  146. self.type(arg)
  147. self.type(sig.ret_type)
  148. elif self.all_nodes:
  149. self.record_line(self.line, TYPE_ANY)
  150. if not o.is_dynamic() or self.visit_untyped_defs:
  151. super().visit_func_def(o)
  152. @contextmanager
  153. def enter_scope(self, o: FuncDef) -> Iterator[None]:
  154. self.checked_scopes.append(o.type is not None and self.checked_scopes[-1])
  155. yield None
  156. self.checked_scopes.pop()
  157. def is_checked_scope(self) -> bool:
  158. return self.checked_scopes[-1]
  159. def visit_class_def(self, o: ClassDef) -> None:
  160. self.record_line(o.line, TYPE_PRECISE) # TODO: Look at base classes
  161. # Override this method because we don't want to analyze base_type_exprs (base_type_exprs
  162. # are base classes in a class declaration).
  163. # While base_type_exprs are technically expressions, type analyzer does not visit them and
  164. # they are not in the typemap.
  165. for d in o.decorators:
  166. d.accept(self)
  167. o.defs.accept(self)
  168. def visit_type_application(self, o: TypeApplication) -> None:
  169. self.line = o.line
  170. for t in o.types:
  171. self.type(t)
  172. super().visit_type_application(o)
  173. def visit_assignment_stmt(self, o: AssignmentStmt) -> None:
  174. self.line = o.line
  175. if isinstance(o.rvalue, nodes.CallExpr) and isinstance(
  176. o.rvalue.analyzed, nodes.TypeVarExpr
  177. ):
  178. # Type variable definition -- not a real assignment.
  179. return
  180. if o.type:
  181. self.type(o.type)
  182. elif self.inferred and not self.all_nodes:
  183. # if self.all_nodes is set, lvalues will be visited later
  184. for lvalue in o.lvalues:
  185. if isinstance(lvalue, nodes.TupleExpr):
  186. items = lvalue.items
  187. else:
  188. items = [lvalue]
  189. for item in items:
  190. if isinstance(item, RefExpr) and item.is_inferred_def:
  191. if self.typemap is not None:
  192. self.type(self.typemap.get(item))
  193. super().visit_assignment_stmt(o)
  194. def visit_expression_stmt(self, o: ExpressionStmt) -> None:
  195. if isinstance(o.expr, (StrExpr, BytesExpr)):
  196. # Docstring
  197. self.record_line(o.line, TYPE_EMPTY)
  198. else:
  199. super().visit_expression_stmt(o)
  200. def visit_pass_stmt(self, o: PassStmt) -> None:
  201. self.record_precise_if_checked_scope(o)
  202. def visit_break_stmt(self, o: BreakStmt) -> None:
  203. self.record_precise_if_checked_scope(o)
  204. def visit_continue_stmt(self, o: ContinueStmt) -> None:
  205. self.record_precise_if_checked_scope(o)
  206. def visit_name_expr(self, o: NameExpr) -> None:
  207. if o.fullname in ("builtins.None", "builtins.True", "builtins.False", "builtins.Ellipsis"):
  208. self.record_precise_if_checked_scope(o)
  209. else:
  210. self.process_node(o)
  211. super().visit_name_expr(o)
  212. def visit_yield_from_expr(self, o: YieldFromExpr) -> None:
  213. if o.expr:
  214. o.expr.accept(self)
  215. def visit_call_expr(self, o: CallExpr) -> None:
  216. self.process_node(o)
  217. if o.analyzed:
  218. o.analyzed.accept(self)
  219. else:
  220. o.callee.accept(self)
  221. for a in o.args:
  222. a.accept(self)
  223. self.record_call_target_precision(o)
  224. def record_call_target_precision(self, o: CallExpr) -> None:
  225. """Record precision of formal argument types used in a call."""
  226. if not self.typemap or o.callee not in self.typemap:
  227. # Type not available.
  228. return
  229. callee_type = get_proper_type(self.typemap[o.callee])
  230. if isinstance(callee_type, CallableType):
  231. self.record_callable_target_precision(o, callee_type)
  232. else:
  233. pass # TODO: Handle overloaded functions, etc.
  234. def record_callable_target_precision(self, o: CallExpr, callee: CallableType) -> None:
  235. """Record imprecision caused by callee argument types.
  236. This only considers arguments passed in a call expression. Arguments
  237. with default values that aren't provided in a call arguably don't
  238. contribute to typing imprecision at the *call site* (but they
  239. contribute at the function definition).
  240. """
  241. assert self.typemap
  242. typemap = self.typemap
  243. actual_to_formal = map_formals_to_actuals(
  244. o.arg_kinds,
  245. o.arg_names,
  246. callee.arg_kinds,
  247. callee.arg_names,
  248. lambda n: typemap[o.args[n]],
  249. )
  250. for formals in actual_to_formal:
  251. for n in formals:
  252. formal = get_proper_type(callee.arg_types[n])
  253. if isinstance(formal, AnyType):
  254. self.record_line(o.line, TYPE_ANY)
  255. elif is_imprecise(formal):
  256. self.record_line(o.line, TYPE_IMPRECISE)
  257. def visit_member_expr(self, o: MemberExpr) -> None:
  258. self.process_node(o)
  259. super().visit_member_expr(o)
  260. def visit_op_expr(self, o: OpExpr) -> None:
  261. self.process_node(o)
  262. super().visit_op_expr(o)
  263. def visit_comparison_expr(self, o: ComparisonExpr) -> None:
  264. self.process_node(o)
  265. super().visit_comparison_expr(o)
  266. def visit_index_expr(self, o: IndexExpr) -> None:
  267. self.process_node(o)
  268. super().visit_index_expr(o)
  269. def visit_assignment_expr(self, o: AssignmentExpr) -> None:
  270. self.process_node(o)
  271. super().visit_assignment_expr(o)
  272. def visit_unary_expr(self, o: UnaryExpr) -> None:
  273. self.process_node(o)
  274. super().visit_unary_expr(o)
  275. def visit_str_expr(self, o: StrExpr) -> None:
  276. self.record_precise_if_checked_scope(o)
  277. def visit_bytes_expr(self, o: BytesExpr) -> None:
  278. self.record_precise_if_checked_scope(o)
  279. def visit_int_expr(self, o: IntExpr) -> None:
  280. self.record_precise_if_checked_scope(o)
  281. def visit_float_expr(self, o: FloatExpr) -> None:
  282. self.record_precise_if_checked_scope(o)
  283. def visit_complex_expr(self, o: ComplexExpr) -> None:
  284. self.record_precise_if_checked_scope(o)
  285. def visit_ellipsis(self, o: EllipsisExpr) -> None:
  286. self.record_precise_if_checked_scope(o)
  287. # Helpers
  288. def process_node(self, node: Expression) -> None:
  289. if self.all_nodes:
  290. if self.typemap is not None:
  291. self.line = node.line
  292. self.type(self.typemap.get(node))
  293. def record_precise_if_checked_scope(self, node: Node) -> None:
  294. if isinstance(node, Expression) and self.typemap and node not in self.typemap:
  295. kind = TYPE_UNANALYZED
  296. elif self.is_checked_scope():
  297. kind = TYPE_PRECISE
  298. else:
  299. kind = TYPE_ANY
  300. self.record_line(node.line, kind)
  301. def type(self, t: Type | None) -> None:
  302. t = get_proper_type(t)
  303. if not t:
  304. # If an expression does not have a type, it is often due to dead code.
  305. # Don't count these because there can be an unanalyzed value on a line with other
  306. # analyzed expressions, which overwrite the TYPE_UNANALYZED.
  307. self.record_line(self.line, TYPE_UNANALYZED)
  308. return
  309. if isinstance(t, AnyType) and is_special_form_any(t):
  310. # TODO: What if there is an error in special form definition?
  311. self.record_line(self.line, TYPE_PRECISE)
  312. return
  313. if isinstance(t, AnyType):
  314. self.log(" !! Any type around line %d" % self.line)
  315. self.num_any_exprs += 1
  316. self.record_line(self.line, TYPE_ANY)
  317. elif (not self.all_nodes and is_imprecise(t)) or (self.all_nodes and is_imprecise2(t)):
  318. self.log(" !! Imprecise type around line %d" % self.line)
  319. self.num_imprecise_exprs += 1
  320. self.record_line(self.line, TYPE_IMPRECISE)
  321. else:
  322. self.num_precise_exprs += 1
  323. self.record_line(self.line, TYPE_PRECISE)
  324. for typ in get_proper_types(collect_all_inner_types(t)) + [t]:
  325. if isinstance(typ, AnyType):
  326. typ = get_original_any(typ)
  327. if is_special_form_any(typ):
  328. continue
  329. self.type_of_any_counter[typ.type_of_any] += 1
  330. self.num_any_types += 1
  331. if self.line in self.any_line_map:
  332. self.any_line_map[self.line].append(typ)
  333. else:
  334. self.any_line_map[self.line] = [typ]
  335. elif isinstance(typ, Instance):
  336. if typ.args:
  337. if any(is_complex(arg) for arg in typ.args):
  338. self.num_complex_types += 1
  339. else:
  340. self.num_generic_types += 1
  341. else:
  342. self.num_simple_types += 1
  343. elif isinstance(typ, FunctionLike):
  344. self.num_function_types += 1
  345. elif isinstance(typ, TupleType):
  346. if any(is_complex(item) for item in typ.items):
  347. self.num_complex_types += 1
  348. else:
  349. self.num_tuple_types += 1
  350. elif isinstance(typ, TypeVarType):
  351. self.num_typevar_types += 1
  352. def log(self, string: str) -> None:
  353. self.output.append(string)
  354. def record_line(self, line: int, precision: int) -> None:
  355. self.line_map[line] = max(precision, self.line_map.get(line, TYPE_EMPTY))
  356. def dump_type_stats(
  357. tree: MypyFile,
  358. path: str,
  359. modules: dict[str, MypyFile],
  360. inferred: bool = False,
  361. typemap: dict[Expression, Type] | None = None,
  362. ) -> None:
  363. if is_special_module(path):
  364. return
  365. print(path)
  366. visitor = StatisticsVisitor(inferred, filename=tree.fullname, modules=modules, typemap=typemap)
  367. tree.accept(visitor)
  368. for line in visitor.output:
  369. print(line)
  370. print(" ** precision **")
  371. print(" precise ", visitor.num_precise_exprs)
  372. print(" imprecise", visitor.num_imprecise_exprs)
  373. print(" any ", visitor.num_any_exprs)
  374. print(" ** kinds **")
  375. print(" simple ", visitor.num_simple_types)
  376. print(" generic ", visitor.num_generic_types)
  377. print(" function ", visitor.num_function_types)
  378. print(" tuple ", visitor.num_tuple_types)
  379. print(" TypeVar ", visitor.num_typevar_types)
  380. print(" complex ", visitor.num_complex_types)
  381. print(" any ", visitor.num_any_types)
  382. def is_special_module(path: str) -> bool:
  383. return os.path.basename(path) in ("abc.pyi", "typing.pyi", "builtins.pyi")
  384. def is_imprecise(t: Type) -> bool:
  385. return t.accept(HasAnyQuery())
  386. class HasAnyQuery(TypeQuery[bool]):
  387. def __init__(self) -> None:
  388. super().__init__(any)
  389. def visit_any(self, t: AnyType) -> bool:
  390. return not is_special_form_any(t)
  391. def is_imprecise2(t: Type) -> bool:
  392. return t.accept(HasAnyQuery2())
  393. class HasAnyQuery2(HasAnyQuery):
  394. def visit_callable_type(self, t: CallableType) -> bool:
  395. # We don't want to flag references to functions with some Any
  396. # argument types (etc.) since they generally don't mean trouble.
  397. return False
  398. def is_generic(t: Type) -> bool:
  399. t = get_proper_type(t)
  400. return isinstance(t, Instance) and bool(t.args)
  401. def is_complex(t: Type) -> bool:
  402. t = get_proper_type(t)
  403. return is_generic(t) or isinstance(t, (FunctionLike, TupleType, TypeVarType))
  404. def ensure_dir_exists(dir: str) -> None:
  405. if not os.path.exists(dir):
  406. os.makedirs(dir)
  407. def is_special_form_any(t: AnyType) -> bool:
  408. return get_original_any(t).type_of_any == TypeOfAny.special_form
  409. def get_original_any(t: AnyType) -> AnyType:
  410. if t.type_of_any == TypeOfAny.from_another_any:
  411. assert t.source_any
  412. assert t.source_any.type_of_any != TypeOfAny.from_another_any
  413. t = t.source_any
  414. return t