stats.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. """Utilities for calculating and reporting statistics about types."""
  2. from __future__ import annotations
  3. import os
  4. from collections import Counter
  5. from contextlib import contextmanager
  6. from typing import Final, Iterator
  7. from mypy import nodes
  8. from mypy.argmap import map_formals_to_actuals
  9. from mypy.nodes import (
  10. AssignmentExpr,
  11. AssignmentStmt,
  12. BreakStmt,
  13. BytesExpr,
  14. CallExpr,
  15. ClassDef,
  16. ComparisonExpr,
  17. ComplexExpr,
  18. ContinueStmt,
  19. EllipsisExpr,
  20. Expression,
  21. ExpressionStmt,
  22. FloatExpr,
  23. FuncDef,
  24. Import,
  25. ImportAll,
  26. ImportFrom,
  27. IndexExpr,
  28. IntExpr,
  29. MemberExpr,
  30. MypyFile,
  31. NameExpr,
  32. Node,
  33. OpExpr,
  34. PassStmt,
  35. RefExpr,
  36. StrExpr,
  37. TypeApplication,
  38. UnaryExpr,
  39. YieldFromExpr,
  40. )
  41. from mypy.traverser import TraverserVisitor
  42. from mypy.typeanal import collect_all_inner_types
  43. from mypy.types import (
  44. AnyType,
  45. CallableType,
  46. FunctionLike,
  47. Instance,
  48. TupleType,
  49. Type,
  50. TypeOfAny,
  51. TypeQuery,
  52. TypeVarType,
  53. get_proper_type,
  54. get_proper_types,
  55. )
  56. from mypy.util import correct_relative_import
  57. TYPE_EMPTY: Final = 0
  58. TYPE_UNANALYZED: Final = 1 # type of non-typechecked code
  59. TYPE_PRECISE: Final = 2
  60. TYPE_IMPRECISE: Final = 3
  61. TYPE_ANY: Final = 4
  62. precision_names: Final = ["empty", "unanalyzed", "precise", "imprecise", "any"]
  63. class StatisticsVisitor(TraverserVisitor):
  64. def __init__(
  65. self,
  66. inferred: bool,
  67. filename: str,
  68. modules: dict[str, MypyFile],
  69. typemap: dict[Expression, Type] | None = None,
  70. all_nodes: bool = False,
  71. visit_untyped_defs: bool = True,
  72. ) -> None:
  73. self.inferred = inferred
  74. self.filename = filename
  75. self.modules = modules
  76. self.typemap = typemap
  77. self.all_nodes = all_nodes
  78. self.visit_untyped_defs = visit_untyped_defs
  79. self.num_precise_exprs = 0
  80. self.num_imprecise_exprs = 0
  81. self.num_any_exprs = 0
  82. self.num_simple_types = 0
  83. self.num_generic_types = 0
  84. self.num_tuple_types = 0
  85. self.num_function_types = 0
  86. self.num_typevar_types = 0
  87. self.num_complex_types = 0
  88. self.num_any_types = 0
  89. self.line = -1
  90. self.line_map: dict[int, int] = {}
  91. self.type_of_any_counter: Counter[int] = Counter()
  92. self.any_line_map: dict[int, list[AnyType]] = {}
  93. # For each scope (top level/function), whether the scope was type checked
  94. # (annotated function).
  95. #
  96. # TODO: Handle --check-untyped-defs
  97. self.checked_scopes = [True]
  98. self.output: list[str] = []
  99. TraverserVisitor.__init__(self)
  100. def visit_mypy_file(self, o: MypyFile) -> None:
  101. self.cur_mod_node = o
  102. self.cur_mod_id = o.fullname
  103. super().visit_mypy_file(o)
  104. def visit_import_from(self, imp: ImportFrom) -> None:
  105. self.process_import(imp)
  106. def visit_import_all(self, imp: ImportAll) -> None:
  107. self.process_import(imp)
  108. def process_import(self, imp: ImportFrom | ImportAll) -> None:
  109. import_id, ok = correct_relative_import(
  110. self.cur_mod_id, imp.relative, imp.id, self.cur_mod_node.is_package_init_file()
  111. )
  112. if ok and import_id in self.modules:
  113. kind = TYPE_PRECISE
  114. else:
  115. kind = TYPE_ANY
  116. self.record_line(imp.line, kind)
  117. def visit_import(self, imp: Import) -> None:
  118. if all(id in self.modules for id, _ in imp.ids):
  119. kind = TYPE_PRECISE
  120. else:
  121. kind = TYPE_ANY
  122. self.record_line(imp.line, kind)
  123. def visit_func_def(self, o: FuncDef) -> None:
  124. with self.enter_scope(o):
  125. self.line = o.line
  126. if len(o.expanded) > 1 and o.expanded != [o] * len(o.expanded):
  127. if o in o.expanded:
  128. print(
  129. "{}:{}: ERROR: cycle in function expansion; skipping".format(
  130. self.filename, o.line
  131. )
  132. )
  133. return
  134. for defn in o.expanded:
  135. assert isinstance(defn, FuncDef)
  136. self.visit_func_def(defn)
  137. else:
  138. if o.type:
  139. assert isinstance(o.type, CallableType)
  140. sig = o.type
  141. arg_types = sig.arg_types
  142. if sig.arg_names and sig.arg_names[0] == "self" and not self.inferred:
  143. arg_types = arg_types[1:]
  144. for arg in arg_types:
  145. self.type(arg)
  146. self.type(sig.ret_type)
  147. elif self.all_nodes:
  148. self.record_line(self.line, TYPE_ANY)
  149. if not o.is_dynamic() or self.visit_untyped_defs:
  150. super().visit_func_def(o)
  151. @contextmanager
  152. def enter_scope(self, o: FuncDef) -> Iterator[None]:
  153. self.checked_scopes.append(o.type is not None and self.checked_scopes[-1])
  154. yield None
  155. self.checked_scopes.pop()
  156. def is_checked_scope(self) -> bool:
  157. return self.checked_scopes[-1]
  158. def visit_class_def(self, o: ClassDef) -> None:
  159. self.record_line(o.line, TYPE_PRECISE) # TODO: Look at base classes
  160. # Override this method because we don't want to analyze base_type_exprs (base_type_exprs
  161. # are base classes in a class declaration).
  162. # While base_type_exprs are technically expressions, type analyzer does not visit them and
  163. # they are not in the typemap.
  164. for d in o.decorators:
  165. d.accept(self)
  166. o.defs.accept(self)
  167. def visit_type_application(self, o: TypeApplication) -> None:
  168. self.line = o.line
  169. for t in o.types:
  170. self.type(t)
  171. super().visit_type_application(o)
  172. def visit_assignment_stmt(self, o: AssignmentStmt) -> None:
  173. self.line = o.line
  174. if isinstance(o.rvalue, nodes.CallExpr) and isinstance(
  175. o.rvalue.analyzed, nodes.TypeVarExpr
  176. ):
  177. # Type variable definition -- not a real assignment.
  178. return
  179. if o.type:
  180. self.type(o.type)
  181. elif self.inferred and not self.all_nodes:
  182. # if self.all_nodes is set, lvalues will be visited later
  183. for lvalue in o.lvalues:
  184. if isinstance(lvalue, nodes.TupleExpr):
  185. items = lvalue.items
  186. else:
  187. items = [lvalue]
  188. for item in items:
  189. if isinstance(item, RefExpr) and item.is_inferred_def:
  190. if self.typemap is not None:
  191. self.type(self.typemap.get(item))
  192. super().visit_assignment_stmt(o)
  193. def visit_expression_stmt(self, o: ExpressionStmt) -> None:
  194. if isinstance(o.expr, (StrExpr, BytesExpr)):
  195. # Docstring
  196. self.record_line(o.line, TYPE_EMPTY)
  197. else:
  198. super().visit_expression_stmt(o)
  199. def visit_pass_stmt(self, o: PassStmt) -> None:
  200. self.record_precise_if_checked_scope(o)
  201. def visit_break_stmt(self, o: BreakStmt) -> None:
  202. self.record_precise_if_checked_scope(o)
  203. def visit_continue_stmt(self, o: ContinueStmt) -> None:
  204. self.record_precise_if_checked_scope(o)
  205. def visit_name_expr(self, o: NameExpr) -> None:
  206. if o.fullname in ("builtins.None", "builtins.True", "builtins.False", "builtins.Ellipsis"):
  207. self.record_precise_if_checked_scope(o)
  208. else:
  209. self.process_node(o)
  210. super().visit_name_expr(o)
  211. def visit_yield_from_expr(self, o: YieldFromExpr) -> None:
  212. if o.expr:
  213. o.expr.accept(self)
  214. def visit_call_expr(self, o: CallExpr) -> None:
  215. self.process_node(o)
  216. if o.analyzed:
  217. o.analyzed.accept(self)
  218. else:
  219. o.callee.accept(self)
  220. for a in o.args:
  221. a.accept(self)
  222. self.record_call_target_precision(o)
  223. def record_call_target_precision(self, o: CallExpr) -> None:
  224. """Record precision of formal argument types used in a call."""
  225. if not self.typemap or o.callee not in self.typemap:
  226. # Type not available.
  227. return
  228. callee_type = get_proper_type(self.typemap[o.callee])
  229. if isinstance(callee_type, CallableType):
  230. self.record_callable_target_precision(o, callee_type)
  231. else:
  232. pass # TODO: Handle overloaded functions, etc.
  233. def record_callable_target_precision(self, o: CallExpr, callee: CallableType) -> None:
  234. """Record imprecision caused by callee argument types.
  235. This only considers arguments passed in a call expression. Arguments
  236. with default values that aren't provided in a call arguably don't
  237. contribute to typing imprecision at the *call site* (but they
  238. contribute at the function definition).
  239. """
  240. assert self.typemap
  241. typemap = self.typemap
  242. actual_to_formal = map_formals_to_actuals(
  243. o.arg_kinds,
  244. o.arg_names,
  245. callee.arg_kinds,
  246. callee.arg_names,
  247. lambda n: typemap[o.args[n]],
  248. )
  249. for formals in actual_to_formal:
  250. for n in formals:
  251. formal = get_proper_type(callee.arg_types[n])
  252. if isinstance(formal, AnyType):
  253. self.record_line(o.line, TYPE_ANY)
  254. elif is_imprecise(formal):
  255. self.record_line(o.line, TYPE_IMPRECISE)
  256. def visit_member_expr(self, o: MemberExpr) -> None:
  257. self.process_node(o)
  258. super().visit_member_expr(o)
  259. def visit_op_expr(self, o: OpExpr) -> None:
  260. self.process_node(o)
  261. super().visit_op_expr(o)
  262. def visit_comparison_expr(self, o: ComparisonExpr) -> None:
  263. self.process_node(o)
  264. super().visit_comparison_expr(o)
  265. def visit_index_expr(self, o: IndexExpr) -> None:
  266. self.process_node(o)
  267. super().visit_index_expr(o)
  268. def visit_assignment_expr(self, o: AssignmentExpr) -> None:
  269. self.process_node(o)
  270. super().visit_assignment_expr(o)
  271. def visit_unary_expr(self, o: UnaryExpr) -> None:
  272. self.process_node(o)
  273. super().visit_unary_expr(o)
  274. def visit_str_expr(self, o: StrExpr) -> None:
  275. self.record_precise_if_checked_scope(o)
  276. def visit_bytes_expr(self, o: BytesExpr) -> None:
  277. self.record_precise_if_checked_scope(o)
  278. def visit_int_expr(self, o: IntExpr) -> None:
  279. self.record_precise_if_checked_scope(o)
  280. def visit_float_expr(self, o: FloatExpr) -> None:
  281. self.record_precise_if_checked_scope(o)
  282. def visit_complex_expr(self, o: ComplexExpr) -> None:
  283. self.record_precise_if_checked_scope(o)
  284. def visit_ellipsis(self, o: EllipsisExpr) -> None:
  285. self.record_precise_if_checked_scope(o)
  286. # Helpers
  287. def process_node(self, node: Expression) -> None:
  288. if self.all_nodes:
  289. if self.typemap is not None:
  290. self.line = node.line
  291. self.type(self.typemap.get(node))
  292. def record_precise_if_checked_scope(self, node: Node) -> None:
  293. if isinstance(node, Expression) and self.typemap and node not in self.typemap:
  294. kind = TYPE_UNANALYZED
  295. elif self.is_checked_scope():
  296. kind = TYPE_PRECISE
  297. else:
  298. kind = TYPE_ANY
  299. self.record_line(node.line, kind)
  300. def type(self, t: Type | None) -> None:
  301. t = get_proper_type(t)
  302. if not t:
  303. # If an expression does not have a type, it is often due to dead code.
  304. # Don't count these because there can be an unanalyzed value on a line with other
  305. # analyzed expressions, which overwrite the TYPE_UNANALYZED.
  306. self.record_line(self.line, TYPE_UNANALYZED)
  307. return
  308. if isinstance(t, AnyType) and is_special_form_any(t):
  309. # TODO: What if there is an error in special form definition?
  310. self.record_line(self.line, TYPE_PRECISE)
  311. return
  312. if isinstance(t, AnyType):
  313. self.log(" !! Any type around line %d" % self.line)
  314. self.num_any_exprs += 1
  315. self.record_line(self.line, TYPE_ANY)
  316. elif (not self.all_nodes and is_imprecise(t)) or (self.all_nodes and is_imprecise2(t)):
  317. self.log(" !! Imprecise type around line %d" % self.line)
  318. self.num_imprecise_exprs += 1
  319. self.record_line(self.line, TYPE_IMPRECISE)
  320. else:
  321. self.num_precise_exprs += 1
  322. self.record_line(self.line, TYPE_PRECISE)
  323. for typ in get_proper_types(collect_all_inner_types(t)) + [t]:
  324. if isinstance(typ, AnyType):
  325. typ = get_original_any(typ)
  326. if is_special_form_any(typ):
  327. continue
  328. self.type_of_any_counter[typ.type_of_any] += 1
  329. self.num_any_types += 1
  330. if self.line in self.any_line_map:
  331. self.any_line_map[self.line].append(typ)
  332. else:
  333. self.any_line_map[self.line] = [typ]
  334. elif isinstance(typ, Instance):
  335. if typ.args:
  336. if any(is_complex(arg) for arg in typ.args):
  337. self.num_complex_types += 1
  338. else:
  339. self.num_generic_types += 1
  340. else:
  341. self.num_simple_types += 1
  342. elif isinstance(typ, FunctionLike):
  343. self.num_function_types += 1
  344. elif isinstance(typ, TupleType):
  345. if any(is_complex(item) for item in typ.items):
  346. self.num_complex_types += 1
  347. else:
  348. self.num_tuple_types += 1
  349. elif isinstance(typ, TypeVarType):
  350. self.num_typevar_types += 1
  351. def log(self, string: str) -> None:
  352. self.output.append(string)
  353. def record_line(self, line: int, precision: int) -> None:
  354. self.line_map[line] = max(precision, self.line_map.get(line, TYPE_EMPTY))
  355. def dump_type_stats(
  356. tree: MypyFile,
  357. path: str,
  358. modules: dict[str, MypyFile],
  359. inferred: bool = False,
  360. typemap: dict[Expression, Type] | None = None,
  361. ) -> None:
  362. if is_special_module(path):
  363. return
  364. print(path)
  365. visitor = StatisticsVisitor(inferred, filename=tree.fullname, modules=modules, typemap=typemap)
  366. tree.accept(visitor)
  367. for line in visitor.output:
  368. print(line)
  369. print(" ** precision **")
  370. print(" precise ", visitor.num_precise_exprs)
  371. print(" imprecise", visitor.num_imprecise_exprs)
  372. print(" any ", visitor.num_any_exprs)
  373. print(" ** kinds **")
  374. print(" simple ", visitor.num_simple_types)
  375. print(" generic ", visitor.num_generic_types)
  376. print(" function ", visitor.num_function_types)
  377. print(" tuple ", visitor.num_tuple_types)
  378. print(" TypeVar ", visitor.num_typevar_types)
  379. print(" complex ", visitor.num_complex_types)
  380. print(" any ", visitor.num_any_types)
  381. def is_special_module(path: str) -> bool:
  382. return os.path.basename(path) in ("abc.pyi", "typing.pyi", "builtins.pyi")
  383. def is_imprecise(t: Type) -> bool:
  384. return t.accept(HasAnyQuery())
  385. class HasAnyQuery(TypeQuery[bool]):
  386. def __init__(self) -> None:
  387. super().__init__(any)
  388. def visit_any(self, t: AnyType) -> bool:
  389. return not is_special_form_any(t)
  390. def is_imprecise2(t: Type) -> bool:
  391. return t.accept(HasAnyQuery2())
  392. class HasAnyQuery2(HasAnyQuery):
  393. def visit_callable_type(self, t: CallableType) -> bool:
  394. # We don't want to flag references to functions with some Any
  395. # argument types (etc.) since they generally don't mean trouble.
  396. return False
  397. def is_generic(t: Type) -> bool:
  398. t = get_proper_type(t)
  399. return isinstance(t, Instance) and bool(t.args)
  400. def is_complex(t: Type) -> bool:
  401. t = get_proper_type(t)
  402. return is_generic(t) or isinstance(t, (FunctionLike, TupleType, TypeVarType))
  403. def ensure_dir_exists(dir: str) -> None:
  404. if not os.path.exists(dir):
  405. os.makedirs(dir)
  406. def is_special_form_any(t: AnyType) -> bool:
  407. return get_original_any(t).type_of_any == TypeOfAny.special_form
  408. def get_original_any(t: AnyType) -> AnyType:
  409. if t.type_of_any == TypeOfAny.from_another_any:
  410. assert t.source_any
  411. assert t.source_any.type_of_any != TypeOfAny.from_another_any
  412. t = t.source_any
  413. return t