class_ir.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. """Intermediate representation of classes."""
  2. from __future__ import annotations
  3. from typing import List, NamedTuple, Optional
  4. from mypyc.common import PROPSET_PREFIX, JsonDict
  5. from mypyc.ir.func_ir import FuncDecl, FuncIR, FuncSignature
  6. from mypyc.ir.ops import DeserMaps, Value
  7. from mypyc.ir.rtypes import RInstance, RType, deserialize_type
  8. from mypyc.namegen import NameGenerator, exported_name
  9. # Some notes on the vtable layout: Each concrete class has a vtable
  10. # that contains function pointers for its methods. So that subclasses
  11. # may be efficiently used when their parent class is expected, the
  12. # layout of child vtables must be an extension of their base class's
  13. # vtable.
  14. #
  15. # This makes multiple inheritance tricky, since obviously we cannot be
  16. # an extension of multiple parent classes. We solve this by requiring
  17. # all but one parent to be "traits", which we can operate on in a
  18. # somewhat less efficient way. For each trait implemented by a class,
  19. # we generate a separate vtable for the methods in that trait.
  20. # We then store an array of (trait type, trait vtable) pointers alongside
  21. # a class's main vtable. When we want to call a trait method, we
  22. # (at runtime!) search the array of trait vtables to find the correct one,
  23. # then call through it.
  24. # Trait vtables additionally need entries for attribute getters and setters,
  25. # since they can't always be in the same location.
  26. #
  27. # To keep down the number of indirections necessary, we store the
  28. # array of trait vtables in the memory *before* the class vtable, and
  29. # search it backwards. (This is a trick we can only do once---there
  30. # are only two directions to store data in---but I don't think we'll
  31. # need it again.)
  32. # There are some tricks we could try in the future to store the trait
  33. # vtables inline in the trait table (which would cut down one indirection),
  34. # but this seems good enough for now.
  35. #
  36. # As an example:
  37. # Imagine that we have a class B that inherits from a concrete class A
  38. # and traits T1 and T2, and that A has methods foo() and
  39. # bar() and B overrides bar() with a more specific type.
  40. # Then B's vtable will look something like:
  41. #
  42. # T1 type object
  43. # ptr to B's T1 trait vtable
  44. # T2 type object
  45. # ptr to B's T2 trait vtable
  46. # -> | A.foo
  47. # | Glue function that converts between A.bar's type and B.bar
  48. # B.bar
  49. # B.baz
  50. #
  51. # The arrow points to the "start" of the vtable (what vtable pointers
  52. # point to) and the bars indicate which parts correspond to the parent
  53. # class A's vtable layout.
  54. #
  55. # Classes that allow interpreted code to subclass them also have a
  56. # "shadow vtable" that contains implementations that delegate to
  57. # making a pycall, so that overridden methods in interpreted children
  58. # will be called. (A better strategy could dynamically generate these
  59. # vtables based on which methods are overridden in the children.)
  60. # Descriptions of method and attribute entries in class vtables.
  61. # The 'cls' field is the class that the method/attr was defined in,
  62. # which might be a parent class.
  63. # The 'shadow_method', if present, contains the method that should be
  64. # placed in the class's shadow vtable (if it has one).
  65. class VTableMethod(NamedTuple):
  66. cls: "ClassIR"
  67. name: str
  68. method: FuncIR
  69. shadow_method: Optional[FuncIR]
  70. VTableEntries = List[VTableMethod]
  71. class ClassIR:
  72. """Intermediate representation of a class.
  73. This also describes the runtime structure of native instances.
  74. """
  75. def __init__(
  76. self,
  77. name: str,
  78. module_name: str,
  79. is_trait: bool = False,
  80. is_generated: bool = False,
  81. is_abstract: bool = False,
  82. is_ext_class: bool = True,
  83. ) -> None:
  84. self.name = name
  85. self.module_name = module_name
  86. self.is_trait = is_trait
  87. self.is_generated = is_generated
  88. self.is_abstract = is_abstract
  89. self.is_ext_class = is_ext_class
  90. # An augmented class has additional methods separate from what mypyc generates.
  91. # Right now the only one is dataclasses.
  92. self.is_augmented = False
  93. # Does this inherit from a Python class?
  94. self.inherits_python = False
  95. # Do instances of this class have __dict__?
  96. self.has_dict = False
  97. # Do we allow interpreted subclasses? Derived from a mypyc_attr.
  98. self.allow_interpreted_subclasses = False
  99. # Does this class need getseters to be generated for its attributes? (getseters are also
  100. # added if is_generated is False)
  101. self.needs_getseters = False
  102. # Is this class declared as serializable (supports copy.copy
  103. # and pickle) using @mypyc_attr(serializable=True)?
  104. #
  105. # Additionally, any class with this attribute False but with
  106. # an __init__ that can be called without any arguments is
  107. # *implicitly serializable*. In this case __init__ will be
  108. # called during deserialization without arguments. If this is
  109. # True, we match Python semantics and __init__ won't be called
  110. # during deserialization.
  111. #
  112. # This impacts also all subclasses. Use is_serializable() to
  113. # also consider base classes.
  114. self._serializable = False
  115. # If this a subclass of some built-in python class, the name
  116. # of the object for that class. We currently only support this
  117. # in a few ad-hoc cases.
  118. self.builtin_base: str | None = None
  119. # Default empty constructor
  120. self.ctor = FuncDecl(name, None, module_name, FuncSignature([], RInstance(self)))
  121. # Attributes defined in the class (not inherited)
  122. self.attributes: dict[str, RType] = {}
  123. # Deletable attributes
  124. self.deletable: list[str] = []
  125. # We populate method_types with the signatures of every method before
  126. # we generate methods, and we rely on this information being present.
  127. self.method_decls: dict[str, FuncDecl] = {}
  128. # Map of methods that are actually present in an extension class
  129. self.methods: dict[str, FuncIR] = {}
  130. # Glue methods for boxing/unboxing when a class changes the type
  131. # while overriding a method. Maps from (parent class overridden, method)
  132. # to IR of glue method.
  133. self.glue_methods: dict[tuple[ClassIR, str], FuncIR] = {}
  134. # Properties are accessed like attributes, but have behavior like method calls.
  135. # They don't belong in the methods dictionary, since we don't want to expose them to
  136. # Python's method API. But we want to put them into our own vtable as methods, so that
  137. # they are properly handled and overridden. The property dictionary values are a tuple
  138. # containing a property getter and an optional property setter.
  139. self.properties: dict[str, tuple[FuncIR, FuncIR | None]] = {}
  140. # We generate these in prepare_class_def so that we have access to them when generating
  141. # other methods and properties that rely on these types.
  142. self.property_types: dict[str, RType] = {}
  143. self.vtable: dict[str, int] | None = None
  144. self.vtable_entries: VTableEntries = []
  145. self.trait_vtables: dict[ClassIR, VTableEntries] = {}
  146. # N.B: base might not actually quite be the direct base.
  147. # It is the nearest concrete base, but we allow a trait in between.
  148. self.base: ClassIR | None = None
  149. self.traits: list[ClassIR] = []
  150. # Supply a working mro for most generated classes. Real classes will need to
  151. # fix it up.
  152. self.mro: list[ClassIR] = [self]
  153. # base_mro is the chain of concrete (non-trait) ancestors
  154. self.base_mro: list[ClassIR] = [self]
  155. # Direct subclasses of this class (use subclasses() to also include non-direct ones)
  156. # None if separate compilation prevents this from working.
  157. #
  158. # Often it's better to use has_no_subclasses() or subclasses() instead.
  159. self.children: list[ClassIR] | None = []
  160. # Instance attributes that are initialized in the class body.
  161. self.attrs_with_defaults: set[str] = set()
  162. # Attributes that are always initialized in __init__ or class body
  163. # (inferred in mypyc.analysis.attrdefined using interprocedural analysis)
  164. self._always_initialized_attrs: set[str] = set()
  165. # Attributes that are sometimes initialized in __init__
  166. self._sometimes_initialized_attrs: set[str] = set()
  167. # If True, __init__ can make 'self' visible to unanalyzed/arbitrary code
  168. self.init_self_leak = False
  169. # Definedness of these attributes is backed by a bitmap. Index in the list
  170. # indicates the bit number. Includes inherited attributes. We need the
  171. # bitmap for types such as native ints that can't have a dedicated error
  172. # value that doesn't overlap a valid value. The bitmap is used if the
  173. # value of an attribute is the same as the error value.
  174. self.bitmap_attrs: List[str] = []
  175. def __repr__(self) -> str:
  176. return (
  177. "ClassIR("
  178. "name={self.name}, module_name={self.module_name}, "
  179. "is_trait={self.is_trait}, is_generated={self.is_generated}, "
  180. "is_abstract={self.is_abstract}, is_ext_class={self.is_ext_class}"
  181. ")".format(self=self)
  182. )
  183. @property
  184. def fullname(self) -> str:
  185. return f"{self.module_name}.{self.name}"
  186. def real_base(self) -> ClassIR | None:
  187. """Return the actual concrete base class, if there is one."""
  188. if len(self.mro) > 1 and not self.mro[1].is_trait:
  189. return self.mro[1]
  190. return None
  191. def vtable_entry(self, name: str) -> int:
  192. assert self.vtable is not None, "vtable not computed yet"
  193. assert name in self.vtable, f"{self.name!r} has no attribute {name!r}"
  194. return self.vtable[name]
  195. def attr_details(self, name: str) -> tuple[RType, ClassIR]:
  196. for ir in self.mro:
  197. if name in ir.attributes:
  198. return ir.attributes[name], ir
  199. if name in ir.property_types:
  200. return ir.property_types[name], ir
  201. raise KeyError(f"{self.name!r} has no attribute {name!r}")
  202. def attr_type(self, name: str) -> RType:
  203. return self.attr_details(name)[0]
  204. def method_decl(self, name: str) -> FuncDecl:
  205. for ir in self.mro:
  206. if name in ir.method_decls:
  207. return ir.method_decls[name]
  208. raise KeyError(f"{self.name!r} has no attribute {name!r}")
  209. def method_sig(self, name: str) -> FuncSignature:
  210. return self.method_decl(name).sig
  211. def has_method(self, name: str) -> bool:
  212. try:
  213. self.method_decl(name)
  214. except KeyError:
  215. return False
  216. return True
  217. def is_method_final(self, name: str) -> bool:
  218. subs = self.subclasses()
  219. if subs is None:
  220. # TODO: Look at the final attribute!
  221. return False
  222. if self.has_method(name):
  223. method_decl = self.method_decl(name)
  224. for subc in subs:
  225. if subc.method_decl(name) != method_decl:
  226. return False
  227. return True
  228. else:
  229. return not any(subc.has_method(name) for subc in subs)
  230. def has_attr(self, name: str) -> bool:
  231. try:
  232. self.attr_type(name)
  233. except KeyError:
  234. return False
  235. return True
  236. def is_deletable(self, name: str) -> bool:
  237. return any(name in ir.deletable for ir in self.mro)
  238. def is_always_defined(self, name: str) -> bool:
  239. if self.is_deletable(name):
  240. return False
  241. return name in self._always_initialized_attrs
  242. def name_prefix(self, names: NameGenerator) -> str:
  243. return names.private_name(self.module_name, self.name)
  244. def struct_name(self, names: NameGenerator) -> str:
  245. return f"{exported_name(self.fullname)}Object"
  246. def get_method_and_class(
  247. self, name: str, *, prefer_method: bool = False
  248. ) -> tuple[FuncIR, ClassIR] | None:
  249. for ir in self.mro:
  250. if name in ir.methods:
  251. func_ir = ir.methods[name]
  252. if not prefer_method and func_ir.decl.implicit:
  253. # This is an implicit accessor, so there is also an attribute definition
  254. # which the caller prefers. This happens if an attribute overrides a
  255. # property.
  256. return None
  257. return func_ir, ir
  258. return None
  259. def get_method(self, name: str, *, prefer_method: bool = False) -> FuncIR | None:
  260. res = self.get_method_and_class(name, prefer_method=prefer_method)
  261. return res[0] if res else None
  262. def has_method_decl(self, name: str) -> bool:
  263. return any(name in ir.method_decls for ir in self.mro)
  264. def has_no_subclasses(self) -> bool:
  265. return self.children == [] and not self.allow_interpreted_subclasses
  266. def subclasses(self) -> set[ClassIR] | None:
  267. """Return all subclasses of this class, both direct and indirect.
  268. Return None if it is impossible to identify all subclasses, for example
  269. because we are performing separate compilation.
  270. """
  271. if self.children is None or self.allow_interpreted_subclasses:
  272. return None
  273. result = set(self.children)
  274. for child in self.children:
  275. if child.children:
  276. child_subs = child.subclasses()
  277. if child_subs is None:
  278. return None
  279. result.update(child_subs)
  280. return result
  281. def concrete_subclasses(self) -> list[ClassIR] | None:
  282. """Return all concrete (i.e. non-trait and non-abstract) subclasses.
  283. Include both direct and indirect subclasses. Place classes with no children first.
  284. """
  285. subs = self.subclasses()
  286. if subs is None:
  287. return None
  288. concrete = {c for c in subs if not (c.is_trait or c.is_abstract)}
  289. # We place classes with no children first because they are more likely
  290. # to appear in various isinstance() checks. We then sort leaves by name
  291. # to get stable order.
  292. return sorted(concrete, key=lambda c: (len(c.children or []), c.name))
  293. def is_serializable(self) -> bool:
  294. return any(ci._serializable for ci in self.mro)
  295. def serialize(self) -> JsonDict:
  296. return {
  297. "name": self.name,
  298. "module_name": self.module_name,
  299. "is_trait": self.is_trait,
  300. "is_ext_class": self.is_ext_class,
  301. "is_abstract": self.is_abstract,
  302. "is_generated": self.is_generated,
  303. "is_augmented": self.is_augmented,
  304. "inherits_python": self.inherits_python,
  305. "has_dict": self.has_dict,
  306. "allow_interpreted_subclasses": self.allow_interpreted_subclasses,
  307. "needs_getseters": self.needs_getseters,
  308. "_serializable": self._serializable,
  309. "builtin_base": self.builtin_base,
  310. "ctor": self.ctor.serialize(),
  311. # We serialize dicts as lists to ensure order is preserved
  312. "attributes": [(k, t.serialize()) for k, t in self.attributes.items()],
  313. # We try to serialize a name reference, but if the decl isn't in methods
  314. # then we can't be sure that will work so we serialize the whole decl.
  315. "method_decls": [
  316. (k, d.id if k in self.methods else d.serialize())
  317. for k, d in self.method_decls.items()
  318. ],
  319. # We serialize method fullnames out and put methods in a separate dict
  320. "methods": [(k, m.id) for k, m in self.methods.items()],
  321. "glue_methods": [
  322. ((cir.fullname, k), m.id) for (cir, k), m in self.glue_methods.items()
  323. ],
  324. # We serialize properties and property_types separately out of an
  325. # abundance of caution about preserving dict ordering...
  326. "property_types": [(k, t.serialize()) for k, t in self.property_types.items()],
  327. "properties": list(self.properties),
  328. "vtable": self.vtable,
  329. "vtable_entries": serialize_vtable(self.vtable_entries),
  330. "trait_vtables": [
  331. (cir.fullname, serialize_vtable(v)) for cir, v in self.trait_vtables.items()
  332. ],
  333. # References to class IRs are all just names
  334. "base": self.base.fullname if self.base else None,
  335. "traits": [cir.fullname for cir in self.traits],
  336. "mro": [cir.fullname for cir in self.mro],
  337. "base_mro": [cir.fullname for cir in self.base_mro],
  338. "children": [cir.fullname for cir in self.children]
  339. if self.children is not None
  340. else None,
  341. "deletable": self.deletable,
  342. "attrs_with_defaults": sorted(self.attrs_with_defaults),
  343. "_always_initialized_attrs": sorted(self._always_initialized_attrs),
  344. "_sometimes_initialized_attrs": sorted(self._sometimes_initialized_attrs),
  345. "init_self_leak": self.init_self_leak,
  346. }
  347. @classmethod
  348. def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> ClassIR:
  349. fullname = data["module_name"] + "." + data["name"]
  350. assert fullname in ctx.classes, "Class %s not in deser class map" % fullname
  351. ir = ctx.classes[fullname]
  352. ir.is_trait = data["is_trait"]
  353. ir.is_generated = data["is_generated"]
  354. ir.is_abstract = data["is_abstract"]
  355. ir.is_ext_class = data["is_ext_class"]
  356. ir.is_augmented = data["is_augmented"]
  357. ir.inherits_python = data["inherits_python"]
  358. ir.has_dict = data["has_dict"]
  359. ir.allow_interpreted_subclasses = data["allow_interpreted_subclasses"]
  360. ir.needs_getseters = data["needs_getseters"]
  361. ir._serializable = data["_serializable"]
  362. ir.builtin_base = data["builtin_base"]
  363. ir.ctor = FuncDecl.deserialize(data["ctor"], ctx)
  364. ir.attributes = {k: deserialize_type(t, ctx) for k, t in data["attributes"]}
  365. ir.method_decls = {
  366. k: ctx.functions[v].decl if isinstance(v, str) else FuncDecl.deserialize(v, ctx)
  367. for k, v in data["method_decls"]
  368. }
  369. ir.methods = {k: ctx.functions[v] for k, v in data["methods"]}
  370. ir.glue_methods = {
  371. (ctx.classes[c], k): ctx.functions[v] for (c, k), v in data["glue_methods"]
  372. }
  373. ir.property_types = {k: deserialize_type(t, ctx) for k, t in data["property_types"]}
  374. ir.properties = {
  375. k: (ir.methods[k], ir.methods.get(PROPSET_PREFIX + k)) for k in data["properties"]
  376. }
  377. ir.vtable = data["vtable"]
  378. ir.vtable_entries = deserialize_vtable(data["vtable_entries"], ctx)
  379. ir.trait_vtables = {
  380. ctx.classes[k]: deserialize_vtable(v, ctx) for k, v in data["trait_vtables"]
  381. }
  382. base = data["base"]
  383. ir.base = ctx.classes[base] if base else None
  384. ir.traits = [ctx.classes[s] for s in data["traits"]]
  385. ir.mro = [ctx.classes[s] for s in data["mro"]]
  386. ir.base_mro = [ctx.classes[s] for s in data["base_mro"]]
  387. ir.children = data["children"] and [ctx.classes[s] for s in data["children"]]
  388. ir.deletable = data["deletable"]
  389. ir.attrs_with_defaults = set(data["attrs_with_defaults"])
  390. ir._always_initialized_attrs = set(data["_always_initialized_attrs"])
  391. ir._sometimes_initialized_attrs = set(data["_sometimes_initialized_attrs"])
  392. ir.init_self_leak = data["init_self_leak"]
  393. return ir
  394. class NonExtClassInfo:
  395. """Information needed to construct a non-extension class (Python class).
  396. Includes the class dictionary, a tuple of base classes,
  397. the class annotations dictionary, and the metaclass.
  398. """
  399. def __init__(self, dict: Value, bases: Value, anns: Value, metaclass: Value) -> None:
  400. self.dict = dict
  401. self.bases = bases
  402. self.anns = anns
  403. self.metaclass = metaclass
  404. def serialize_vtable_entry(entry: VTableMethod) -> JsonDict:
  405. return {
  406. ".class": "VTableMethod",
  407. "cls": entry.cls.fullname,
  408. "name": entry.name,
  409. "method": entry.method.decl.id,
  410. "shadow_method": entry.shadow_method.decl.id if entry.shadow_method else None,
  411. }
  412. def serialize_vtable(vtable: VTableEntries) -> list[JsonDict]:
  413. return [serialize_vtable_entry(v) for v in vtable]
  414. def deserialize_vtable_entry(data: JsonDict, ctx: DeserMaps) -> VTableMethod:
  415. if data[".class"] == "VTableMethod":
  416. return VTableMethod(
  417. ctx.classes[data["cls"]],
  418. data["name"],
  419. ctx.functions[data["method"]],
  420. ctx.functions[data["shadow_method"]] if data["shadow_method"] else None,
  421. )
  422. assert False, "Bogus vtable .class: %s" % data[".class"]
  423. def deserialize_vtable(data: list[JsonDict], ctx: DeserMaps) -> VTableEntries:
  424. return [deserialize_vtable_entry(x, ctx) for x in data]
  425. def all_concrete_classes(class_ir: ClassIR) -> list[ClassIR] | None:
  426. """Return all concrete classes among the class itself and its subclasses."""
  427. concrete = class_ir.concrete_subclasses()
  428. if concrete is None:
  429. return None
  430. if not (class_ir.is_abstract or class_ir.is_trait):
  431. concrete.append(class_ir)
  432. return concrete