| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516 |
- """Utilities for comparing two versions of a module symbol table.
- The goal is to find which AST nodes have externally visible changes, so
- that we can fire triggers and re-process other parts of the program
- that are stale because of the changes.
- Only look at detail at definitions at the current module -- don't
- recurse into other modules.
- A summary of the module contents:
- * snapshot_symbol_table(...) creates an opaque snapshot description of a
- module/class symbol table (recursing into nested class symbol tables).
- * compare_symbol_table_snapshots(...) compares two snapshots for the same
- module id and returns fully qualified names of differences (which act as
- triggers).
- To compare two versions of a module symbol table, take snapshots of both
- versions and compare the snapshots. The use of snapshots makes it easy to
- compare two versions of the *same* symbol table that is being mutated.
- Summary of how this works for certain kinds of differences:
- * If a symbol table node is deleted or added (only present in old/new version
- of the symbol table), it is considered different, of course.
- * If a symbol table node refers to a different sort of thing in the new version,
- it is considered different (for example, if a class is replaced with a
- function).
- * If the signature of a function has changed, it is considered different.
- * If the type of a variable changes, it is considered different.
- * If the MRO of a class changes, or a non-generic class is turned into a
- generic class, the class is considered different (there are other such "big"
- differences that cause a class to be considered changed). However, just changes
- to attributes or methods don't generally constitute a difference at the
- class level -- these are handled at attribute level (say, 'mod.Cls.method'
- is different rather than 'mod.Cls' being different).
- * If an imported name targets a different name (say, 'from x import y' is
- replaced with 'from z import y'), the name in the module is considered
- different. If the target of an import continues to have the same name,
- but it's specifics change, this doesn't mean that the imported name is
- treated as changed. Say, there is 'from x import y' in 'm', and the
- type of 'x.y' has changed. This doesn't mean that that 'm.y' is considered
- changed. Instead, processing the difference in 'm' will be handled through
- fine-grained dependencies.
- """
- from __future__ import annotations
- from typing import Sequence, Tuple, Union
- from typing_extensions import TypeAlias as _TypeAlias
- from mypy.expandtype import expand_type
- from mypy.nodes import (
- UNBOUND_IMPORTED,
- Decorator,
- FuncBase,
- FuncDef,
- FuncItem,
- MypyFile,
- OverloadedFuncDef,
- ParamSpecExpr,
- SymbolNode,
- SymbolTable,
- TypeAlias,
- TypeInfo,
- TypeVarExpr,
- TypeVarTupleExpr,
- Var,
- )
- from mypy.semanal_shared import find_dataclass_transform_spec
- from mypy.types import (
- AnyType,
- CallableType,
- DeletedType,
- ErasedType,
- Instance,
- LiteralType,
- NoneType,
- Overloaded,
- Parameters,
- ParamSpecType,
- PartialType,
- TupleType,
- Type,
- TypeAliasType,
- TypedDictType,
- TypeType,
- TypeVarId,
- TypeVarLikeType,
- TypeVarTupleType,
- TypeVarType,
- TypeVisitor,
- UnboundType,
- UninhabitedType,
- UnionType,
- UnpackType,
- )
- from mypy.util import get_prefix
- # Snapshot representation of a symbol table node or type. The representation is
- # opaque -- the only supported operations are comparing for equality and
- # hashing (latter for type snapshots only). Snapshots can contain primitive
- # objects, nested tuples, lists and dictionaries and primitive objects (type
- # snapshots are immutable).
- #
- # For example, the snapshot of the 'int' type is ('Instance', 'builtins.int', ()).
- # Type snapshots are strict, they must be hashable and ordered (e.g. for Unions).
- Primitive: _TypeAlias = Union[str, float, int, bool] # float is for Literal[3.14] support.
- SnapshotItem: _TypeAlias = Tuple[Union[Primitive, "SnapshotItem"], ...]
- # Symbol snapshots can be more lenient.
- SymbolSnapshot: _TypeAlias = Tuple[object, ...]
- def compare_symbol_table_snapshots(
- name_prefix: str, snapshot1: dict[str, SymbolSnapshot], snapshot2: dict[str, SymbolSnapshot]
- ) -> set[str]:
- """Return names that are different in two snapshots of a symbol table.
- Only shallow (intra-module) differences are considered. References to things defined
- outside the module are compared based on the name of the target only.
- Recurse into class symbol tables (if the class is defined in the target module).
- Return a set of fully-qualified names (e.g., 'mod.func' or 'mod.Class.method').
- """
- # Find names only defined only in one version.
- names1 = {f"{name_prefix}.{name}" for name in snapshot1}
- names2 = {f"{name_prefix}.{name}" for name in snapshot2}
- triggers = names1 ^ names2
- # Look for names defined in both versions that are different.
- for name in set(snapshot1.keys()) & set(snapshot2.keys()):
- item1 = snapshot1[name]
- item2 = snapshot2[name]
- kind1 = item1[0]
- kind2 = item2[0]
- item_name = f"{name_prefix}.{name}"
- if kind1 != kind2:
- # Different kind of node in two snapshots -> trivially different.
- triggers.add(item_name)
- elif kind1 == "TypeInfo":
- if item1[:-1] != item2[:-1]:
- # Record major difference (outside class symbol tables).
- triggers.add(item_name)
- # Look for differences in nested class symbol table entries.
- assert isinstance(item1[-1], dict)
- assert isinstance(item2[-1], dict)
- triggers |= compare_symbol_table_snapshots(item_name, item1[-1], item2[-1])
- else:
- # Shallow node (no interesting internal structure). Just use equality.
- if snapshot1[name] != snapshot2[name]:
- triggers.add(item_name)
- return triggers
- def snapshot_symbol_table(name_prefix: str, table: SymbolTable) -> dict[str, SymbolSnapshot]:
- """Create a snapshot description that represents the state of a symbol table.
- The snapshot has a representation based on nested tuples and dicts
- that makes it easy and fast to find differences.
- Only "shallow" state is included in the snapshot -- references to
- things defined in other modules are represented just by the names of
- the targets.
- """
- result: dict[str, SymbolSnapshot] = {}
- for name, symbol in table.items():
- node = symbol.node
- # TODO: cross_ref?
- fullname = node.fullname if node else None
- common = (fullname, symbol.kind, symbol.module_public)
- if isinstance(node, MypyFile):
- # This is a cross-reference to another module.
- # If the reference is busted because the other module is missing,
- # the node will be a "stale_info" TypeInfo produced by fixup,
- # but that doesn't really matter to us here.
- result[name] = ("Moduleref", common)
- elif isinstance(node, TypeVarExpr):
- result[name] = (
- "TypeVar",
- node.variance,
- [snapshot_type(value) for value in node.values],
- snapshot_type(node.upper_bound),
- snapshot_type(node.default),
- )
- elif isinstance(node, TypeAlias):
- result[name] = (
- "TypeAlias",
- snapshot_types(node.alias_tvars),
- node.normalized,
- node.no_args,
- snapshot_optional_type(node.target),
- )
- elif isinstance(node, ParamSpecExpr):
- result[name] = (
- "ParamSpec",
- node.variance,
- snapshot_type(node.upper_bound),
- snapshot_type(node.default),
- )
- elif isinstance(node, TypeVarTupleExpr):
- result[name] = (
- "TypeVarTuple",
- node.variance,
- snapshot_type(node.upper_bound),
- snapshot_type(node.default),
- )
- else:
- assert symbol.kind != UNBOUND_IMPORTED
- if node and get_prefix(node.fullname) != name_prefix:
- # This is a cross-reference to a node defined in another module.
- result[name] = ("CrossRef", common)
- else:
- result[name] = snapshot_definition(node, common)
- return result
- def snapshot_definition(node: SymbolNode | None, common: SymbolSnapshot) -> SymbolSnapshot:
- """Create a snapshot description of a symbol table node.
- The representation is nested tuples and dicts. Only externally
- visible attributes are included.
- """
- if isinstance(node, FuncBase):
- # TODO: info
- if node.type:
- signature = snapshot_type(node.type)
- else:
- signature = snapshot_untyped_signature(node)
- impl: FuncDef | None = None
- if isinstance(node, FuncDef):
- impl = node
- elif isinstance(node, OverloadedFuncDef) and node.impl:
- impl = node.impl.func if isinstance(node.impl, Decorator) else node.impl
- is_trivial_body = impl.is_trivial_body if impl else False
- dataclass_transform_spec = find_dataclass_transform_spec(node)
- return (
- "Func",
- common,
- node.is_property,
- node.is_final,
- node.is_class,
- node.is_static,
- signature,
- is_trivial_body,
- dataclass_transform_spec.serialize() if dataclass_transform_spec is not None else None,
- )
- elif isinstance(node, Var):
- return ("Var", common, snapshot_optional_type(node.type), node.is_final)
- elif isinstance(node, Decorator):
- # Note that decorated methods are represented by Decorator instances in
- # a symbol table since we need to preserve information about the
- # decorated function (whether it's a class function, for
- # example). Top-level decorated functions, however, are represented by
- # the corresponding Var node, since that happens to provide enough
- # context.
- return (
- "Decorator",
- node.is_overload,
- snapshot_optional_type(node.var.type),
- snapshot_definition(node.func, common),
- )
- elif isinstance(node, TypeInfo):
- dataclass_transform_spec = node.dataclass_transform_spec
- if dataclass_transform_spec is None:
- dataclass_transform_spec = find_dataclass_transform_spec(node)
- attrs = (
- node.is_abstract,
- node.is_enum,
- node.is_protocol,
- node.fallback_to_any,
- node.meta_fallback_to_any,
- node.is_named_tuple,
- node.is_newtype,
- # We need this to e.g. trigger metaclass calculation in subclasses.
- snapshot_optional_type(node.metaclass_type),
- snapshot_optional_type(node.tuple_type),
- snapshot_optional_type(node.typeddict_type),
- [base.fullname for base in node.mro],
- # Note that the structure of type variables is a part of the external interface,
- # since creating instances might fail, for example:
- # T = TypeVar('T', bound=int)
- # class C(Generic[T]):
- # ...
- # x: C[str] <- this is invalid, and needs to be re-checked if `T` changes.
- # An alternative would be to create both deps: <...> -> C, and <...> -> <C>,
- # but this currently seems a bit ad hoc.
- tuple(snapshot_type(tdef) for tdef in node.defn.type_vars),
- [snapshot_type(base) for base in node.bases],
- [snapshot_type(p) for p in node._promote],
- dataclass_transform_spec.serialize() if dataclass_transform_spec is not None else None,
- )
- prefix = node.fullname
- symbol_table = snapshot_symbol_table(prefix, node.names)
- # Special dependency for abstract attribute handling.
- symbol_table["(abstract)"] = ("Abstract", tuple(sorted(node.abstract_attributes)))
- return ("TypeInfo", common, attrs, symbol_table)
- else:
- # Other node types are handled elsewhere.
- assert False, type(node)
- def snapshot_type(typ: Type) -> SnapshotItem:
- """Create a snapshot representation of a type using nested tuples."""
- return typ.accept(SnapshotTypeVisitor())
- def snapshot_optional_type(typ: Type | None) -> SnapshotItem:
- if typ:
- return snapshot_type(typ)
- else:
- return ("<not set>",)
- def snapshot_types(types: Sequence[Type]) -> SnapshotItem:
- return tuple(snapshot_type(item) for item in types)
- def snapshot_simple_type(typ: Type) -> SnapshotItem:
- return (type(typ).__name__,)
- def encode_optional_str(s: str | None) -> str:
- if s is None:
- return "<None>"
- else:
- return s
- class SnapshotTypeVisitor(TypeVisitor[SnapshotItem]):
- """Creates a read-only, self-contained snapshot of a type object.
- Properties of a snapshot:
- - Contains (nested) tuples and other immutable primitive objects only.
- - References to AST nodes are replaced with full names of targets.
- - Has no references to mutable or non-primitive objects.
- - Two snapshots represent the same object if and only if they are
- equal.
- - Results must be sortable. It's important that tuples have
- consistent types and can't arbitrarily mix str and None values,
- for example, since they can't be compared.
- """
- def visit_unbound_type(self, typ: UnboundType) -> SnapshotItem:
- return (
- "UnboundType",
- typ.name,
- typ.optional,
- typ.empty_tuple_index,
- snapshot_types(typ.args),
- )
- def visit_any(self, typ: AnyType) -> SnapshotItem:
- return snapshot_simple_type(typ)
- def visit_none_type(self, typ: NoneType) -> SnapshotItem:
- return snapshot_simple_type(typ)
- def visit_uninhabited_type(self, typ: UninhabitedType) -> SnapshotItem:
- return snapshot_simple_type(typ)
- def visit_erased_type(self, typ: ErasedType) -> SnapshotItem:
- return snapshot_simple_type(typ)
- def visit_deleted_type(self, typ: DeletedType) -> SnapshotItem:
- return snapshot_simple_type(typ)
- def visit_instance(self, typ: Instance) -> SnapshotItem:
- return (
- "Instance",
- encode_optional_str(typ.type.fullname),
- snapshot_types(typ.args),
- ("None",) if typ.last_known_value is None else snapshot_type(typ.last_known_value),
- )
- def visit_type_var(self, typ: TypeVarType) -> SnapshotItem:
- return (
- "TypeVar",
- typ.name,
- typ.fullname,
- typ.id.raw_id,
- typ.id.meta_level,
- snapshot_types(typ.values),
- snapshot_type(typ.upper_bound),
- snapshot_type(typ.default),
- typ.variance,
- )
- def visit_param_spec(self, typ: ParamSpecType) -> SnapshotItem:
- return (
- "ParamSpec",
- typ.id.raw_id,
- typ.id.meta_level,
- typ.flavor,
- snapshot_type(typ.upper_bound),
- snapshot_type(typ.default),
- )
- def visit_type_var_tuple(self, typ: TypeVarTupleType) -> SnapshotItem:
- return (
- "TypeVarTupleType",
- typ.id.raw_id,
- typ.id.meta_level,
- snapshot_type(typ.upper_bound),
- snapshot_type(typ.default),
- )
- def visit_unpack_type(self, typ: UnpackType) -> SnapshotItem:
- return ("UnpackType", snapshot_type(typ.type))
- def visit_parameters(self, typ: Parameters) -> SnapshotItem:
- return (
- "Parameters",
- snapshot_types(typ.arg_types),
- tuple(encode_optional_str(name) for name in typ.arg_names),
- tuple(k.value for k in typ.arg_kinds),
- )
- def visit_callable_type(self, typ: CallableType) -> SnapshotItem:
- if typ.is_generic():
- typ = self.normalize_callable_variables(typ)
- return (
- "CallableType",
- snapshot_types(typ.arg_types),
- snapshot_type(typ.ret_type),
- tuple(encode_optional_str(name) for name in typ.arg_names),
- tuple(k.value for k in typ.arg_kinds),
- typ.is_type_obj(),
- typ.is_ellipsis_args,
- snapshot_types(typ.variables),
- )
- def normalize_callable_variables(self, typ: CallableType) -> CallableType:
- """Normalize all type variable ids to run from -1 to -len(variables)."""
- tvs = []
- tvmap: dict[TypeVarId, Type] = {}
- for i, v in enumerate(typ.variables):
- tid = TypeVarId(-1 - i)
- if isinstance(v, TypeVarType):
- tv: TypeVarLikeType = v.copy_modified(id=tid)
- elif isinstance(v, TypeVarTupleType):
- tv = v.copy_modified(id=tid)
- else:
- assert isinstance(v, ParamSpecType)
- tv = v.copy_modified(id=tid)
- tvs.append(tv)
- tvmap[v.id] = tv
- return expand_type(typ, tvmap).copy_modified(variables=tvs)
- def visit_tuple_type(self, typ: TupleType) -> SnapshotItem:
- return ("TupleType", snapshot_types(typ.items))
- def visit_typeddict_type(self, typ: TypedDictType) -> SnapshotItem:
- items = tuple((key, snapshot_type(item_type)) for key, item_type in typ.items.items())
- required = tuple(sorted(typ.required_keys))
- return ("TypedDictType", items, required)
- def visit_literal_type(self, typ: LiteralType) -> SnapshotItem:
- return ("LiteralType", snapshot_type(typ.fallback), typ.value)
- def visit_union_type(self, typ: UnionType) -> SnapshotItem:
- # Sort and remove duplicates so that we can use equality to test for
- # equivalent union type snapshots.
- items = {snapshot_type(item) for item in typ.items}
- normalized = tuple(sorted(items))
- return ("UnionType", normalized)
- def visit_overloaded(self, typ: Overloaded) -> SnapshotItem:
- return ("Overloaded", snapshot_types(typ.items))
- def visit_partial_type(self, typ: PartialType) -> SnapshotItem:
- # A partial type is not fully defined, so the result is indeterminate. We shouldn't
- # get here.
- raise RuntimeError
- def visit_type_type(self, typ: TypeType) -> SnapshotItem:
- return ("TypeType", snapshot_type(typ.item))
- def visit_type_alias_type(self, typ: TypeAliasType) -> SnapshotItem:
- assert typ.alias is not None
- return ("TypeAliasType", typ.alias.fullname, snapshot_types(typ.args))
- def snapshot_untyped_signature(func: OverloadedFuncDef | FuncItem) -> SymbolSnapshot:
- """Create a snapshot of the signature of a function that has no explicit signature.
- If the arguments to a function without signature change, it must be
- considered as different. We have this special casing since we don't store
- the implicit signature anywhere, and we'd rather not construct new
- Callable objects in this module (the idea is to only read properties of
- the AST here).
- """
- if isinstance(func, FuncItem):
- return (tuple(func.arg_names), tuple(func.arg_kinds))
- else:
- result: list[SymbolSnapshot] = []
- for item in func.items:
- if isinstance(item, Decorator):
- if item.var.type:
- result.append(snapshot_type(item.var.type))
- else:
- result.append(("DecoratorWithoutType",))
- else:
- result.append(snapshot_untyped_signature(item))
- return tuple(result)
|