moduleinspect.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. """Basic introspection of modules."""
  2. from __future__ import annotations
  3. import importlib
  4. import inspect
  5. import os
  6. import pkgutil
  7. import queue
  8. import sys
  9. from multiprocessing import Process, Queue
  10. from types import ModuleType
  11. class ModuleProperties:
  12. # Note that all __init__ args must have default values
  13. def __init__(
  14. self,
  15. name: str = "",
  16. file: str | None = None,
  17. path: list[str] | None = None,
  18. all: list[str] | None = None,
  19. is_c_module: bool = False,
  20. subpackages: list[str] | None = None,
  21. ) -> None:
  22. self.name = name # __name__ attribute
  23. self.file = file # __file__ attribute
  24. self.path = path # __path__ attribute
  25. self.all = all # __all__ attribute
  26. self.is_c_module = is_c_module
  27. self.subpackages = subpackages or []
  28. def is_c_module(module: ModuleType) -> bool:
  29. if module.__dict__.get("__file__") is None:
  30. # Could be a namespace package. These must be handled through
  31. # introspection, since there is no source file.
  32. return True
  33. return os.path.splitext(module.__dict__["__file__"])[-1] in [".so", ".pyd", ".dll"]
  34. class InspectError(Exception):
  35. pass
  36. def get_package_properties(package_id: str) -> ModuleProperties:
  37. """Use runtime introspection to get information about a module/package."""
  38. try:
  39. package = importlib.import_module(package_id)
  40. except BaseException as e:
  41. raise InspectError(str(e)) from e
  42. name = getattr(package, "__name__", package_id)
  43. file = getattr(package, "__file__", None)
  44. path: list[str] | None = getattr(package, "__path__", None)
  45. if not isinstance(path, list):
  46. path = None
  47. pkg_all = getattr(package, "__all__", None)
  48. if pkg_all is not None:
  49. try:
  50. pkg_all = list(pkg_all)
  51. except Exception:
  52. pkg_all = None
  53. is_c = is_c_module(package)
  54. if path is None:
  55. # Object has no path; this means it's either a module inside a package
  56. # (and thus no sub-packages), or it could be a C extension package.
  57. if is_c:
  58. # This is a C extension module, now get the list of all sub-packages
  59. # using the inspect module
  60. subpackages = [
  61. package.__name__ + "." + name
  62. for name, val in inspect.getmembers(package)
  63. if inspect.ismodule(val) and val.__name__ == package.__name__ + "." + name
  64. ]
  65. else:
  66. # It's a module inside a package. There's nothing else to walk/yield.
  67. subpackages = []
  68. else:
  69. all_packages = pkgutil.walk_packages(
  70. path, prefix=package.__name__ + ".", onerror=lambda r: None
  71. )
  72. subpackages = [qualified_name for importer, qualified_name, ispkg in all_packages]
  73. return ModuleProperties(
  74. name=name, file=file, path=path, all=pkg_all, is_c_module=is_c, subpackages=subpackages
  75. )
  76. def worker(tasks: Queue[str], results: Queue[str | ModuleProperties], sys_path: list[str]) -> None:
  77. """The main loop of a worker introspection process."""
  78. sys.path = sys_path
  79. while True:
  80. mod = tasks.get()
  81. try:
  82. prop = get_package_properties(mod)
  83. except InspectError as e:
  84. results.put(str(e))
  85. continue
  86. results.put(prop)
  87. class ModuleInspect:
  88. """Perform runtime introspection of modules in a separate process.
  89. Reuse the process for multiple modules for efficiency. However, if there is an
  90. error, retry using a fresh process to avoid cross-contamination of state between
  91. modules.
  92. We use a separate process to isolate us from many side effects. For example, the
  93. import of a module may kill the current process, and we want to recover from that.
  94. Always use in a with statement for proper clean-up:
  95. with ModuleInspect() as m:
  96. p = m.get_package_properties('urllib.parse')
  97. """
  98. def __init__(self) -> None:
  99. self._start()
  100. def _start(self) -> None:
  101. self.tasks: Queue[str] = Queue()
  102. self.results: Queue[ModuleProperties | str] = Queue()
  103. self.proc = Process(target=worker, args=(self.tasks, self.results, sys.path))
  104. self.proc.start()
  105. self.counter = 0 # Number of successful roundtrips
  106. def close(self) -> None:
  107. """Free any resources used."""
  108. self.proc.terminate()
  109. def get_package_properties(self, package_id: str) -> ModuleProperties:
  110. """Return some properties of a module/package using runtime introspection.
  111. Raise InspectError if the target couldn't be imported.
  112. """
  113. self.tasks.put(package_id)
  114. res = self._get_from_queue()
  115. if res is None:
  116. # The process died; recover and report error.
  117. self._start()
  118. raise InspectError(f"Process died when importing {package_id!r}")
  119. if isinstance(res, str):
  120. # Error importing module
  121. if self.counter > 0:
  122. # Also try with a fresh process. Maybe one of the previous imports has
  123. # corrupted some global state.
  124. self.close()
  125. self._start()
  126. return self.get_package_properties(package_id)
  127. raise InspectError(res)
  128. self.counter += 1
  129. return res
  130. def _get_from_queue(self) -> ModuleProperties | str | None:
  131. """Get value from the queue.
  132. Return the value read from the queue, or None if the process unexpectedly died.
  133. """
  134. max_iter = 600
  135. n = 0
  136. while True:
  137. if n == max_iter:
  138. raise RuntimeError("Timeout waiting for subprocess")
  139. try:
  140. return self.results.get(timeout=0.05)
  141. except queue.Empty:
  142. if not self.proc.is_alive():
  143. return None
  144. n += 1
  145. def __enter__(self) -> ModuleInspect:
  146. return self
  147. def __exit__(self, *args: object) -> None:
  148. self.close()