modulefinder.py 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900
  1. """Low-level infrastructure to find modules.
  2. This builds on fscache.py; find_sources.py builds on top of this.
  3. """
  4. from __future__ import annotations
  5. import ast
  6. import collections
  7. import functools
  8. import os
  9. import re
  10. import subprocess
  11. import sys
  12. from enum import Enum, unique
  13. from mypy.errors import CompileError
  14. if sys.version_info >= (3, 11):
  15. import tomllib
  16. else:
  17. import tomli as tomllib
  18. from typing import Dict, Final, List, NamedTuple, Optional, Tuple, Union
  19. from typing_extensions import TypeAlias as _TypeAlias
  20. from mypy import pyinfo
  21. from mypy.fscache import FileSystemCache
  22. from mypy.nodes import MypyFile
  23. from mypy.options import Options
  24. from mypy.stubinfo import approved_stub_package_exists
  25. # Paths to be searched in find_module().
  26. class SearchPaths(NamedTuple):
  27. python_path: tuple[str, ...] # where user code is found
  28. mypy_path: tuple[str, ...] # from $MYPYPATH or config variable
  29. package_path: tuple[str, ...] # from get_site_packages_dirs()
  30. typeshed_path: tuple[str, ...] # paths in typeshed
  31. # Package dirs are a two-tuple of path to search and whether to verify the module
  32. OnePackageDir = Tuple[str, bool]
  33. PackageDirs = List[OnePackageDir]
  34. # Minimum and maximum Python versions for modules in stdlib as (major, minor)
  35. StdlibVersions: _TypeAlias = Dict[str, Tuple[Tuple[int, int], Optional[Tuple[int, int]]]]
  36. PYTHON_EXTENSIONS: Final = [".pyi", ".py"]
  37. # TODO: Consider adding more reasons here?
  38. # E.g. if we deduce a module would likely be found if the user were
  39. # to set the --namespace-packages flag.
  40. @unique
  41. class ModuleNotFoundReason(Enum):
  42. # The module was not found: we found neither stubs nor a plausible code
  43. # implementation (with or without a py.typed file).
  44. NOT_FOUND = 0
  45. # The implementation for this module plausibly exists (e.g. we
  46. # found a matching folder or *.py file), but either the parent package
  47. # did not contain a py.typed file or we were unable to find a
  48. # corresponding *-stubs package.
  49. FOUND_WITHOUT_TYPE_HINTS = 1
  50. # The module was not found in the current working directory, but
  51. # was able to be found in the parent directory.
  52. WRONG_WORKING_DIRECTORY = 2
  53. # Stub PyPI package (typically types-pkgname) known to exist but not installed.
  54. APPROVED_STUBS_NOT_INSTALLED = 3
  55. def error_message_templates(self, daemon: bool) -> tuple[str, list[str]]:
  56. doc_link = "See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports"
  57. if self is ModuleNotFoundReason.NOT_FOUND:
  58. msg = 'Cannot find implementation or library stub for module named "{module}"'
  59. notes = [doc_link]
  60. elif self is ModuleNotFoundReason.WRONG_WORKING_DIRECTORY:
  61. msg = 'Cannot find implementation or library stub for module named "{module}"'
  62. notes = [
  63. "You may be running mypy in a subpackage, "
  64. "mypy should be run on the package root"
  65. ]
  66. elif self is ModuleNotFoundReason.FOUND_WITHOUT_TYPE_HINTS:
  67. msg = (
  68. 'Skipping analyzing "{module}": module is installed, but missing library stubs '
  69. "or py.typed marker"
  70. )
  71. notes = [doc_link]
  72. elif self is ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED:
  73. msg = 'Library stubs not installed for "{module}"'
  74. notes = ['Hint: "python3 -m pip install {stub_dist}"']
  75. if not daemon:
  76. notes.append(
  77. '(or run "mypy --install-types" to install all missing stub packages)'
  78. )
  79. notes.append(doc_link)
  80. else:
  81. assert False
  82. return msg, notes
  83. # If we found the module, returns the path to the module as a str.
  84. # Otherwise, returns the reason why the module wasn't found.
  85. ModuleSearchResult = Union[str, ModuleNotFoundReason]
  86. class BuildSource:
  87. """A single source file."""
  88. def __init__(
  89. self,
  90. path: str | None,
  91. module: str | None,
  92. text: str | None = None,
  93. base_dir: str | None = None,
  94. followed: bool = False,
  95. ) -> None:
  96. self.path = path # File where it's found (e.g. 'xxx/yyy/foo/bar.py')
  97. self.module = module or "__main__" # Module name (e.g. 'foo.bar')
  98. self.text = text # Source code, if initially supplied, else None
  99. self.base_dir = base_dir # Directory where the package is rooted (e.g. 'xxx/yyy')
  100. self.followed = followed # Was this found by following imports?
  101. def __repr__(self) -> str:
  102. return (
  103. "BuildSource(path={!r}, module={!r}, has_text={}, base_dir={!r}, followed={})".format(
  104. self.path, self.module, self.text is not None, self.base_dir, self.followed
  105. )
  106. )
  107. class BuildSourceSet:
  108. """Helper to efficiently test a file's membership in a set of build sources."""
  109. def __init__(self, sources: list[BuildSource]) -> None:
  110. self.source_text_present = False
  111. self.source_modules: dict[str, str] = {}
  112. self.source_paths: set[str] = set()
  113. for source in sources:
  114. if source.text is not None:
  115. self.source_text_present = True
  116. if source.path:
  117. self.source_paths.add(source.path)
  118. if source.module:
  119. self.source_modules[source.module] = source.path or ""
  120. def is_source(self, file: MypyFile) -> bool:
  121. return (
  122. (file.path and file.path in self.source_paths)
  123. or file._fullname in self.source_modules
  124. or self.source_text_present
  125. )
  126. class FindModuleCache:
  127. """Module finder with integrated cache.
  128. Module locations and some intermediate results are cached internally
  129. and can be cleared with the clear() method.
  130. All file system accesses are performed through a FileSystemCache,
  131. which is not ever cleared by this class. If necessary it must be
  132. cleared by client code.
  133. """
  134. def __init__(
  135. self,
  136. search_paths: SearchPaths,
  137. fscache: FileSystemCache | None,
  138. options: Options | None,
  139. stdlib_py_versions: StdlibVersions | None = None,
  140. source_set: BuildSourceSet | None = None,
  141. ) -> None:
  142. self.search_paths = search_paths
  143. self.source_set = source_set
  144. self.fscache = fscache or FileSystemCache()
  145. # Cache for get_toplevel_possibilities:
  146. # search_paths -> (toplevel_id -> list(package_dirs))
  147. self.initial_components: dict[tuple[str, ...], dict[str, list[str]]] = {}
  148. # Cache find_module: id -> result
  149. self.results: dict[str, ModuleSearchResult] = {}
  150. self.ns_ancestors: dict[str, str] = {}
  151. self.options = options
  152. custom_typeshed_dir = None
  153. if options:
  154. custom_typeshed_dir = options.custom_typeshed_dir
  155. self.stdlib_py_versions = stdlib_py_versions or load_stdlib_py_versions(
  156. custom_typeshed_dir
  157. )
  158. def clear(self) -> None:
  159. self.results.clear()
  160. self.initial_components.clear()
  161. self.ns_ancestors.clear()
  162. def find_module_via_source_set(self, id: str) -> ModuleSearchResult | None:
  163. """Fast path to find modules by looking through the input sources
  164. This is only used when --fast-module-lookup is passed on the command line."""
  165. if not self.source_set:
  166. return None
  167. p = self.source_set.source_modules.get(id, None)
  168. if p and self.fscache.isfile(p):
  169. # We need to make sure we still have __init__.py all the way up
  170. # otherwise we might have false positives compared to slow path
  171. # in case of deletion of init files, which is covered by some tests.
  172. # TODO: are there some combination of flags in which this check should be skipped?
  173. d = os.path.dirname(p)
  174. for _ in range(id.count(".")):
  175. if not any(
  176. self.fscache.isfile(os.path.join(d, "__init__" + x)) for x in PYTHON_EXTENSIONS
  177. ):
  178. return None
  179. d = os.path.dirname(d)
  180. return p
  181. idx = id.rfind(".")
  182. if idx != -1:
  183. # When we're looking for foo.bar.baz and can't find a matching module
  184. # in the source set, look up for a foo.bar module.
  185. parent = self.find_module_via_source_set(id[:idx])
  186. if parent is None or not isinstance(parent, str):
  187. return None
  188. basename, ext = os.path.splitext(parent)
  189. if not any(parent.endswith("__init__" + x) for x in PYTHON_EXTENSIONS) and (
  190. ext in PYTHON_EXTENSIONS and not self.fscache.isdir(basename)
  191. ):
  192. # If we do find such a *module* (and crucially, we don't want a package,
  193. # hence the filtering out of __init__ files, and checking for the presence
  194. # of a folder with a matching name), then we can be pretty confident that
  195. # 'baz' will either be a top-level variable in foo.bar, or will not exist.
  196. #
  197. # Either way, spelunking in other search paths for another 'foo.bar.baz'
  198. # module should be avoided because:
  199. # 1. in the unlikely event that one were found, it's highly likely that
  200. # it would be unrelated to the source being typechecked and therefore
  201. # more likely to lead to erroneous results
  202. # 2. as described in _find_module, in some cases the search itself could
  203. # potentially waste significant amounts of time
  204. return ModuleNotFoundReason.NOT_FOUND
  205. return None
  206. def find_lib_path_dirs(self, id: str, lib_path: tuple[str, ...]) -> PackageDirs:
  207. """Find which elements of a lib_path have the directory a module needs to exist.
  208. This is run for the python_path, mypy_path, and typeshed_path search paths.
  209. """
  210. components = id.split(".")
  211. dir_chain = os.sep.join(components[:-1]) # e.g., 'foo/bar'
  212. dirs = []
  213. for pathitem in self.get_toplevel_possibilities(lib_path, components[0]):
  214. # e.g., '/usr/lib/python3.4/foo/bar'
  215. dir = os.path.normpath(os.path.join(pathitem, dir_chain))
  216. if self.fscache.isdir(dir):
  217. dirs.append((dir, True))
  218. return dirs
  219. def get_toplevel_possibilities(self, lib_path: tuple[str, ...], id: str) -> list[str]:
  220. """Find which elements of lib_path could contain a particular top-level module.
  221. In practice, almost all modules can be routed to the correct entry in
  222. lib_path by looking at just the first component of the module name.
  223. We take advantage of this by enumerating the contents of all of the
  224. directories on the lib_path and building a map of which entries in
  225. the lib_path could contain each potential top-level module that appears.
  226. """
  227. if lib_path in self.initial_components:
  228. return self.initial_components[lib_path].get(id, [])
  229. # Enumerate all the files in the directories on lib_path and produce the map
  230. components: dict[str, list[str]] = {}
  231. for dir in lib_path:
  232. try:
  233. contents = self.fscache.listdir(dir)
  234. except OSError:
  235. contents = []
  236. # False positives are fine for correctness here, since we will check
  237. # precisely later, so we only look at the root of every filename without
  238. # any concern for the exact details.
  239. for name in contents:
  240. name = os.path.splitext(name)[0]
  241. components.setdefault(name, []).append(dir)
  242. self.initial_components[lib_path] = components
  243. return components.get(id, [])
  244. def find_module(self, id: str, *, fast_path: bool = False) -> ModuleSearchResult:
  245. """Return the path of the module source file or why it wasn't found.
  246. If fast_path is True, prioritize performance over generating detailed
  247. error descriptions.
  248. """
  249. if id not in self.results:
  250. top_level = id.partition(".")[0]
  251. use_typeshed = True
  252. if id in self.stdlib_py_versions:
  253. use_typeshed = self._typeshed_has_version(id)
  254. elif top_level in self.stdlib_py_versions:
  255. use_typeshed = self._typeshed_has_version(top_level)
  256. self.results[id] = self._find_module(id, use_typeshed)
  257. if (
  258. not (fast_path or (self.options is not None and self.options.fast_module_lookup))
  259. and self.results[id] is ModuleNotFoundReason.NOT_FOUND
  260. and self._can_find_module_in_parent_dir(id)
  261. ):
  262. self.results[id] = ModuleNotFoundReason.WRONG_WORKING_DIRECTORY
  263. return self.results[id]
  264. def _typeshed_has_version(self, module: str) -> bool:
  265. if not self.options:
  266. return True
  267. version = typeshed_py_version(self.options)
  268. min_version, max_version = self.stdlib_py_versions[module]
  269. return version >= min_version and (max_version is None or version <= max_version)
  270. def _find_module_non_stub_helper(
  271. self, components: list[str], pkg_dir: str
  272. ) -> OnePackageDir | ModuleNotFoundReason:
  273. plausible_match = False
  274. dir_path = pkg_dir
  275. for index, component in enumerate(components):
  276. dir_path = os.path.join(dir_path, component)
  277. if self.fscache.isfile(os.path.join(dir_path, "py.typed")):
  278. return os.path.join(pkg_dir, *components[:-1]), index == 0
  279. elif not plausible_match and (
  280. self.fscache.isdir(dir_path) or self.fscache.isfile(dir_path + ".py")
  281. ):
  282. plausible_match = True
  283. # If this is not a directory then we can't traverse further into it
  284. if not self.fscache.isdir(dir_path):
  285. break
  286. if approved_stub_package_exists(components[0]):
  287. if len(components) == 1 or (
  288. self.find_module(components[0])
  289. is ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED
  290. ):
  291. return ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED
  292. if approved_stub_package_exists(".".join(components[:2])):
  293. return ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED
  294. if plausible_match:
  295. return ModuleNotFoundReason.FOUND_WITHOUT_TYPE_HINTS
  296. else:
  297. return ModuleNotFoundReason.NOT_FOUND
  298. def _update_ns_ancestors(self, components: list[str], match: tuple[str, bool]) -> None:
  299. path, verify = match
  300. for i in range(1, len(components)):
  301. pkg_id = ".".join(components[:-i])
  302. if pkg_id not in self.ns_ancestors and self.fscache.isdir(path):
  303. self.ns_ancestors[pkg_id] = path
  304. path = os.path.dirname(path)
  305. def _can_find_module_in_parent_dir(self, id: str) -> bool:
  306. """Test if a module can be found by checking the parent directories
  307. of the current working directory.
  308. """
  309. working_dir = os.getcwd()
  310. parent_search = FindModuleCache(
  311. SearchPaths((), (), (), ()),
  312. self.fscache,
  313. self.options,
  314. stdlib_py_versions=self.stdlib_py_versions,
  315. )
  316. while any(is_init_file(file) for file in os.listdir(working_dir)):
  317. working_dir = os.path.dirname(working_dir)
  318. parent_search.search_paths = SearchPaths((working_dir,), (), (), ())
  319. if not isinstance(parent_search._find_module(id, False), ModuleNotFoundReason):
  320. return True
  321. return False
  322. def _find_module(self, id: str, use_typeshed: bool) -> ModuleSearchResult:
  323. fscache = self.fscache
  324. # Fast path for any modules in the current source set.
  325. # This is particularly important when there are a large number of search
  326. # paths which share the first (few) component(s) due to the use of namespace
  327. # packages, for instance:
  328. # foo/
  329. # company/
  330. # __init__.py
  331. # foo/
  332. # bar/
  333. # company/
  334. # __init__.py
  335. # bar/
  336. # baz/
  337. # company/
  338. # __init__.py
  339. # baz/
  340. #
  341. # mypy gets [foo/company/foo, bar/company/bar, baz/company/baz, ...] as input
  342. # and computes [foo, bar, baz, ...] as the module search path.
  343. #
  344. # This would result in O(n) search for every import of company.*, leading to
  345. # O(n**2) behavior in load_graph as such imports are unsurprisingly present
  346. # at least once, and usually many more times than that, in each and every file
  347. # being parsed.
  348. #
  349. # Thankfully, such cases are efficiently handled by looking up the module path
  350. # via BuildSourceSet.
  351. p = (
  352. self.find_module_via_source_set(id)
  353. if (self.options is not None and self.options.fast_module_lookup)
  354. else None
  355. )
  356. if p:
  357. return p
  358. # If we're looking for a module like 'foo.bar.baz', it's likely that most of the
  359. # many elements of lib_path don't even have a subdirectory 'foo/bar'. Discover
  360. # that only once and cache it for when we look for modules like 'foo.bar.blah'
  361. # that will require the same subdirectory.
  362. components = id.split(".")
  363. dir_chain = os.sep.join(components[:-1]) # e.g., 'foo/bar'
  364. # We have two sets of folders so that we collect *all* stubs folders and
  365. # put them in the front of the search path
  366. third_party_inline_dirs: PackageDirs = []
  367. third_party_stubs_dirs: PackageDirs = []
  368. found_possible_third_party_missing_type_hints = False
  369. need_installed_stubs = False
  370. # Third-party stub/typed packages
  371. for pkg_dir in self.search_paths.package_path:
  372. stub_name = components[0] + "-stubs"
  373. stub_dir = os.path.join(pkg_dir, stub_name)
  374. if fscache.isdir(stub_dir) and self._is_compatible_stub_package(stub_dir):
  375. stub_typed_file = os.path.join(stub_dir, "py.typed")
  376. stub_components = [stub_name] + components[1:]
  377. path = os.path.join(pkg_dir, *stub_components[:-1])
  378. if fscache.isdir(path):
  379. if fscache.isfile(stub_typed_file):
  380. # Stub packages can have a py.typed file, which must include
  381. # 'partial\n' to make the package partial
  382. # Partial here means that mypy should look at the runtime
  383. # package if installed.
  384. if fscache.read(stub_typed_file).decode().strip() == "partial":
  385. runtime_path = os.path.join(pkg_dir, dir_chain)
  386. third_party_inline_dirs.append((runtime_path, True))
  387. # if the package is partial, we don't verify the module, as
  388. # the partial stub package may not have a __init__.pyi
  389. third_party_stubs_dirs.append((path, False))
  390. else:
  391. # handle the edge case where people put a py.typed file
  392. # in a stub package, but it isn't partial
  393. third_party_stubs_dirs.append((path, True))
  394. else:
  395. third_party_stubs_dirs.append((path, True))
  396. non_stub_match = self._find_module_non_stub_helper(components, pkg_dir)
  397. if isinstance(non_stub_match, ModuleNotFoundReason):
  398. if non_stub_match is ModuleNotFoundReason.FOUND_WITHOUT_TYPE_HINTS:
  399. found_possible_third_party_missing_type_hints = True
  400. elif non_stub_match is ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED:
  401. need_installed_stubs = True
  402. else:
  403. third_party_inline_dirs.append(non_stub_match)
  404. self._update_ns_ancestors(components, non_stub_match)
  405. if self.options and self.options.use_builtins_fixtures:
  406. # Everything should be in fixtures.
  407. third_party_inline_dirs.clear()
  408. third_party_stubs_dirs.clear()
  409. found_possible_third_party_missing_type_hints = False
  410. python_mypy_path = self.search_paths.mypy_path + self.search_paths.python_path
  411. candidate_base_dirs = self.find_lib_path_dirs(id, python_mypy_path)
  412. if use_typeshed:
  413. # Search for stdlib stubs in typeshed before installed
  414. # stubs to avoid picking up backports (dataclasses, for
  415. # example) when the library is included in stdlib.
  416. candidate_base_dirs += self.find_lib_path_dirs(id, self.search_paths.typeshed_path)
  417. candidate_base_dirs += third_party_stubs_dirs + third_party_inline_dirs
  418. # If we're looking for a module like 'foo.bar.baz', then candidate_base_dirs now
  419. # contains just the subdirectories 'foo/bar' that actually exist under the
  420. # elements of lib_path. This is probably much shorter than lib_path itself.
  421. # Now just look for 'baz.pyi', 'baz/__init__.py', etc., inside those directories.
  422. seplast = os.sep + components[-1] # so e.g. '/baz'
  423. sepinit = os.sep + "__init__"
  424. near_misses = [] # Collect near misses for namespace mode (see below).
  425. for base_dir, verify in candidate_base_dirs:
  426. base_path = base_dir + seplast # so e.g. '/usr/lib/python3.4/foo/bar/baz'
  427. has_init = False
  428. dir_prefix = base_dir
  429. for _ in range(len(components) - 1):
  430. dir_prefix = os.path.dirname(dir_prefix)
  431. # Prefer package over module, i.e. baz/__init__.py* over baz.py*.
  432. for extension in PYTHON_EXTENSIONS:
  433. path = base_path + sepinit + extension
  434. path_stubs = base_path + "-stubs" + sepinit + extension
  435. if fscache.isfile_case(path, dir_prefix):
  436. has_init = True
  437. if verify and not verify_module(fscache, id, path, dir_prefix):
  438. near_misses.append((path, dir_prefix))
  439. continue
  440. return path
  441. elif fscache.isfile_case(path_stubs, dir_prefix):
  442. if verify and not verify_module(fscache, id, path_stubs, dir_prefix):
  443. near_misses.append((path_stubs, dir_prefix))
  444. continue
  445. return path_stubs
  446. # In namespace mode, register a potential namespace package
  447. if self.options and self.options.namespace_packages:
  448. if (
  449. not has_init
  450. and fscache.exists_case(base_path, dir_prefix)
  451. and not fscache.isfile_case(base_path, dir_prefix)
  452. ):
  453. near_misses.append((base_path, dir_prefix))
  454. # No package, look for module.
  455. for extension in PYTHON_EXTENSIONS:
  456. path = base_path + extension
  457. if fscache.isfile_case(path, dir_prefix):
  458. if verify and not verify_module(fscache, id, path, dir_prefix):
  459. near_misses.append((path, dir_prefix))
  460. continue
  461. return path
  462. # In namespace mode, re-check those entries that had 'verify'.
  463. # Assume search path entries xxx, yyy and zzz, and we're
  464. # looking for foo.bar.baz. Suppose near_misses has:
  465. #
  466. # - xxx/foo/bar/baz.py
  467. # - yyy/foo/bar/baz/__init__.py
  468. # - zzz/foo/bar/baz.pyi
  469. #
  470. # If any of the foo directories has __init__.py[i], it wins.
  471. # Else, we look for foo/bar/__init__.py[i], etc. If there are
  472. # none, the first hit wins. Note that this does not take into
  473. # account whether the lowest-level module is a file (baz.py),
  474. # a package (baz/__init__.py), or a stub file (baz.pyi) -- for
  475. # these the first one encountered along the search path wins.
  476. #
  477. # The helper function highest_init_level() returns an int that
  478. # indicates the highest level at which a __init__.py[i] file
  479. # is found; if no __init__ was found it returns 0, if we find
  480. # only foo/bar/__init__.py it returns 1, and if we have
  481. # foo/__init__.py it returns 2 (regardless of what's in
  482. # foo/bar). It doesn't look higher than that.
  483. if self.options and self.options.namespace_packages and near_misses:
  484. levels = [
  485. highest_init_level(fscache, id, path, dir_prefix)
  486. for path, dir_prefix in near_misses
  487. ]
  488. index = levels.index(max(levels))
  489. return near_misses[index][0]
  490. # Finally, we may be asked to produce an ancestor for an
  491. # installed package with a py.typed marker that is a
  492. # subpackage of a namespace package. We only fess up to these
  493. # if we would otherwise return "not found".
  494. ancestor = self.ns_ancestors.get(id)
  495. if ancestor is not None:
  496. return ancestor
  497. if need_installed_stubs:
  498. return ModuleNotFoundReason.APPROVED_STUBS_NOT_INSTALLED
  499. elif found_possible_third_party_missing_type_hints:
  500. return ModuleNotFoundReason.FOUND_WITHOUT_TYPE_HINTS
  501. else:
  502. return ModuleNotFoundReason.NOT_FOUND
  503. def _is_compatible_stub_package(self, stub_dir: str) -> bool:
  504. """Does a stub package support the target Python version?
  505. Stub packages may contain a metadata file which specifies
  506. whether the stubs are compatible with Python 2 and 3.
  507. """
  508. metadata_fnam = os.path.join(stub_dir, "METADATA.toml")
  509. if not os.path.isfile(metadata_fnam):
  510. return True
  511. with open(metadata_fnam, "rb") as f:
  512. metadata = tomllib.load(f)
  513. return bool(metadata.get("python3", True))
  514. def find_modules_recursive(self, module: str) -> list[BuildSource]:
  515. module_path = self.find_module(module)
  516. if isinstance(module_path, ModuleNotFoundReason):
  517. return []
  518. sources = [BuildSource(module_path, module, None)]
  519. package_path = None
  520. if is_init_file(module_path):
  521. package_path = os.path.dirname(module_path)
  522. elif self.fscache.isdir(module_path):
  523. package_path = module_path
  524. if package_path is None:
  525. return sources
  526. # This logic closely mirrors that in find_sources. One small but important difference is
  527. # that we do not sort names with keyfunc. The recursive call to find_modules_recursive
  528. # calls find_module, which will handle the preference between packages, pyi and py.
  529. # Another difference is it doesn't handle nested search paths / package roots.
  530. seen: set[str] = set()
  531. names = sorted(self.fscache.listdir(package_path))
  532. for name in names:
  533. # Skip certain names altogether
  534. if name in ("__pycache__", "site-packages", "node_modules") or name.startswith("."):
  535. continue
  536. subpath = os.path.join(package_path, name)
  537. if self.options and matches_exclude(
  538. subpath, self.options.exclude, self.fscache, self.options.verbosity >= 2
  539. ):
  540. continue
  541. if self.fscache.isdir(subpath):
  542. # Only recurse into packages
  543. if (self.options and self.options.namespace_packages) or (
  544. self.fscache.isfile(os.path.join(subpath, "__init__.py"))
  545. or self.fscache.isfile(os.path.join(subpath, "__init__.pyi"))
  546. ):
  547. seen.add(name)
  548. sources.extend(self.find_modules_recursive(module + "." + name))
  549. else:
  550. stem, suffix = os.path.splitext(name)
  551. if stem == "__init__":
  552. continue
  553. if stem not in seen and "." not in stem and suffix in PYTHON_EXTENSIONS:
  554. # (If we sorted names by keyfunc) we could probably just make the BuildSource
  555. # ourselves, but this ensures compatibility with find_module / the cache
  556. seen.add(stem)
  557. sources.extend(self.find_modules_recursive(module + "." + stem))
  558. return sources
  559. def matches_exclude(
  560. subpath: str, excludes: list[str], fscache: FileSystemCache, verbose: bool
  561. ) -> bool:
  562. if not excludes:
  563. return False
  564. subpath_str = os.path.relpath(subpath).replace(os.sep, "/")
  565. if fscache.isdir(subpath):
  566. subpath_str += "/"
  567. for exclude in excludes:
  568. if re.search(exclude, subpath_str):
  569. if verbose:
  570. print(
  571. f"TRACE: Excluding {subpath_str} (matches pattern {exclude})", file=sys.stderr
  572. )
  573. return True
  574. return False
  575. def is_init_file(path: str) -> bool:
  576. return os.path.basename(path) in ("__init__.py", "__init__.pyi")
  577. def verify_module(fscache: FileSystemCache, id: str, path: str, prefix: str) -> bool:
  578. """Check that all packages containing id have a __init__ file."""
  579. if is_init_file(path):
  580. path = os.path.dirname(path)
  581. for i in range(id.count(".")):
  582. path = os.path.dirname(path)
  583. if not any(
  584. fscache.isfile_case(os.path.join(path, f"__init__{extension}"), prefix)
  585. for extension in PYTHON_EXTENSIONS
  586. ):
  587. return False
  588. return True
  589. def highest_init_level(fscache: FileSystemCache, id: str, path: str, prefix: str) -> int:
  590. """Compute the highest level where an __init__ file is found."""
  591. if is_init_file(path):
  592. path = os.path.dirname(path)
  593. level = 0
  594. for i in range(id.count(".")):
  595. path = os.path.dirname(path)
  596. if any(
  597. fscache.isfile_case(os.path.join(path, f"__init__{extension}"), prefix)
  598. for extension in PYTHON_EXTENSIONS
  599. ):
  600. level = i + 1
  601. return level
  602. def mypy_path() -> list[str]:
  603. path_env = os.getenv("MYPYPATH")
  604. if not path_env:
  605. return []
  606. return path_env.split(os.pathsep)
  607. def default_lib_path(
  608. data_dir: str, pyversion: tuple[int, int], custom_typeshed_dir: str | None
  609. ) -> list[str]:
  610. """Return default standard library search paths."""
  611. path: list[str] = []
  612. if custom_typeshed_dir:
  613. typeshed_dir = os.path.join(custom_typeshed_dir, "stdlib")
  614. mypy_extensions_dir = os.path.join(custom_typeshed_dir, "stubs", "mypy-extensions")
  615. versions_file = os.path.join(typeshed_dir, "VERSIONS")
  616. if not os.path.isdir(typeshed_dir) or not os.path.isfile(versions_file):
  617. print(
  618. "error: --custom-typeshed-dir does not point to a valid typeshed ({})".format(
  619. custom_typeshed_dir
  620. )
  621. )
  622. sys.exit(2)
  623. else:
  624. auto = os.path.join(data_dir, "stubs-auto")
  625. if os.path.isdir(auto):
  626. data_dir = auto
  627. typeshed_dir = os.path.join(data_dir, "typeshed", "stdlib")
  628. mypy_extensions_dir = os.path.join(data_dir, "typeshed", "stubs", "mypy-extensions")
  629. path.append(typeshed_dir)
  630. # Get mypy-extensions stubs from typeshed, since we treat it as an
  631. # "internal" library, similar to typing and typing-extensions.
  632. path.append(mypy_extensions_dir)
  633. # Add fallback path that can be used if we have a broken installation.
  634. if sys.platform != "win32":
  635. path.append("/usr/local/lib/mypy")
  636. if not path:
  637. print(
  638. "Could not resolve typeshed subdirectories. Your mypy install is broken.\n"
  639. "Python executable is located at {}.\nMypy located at {}".format(
  640. sys.executable, data_dir
  641. ),
  642. file=sys.stderr,
  643. )
  644. sys.exit(1)
  645. return path
  646. @functools.lru_cache(maxsize=None)
  647. def get_search_dirs(python_executable: str | None) -> tuple[list[str], list[str]]:
  648. """Find package directories for given python.
  649. This runs a subprocess call, which generates a list of the directories in sys.path.
  650. To avoid repeatedly calling a subprocess (which can be slow!) we
  651. lru_cache the results.
  652. """
  653. if python_executable is None:
  654. return ([], [])
  655. elif python_executable == sys.executable:
  656. # Use running Python's package dirs
  657. sys_path, site_packages = pyinfo.getsearchdirs()
  658. else:
  659. # Use subprocess to get the package directory of given Python
  660. # executable
  661. env = {**dict(os.environ), "PYTHONSAFEPATH": "1"}
  662. try:
  663. sys_path, site_packages = ast.literal_eval(
  664. subprocess.check_output(
  665. [python_executable, pyinfo.__file__, "getsearchdirs"],
  666. env=env,
  667. stderr=subprocess.PIPE,
  668. ).decode()
  669. )
  670. except subprocess.CalledProcessError as err:
  671. print(err.stderr)
  672. print(err.stdout)
  673. raise
  674. except OSError as err:
  675. reason = os.strerror(err.errno)
  676. raise CompileError(
  677. [f"mypy: Invalid python executable '{python_executable}': {reason}"]
  678. ) from err
  679. return sys_path, site_packages
  680. def compute_search_paths(
  681. sources: list[BuildSource], options: Options, data_dir: str, alt_lib_path: str | None = None
  682. ) -> SearchPaths:
  683. """Compute the search paths as specified in PEP 561.
  684. There are the following 4 members created:
  685. - User code (from `sources`)
  686. - MYPYPATH (set either via config or environment variable)
  687. - installed package directories (which will later be split into stub-only and inline)
  688. - typeshed
  689. """
  690. # Determine the default module search path.
  691. lib_path = collections.deque(
  692. default_lib_path(
  693. data_dir, options.python_version, custom_typeshed_dir=options.custom_typeshed_dir
  694. )
  695. )
  696. if options.use_builtins_fixtures:
  697. # Use stub builtins (to speed up test cases and to make them easier to
  698. # debug). This is a test-only feature, so assume our files are laid out
  699. # as in the source tree.
  700. # We also need to allow overriding where to look for it. Argh.
  701. root_dir = os.getenv("MYPY_TEST_PREFIX", None)
  702. if not root_dir:
  703. root_dir = os.path.dirname(os.path.dirname(__file__))
  704. lib_path.appendleft(os.path.join(root_dir, "test-data", "unit", "lib-stub"))
  705. # alt_lib_path is used by some tests to bypass the normal lib_path mechanics.
  706. # If we don't have one, grab directories of source files.
  707. python_path: list[str] = []
  708. if not alt_lib_path:
  709. for source in sources:
  710. # Include directory of the program file in the module search path.
  711. if source.base_dir:
  712. dir = source.base_dir
  713. if dir not in python_path:
  714. python_path.append(dir)
  715. # Do this even if running as a file, for sanity (mainly because with
  716. # multiple builds, there could be a mix of files/modules, so its easier
  717. # to just define the semantics that we always add the current director
  718. # to the lib_path
  719. # TODO: Don't do this in some cases; for motivation see see
  720. # https://github.com/python/mypy/issues/4195#issuecomment-341915031
  721. if options.bazel:
  722. dir = "."
  723. else:
  724. dir = os.getcwd()
  725. if dir not in lib_path:
  726. python_path.insert(0, dir)
  727. # Start with a MYPYPATH environment variable at the front of the mypy_path, if defined.
  728. mypypath = mypy_path()
  729. # Add a config-defined mypy path.
  730. mypypath.extend(options.mypy_path)
  731. # If provided, insert the caller-supplied extra module path to the
  732. # beginning (highest priority) of the search path.
  733. if alt_lib_path:
  734. mypypath.insert(0, alt_lib_path)
  735. sys_path, site_packages = get_search_dirs(options.python_executable)
  736. # We only use site packages for this check
  737. for site in site_packages:
  738. assert site not in lib_path
  739. if (
  740. site in mypypath
  741. or any(p.startswith(site + os.path.sep) for p in mypypath)
  742. or (os.path.altsep and any(p.startswith(site + os.path.altsep) for p in mypypath))
  743. ):
  744. print(f"{site} is in the MYPYPATH. Please remove it.", file=sys.stderr)
  745. print(
  746. "See https://mypy.readthedocs.io/en/stable/running_mypy.html"
  747. "#how-mypy-handles-imports for more info",
  748. file=sys.stderr,
  749. )
  750. sys.exit(1)
  751. return SearchPaths(
  752. python_path=tuple(reversed(python_path)),
  753. mypy_path=tuple(mypypath),
  754. package_path=tuple(sys_path + site_packages),
  755. typeshed_path=tuple(lib_path),
  756. )
  757. def load_stdlib_py_versions(custom_typeshed_dir: str | None) -> StdlibVersions:
  758. """Return dict with minimum and maximum Python versions of stdlib modules.
  759. The contents look like
  760. {..., 'secrets': ((3, 6), None), 'symbol': ((2, 7), (3, 9)), ...}
  761. None means there is no maximum version.
  762. """
  763. typeshed_dir = custom_typeshed_dir or os.path.join(os.path.dirname(__file__), "typeshed")
  764. stdlib_dir = os.path.join(typeshed_dir, "stdlib")
  765. result = {}
  766. versions_path = os.path.join(stdlib_dir, "VERSIONS")
  767. assert os.path.isfile(versions_path), (custom_typeshed_dir, versions_path, __file__)
  768. with open(versions_path) as f:
  769. for line in f:
  770. line = line.split("#")[0].strip()
  771. if line == "":
  772. continue
  773. module, version_range = line.split(":")
  774. versions = version_range.split("-")
  775. min_version = parse_version(versions[0])
  776. max_version = (
  777. parse_version(versions[1]) if len(versions) >= 2 and versions[1].strip() else None
  778. )
  779. result[module] = min_version, max_version
  780. return result
  781. def parse_version(version: str) -> tuple[int, int]:
  782. major, minor = version.strip().split(".")
  783. return int(major), int(minor)
  784. def typeshed_py_version(options: Options) -> tuple[int, int]:
  785. """Return Python version used for checking whether module supports typeshed."""
  786. # Typeshed no longer covers Python 3.x versions before 3.7, so 3.7 is
  787. # the earliest we can support.
  788. return max(options.python_version, (3, 7))