find_sources.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. """Routines for finding the sources that mypy will check"""
  2. from __future__ import annotations
  3. import functools
  4. import os
  5. from typing import Sequence
  6. from typing_extensions import Final
  7. from mypy.fscache import FileSystemCache
  8. from mypy.modulefinder import PYTHON_EXTENSIONS, BuildSource, matches_exclude, mypy_path
  9. from mypy.options import Options
  10. PY_EXTENSIONS: Final = tuple(PYTHON_EXTENSIONS)
  11. class InvalidSourceList(Exception):
  12. """Exception indicating a problem in the list of sources given to mypy."""
  13. def create_source_list(
  14. paths: Sequence[str],
  15. options: Options,
  16. fscache: FileSystemCache | None = None,
  17. allow_empty_dir: bool = False,
  18. ) -> list[BuildSource]:
  19. """From a list of source files/directories, makes a list of BuildSources.
  20. Raises InvalidSourceList on errors.
  21. """
  22. fscache = fscache or FileSystemCache()
  23. finder = SourceFinder(fscache, options)
  24. sources = []
  25. for path in paths:
  26. path = os.path.normpath(path)
  27. if path.endswith(PY_EXTENSIONS):
  28. # Can raise InvalidSourceList if a directory doesn't have a valid module name.
  29. name, base_dir = finder.crawl_up(path)
  30. sources.append(BuildSource(path, name, None, base_dir))
  31. elif fscache.isdir(path):
  32. sub_sources = finder.find_sources_in_dir(path)
  33. if not sub_sources and not allow_empty_dir:
  34. raise InvalidSourceList(f"There are no .py[i] files in directory '{path}'")
  35. sources.extend(sub_sources)
  36. else:
  37. mod = os.path.basename(path) if options.scripts_are_modules else None
  38. sources.append(BuildSource(path, mod, None))
  39. return sources
  40. def keyfunc(name: str) -> tuple[bool, int, str]:
  41. """Determines sort order for directory listing.
  42. The desirable properties are:
  43. 1) foo < foo.pyi < foo.py
  44. 2) __init__.py[i] < foo
  45. """
  46. base, suffix = os.path.splitext(name)
  47. for i, ext in enumerate(PY_EXTENSIONS):
  48. if suffix == ext:
  49. return (base != "__init__", i, base)
  50. return (base != "__init__", -1, name)
  51. def normalise_package_base(root: str) -> str:
  52. if not root:
  53. root = os.curdir
  54. root = os.path.abspath(root)
  55. if root.endswith(os.sep):
  56. root = root[:-1]
  57. return root
  58. def get_explicit_package_bases(options: Options) -> list[str] | None:
  59. """Returns explicit package bases to use if the option is enabled, or None if disabled.
  60. We currently use MYPYPATH and the current directory as the package bases. In the future,
  61. when --namespace-packages is the default could also use the values passed with the
  62. --package-root flag, see #9632.
  63. Values returned are normalised so we can use simple string comparisons in
  64. SourceFinder.is_explicit_package_base
  65. """
  66. if not options.explicit_package_bases:
  67. return None
  68. roots = mypy_path() + options.mypy_path + [os.getcwd()]
  69. return [normalise_package_base(root) for root in roots]
  70. class SourceFinder:
  71. def __init__(self, fscache: FileSystemCache, options: Options) -> None:
  72. self.fscache = fscache
  73. self.explicit_package_bases = get_explicit_package_bases(options)
  74. self.namespace_packages = options.namespace_packages
  75. self.exclude = options.exclude
  76. self.verbosity = options.verbosity
  77. def is_explicit_package_base(self, path: str) -> bool:
  78. assert self.explicit_package_bases
  79. return normalise_package_base(path) in self.explicit_package_bases
  80. def find_sources_in_dir(self, path: str) -> list[BuildSource]:
  81. sources = []
  82. seen: set[str] = set()
  83. names = sorted(self.fscache.listdir(path), key=keyfunc)
  84. for name in names:
  85. # Skip certain names altogether
  86. if name in ("__pycache__", "site-packages", "node_modules") or name.startswith("."):
  87. continue
  88. subpath = os.path.join(path, name)
  89. if matches_exclude(subpath, self.exclude, self.fscache, self.verbosity >= 2):
  90. continue
  91. if self.fscache.isdir(subpath):
  92. sub_sources = self.find_sources_in_dir(subpath)
  93. if sub_sources:
  94. seen.add(name)
  95. sources.extend(sub_sources)
  96. else:
  97. stem, suffix = os.path.splitext(name)
  98. if stem not in seen and suffix in PY_EXTENSIONS:
  99. seen.add(stem)
  100. module, base_dir = self.crawl_up(subpath)
  101. sources.append(BuildSource(subpath, module, None, base_dir))
  102. return sources
  103. def crawl_up(self, path: str) -> tuple[str, str]:
  104. """Given a .py[i] filename, return module and base directory.
  105. For example, given "xxx/yyy/foo/bar.py", we might return something like:
  106. ("foo.bar", "xxx/yyy")
  107. If namespace packages is off, we crawl upwards until we find a directory without
  108. an __init__.py
  109. If namespace packages is on, we crawl upwards until the nearest explicit base directory.
  110. Failing that, we return one past the highest directory containing an __init__.py
  111. We won't crawl past directories with invalid package names.
  112. The base directory returned is an absolute path.
  113. """
  114. path = os.path.abspath(path)
  115. parent, filename = os.path.split(path)
  116. module_name = strip_py(filename) or filename
  117. parent_module, base_dir = self.crawl_up_dir(parent)
  118. if module_name == "__init__":
  119. return parent_module, base_dir
  120. # Note that module_name might not actually be a valid identifier, but that's okay
  121. # Ignoring this possibility sidesteps some search path confusion
  122. module = module_join(parent_module, module_name)
  123. return module, base_dir
  124. def crawl_up_dir(self, dir: str) -> tuple[str, str]:
  125. return self._crawl_up_helper(dir) or ("", dir)
  126. @functools.lru_cache() # noqa: B019
  127. def _crawl_up_helper(self, dir: str) -> tuple[str, str] | None:
  128. """Given a directory, maybe returns module and base directory.
  129. We return a non-None value if we were able to find something clearly intended as a base
  130. directory (as adjudicated by being an explicit base directory or by containing a package
  131. with __init__.py).
  132. This distinction is necessary for namespace packages, so that we know when to treat
  133. ourselves as a subpackage.
  134. """
  135. # stop crawling if we're an explicit base directory
  136. if self.explicit_package_bases is not None and self.is_explicit_package_base(dir):
  137. return "", dir
  138. parent, name = os.path.split(dir)
  139. if name.endswith("-stubs"):
  140. name = name[:-6] # PEP-561 stub-only directory
  141. # recurse if there's an __init__.py
  142. init_file = self.get_init_file(dir)
  143. if init_file is not None:
  144. if not name.isidentifier():
  145. # in most cases the directory name is invalid, we'll just stop crawling upwards
  146. # but if there's an __init__.py in the directory, something is messed up
  147. raise InvalidSourceList(f"{name} is not a valid Python package name")
  148. # we're definitely a package, so we always return a non-None value
  149. mod_prefix, base_dir = self.crawl_up_dir(parent)
  150. return module_join(mod_prefix, name), base_dir
  151. # stop crawling if we're out of path components or our name is an invalid identifier
  152. if not name or not parent or not name.isidentifier():
  153. return None
  154. # stop crawling if namespace packages is off (since we don't have an __init__.py)
  155. if not self.namespace_packages:
  156. return None
  157. # at this point: namespace packages is on, we don't have an __init__.py and we're not an
  158. # explicit base directory
  159. result = self._crawl_up_helper(parent)
  160. if result is None:
  161. # we're not an explicit base directory and we don't have an __init__.py
  162. # and none of our parents are either, so return
  163. return None
  164. # one of our parents was an explicit base directory or had an __init__.py, so we're
  165. # definitely a subpackage! chain our name to the module.
  166. mod_prefix, base_dir = result
  167. return module_join(mod_prefix, name), base_dir
  168. def get_init_file(self, dir: str) -> str | None:
  169. """Check whether a directory contains a file named __init__.py[i].
  170. If so, return the file's name (with dir prefixed). If not, return None.
  171. This prefers .pyi over .py (because of the ordering of PY_EXTENSIONS).
  172. """
  173. for ext in PY_EXTENSIONS:
  174. f = os.path.join(dir, "__init__" + ext)
  175. if self.fscache.isfile(f):
  176. return f
  177. if ext == ".py" and self.fscache.init_under_package_root(f):
  178. return f
  179. return None
  180. def module_join(parent: str, child: str) -> str:
  181. """Join module ids, accounting for a possibly empty parent."""
  182. if parent:
  183. return parent + "." + child
  184. return child
  185. def strip_py(arg: str) -> str | None:
  186. """Strip a trailing .py or .pyi suffix.
  187. Return None if no such suffix is found.
  188. """
  189. for ext in PY_EXTENSIONS:
  190. if arg.endswith(ext):
  191. return arg[: -len(ext)]
  192. return None