fscache.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. """Interface for accessing the file system with automatic caching.
  2. The idea is to cache the results of any file system state reads during
  3. a single transaction. This has two main benefits:
  4. * This avoids redundant syscalls, as we won't perform the same OS
  5. operations multiple times.
  6. * This makes it easier to reason about concurrent FS updates, as different
  7. operations targeting the same paths can't report different state during
  8. a transaction.
  9. Note that this only deals with reading state, not writing.
  10. Properties maintained by the API:
  11. * The contents of the file are always from the same or later time compared
  12. to the reported mtime of the file, even if mtime is queried after reading
  13. a file.
  14. * Repeating an operation produces the same result as the first one during
  15. a transaction.
  16. * Call flush() to start a new transaction (flush the caches).
  17. The API is a bit limited. It's easy to add new cached operations, however.
  18. You should perform all file system reads through the API to actually take
  19. advantage of the benefits.
  20. """
  21. from __future__ import annotations
  22. import os
  23. import stat
  24. from mypy_extensions import mypyc_attr
  25. from mypy.util import hash_digest
  26. @mypyc_attr(allow_interpreted_subclasses=True) # for tests
  27. class FileSystemCache:
  28. def __init__(self) -> None:
  29. # The package root is not flushed with the caches.
  30. # It is set by set_package_root() below.
  31. self.package_root: list[str] = []
  32. self.flush()
  33. def set_package_root(self, package_root: list[str]) -> None:
  34. self.package_root = package_root
  35. def flush(self) -> None:
  36. """Start another transaction and empty all caches."""
  37. self.stat_cache: dict[str, os.stat_result] = {}
  38. self.stat_error_cache: dict[str, OSError] = {}
  39. self.listdir_cache: dict[str, list[str]] = {}
  40. self.listdir_error_cache: dict[str, OSError] = {}
  41. self.isfile_case_cache: dict[str, bool] = {}
  42. self.exists_case_cache: dict[str, bool] = {}
  43. self.read_cache: dict[str, bytes] = {}
  44. self.read_error_cache: dict[str, Exception] = {}
  45. self.hash_cache: dict[str, str] = {}
  46. self.fake_package_cache: set[str] = set()
  47. def stat(self, path: str) -> os.stat_result:
  48. if path in self.stat_cache:
  49. return self.stat_cache[path]
  50. if path in self.stat_error_cache:
  51. raise copy_os_error(self.stat_error_cache[path])
  52. try:
  53. st = os.stat(path)
  54. except OSError as err:
  55. if self.init_under_package_root(path):
  56. try:
  57. return self._fake_init(path)
  58. except OSError:
  59. pass
  60. # Take a copy to get rid of associated traceback and frame objects.
  61. # Just assigning to __traceback__ doesn't free them.
  62. self.stat_error_cache[path] = copy_os_error(err)
  63. raise err
  64. self.stat_cache[path] = st
  65. return st
  66. def init_under_package_root(self, path: str) -> bool:
  67. """Is this path an __init__.py under a package root?
  68. This is used to detect packages that don't contain __init__.py
  69. files, which is needed to support Bazel. The function should
  70. only be called for non-existing files.
  71. It will return True if it refers to a __init__.py file that
  72. Bazel would create, so that at runtime Python would think the
  73. directory containing it is a package. For this to work you
  74. must pass one or more package roots using the --package-root
  75. flag.
  76. As an exceptional case, any directory that is a package root
  77. itself will not be considered to contain a __init__.py file.
  78. This is different from the rules Bazel itself applies, but is
  79. necessary for mypy to properly distinguish packages from other
  80. directories.
  81. See https://docs.bazel.build/versions/master/be/python.html,
  82. where this behavior is described under legacy_create_init.
  83. """
  84. if not self.package_root:
  85. return False
  86. dirname, basename = os.path.split(path)
  87. if basename != "__init__.py":
  88. return False
  89. if not os.path.basename(dirname).isidentifier():
  90. # Can't put an __init__.py in a place that's not an identifier
  91. return False
  92. try:
  93. st = self.stat(dirname)
  94. except OSError:
  95. return False
  96. else:
  97. if not stat.S_ISDIR(st.st_mode):
  98. return False
  99. ok = False
  100. drive, path = os.path.splitdrive(path) # Ignore Windows drive name
  101. if os.path.isabs(path):
  102. path = os.path.relpath(path)
  103. path = os.path.normpath(path)
  104. for root in self.package_root:
  105. if path.startswith(root):
  106. if path == root + basename:
  107. # A package root itself is never a package.
  108. ok = False
  109. break
  110. else:
  111. ok = True
  112. return ok
  113. def _fake_init(self, path: str) -> os.stat_result:
  114. """Prime the cache with a fake __init__.py file.
  115. This makes code that looks for path believe an empty file by
  116. that name exists. Should only be called after
  117. init_under_package_root() returns True.
  118. """
  119. dirname, basename = os.path.split(path)
  120. assert basename == "__init__.py", path
  121. assert not os.path.exists(path), path # Not cached!
  122. dirname = os.path.normpath(dirname)
  123. st = self.stat(dirname) # May raise OSError
  124. # Get stat result as a list so we can modify it.
  125. seq: list[float] = list(st)
  126. seq[stat.ST_MODE] = stat.S_IFREG | 0o444
  127. seq[stat.ST_INO] = 1
  128. seq[stat.ST_NLINK] = 1
  129. seq[stat.ST_SIZE] = 0
  130. st = os.stat_result(seq)
  131. self.stat_cache[path] = st
  132. # Make listdir() and read() also pretend this file exists.
  133. self.fake_package_cache.add(dirname)
  134. return st
  135. def listdir(self, path: str) -> list[str]:
  136. path = os.path.normpath(path)
  137. if path in self.listdir_cache:
  138. res = self.listdir_cache[path]
  139. # Check the fake cache.
  140. if path in self.fake_package_cache and "__init__.py" not in res:
  141. res.append("__init__.py") # Updates the result as well as the cache
  142. return res
  143. if path in self.listdir_error_cache:
  144. raise copy_os_error(self.listdir_error_cache[path])
  145. try:
  146. results = os.listdir(path)
  147. except OSError as err:
  148. # Like above, take a copy to reduce memory use.
  149. self.listdir_error_cache[path] = copy_os_error(err)
  150. raise err
  151. self.listdir_cache[path] = results
  152. # Check the fake cache.
  153. if path in self.fake_package_cache and "__init__.py" not in results:
  154. results.append("__init__.py")
  155. return results
  156. def isfile(self, path: str) -> bool:
  157. try:
  158. st = self.stat(path)
  159. except OSError:
  160. return False
  161. return stat.S_ISREG(st.st_mode)
  162. def isfile_case(self, path: str, prefix: str) -> bool:
  163. """Return whether path exists and is a file.
  164. On case-insensitive filesystems (like Mac or Windows) this returns
  165. False if the case of path's last component does not exactly match
  166. the case found in the filesystem.
  167. We check also the case of other path components up to prefix.
  168. For example, if path is 'user-stubs/pack/mod.pyi' and prefix is 'user-stubs',
  169. we check that the case of 'pack' and 'mod.py' matches exactly, 'user-stubs' will be
  170. case insensitive on case insensitive filesystems.
  171. The caller must ensure that prefix is a valid file system prefix of path.
  172. """
  173. if not self.isfile(path):
  174. # Fast path
  175. return False
  176. if path in self.isfile_case_cache:
  177. return self.isfile_case_cache[path]
  178. head, tail = os.path.split(path)
  179. if not tail:
  180. self.isfile_case_cache[path] = False
  181. return False
  182. try:
  183. names = self.listdir(head)
  184. # This allows one to check file name case sensitively in
  185. # case-insensitive filesystems.
  186. res = tail in names
  187. except OSError:
  188. res = False
  189. if res:
  190. # Also recursively check the other path components in case sensitive way.
  191. res = self.exists_case(head, prefix)
  192. self.isfile_case_cache[path] = res
  193. return res
  194. def exists_case(self, path: str, prefix: str) -> bool:
  195. """Return whether path exists - checking path components in case sensitive
  196. fashion, up to prefix.
  197. """
  198. if path in self.exists_case_cache:
  199. return self.exists_case_cache[path]
  200. head, tail = os.path.split(path)
  201. if not head.startswith(prefix) or not tail:
  202. # Only perform the check for paths under prefix.
  203. self.exists_case_cache[path] = True
  204. return True
  205. try:
  206. names = self.listdir(head)
  207. # This allows one to check file name case sensitively in
  208. # case-insensitive filesystems.
  209. res = tail in names
  210. except OSError:
  211. res = False
  212. if res:
  213. # Also recursively check other path components.
  214. res = self.exists_case(head, prefix)
  215. self.exists_case_cache[path] = res
  216. return res
  217. def isdir(self, path: str) -> bool:
  218. try:
  219. st = self.stat(path)
  220. except OSError:
  221. return False
  222. return stat.S_ISDIR(st.st_mode)
  223. def exists(self, path: str) -> bool:
  224. try:
  225. self.stat(path)
  226. except FileNotFoundError:
  227. return False
  228. return True
  229. def read(self, path: str) -> bytes:
  230. if path in self.read_cache:
  231. return self.read_cache[path]
  232. if path in self.read_error_cache:
  233. raise self.read_error_cache[path]
  234. # Need to stat first so that the contents of file are from no
  235. # earlier instant than the mtime reported by self.stat().
  236. self.stat(path)
  237. dirname, basename = os.path.split(path)
  238. dirname = os.path.normpath(dirname)
  239. # Check the fake cache.
  240. if basename == "__init__.py" and dirname in self.fake_package_cache:
  241. data = b""
  242. else:
  243. try:
  244. with open(path, "rb") as f:
  245. data = f.read()
  246. except OSError as err:
  247. self.read_error_cache[path] = err
  248. raise
  249. self.read_cache[path] = data
  250. self.hash_cache[path] = hash_digest(data)
  251. return data
  252. def hash_digest(self, path: str) -> str:
  253. if path not in self.hash_cache:
  254. self.read(path)
  255. return self.hash_cache[path]
  256. def samefile(self, f1: str, f2: str) -> bool:
  257. s1 = self.stat(f1)
  258. s2 = self.stat(f2)
  259. return os.path.samestat(s1, s2)
  260. def copy_os_error(e: OSError) -> OSError:
  261. new = OSError(*e.args)
  262. new.errno = e.errno
  263. new.strerror = e.strerror
  264. new.filename = e.filename
  265. if e.filename2:
  266. new.filename2 = e.filename2
  267. return new