metastore.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. """Interfaces for accessing metadata.
  2. We provide two implementations.
  3. * The "classic" file system implementation, which uses a directory
  4. structure of files.
  5. * A hokey sqlite backed implementation, which basically simulates
  6. the file system in an effort to work around poor file system performance
  7. on OS X.
  8. """
  9. from __future__ import annotations
  10. import binascii
  11. import os
  12. import time
  13. from abc import abstractmethod
  14. from typing import TYPE_CHECKING, Any, Iterable
  15. if TYPE_CHECKING:
  16. # We avoid importing sqlite3 unless we are using it so we can mostly work
  17. # on semi-broken pythons that are missing it.
  18. import sqlite3
  19. class MetadataStore:
  20. """Generic interface for metadata storage."""
  21. @abstractmethod
  22. def getmtime(self, name: str) -> float:
  23. """Read the mtime of a metadata entry..
  24. Raises FileNotFound if the entry does not exist.
  25. """
  26. @abstractmethod
  27. def read(self, name: str) -> str:
  28. """Read the contents of a metadata entry.
  29. Raises FileNotFound if the entry does not exist.
  30. """
  31. @abstractmethod
  32. def write(self, name: str, data: str, mtime: float | None = None) -> bool:
  33. """Write a metadata entry.
  34. If mtime is specified, set it as the mtime of the entry. Otherwise,
  35. the current time is used.
  36. Returns True if the entry is successfully written, False otherwise.
  37. """
  38. @abstractmethod
  39. def remove(self, name: str) -> None:
  40. """Delete a metadata entry"""
  41. @abstractmethod
  42. def commit(self) -> None:
  43. """If the backing store requires a commit, do it.
  44. But N.B. that this is not *guaranteed* to do anything, and
  45. there is no guarantee that changes are not made until it is
  46. called.
  47. """
  48. @abstractmethod
  49. def list_all(self) -> Iterable[str]:
  50. ...
  51. def random_string() -> str:
  52. return binascii.hexlify(os.urandom(8)).decode("ascii")
  53. class FilesystemMetadataStore(MetadataStore):
  54. def __init__(self, cache_dir_prefix: str) -> None:
  55. # We check startswith instead of equality because the version
  56. # will have already been appended by the time the cache dir is
  57. # passed here.
  58. if cache_dir_prefix.startswith(os.devnull):
  59. self.cache_dir_prefix = None
  60. else:
  61. self.cache_dir_prefix = cache_dir_prefix
  62. def getmtime(self, name: str) -> float:
  63. if not self.cache_dir_prefix:
  64. raise FileNotFoundError()
  65. return int(os.path.getmtime(os.path.join(self.cache_dir_prefix, name)))
  66. def read(self, name: str) -> str:
  67. assert os.path.normpath(name) != os.path.abspath(name), "Don't use absolute paths!"
  68. if not self.cache_dir_prefix:
  69. raise FileNotFoundError()
  70. with open(os.path.join(self.cache_dir_prefix, name)) as f:
  71. return f.read()
  72. def write(self, name: str, data: str, mtime: float | None = None) -> bool:
  73. assert os.path.normpath(name) != os.path.abspath(name), "Don't use absolute paths!"
  74. if not self.cache_dir_prefix:
  75. return False
  76. path = os.path.join(self.cache_dir_prefix, name)
  77. tmp_filename = path + "." + random_string()
  78. try:
  79. os.makedirs(os.path.dirname(path), exist_ok=True)
  80. with open(tmp_filename, "w") as f:
  81. f.write(data)
  82. os.replace(tmp_filename, path)
  83. if mtime is not None:
  84. os.utime(path, times=(mtime, mtime))
  85. except os.error:
  86. return False
  87. return True
  88. def remove(self, name: str) -> None:
  89. if not self.cache_dir_prefix:
  90. raise FileNotFoundError()
  91. os.remove(os.path.join(self.cache_dir_prefix, name))
  92. def commit(self) -> None:
  93. pass
  94. def list_all(self) -> Iterable[str]:
  95. if not self.cache_dir_prefix:
  96. return
  97. for dir, _, files in os.walk(self.cache_dir_prefix):
  98. dir = os.path.relpath(dir, self.cache_dir_prefix)
  99. for file in files:
  100. yield os.path.join(dir, file)
  101. SCHEMA = """
  102. CREATE TABLE IF NOT EXISTS files (
  103. path TEXT UNIQUE NOT NULL,
  104. mtime REAL,
  105. data TEXT
  106. );
  107. CREATE INDEX IF NOT EXISTS path_idx on files(path);
  108. """
  109. # No migrations yet
  110. MIGRATIONS: list[str] = []
  111. def connect_db(db_file: str) -> sqlite3.Connection:
  112. import sqlite3.dbapi2
  113. db = sqlite3.dbapi2.connect(db_file)
  114. db.executescript(SCHEMA)
  115. for migr in MIGRATIONS:
  116. try:
  117. db.executescript(migr)
  118. except sqlite3.OperationalError:
  119. pass
  120. return db
  121. class SqliteMetadataStore(MetadataStore):
  122. def __init__(self, cache_dir_prefix: str) -> None:
  123. # We check startswith instead of equality because the version
  124. # will have already been appended by the time the cache dir is
  125. # passed here.
  126. if cache_dir_prefix.startswith(os.devnull):
  127. self.db = None
  128. return
  129. os.makedirs(cache_dir_prefix, exist_ok=True)
  130. self.db = connect_db(os.path.join(cache_dir_prefix, "cache.db"))
  131. def _query(self, name: str, field: str) -> Any:
  132. # Raises FileNotFound for consistency with the file system version
  133. if not self.db:
  134. raise FileNotFoundError()
  135. cur = self.db.execute(f"SELECT {field} FROM files WHERE path = ?", (name,))
  136. results = cur.fetchall()
  137. if not results:
  138. raise FileNotFoundError()
  139. assert len(results) == 1
  140. return results[0][0]
  141. def getmtime(self, name: str) -> float:
  142. mtime = self._query(name, "mtime")
  143. assert isinstance(mtime, float)
  144. return mtime
  145. def read(self, name: str) -> str:
  146. data = self._query(name, "data")
  147. assert isinstance(data, str)
  148. return data
  149. def write(self, name: str, data: str, mtime: float | None = None) -> bool:
  150. import sqlite3
  151. if not self.db:
  152. return False
  153. try:
  154. if mtime is None:
  155. mtime = time.time()
  156. self.db.execute(
  157. "INSERT OR REPLACE INTO files(path, mtime, data) VALUES(?, ?, ?)",
  158. (name, mtime, data),
  159. )
  160. except sqlite3.OperationalError:
  161. return False
  162. return True
  163. def remove(self, name: str) -> None:
  164. if not self.db:
  165. raise FileNotFoundError()
  166. self.db.execute("DELETE FROM files WHERE path = ?", (name,))
  167. def commit(self) -> None:
  168. if self.db:
  169. self.db.commit()
  170. def list_all(self) -> Iterable[str]:
  171. if self.db:
  172. for row in self.db.execute("SELECT path FROM files"):
  173. yield row[0]