base.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. # Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
  2. #
  3. # This module is part of GitDB and is released under
  4. # the New BSD License: http://www.opensource.org/licenses/bsd-license.php
  5. """Contains implementations of database retrieveing objects"""
  6. from gitdb.util import (
  7. join,
  8. LazyMixin,
  9. hex_to_bin
  10. )
  11. from gitdb.utils.encoding import force_text
  12. from gitdb.exc import (
  13. BadObject,
  14. AmbiguousObjectName
  15. )
  16. from itertools import chain
  17. from functools import reduce
  18. __all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB')
  19. class ObjectDBR:
  20. """Defines an interface for object database lookup.
  21. Objects are identified either by their 20 byte bin sha"""
  22. def __contains__(self, sha):
  23. return self.has_obj
  24. #{ Query Interface
  25. def has_object(self, sha):
  26. """
  27. Whether the object identified by the given 20 bytes
  28. binary sha is contained in the database
  29. :return: True if the object identified by the given 20 bytes
  30. binary sha is contained in the database"""
  31. raise NotImplementedError("To be implemented in subclass")
  32. def info(self, sha):
  33. """ :return: OInfo instance
  34. :param sha: bytes binary sha
  35. :raise BadObject:"""
  36. raise NotImplementedError("To be implemented in subclass")
  37. def stream(self, sha):
  38. """:return: OStream instance
  39. :param sha: 20 bytes binary sha
  40. :raise BadObject:"""
  41. raise NotImplementedError("To be implemented in subclass")
  42. def size(self):
  43. """:return: amount of objects in this database"""
  44. raise NotImplementedError()
  45. def sha_iter(self):
  46. """Return iterator yielding 20 byte shas for all objects in this data base"""
  47. raise NotImplementedError()
  48. #} END query interface
  49. class ObjectDBW:
  50. """Defines an interface to create objects in the database"""
  51. def __init__(self, *args, **kwargs):
  52. self._ostream = None
  53. #{ Edit Interface
  54. def set_ostream(self, stream):
  55. """
  56. Adjusts the stream to which all data should be sent when storing new objects
  57. :param stream: if not None, the stream to use, if None the default stream
  58. will be used.
  59. :return: previously installed stream, or None if there was no override
  60. :raise TypeError: if the stream doesn't have the supported functionality"""
  61. cstream = self._ostream
  62. self._ostream = stream
  63. return cstream
  64. def ostream(self):
  65. """
  66. Return the output stream
  67. :return: overridden output stream this instance will write to, or None
  68. if it will write to the default stream"""
  69. return self._ostream
  70. def store(self, istream):
  71. """
  72. Create a new object in the database
  73. :return: the input istream object with its sha set to its corresponding value
  74. :param istream: IStream compatible instance. If its sha is already set
  75. to a value, the object will just be stored in the our database format,
  76. in which case the input stream is expected to be in object format ( header + contents ).
  77. :raise IOError: if data could not be written"""
  78. raise NotImplementedError("To be implemented in subclass")
  79. #} END edit interface
  80. class FileDBBase:
  81. """Provides basic facilities to retrieve files of interest, including
  82. caching facilities to help mapping hexsha's to objects"""
  83. def __init__(self, root_path):
  84. """Initialize this instance to look for its files at the given root path
  85. All subsequent operations will be relative to this path
  86. :raise InvalidDBRoot:
  87. **Note:** The base will not perform any accessablity checking as the base
  88. might not yet be accessible, but become accessible before the first
  89. access."""
  90. super().__init__()
  91. self._root_path = root_path
  92. #{ Interface
  93. def root_path(self):
  94. """:return: path at which this db operates"""
  95. return self._root_path
  96. def db_path(self, rela_path):
  97. """
  98. :return: the given relative path relative to our database root, allowing
  99. to pontentially access datafiles"""
  100. return join(self._root_path, force_text(rela_path))
  101. #} END interface
  102. class CachingDB:
  103. """A database which uses caches to speed-up access"""
  104. #{ Interface
  105. def update_cache(self, force=False):
  106. """
  107. Call this method if the underlying data changed to trigger an update
  108. of the internal caching structures.
  109. :param force: if True, the update must be performed. Otherwise the implementation
  110. may decide not to perform an update if it thinks nothing has changed.
  111. :return: True if an update was performed as something change indeed"""
  112. # END interface
  113. def _databases_recursive(database, output):
  114. """Fill output list with database from db, in order. Deals with Loose, Packed
  115. and compound databases."""
  116. if isinstance(database, CompoundDB):
  117. dbs = database.databases()
  118. output.extend(db for db in dbs if not isinstance(db, CompoundDB))
  119. for cdb in (db for db in dbs if isinstance(db, CompoundDB)):
  120. _databases_recursive(cdb, output)
  121. else:
  122. output.append(database)
  123. # END handle database type
  124. class CompoundDB(ObjectDBR, LazyMixin, CachingDB):
  125. """A database which delegates calls to sub-databases.
  126. Databases are stored in the lazy-loaded _dbs attribute.
  127. Define _set_cache_ to update it with your databases"""
  128. def _set_cache_(self, attr):
  129. if attr == '_dbs':
  130. self._dbs = list()
  131. elif attr == '_db_cache':
  132. self._db_cache = dict()
  133. else:
  134. super()._set_cache_(attr)
  135. def _db_query(self, sha):
  136. """:return: database containing the given 20 byte sha
  137. :raise BadObject:"""
  138. # most databases use binary representations, prevent converting
  139. # it every time a database is being queried
  140. try:
  141. return self._db_cache[sha]
  142. except KeyError:
  143. pass
  144. # END first level cache
  145. for db in self._dbs:
  146. if db.has_object(sha):
  147. self._db_cache[sha] = db
  148. return db
  149. # END for each database
  150. raise BadObject(sha)
  151. #{ ObjectDBR interface
  152. def has_object(self, sha):
  153. try:
  154. self._db_query(sha)
  155. return True
  156. except BadObject:
  157. return False
  158. # END handle exceptions
  159. def info(self, sha):
  160. return self._db_query(sha).info(sha)
  161. def stream(self, sha):
  162. return self._db_query(sha).stream(sha)
  163. def size(self):
  164. """:return: total size of all contained databases"""
  165. return reduce(lambda x, y: x + y, (db.size() for db in self._dbs), 0)
  166. def sha_iter(self):
  167. return chain(*(db.sha_iter() for db in self._dbs))
  168. #} END object DBR Interface
  169. #{ Interface
  170. def databases(self):
  171. """:return: tuple of database instances we use for lookups"""
  172. return tuple(self._dbs)
  173. def update_cache(self, force=False):
  174. # something might have changed, clear everything
  175. self._db_cache.clear()
  176. stat = False
  177. for db in self._dbs:
  178. if isinstance(db, CachingDB):
  179. stat |= db.update_cache(force)
  180. # END if is caching db
  181. # END for each database to update
  182. return stat
  183. def partial_to_complete_sha_hex(self, partial_hexsha):
  184. """
  185. :return: 20 byte binary sha1 from the given less-than-40 byte hexsha (bytes or str)
  186. :param partial_hexsha: hexsha with less than 40 byte
  187. :raise AmbiguousObjectName: """
  188. databases = list()
  189. _databases_recursive(self, databases)
  190. partial_hexsha = force_text(partial_hexsha)
  191. len_partial_hexsha = len(partial_hexsha)
  192. if len_partial_hexsha % 2 != 0:
  193. partial_binsha = hex_to_bin(partial_hexsha + "0")
  194. else:
  195. partial_binsha = hex_to_bin(partial_hexsha)
  196. # END assure successful binary conversion
  197. candidate = None
  198. for db in databases:
  199. full_bin_sha = None
  200. try:
  201. if hasattr(db, 'partial_to_complete_sha_hex'):
  202. full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
  203. else:
  204. full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
  205. # END handle database type
  206. except BadObject:
  207. continue
  208. # END ignore bad objects
  209. if full_bin_sha:
  210. if candidate and candidate != full_bin_sha:
  211. raise AmbiguousObjectName(partial_hexsha)
  212. candidate = full_bin_sha
  213. # END handle candidate
  214. # END for each db
  215. if not candidate:
  216. raise BadObject(partial_binsha)
  217. return candidate
  218. #} END interface