test_pack.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. # Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
  2. #
  3. # This module is part of GitDB and is released under
  4. # the New BSD License: http://www.opensource.org/licenses/bsd-license.php
  5. """Test everything about packs reading and writing"""
  6. from gitdb.test.lib import (
  7. TestBase,
  8. with_rw_directory,
  9. fixture_path
  10. )
  11. from gitdb.stream import DeltaApplyReader
  12. from gitdb.pack import (
  13. PackEntity,
  14. PackIndexFile,
  15. PackFile
  16. )
  17. from gitdb.base import (
  18. OInfo,
  19. OStream,
  20. )
  21. from gitdb.fun import delta_types
  22. from gitdb.exc import UnsupportedOperation
  23. from gitdb.util import to_bin_sha
  24. import pytest
  25. import os
  26. import tempfile
  27. #{ Utilities
  28. def bin_sha_from_filename(filename):
  29. return to_bin_sha(os.path.splitext(os.path.basename(filename))[0][5:])
  30. #} END utilities
  31. class TestPack(TestBase):
  32. packindexfile_v1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx'), 1, 67)
  33. packindexfile_v2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx'), 2, 30)
  34. packindexfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx'), 2, 42)
  35. packfile_v2_1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack'), 2, packindexfile_v1[2])
  36. packfile_v2_2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack'), 2, packindexfile_v2[2])
  37. packfile_v2_3_ascii = (
  38. fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack'), 2, packindexfile_v2_3_ascii[2])
  39. def _assert_index_file(self, index, version, size):
  40. assert index.packfile_checksum() != index.indexfile_checksum()
  41. assert len(index.packfile_checksum()) == 20
  42. assert len(index.indexfile_checksum()) == 20
  43. assert index.version() == version
  44. assert index.size() == size
  45. assert len(index.offsets()) == size
  46. # get all data of all objects
  47. for oidx in range(index.size()):
  48. sha = index.sha(oidx)
  49. assert oidx == index.sha_to_index(sha)
  50. entry = index.entry(oidx)
  51. assert len(entry) == 3
  52. assert entry[0] == index.offset(oidx)
  53. assert entry[1] == sha
  54. assert entry[2] == index.crc(oidx)
  55. # verify partial sha
  56. for l in (4, 8, 11, 17, 20):
  57. assert index.partial_sha_to_index(sha[:l], l * 2) == oidx
  58. # END for each object index in indexfile
  59. self.assertRaises(ValueError, index.partial_sha_to_index, "\0", 2)
  60. def _assert_pack_file(self, pack, version, size):
  61. assert pack.version() == 2
  62. assert pack.size() == size
  63. assert len(pack.checksum()) == 20
  64. num_obj = 0
  65. for obj in pack.stream_iter():
  66. num_obj += 1
  67. info = pack.info(obj.pack_offset)
  68. stream = pack.stream(obj.pack_offset)
  69. assert info.pack_offset == stream.pack_offset
  70. assert info.type_id == stream.type_id
  71. assert hasattr(stream, 'read')
  72. # it should be possible to read from both streams
  73. assert obj.read() == stream.read()
  74. streams = pack.collect_streams(obj.pack_offset)
  75. assert streams
  76. # read the stream
  77. try:
  78. dstream = DeltaApplyReader.new(streams)
  79. except ValueError:
  80. # ignore these, old git versions use only ref deltas,
  81. # which we haven't resolved ( as we are without an index )
  82. # Also ignore non-delta streams
  83. continue
  84. # END get deltastream
  85. # read all
  86. data = dstream.read()
  87. assert len(data) == dstream.size
  88. # test seek
  89. dstream.seek(0)
  90. assert dstream.read() == data
  91. # read chunks
  92. # NOTE: the current implementation is safe, it basically transfers
  93. # all calls to the underlying memory map
  94. # END for each object
  95. assert num_obj == size
  96. def test_pack_index(self):
  97. # check version 1 and 2
  98. for indexfile, version, size in (self.packindexfile_v1, self.packindexfile_v2):
  99. index = PackIndexFile(indexfile)
  100. self._assert_index_file(index, version, size)
  101. # END run tests
  102. def test_pack(self):
  103. # there is this special version 3, but apparently its like 2 ...
  104. for packfile, version, size in (self.packfile_v2_3_ascii, self.packfile_v2_1, self.packfile_v2_2):
  105. pack = PackFile(packfile)
  106. self._assert_pack_file(pack, version, size)
  107. # END for each pack to test
  108. @with_rw_directory
  109. def test_pack_entity(self, rw_dir):
  110. pack_objs = list()
  111. for packinfo, indexinfo in ((self.packfile_v2_1, self.packindexfile_v1),
  112. (self.packfile_v2_2, self.packindexfile_v2),
  113. (self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)):
  114. packfile, version, size = packinfo
  115. indexfile, version, size = indexinfo
  116. entity = PackEntity(packfile)
  117. assert entity.pack().path() == packfile
  118. assert entity.index().path() == indexfile
  119. pack_objs.extend(entity.stream_iter())
  120. count = 0
  121. for info, stream in zip(entity.info_iter(), entity.stream_iter()):
  122. count += 1
  123. assert info.binsha == stream.binsha
  124. assert len(info.binsha) == 20
  125. assert info.type_id == stream.type_id
  126. assert info.size == stream.size
  127. # we return fully resolved items, which is implied by the sha centric access
  128. assert not info.type_id in delta_types
  129. # try all calls
  130. assert len(entity.collect_streams(info.binsha))
  131. oinfo = entity.info(info.binsha)
  132. assert isinstance(oinfo, OInfo)
  133. assert oinfo.binsha is not None
  134. ostream = entity.stream(info.binsha)
  135. assert isinstance(ostream, OStream)
  136. assert ostream.binsha is not None
  137. # verify the stream
  138. try:
  139. assert entity.is_valid_stream(info.binsha, use_crc=True)
  140. except UnsupportedOperation:
  141. pass
  142. # END ignore version issues
  143. assert entity.is_valid_stream(info.binsha, use_crc=False)
  144. # END for each info, stream tuple
  145. assert count == size
  146. # END for each entity
  147. # pack writing - write all packs into one
  148. # index path can be None
  149. pack_path1 = tempfile.mktemp('', "pack1", rw_dir)
  150. pack_path2 = tempfile.mktemp('', "pack2", rw_dir)
  151. index_path = tempfile.mktemp('', 'index', rw_dir)
  152. iteration = 0
  153. def rewind_streams():
  154. for obj in pack_objs:
  155. obj.stream.seek(0)
  156. # END utility
  157. for ppath, ipath, num_obj in zip((pack_path1, pack_path2),
  158. (index_path, None),
  159. (len(pack_objs), None)):
  160. iwrite = None
  161. if ipath:
  162. ifile = open(ipath, 'wb')
  163. iwrite = ifile.write
  164. # END handle ip
  165. # make sure we rewind the streams ... we work on the same objects over and over again
  166. if iteration > 0:
  167. rewind_streams()
  168. # END rewind streams
  169. iteration += 1
  170. with open(ppath, 'wb') as pfile:
  171. pack_sha, index_sha = PackEntity.write_pack(pack_objs, pfile.write, iwrite, object_count=num_obj)
  172. assert os.path.getsize(ppath) > 100
  173. # verify pack
  174. pf = PackFile(ppath)
  175. assert pf.size() == len(pack_objs)
  176. assert pf.version() == PackFile.pack_version_default
  177. assert pf.checksum() == pack_sha
  178. pf.close()
  179. # verify index
  180. if ipath is not None:
  181. ifile.close()
  182. assert os.path.getsize(ipath) > 100
  183. idx = PackIndexFile(ipath)
  184. assert idx.version() == PackIndexFile.index_version_default
  185. assert idx.packfile_checksum() == pack_sha
  186. assert idx.indexfile_checksum() == index_sha
  187. assert idx.size() == len(pack_objs)
  188. idx.close()
  189. # END verify files exist
  190. # END for each packpath, indexpath pair
  191. # verify the packs thoroughly
  192. rewind_streams()
  193. entity = PackEntity.create(pack_objs, rw_dir)
  194. count = 0
  195. for info in entity.info_iter():
  196. count += 1
  197. for use_crc in range(2):
  198. assert entity.is_valid_stream(info.binsha, use_crc)
  199. # END for each crc mode
  200. # END for each info
  201. assert count == len(pack_objs)
  202. entity.close()
  203. def test_pack_64(self):
  204. # TODO: hex-edit a pack helping us to verify that we can handle 64 byte offsets
  205. # of course without really needing such a huge pack
  206. pytest.skip('not implemented')