fun.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. # Contains standalone functions to accompany the index implementation and make it
  2. # more versatile
  3. # NOTE: Autodoc hates it if this is a docstring
  4. from io import BytesIO
  5. from pathlib import Path
  6. import os
  7. from stat import (
  8. S_IFDIR,
  9. S_IFLNK,
  10. S_ISLNK,
  11. S_ISDIR,
  12. S_IFMT,
  13. S_IFREG,
  14. S_IXUSR,
  15. )
  16. import subprocess
  17. from git.cmd import PROC_CREATIONFLAGS, handle_process_output
  18. from git.compat import (
  19. defenc,
  20. force_text,
  21. force_bytes,
  22. is_posix,
  23. is_win,
  24. safe_decode,
  25. )
  26. from git.exc import UnmergedEntriesError, HookExecutionError
  27. from git.objects.fun import (
  28. tree_to_stream,
  29. traverse_tree_recursive,
  30. traverse_trees_recursive,
  31. )
  32. from git.util import IndexFileSHA1Writer, finalize_process
  33. from gitdb.base import IStream
  34. from gitdb.typ import str_tree_type
  35. import os.path as osp
  36. from .typ import BaseIndexEntry, IndexEntry, CE_NAMEMASK, CE_STAGESHIFT
  37. from .util import pack, unpack
  38. # typing -----------------------------------------------------------------------------
  39. from typing import Dict, IO, List, Sequence, TYPE_CHECKING, Tuple, Type, Union, cast
  40. from git.types import PathLike
  41. if TYPE_CHECKING:
  42. from .base import IndexFile
  43. from git.db import GitCmdObjectDB
  44. from git.objects.tree import TreeCacheTup
  45. # from git.objects.fun import EntryTupOrNone
  46. # ------------------------------------------------------------------------------------
  47. S_IFGITLINK = S_IFLNK | S_IFDIR # a submodule
  48. CE_NAMEMASK_INV = ~CE_NAMEMASK
  49. __all__ = (
  50. "write_cache",
  51. "read_cache",
  52. "write_tree_from_cache",
  53. "entry_key",
  54. "stat_mode_to_index_mode",
  55. "S_IFGITLINK",
  56. "run_commit_hook",
  57. "hook_path",
  58. )
  59. def hook_path(name: str, git_dir: PathLike) -> str:
  60. """:return: path to the given named hook in the given git repository directory"""
  61. return osp.join(git_dir, "hooks", name)
  62. def _has_file_extension(path):
  63. return osp.splitext(path)[1]
  64. def run_commit_hook(name: str, index: "IndexFile", *args: str) -> None:
  65. """Run the commit hook of the given name. Silently ignores hooks that do not exist.
  66. :param name: name of hook, like 'pre-commit'
  67. :param index: IndexFile instance
  68. :param args: arguments passed to hook file
  69. :raises HookExecutionError:"""
  70. hp = hook_path(name, index.repo.git_dir)
  71. if not os.access(hp, os.X_OK):
  72. return None
  73. env = os.environ.copy()
  74. env["GIT_INDEX_FILE"] = safe_decode(str(index.path))
  75. env["GIT_EDITOR"] = ":"
  76. cmd = [hp]
  77. try:
  78. if is_win and not _has_file_extension(hp):
  79. # Windows only uses extensions to determine how to open files
  80. # (doesn't understand shebangs). Try using bash to run the hook.
  81. relative_hp = Path(hp).relative_to(index.repo.working_dir).as_posix()
  82. cmd = ["bash.exe", relative_hp]
  83. cmd = subprocess.Popen(
  84. cmd + list(args),
  85. env=env,
  86. stdout=subprocess.PIPE,
  87. stderr=subprocess.PIPE,
  88. cwd=index.repo.working_dir,
  89. close_fds=is_posix,
  90. creationflags=PROC_CREATIONFLAGS,
  91. )
  92. except Exception as ex:
  93. raise HookExecutionError(hp, ex) from ex
  94. else:
  95. stdout_list: List[str] = []
  96. stderr_list: List[str] = []
  97. handle_process_output(cmd, stdout_list.append, stderr_list.append, finalize_process)
  98. stdout = "".join(stdout_list)
  99. stderr = "".join(stderr_list)
  100. if cmd.returncode != 0:
  101. stdout = force_text(stdout, defenc)
  102. stderr = force_text(stderr, defenc)
  103. raise HookExecutionError(hp, cmd.returncode, stderr, stdout)
  104. # end handle return code
  105. def stat_mode_to_index_mode(mode: int) -> int:
  106. """Convert the given mode from a stat call to the corresponding index mode
  107. and return it"""
  108. if S_ISLNK(mode): # symlinks
  109. return S_IFLNK
  110. if S_ISDIR(mode) or S_IFMT(mode) == S_IFGITLINK: # submodules
  111. return S_IFGITLINK
  112. return S_IFREG | (mode & S_IXUSR and 0o755 or 0o644) # blobs with or without executable bit
  113. def write_cache(
  114. entries: Sequence[Union[BaseIndexEntry, "IndexEntry"]],
  115. stream: IO[bytes],
  116. extension_data: Union[None, bytes] = None,
  117. ShaStreamCls: Type[IndexFileSHA1Writer] = IndexFileSHA1Writer,
  118. ) -> None:
  119. """Write the cache represented by entries to a stream
  120. :param entries: **sorted** list of entries
  121. :param stream: stream to wrap into the AdapterStreamCls - it is used for
  122. final output.
  123. :param ShaStreamCls: Type to use when writing to the stream. It produces a sha
  124. while writing to it, before the data is passed on to the wrapped stream
  125. :param extension_data: any kind of data to write as a trailer, it must begin
  126. a 4 byte identifier, followed by its size ( 4 bytes )"""
  127. # wrap the stream into a compatible writer
  128. stream_sha = ShaStreamCls(stream)
  129. tell = stream_sha.tell
  130. write = stream_sha.write
  131. # header
  132. version = 2
  133. write(b"DIRC")
  134. write(pack(">LL", version, len(entries)))
  135. # body
  136. for entry in entries:
  137. beginoffset = tell()
  138. write(entry.ctime_bytes) # ctime
  139. write(entry.mtime_bytes) # mtime
  140. path_str = str(entry.path)
  141. path: bytes = force_bytes(path_str, encoding=defenc)
  142. plen = len(path) & CE_NAMEMASK # path length
  143. assert plen == len(path), "Path %s too long to fit into index" % entry.path
  144. flags = plen | (entry.flags & CE_NAMEMASK_INV) # clear possible previous values
  145. write(
  146. pack(
  147. ">LLLLLL20sH",
  148. entry.dev,
  149. entry.inode,
  150. entry.mode,
  151. entry.uid,
  152. entry.gid,
  153. entry.size,
  154. entry.binsha,
  155. flags,
  156. )
  157. )
  158. write(path)
  159. real_size = (tell() - beginoffset + 8) & ~7
  160. write(b"\0" * ((beginoffset + real_size) - tell()))
  161. # END for each entry
  162. # write previously cached extensions data
  163. if extension_data is not None:
  164. stream_sha.write(extension_data)
  165. # write the sha over the content
  166. stream_sha.write_sha()
  167. def read_header(stream: IO[bytes]) -> Tuple[int, int]:
  168. """Return tuple(version_long, num_entries) from the given stream"""
  169. type_id = stream.read(4)
  170. if type_id != b"DIRC":
  171. raise AssertionError("Invalid index file header: %r" % type_id)
  172. unpacked = cast(Tuple[int, int], unpack(">LL", stream.read(4 * 2)))
  173. version, num_entries = unpacked
  174. # TODO: handle version 3: extended data, see read-cache.c
  175. assert version in (1, 2)
  176. return version, num_entries
  177. def entry_key(*entry: Union[BaseIndexEntry, PathLike, int]) -> Tuple[PathLike, int]:
  178. """:return: Key suitable to be used for the index.entries dictionary
  179. :param entry: One instance of type BaseIndexEntry or the path and the stage"""
  180. # def is_entry_key_tup(entry_key: Tuple) -> TypeGuard[Tuple[PathLike, int]]:
  181. # return isinstance(entry_key, tuple) and len(entry_key) == 2
  182. if len(entry) == 1:
  183. entry_first = entry[0]
  184. assert isinstance(entry_first, BaseIndexEntry)
  185. return (entry_first.path, entry_first.stage)
  186. else:
  187. # assert is_entry_key_tup(entry)
  188. entry = cast(Tuple[PathLike, int], entry)
  189. return entry
  190. # END handle entry
  191. def read_cache(
  192. stream: IO[bytes],
  193. ) -> Tuple[int, Dict[Tuple[PathLike, int], "IndexEntry"], bytes, bytes]:
  194. """Read a cache file from the given stream
  195. :return: tuple(version, entries_dict, extension_data, content_sha)
  196. * version is the integer version number
  197. * entries dict is a dictionary which maps IndexEntry instances to a path at a stage
  198. * extension_data is '' or 4 bytes of type + 4 bytes of size + size bytes
  199. * content_sha is a 20 byte sha on all cache file contents"""
  200. version, num_entries = read_header(stream)
  201. count = 0
  202. entries: Dict[Tuple[PathLike, int], "IndexEntry"] = {}
  203. read = stream.read
  204. tell = stream.tell
  205. while count < num_entries:
  206. beginoffset = tell()
  207. ctime = unpack(">8s", read(8))[0]
  208. mtime = unpack(">8s", read(8))[0]
  209. (dev, ino, mode, uid, gid, size, sha, flags) = unpack(">LLLLLL20sH", read(20 + 4 * 6 + 2))
  210. path_size = flags & CE_NAMEMASK
  211. path = read(path_size).decode(defenc)
  212. real_size = (tell() - beginoffset + 8) & ~7
  213. read((beginoffset + real_size) - tell())
  214. entry = IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size))
  215. # entry_key would be the method to use, but we safe the effort
  216. entries[(path, entry.stage)] = entry
  217. count += 1
  218. # END for each entry
  219. # the footer contains extension data and a sha on the content so far
  220. # Keep the extension footer,and verify we have a sha in the end
  221. # Extension data format is:
  222. # 4 bytes ID
  223. # 4 bytes length of chunk
  224. # repeated 0 - N times
  225. extension_data = stream.read(~0)
  226. assert (
  227. len(extension_data) > 19
  228. ), "Index Footer was not at least a sha on content as it was only %i bytes in size" % len(extension_data)
  229. content_sha = extension_data[-20:]
  230. # truncate the sha in the end as we will dynamically create it anyway
  231. extension_data = extension_data[:-20]
  232. return (version, entries, extension_data, content_sha)
  233. def write_tree_from_cache(
  234. entries: List[IndexEntry], odb: "GitCmdObjectDB", sl: slice, si: int = 0
  235. ) -> Tuple[bytes, List["TreeCacheTup"]]:
  236. """Create a tree from the given sorted list of entries and put the respective
  237. trees into the given object database
  238. :param entries: **sorted** list of IndexEntries
  239. :param odb: object database to store the trees in
  240. :param si: start index at which we should start creating subtrees
  241. :param sl: slice indicating the range we should process on the entries list
  242. :return: tuple(binsha, list(tree_entry, ...)) a tuple of a sha and a list of
  243. tree entries being a tuple of hexsha, mode, name"""
  244. tree_items: List["TreeCacheTup"] = []
  245. ci = sl.start
  246. end = sl.stop
  247. while ci < end:
  248. entry = entries[ci]
  249. if entry.stage != 0:
  250. raise UnmergedEntriesError(entry)
  251. # END abort on unmerged
  252. ci += 1
  253. rbound = entry.path.find("/", si)
  254. if rbound == -1:
  255. # its not a tree
  256. tree_items.append((entry.binsha, entry.mode, entry.path[si:]))
  257. else:
  258. # find common base range
  259. base = entry.path[si:rbound]
  260. xi = ci
  261. while xi < end:
  262. oentry = entries[xi]
  263. orbound = oentry.path.find("/", si)
  264. if orbound == -1 or oentry.path[si:orbound] != base:
  265. break
  266. # END abort on base mismatch
  267. xi += 1
  268. # END find common base
  269. # enter recursion
  270. # ci - 1 as we want to count our current item as well
  271. sha, _tree_entry_list = write_tree_from_cache(entries, odb, slice(ci - 1, xi), rbound + 1)
  272. tree_items.append((sha, S_IFDIR, base))
  273. # skip ahead
  274. ci = xi
  275. # END handle bounds
  276. # END for each entry
  277. # finally create the tree
  278. sio = BytesIO()
  279. tree_to_stream(tree_items, sio.write) # writes to stream as bytes, but doesn't change tree_items
  280. sio.seek(0)
  281. istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio))
  282. return (istream.binsha, tree_items)
  283. def _tree_entry_to_baseindexentry(tree_entry: "TreeCacheTup", stage: int) -> BaseIndexEntry:
  284. return BaseIndexEntry((tree_entry[1], tree_entry[0], stage << CE_STAGESHIFT, tree_entry[2]))
  285. def aggressive_tree_merge(odb: "GitCmdObjectDB", tree_shas: Sequence[bytes]) -> List[BaseIndexEntry]:
  286. """
  287. :return: list of BaseIndexEntries representing the aggressive merge of the given
  288. trees. All valid entries are on stage 0, whereas the conflicting ones are left
  289. on stage 1, 2 or 3, whereas stage 1 corresponds to the common ancestor tree,
  290. 2 to our tree and 3 to 'their' tree.
  291. :param tree_shas: 1, 2 or 3 trees as identified by their binary 20 byte shas
  292. If 1 or two, the entries will effectively correspond to the last given tree
  293. If 3 are given, a 3 way merge is performed"""
  294. out: List[BaseIndexEntry] = []
  295. # one and two way is the same for us, as we don't have to handle an existing
  296. # index, instrea
  297. if len(tree_shas) in (1, 2):
  298. for entry in traverse_tree_recursive(odb, tree_shas[-1], ""):
  299. out.append(_tree_entry_to_baseindexentry(entry, 0))
  300. # END for each entry
  301. return out
  302. # END handle single tree
  303. if len(tree_shas) > 3:
  304. raise ValueError("Cannot handle %i trees at once" % len(tree_shas))
  305. # three trees
  306. for base, ours, theirs in traverse_trees_recursive(odb, tree_shas, ""):
  307. if base is not None:
  308. # base version exists
  309. if ours is not None:
  310. # ours exists
  311. if theirs is not None:
  312. # it exists in all branches, if it was changed in both
  313. # its a conflict, otherwise we take the changed version
  314. # This should be the most common branch, so it comes first
  315. if (base[0] != ours[0] and base[0] != theirs[0] and ours[0] != theirs[0]) or (
  316. base[1] != ours[1] and base[1] != theirs[1] and ours[1] != theirs[1]
  317. ):
  318. # changed by both
  319. out.append(_tree_entry_to_baseindexentry(base, 1))
  320. out.append(_tree_entry_to_baseindexentry(ours, 2))
  321. out.append(_tree_entry_to_baseindexentry(theirs, 3))
  322. elif base[0] != ours[0] or base[1] != ours[1]:
  323. # only we changed it
  324. out.append(_tree_entry_to_baseindexentry(ours, 0))
  325. else:
  326. # either nobody changed it, or they did. In either
  327. # case, use theirs
  328. out.append(_tree_entry_to_baseindexentry(theirs, 0))
  329. # END handle modification
  330. else:
  331. if ours[0] != base[0] or ours[1] != base[1]:
  332. # they deleted it, we changed it, conflict
  333. out.append(_tree_entry_to_baseindexentry(base, 1))
  334. out.append(_tree_entry_to_baseindexentry(ours, 2))
  335. # else:
  336. # we didn't change it, ignore
  337. # pass
  338. # END handle our change
  339. # END handle theirs
  340. else:
  341. if theirs is None:
  342. # deleted in both, its fine - its out
  343. pass
  344. else:
  345. if theirs[0] != base[0] or theirs[1] != base[1]:
  346. # deleted in ours, changed theirs, conflict
  347. out.append(_tree_entry_to_baseindexentry(base, 1))
  348. out.append(_tree_entry_to_baseindexentry(theirs, 3))
  349. # END theirs changed
  350. # else:
  351. # theirs didn't change
  352. # pass
  353. # END handle theirs
  354. # END handle ours
  355. else:
  356. # all three can't be None
  357. if ours is None:
  358. # added in their branch
  359. assert theirs is not None
  360. out.append(_tree_entry_to_baseindexentry(theirs, 0))
  361. elif theirs is None:
  362. # added in our branch
  363. out.append(_tree_entry_to_baseindexentry(ours, 0))
  364. else:
  365. # both have it, except for the base, see whether it changed
  366. if ours[0] != theirs[0] or ours[1] != theirs[1]:
  367. out.append(_tree_entry_to_baseindexentry(ours, 2))
  368. out.append(_tree_entry_to_baseindexentry(theirs, 3))
  369. else:
  370. # it was added the same in both
  371. out.append(_tree_entry_to_baseindexentry(ours, 0))
  372. # END handle two items
  373. # END handle heads
  374. # END handle base exists
  375. # END for each entries tuple
  376. return out