log.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. from mmap import mmap
  2. import re
  3. import time as _time
  4. from git.compat import defenc
  5. from git.objects.util import (
  6. parse_date,
  7. Serializable,
  8. altz_to_utctz_str,
  9. )
  10. from git.util import (
  11. Actor,
  12. LockedFD,
  13. LockFile,
  14. assure_directory_exists,
  15. to_native_path,
  16. bin_to_hex,
  17. file_contents_ro_filepath,
  18. )
  19. import os.path as osp
  20. # typing ------------------------------------------------------------------
  21. from typing import Iterator, List, Tuple, Union, TYPE_CHECKING
  22. from git.types import PathLike
  23. if TYPE_CHECKING:
  24. from git.refs import SymbolicReference
  25. from io import BytesIO
  26. from git.config import GitConfigParser, SectionConstraint # NOQA
  27. # ------------------------------------------------------------------------------
  28. __all__ = ["RefLog", "RefLogEntry"]
  29. class RefLogEntry(Tuple[str, str, Actor, Tuple[int, int], str]):
  30. """Named tuple allowing easy access to the revlog data fields"""
  31. _re_hexsha_only = re.compile("^[0-9A-Fa-f]{40}$")
  32. __slots__ = ()
  33. def __repr__(self) -> str:
  34. """Representation of ourselves in git reflog format"""
  35. return self.format()
  36. def format(self) -> str:
  37. """:return: a string suitable to be placed in a reflog file"""
  38. act = self.actor
  39. time = self.time
  40. return "{} {} {} <{}> {!s} {}\t{}\n".format(
  41. self.oldhexsha,
  42. self.newhexsha,
  43. act.name,
  44. act.email,
  45. time[0],
  46. altz_to_utctz_str(time[1]),
  47. self.message,
  48. )
  49. @property
  50. def oldhexsha(self) -> str:
  51. """The hexsha to the commit the ref pointed to before the change"""
  52. return self[0]
  53. @property
  54. def newhexsha(self) -> str:
  55. """The hexsha to the commit the ref now points to, after the change"""
  56. return self[1]
  57. @property
  58. def actor(self) -> Actor:
  59. """Actor instance, providing access"""
  60. return self[2]
  61. @property
  62. def time(self) -> Tuple[int, int]:
  63. """time as tuple:
  64. * [0] = int(time)
  65. * [1] = int(timezone_offset) in time.altzone format"""
  66. return self[3]
  67. @property
  68. def message(self) -> str:
  69. """Message describing the operation that acted on the reference"""
  70. return self[4]
  71. @classmethod
  72. def new(
  73. cls,
  74. oldhexsha: str,
  75. newhexsha: str,
  76. actor: Actor,
  77. time: int,
  78. tz_offset: int,
  79. message: str,
  80. ) -> "RefLogEntry": # skipcq: PYL-W0621
  81. """:return: New instance of a RefLogEntry"""
  82. if not isinstance(actor, Actor):
  83. raise ValueError("Need actor instance, got %s" % actor)
  84. # END check types
  85. return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), message))
  86. @classmethod
  87. def from_line(cls, line: bytes) -> "RefLogEntry":
  88. """:return: New RefLogEntry instance from the given revlog line.
  89. :param line: line bytes without trailing newline
  90. :raise ValueError: If line could not be parsed"""
  91. line_str = line.decode(defenc)
  92. fields = line_str.split("\t", 1)
  93. if len(fields) == 1:
  94. info, msg = fields[0], None
  95. elif len(fields) == 2:
  96. info, msg = fields
  97. else:
  98. raise ValueError("Line must have up to two TAB-separated fields." " Got %s" % repr(line_str))
  99. # END handle first split
  100. oldhexsha = info[:40]
  101. newhexsha = info[41:81]
  102. for hexsha in (oldhexsha, newhexsha):
  103. if not cls._re_hexsha_only.match(hexsha):
  104. raise ValueError("Invalid hexsha: %r" % (hexsha,))
  105. # END if hexsha re doesn't match
  106. # END for each hexsha
  107. email_end = info.find(">", 82)
  108. if email_end == -1:
  109. raise ValueError("Missing token: >")
  110. # END handle missing end brace
  111. actor = Actor._from_string(info[82 : email_end + 1])
  112. time, tz_offset = parse_date(info[email_end + 2 :]) # skipcq: PYL-W0621
  113. return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), msg))
  114. class RefLog(List[RefLogEntry], Serializable):
  115. """A reflog contains RefLogEntrys, each of which defines a certain state
  116. of the head in question. Custom query methods allow to retrieve log entries
  117. by date or by other criteria.
  118. Reflog entries are ordered, the first added entry is first in the list, the last
  119. entry, i.e. the last change of the head or reference, is last in the list."""
  120. __slots__ = ("_path",)
  121. def __new__(cls, filepath: Union[PathLike, None] = None) -> "RefLog":
  122. inst = super(RefLog, cls).__new__(cls)
  123. return inst
  124. def __init__(self, filepath: Union[PathLike, None] = None):
  125. """Initialize this instance with an optional filepath, from which we will
  126. initialize our data. The path is also used to write changes back using
  127. the write() method"""
  128. self._path = filepath
  129. if filepath is not None:
  130. self._read_from_file()
  131. # END handle filepath
  132. def _read_from_file(self) -> None:
  133. try:
  134. fmap = file_contents_ro_filepath(self._path, stream=True, allow_mmap=True)
  135. except OSError:
  136. # it is possible and allowed that the file doesn't exist !
  137. return
  138. # END handle invalid log
  139. try:
  140. self._deserialize(fmap)
  141. finally:
  142. fmap.close()
  143. # END handle closing of handle
  144. # { Interface
  145. @classmethod
  146. def from_file(cls, filepath: PathLike) -> "RefLog":
  147. """
  148. :return: a new RefLog instance containing all entries from the reflog
  149. at the given filepath
  150. :param filepath: path to reflog
  151. :raise ValueError: If the file could not be read or was corrupted in some way"""
  152. return cls(filepath)
  153. @classmethod
  154. def path(cls, ref: "SymbolicReference") -> str:
  155. """
  156. :return: string to absolute path at which the reflog of the given ref
  157. instance would be found. The path is not guaranteed to point to a valid
  158. file though.
  159. :param ref: SymbolicReference instance"""
  160. return osp.join(ref.repo.git_dir, "logs", to_native_path(ref.path))
  161. @classmethod
  162. def iter_entries(cls, stream: Union[str, "BytesIO", mmap]) -> Iterator[RefLogEntry]:
  163. """
  164. :return: Iterator yielding RefLogEntry instances, one for each line read
  165. sfrom the given stream.
  166. :param stream: file-like object containing the revlog in its native format
  167. or string instance pointing to a file to read"""
  168. new_entry = RefLogEntry.from_line
  169. if isinstance(stream, str):
  170. # default args return mmap on py>3
  171. _stream = file_contents_ro_filepath(stream)
  172. assert isinstance(_stream, mmap)
  173. else:
  174. _stream = stream
  175. # END handle stream type
  176. while True:
  177. line = _stream.readline()
  178. if not line:
  179. return
  180. yield new_entry(line.strip())
  181. # END endless loop
  182. @classmethod
  183. def entry_at(cls, filepath: PathLike, index: int) -> "RefLogEntry":
  184. """
  185. :return: RefLogEntry at the given index
  186. :param filepath: full path to the index file from which to read the entry
  187. :param index: python list compatible index, i.e. it may be negative to
  188. specify an entry counted from the end of the list
  189. :raise IndexError: If the entry didn't exist
  190. .. note:: This method is faster as it only parses the entry at index, skipping
  191. all other lines. Nonetheless, the whole file has to be read if
  192. the index is negative
  193. """
  194. with open(filepath, "rb") as fp:
  195. if index < 0:
  196. return RefLogEntry.from_line(fp.readlines()[index].strip())
  197. # read until index is reached
  198. for i in range(index + 1):
  199. line = fp.readline()
  200. if not line:
  201. raise IndexError(f"Index file ended at line {i+1}, before given index was reached")
  202. # END abort on eof
  203. # END handle runup
  204. return RefLogEntry.from_line(line.strip())
  205. # END handle index
  206. def to_file(self, filepath: PathLike) -> None:
  207. """Write the contents of the reflog instance to a file at the given filepath.
  208. :param filepath: path to file, parent directories are assumed to exist"""
  209. lfd = LockedFD(filepath)
  210. assure_directory_exists(filepath, is_file=True)
  211. fp = lfd.open(write=True, stream=True)
  212. try:
  213. self._serialize(fp)
  214. lfd.commit()
  215. except Exception:
  216. # on failure it rolls back automatically, but we make it clear
  217. lfd.rollback()
  218. raise
  219. # END handle change
  220. @classmethod
  221. def append_entry(
  222. cls,
  223. config_reader: Union[Actor, "GitConfigParser", "SectionConstraint", None],
  224. filepath: PathLike,
  225. oldbinsha: bytes,
  226. newbinsha: bytes,
  227. message: str,
  228. write: bool = True,
  229. ) -> "RefLogEntry":
  230. """Append a new log entry to the revlog at filepath.
  231. :param config_reader: configuration reader of the repository - used to obtain
  232. user information. May also be an Actor instance identifying the committer directly or None.
  233. :param filepath: full path to the log file
  234. :param oldbinsha: binary sha of the previous commit
  235. :param newbinsha: binary sha of the current commit
  236. :param message: message describing the change to the reference
  237. :param write: If True, the changes will be written right away. Otherwise
  238. the change will not be written
  239. :return: RefLogEntry objects which was appended to the log
  240. :note: As we are append-only, concurrent access is not a problem as we
  241. do not interfere with readers."""
  242. if len(oldbinsha) != 20 or len(newbinsha) != 20:
  243. raise ValueError("Shas need to be given in binary format")
  244. # END handle sha type
  245. assure_directory_exists(filepath, is_file=True)
  246. first_line = message.split("\n")[0]
  247. if isinstance(config_reader, Actor):
  248. committer = config_reader # mypy thinks this is Actor | Gitconfigparser, but why?
  249. else:
  250. committer = Actor.committer(config_reader)
  251. entry = RefLogEntry(
  252. (
  253. bin_to_hex(oldbinsha).decode("ascii"),
  254. bin_to_hex(newbinsha).decode("ascii"),
  255. committer,
  256. (int(_time.time()), _time.altzone),
  257. first_line,
  258. )
  259. )
  260. if write:
  261. lf = LockFile(filepath)
  262. lf._obtain_lock_or_raise()
  263. fd = open(filepath, "ab")
  264. try:
  265. fd.write(entry.format().encode(defenc))
  266. finally:
  267. fd.close()
  268. lf._release_lock()
  269. # END handle write operation
  270. return entry
  271. def write(self) -> "RefLog":
  272. """Write this instance's data to the file we are originating from
  273. :return: self"""
  274. if self._path is None:
  275. raise ValueError("Instance was not initialized with a path, use to_file(...) instead")
  276. # END assert path
  277. self.to_file(self._path)
  278. return self
  279. # } END interface
  280. # { Serializable Interface
  281. def _serialize(self, stream: "BytesIO") -> "RefLog":
  282. write = stream.write
  283. # write all entries
  284. for e in self:
  285. write(e.format().encode(defenc))
  286. # END for each entry
  287. return self
  288. def _deserialize(self, stream: "BytesIO") -> "RefLog":
  289. self.extend(self.iter_entries(stream))
  290. # } END serializable interface
  291. return self