buf.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. """Module with a simple buffer implementation using the memory manager"""
  2. import sys
  3. __all__ = ["SlidingWindowMapBuffer"]
  4. class SlidingWindowMapBuffer:
  5. """A buffer like object which allows direct byte-wise object and slicing into
  6. memory of a mapped file. The mapping is controlled by the provided cursor.
  7. The buffer is relative, that is if you map an offset, index 0 will map to the
  8. first byte at the offset you used during initialization or begin_access
  9. **Note:** Although this type effectively hides the fact that there are mapped windows
  10. underneath, it can unfortunately not be used in any non-pure python method which
  11. needs a buffer or string"""
  12. __slots__ = (
  13. '_c', # our cursor
  14. '_size', # our supposed size
  15. )
  16. def __init__(self, cursor=None, offset=0, size=sys.maxsize, flags=0):
  17. """Initalize the instance to operate on the given cursor.
  18. :param cursor: if not None, the associated cursor to the file you want to access
  19. If None, you have call begin_access before using the buffer and provide a cursor
  20. :param offset: absolute offset in bytes
  21. :param size: the total size of the mapping. Defaults to the maximum possible size
  22. From that point on, the __len__ of the buffer will be the given size or the file size.
  23. If the size is larger than the mappable area, you can only access the actually available
  24. area, although the length of the buffer is reported to be your given size.
  25. Hence it is in your own interest to provide a proper size !
  26. :param flags: Additional flags to be passed to os.open
  27. :raise ValueError: if the buffer could not achieve a valid state"""
  28. self._c = cursor
  29. if cursor and not self.begin_access(cursor, offset, size, flags):
  30. raise ValueError("Failed to allocate the buffer - probably the given offset is out of bounds")
  31. # END handle offset
  32. def __del__(self):
  33. self.end_access()
  34. def __enter__(self):
  35. return self
  36. def __exit__(self, exc_type, exc_value, traceback):
  37. self.end_access()
  38. def __len__(self):
  39. return self._size
  40. def __getitem__(self, i):
  41. if isinstance(i, slice):
  42. return self.__getslice__(i.start or 0, i.stop or self._size)
  43. c = self._c
  44. assert c.is_valid()
  45. if i < 0:
  46. i = self._size + i
  47. if not c.includes_ofs(i):
  48. c.use_region(i, 1)
  49. # END handle region usage
  50. return c.buffer()[i - c.ofs_begin()]
  51. def __getslice__(self, i, j):
  52. c = self._c
  53. # fast path, slice fully included - safes a concatenate operation and
  54. # should be the default
  55. assert c.is_valid()
  56. if i < 0:
  57. i = self._size + i
  58. if j == sys.maxsize:
  59. j = self._size
  60. if j < 0:
  61. j = self._size + j
  62. if (c.ofs_begin() <= i) and (j < c.ofs_end()):
  63. b = c.ofs_begin()
  64. return c.buffer()[i - b:j - b]
  65. else:
  66. l = j - i # total length
  67. ofs = i
  68. # It's fastest to keep tokens and join later, especially in py3, which was 7 times slower
  69. # in the previous iteration of this code
  70. md = list()
  71. while l:
  72. c.use_region(ofs, l)
  73. assert c.is_valid()
  74. d = c.buffer()[:l]
  75. ofs += len(d)
  76. l -= len(d)
  77. # Make sure we don't keep references, as c.use_region() might attempt to free resources, but
  78. # can't unless we use pure bytes
  79. if hasattr(d, 'tobytes'):
  80. d = d.tobytes()
  81. md.append(d)
  82. # END while there are bytes to read
  83. return bytes().join(md)
  84. # END fast or slow path
  85. #{ Interface
  86. def begin_access(self, cursor=None, offset=0, size=sys.maxsize, flags=0):
  87. """Call this before the first use of this instance. The method was already
  88. called by the constructor in case sufficient information was provided.
  89. For more information no the parameters, see the __init__ method
  90. :param path: if cursor is None the existing one will be used.
  91. :return: True if the buffer can be used"""
  92. if cursor:
  93. self._c = cursor
  94. # END update our cursor
  95. # reuse existing cursors if possible
  96. if self._c is not None and self._c.is_associated():
  97. res = self._c.use_region(offset, size, flags).is_valid()
  98. if res:
  99. # if given size is too large or default, we computer a proper size
  100. # If its smaller, we assume the combination between offset and size
  101. # as chosen by the user is correct and use it !
  102. # If not, the user is in trouble.
  103. if size > self._c.file_size():
  104. size = self._c.file_size() - offset
  105. # END handle size
  106. self._size = size
  107. # END set size
  108. return res
  109. # END use our cursor
  110. return False
  111. def end_access(self):
  112. """Call this method once you are done using the instance. It is automatically
  113. called on destruction, and should be called just in time to allow system
  114. resources to be freed.
  115. Once you called end_access, you must call begin access before reusing this instance!"""
  116. self._size = 0
  117. if self._c is not None:
  118. self._c.unuse_region()
  119. # END unuse region
  120. def cursor(self):
  121. """:return: the currently set cursor which provides access to the data"""
  122. return self._c
  123. #}END interface