mman.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. """Module containing a memory memory manager which provides a sliding window on a number of memory mapped files"""
  2. from .util import (
  3. MapWindow,
  4. MapRegion,
  5. MapRegionList,
  6. is_64_bit,
  7. )
  8. import sys
  9. from functools import reduce
  10. __all__ = ["StaticWindowMapManager", "SlidingWindowMapManager", "WindowCursor"]
  11. #{ Utilities
  12. #}END utilities
  13. class WindowCursor:
  14. """
  15. Pointer into the mapped region of the memory manager, keeping the map
  16. alive until it is destroyed and no other client uses it.
  17. Cursors should not be created manually, but are instead returned by the SlidingWindowMapManager
  18. **Note:**: The current implementation is suited for static and sliding window managers, but it also means
  19. that it must be suited for the somewhat quite different sliding manager. It could be improved, but
  20. I see no real need to do so."""
  21. __slots__ = (
  22. '_manager', # the manger keeping all file regions
  23. '_rlist', # a regions list with regions for our file
  24. '_region', # our current class:`MapRegion` or None
  25. '_ofs', # relative offset from the actually mapped area to our start area
  26. '_size' # maximum size we should provide
  27. )
  28. def __init__(self, manager=None, regions=None):
  29. self._manager = manager
  30. self._rlist = regions
  31. self._region = None
  32. self._ofs = 0
  33. self._size = 0
  34. def __del__(self):
  35. self._destroy()
  36. def __enter__(self):
  37. return self
  38. def __exit__(self, exc_type, exc_value, traceback):
  39. self._destroy()
  40. def _destroy(self):
  41. """Destruction code to decrement counters"""
  42. self.unuse_region()
  43. if self._rlist is not None:
  44. # Actual client count, which doesn't include the reference kept by the manager, nor ours
  45. # as we are about to be deleted
  46. try:
  47. if len(self._rlist) == 0:
  48. # Free all resources associated with the mapped file
  49. self._manager._fdict.pop(self._rlist.path_or_fd())
  50. # END remove regions list from manager
  51. except (TypeError, KeyError):
  52. # sometimes, during shutdown, getrefcount is None. Its possible
  53. # to re-import it, however, its probably better to just ignore
  54. # this python problem (for now).
  55. # The next step is to get rid of the error prone getrefcount alltogether.
  56. pass
  57. # END exception handling
  58. # END handle regions
  59. def _copy_from(self, rhs):
  60. """Copy all data from rhs into this instance, handles usage count"""
  61. self._manager = rhs._manager
  62. self._rlist = type(rhs._rlist)(rhs._rlist)
  63. self._region = rhs._region
  64. self._ofs = rhs._ofs
  65. self._size = rhs._size
  66. for region in self._rlist:
  67. region.increment_client_count()
  68. if self._region is not None:
  69. self._region.increment_client_count()
  70. # END handle regions
  71. def __copy__(self):
  72. """copy module interface"""
  73. cpy = type(self)()
  74. cpy._copy_from(self)
  75. return cpy
  76. #{ Interface
  77. def assign(self, rhs):
  78. """Assign rhs to this instance. This is required in order to get a real copy.
  79. Alternativly, you can copy an existing instance using the copy module"""
  80. self._destroy()
  81. self._copy_from(rhs)
  82. def use_region(self, offset=0, size=0, flags=0):
  83. """Assure we point to a window which allows access to the given offset into the file
  84. :param offset: absolute offset in bytes into the file
  85. :param size: amount of bytes to map. If 0, all available bytes will be mapped
  86. :param flags: additional flags to be given to os.open in case a file handle is initially opened
  87. for mapping. Has no effect if a region can actually be reused.
  88. :return: this instance - it should be queried for whether it points to a valid memory region.
  89. This is not the case if the mapping failed because we reached the end of the file
  90. **Note:**: The size actually mapped may be smaller than the given size. If that is the case,
  91. either the file has reached its end, or the map was created between two existing regions"""
  92. need_region = True
  93. man = self._manager
  94. fsize = self._rlist.file_size()
  95. size = min(size or fsize, man.window_size() or fsize) # clamp size to window size
  96. if self._region is not None:
  97. if self._region.includes_ofs(offset):
  98. need_region = False
  99. else:
  100. self.unuse_region()
  101. # END handle existing region
  102. # END check existing region
  103. # offset too large ?
  104. if offset >= fsize:
  105. return self
  106. # END handle offset
  107. if need_region:
  108. self._region = man._obtain_region(self._rlist, offset, size, flags, False)
  109. self._region.increment_client_count()
  110. # END need region handling
  111. self._ofs = offset - self._region._b
  112. self._size = min(size, self._region.ofs_end() - offset)
  113. return self
  114. def unuse_region(self):
  115. """Unuse the current region. Does nothing if we have no current region
  116. **Note:** the cursor unuses the region automatically upon destruction. It is recommended
  117. to un-use the region once you are done reading from it in persistent cursors as it
  118. helps to free up resource more quickly"""
  119. if self._region is not None:
  120. self._region.increment_client_count(-1)
  121. self._region = None
  122. # note: should reset ofs and size, but we spare that for performance. Its not
  123. # allowed to query information if we are not valid !
  124. def buffer(self):
  125. """Return a buffer object which allows access to our memory region from our offset
  126. to the window size. Please note that it might be smaller than you requested when calling use_region()
  127. **Note:** You can only obtain a buffer if this instance is_valid() !
  128. **Note:** buffers should not be cached passed the duration of your access as it will
  129. prevent resources from being freed even though they might not be accounted for anymore !"""
  130. return memoryview(self._region.buffer())[self._ofs:self._ofs+self._size]
  131. def map(self):
  132. """
  133. :return: the underlying raw memory map. Please not that the offset and size is likely to be different
  134. to what you set as offset and size. Use it only if you are sure about the region it maps, which is the whole
  135. file in case of StaticWindowMapManager"""
  136. return self._region.map()
  137. def is_valid(self):
  138. """:return: True if we have a valid and usable region"""
  139. return self._region is not None
  140. def is_associated(self):
  141. """:return: True if we are associated with a specific file already"""
  142. return self._rlist is not None
  143. def ofs_begin(self):
  144. """:return: offset to the first byte pointed to by our cursor
  145. **Note:** only if is_valid() is True"""
  146. return self._region._b + self._ofs
  147. def ofs_end(self):
  148. """:return: offset to one past the last available byte"""
  149. # unroll method calls for performance !
  150. return self._region._b + self._ofs + self._size
  151. def size(self):
  152. """:return: amount of bytes we point to"""
  153. return self._size
  154. def region(self):
  155. """:return: our mapped region, or None if nothing is mapped yet
  156. :raise AssertionError: if we have no current region. This is only useful for debugging"""
  157. return self._region
  158. def includes_ofs(self, ofs):
  159. """:return: True if the given absolute offset is contained in the cursors
  160. current region
  161. **Note:** cursor must be valid for this to work"""
  162. # unroll methods
  163. return (self._region._b + self._ofs) <= ofs < (self._region._b + self._ofs + self._size)
  164. def file_size(self):
  165. """:return: size of the underlying file"""
  166. return self._rlist.file_size()
  167. def path_or_fd(self):
  168. """:return: path or file descriptor of the underlying mapped file"""
  169. return self._rlist.path_or_fd()
  170. def path(self):
  171. """:return: path of the underlying mapped file
  172. :raise ValueError: if attached path is not a path"""
  173. if isinstance(self._rlist.path_or_fd(), int):
  174. raise ValueError("Path queried although mapping was applied to a file descriptor")
  175. # END handle type
  176. return self._rlist.path_or_fd()
  177. def fd(self):
  178. """:return: file descriptor used to create the underlying mapping.
  179. **Note:** it is not required to be valid anymore
  180. :raise ValueError: if the mapping was not created by a file descriptor"""
  181. if isinstance(self._rlist.path_or_fd(), str):
  182. raise ValueError("File descriptor queried although mapping was generated from path")
  183. # END handle type
  184. return self._rlist.path_or_fd()
  185. #} END interface
  186. class StaticWindowMapManager:
  187. """Provides a manager which will produce single size cursors that are allowed
  188. to always map the whole file.
  189. Clients must be written to specifically know that they are accessing their data
  190. through a StaticWindowMapManager, as they otherwise have to deal with their window size.
  191. These clients would have to use a SlidingWindowMapBuffer to hide this fact.
  192. This type will always use a maximum window size, and optimize certain methods to
  193. accommodate this fact"""
  194. __slots__ = [
  195. '_fdict', # mapping of path -> StorageHelper (of some kind
  196. '_window_size', # maximum size of a window
  197. '_max_memory_size', # maximum amount of memory we may allocate
  198. '_max_handle_count', # maximum amount of handles to keep open
  199. '_memory_size', # currently allocated memory size
  200. '_handle_count', # amount of currently allocated file handles
  201. ]
  202. #{ Configuration
  203. MapRegionListCls = MapRegionList
  204. MapWindowCls = MapWindow
  205. MapRegionCls = MapRegion
  206. WindowCursorCls = WindowCursor
  207. #} END configuration
  208. _MB_in_bytes = 1024 * 1024
  209. def __init__(self, window_size=0, max_memory_size=0, max_open_handles=sys.maxsize):
  210. """initialize the manager with the given parameters.
  211. :param window_size: if -1, a default window size will be chosen depending on
  212. the operating system's architecture. It will internally be quantified to a multiple of the page size
  213. If 0, the window may have any size, which basically results in mapping the whole file at one
  214. :param max_memory_size: maximum amount of memory we may map at once before releasing mapped regions.
  215. If 0, a viable default will be set depending on the system's architecture.
  216. It is a soft limit that is tried to be kept, but nothing bad happens if we have to over-allocate
  217. :param max_open_handles: if not maxint, limit the amount of open file handles to the given number.
  218. Otherwise the amount is only limited by the system itself. If a system or soft limit is hit,
  219. the manager will free as many handles as possible"""
  220. self._fdict = dict()
  221. self._window_size = window_size
  222. self._max_memory_size = max_memory_size
  223. self._max_handle_count = max_open_handles
  224. self._memory_size = 0
  225. self._handle_count = 0
  226. if window_size < 0:
  227. coeff = 64
  228. if is_64_bit():
  229. coeff = 1024
  230. # END handle arch
  231. self._window_size = coeff * self._MB_in_bytes
  232. # END handle max window size
  233. if max_memory_size == 0:
  234. coeff = 1024
  235. if is_64_bit():
  236. coeff = 8192
  237. # END handle arch
  238. self._max_memory_size = coeff * self._MB_in_bytes
  239. # END handle max memory size
  240. #{ Internal Methods
  241. def _collect_lru_region(self, size):
  242. """Unmap the region which was least-recently used and has no client
  243. :param size: size of the region we want to map next (assuming its not already mapped partially or full
  244. if 0, we try to free any available region
  245. :return: Amount of freed regions
  246. .. Note::
  247. We don't raise exceptions anymore, in order to keep the system working, allowing temporary overallocation.
  248. If the system runs out of memory, it will tell.
  249. .. TODO::
  250. implement a case where all unusued regions are discarded efficiently.
  251. Currently its only brute force
  252. """
  253. num_found = 0
  254. while (size == 0) or (self._memory_size + size > self._max_memory_size):
  255. lru_region = None
  256. lru_list = None
  257. for regions in self._fdict.values():
  258. for region in regions:
  259. # check client count - if it's 1, it's just us
  260. if (region.client_count() == 1 and
  261. (lru_region is None or region._uc < lru_region._uc)):
  262. lru_region = region
  263. lru_list = regions
  264. # END update lru_region
  265. # END for each region
  266. # END for each regions list
  267. if lru_region is None:
  268. break
  269. # END handle region not found
  270. num_found += 1
  271. del(lru_list[lru_list.index(lru_region)])
  272. lru_region.increment_client_count(-1)
  273. self._memory_size -= lru_region.size()
  274. self._handle_count -= 1
  275. # END while there is more memory to free
  276. return num_found
  277. def _obtain_region(self, a, offset, size, flags, is_recursive):
  278. """Utilty to create a new region - for more information on the parameters,
  279. see MapCursor.use_region.
  280. :param a: A regions (a)rray
  281. :return: The newly created region"""
  282. if self._memory_size + size > self._max_memory_size:
  283. self._collect_lru_region(size)
  284. # END handle collection
  285. r = None
  286. if a:
  287. assert len(a) == 1
  288. r = a[0]
  289. else:
  290. try:
  291. r = self.MapRegionCls(a.path_or_fd(), 0, sys.maxsize, flags)
  292. except Exception:
  293. # apparently we are out of system resources or hit a limit
  294. # As many more operations are likely to fail in that condition (
  295. # like reading a file from disk, etc) we free up as much as possible
  296. # As this invalidates our insert position, we have to recurse here
  297. if is_recursive:
  298. # we already tried this, and still have no success in obtaining
  299. # a mapping. This is an exception, so we propagate it
  300. raise
  301. # END handle existing recursion
  302. self._collect_lru_region(0)
  303. return self._obtain_region(a, offset, size, flags, True)
  304. # END handle exceptions
  305. self._handle_count += 1
  306. self._memory_size += r.size()
  307. a.append(r)
  308. # END handle array
  309. assert r.includes_ofs(offset)
  310. return r
  311. #}END internal methods
  312. #{ Interface
  313. def make_cursor(self, path_or_fd):
  314. """
  315. :return: a cursor pointing to the given path or file descriptor.
  316. It can be used to map new regions of the file into memory
  317. **Note:** if a file descriptor is given, it is assumed to be open and valid,
  318. but may be closed afterwards. To refer to the same file, you may reuse
  319. your existing file descriptor, but keep in mind that new windows can only
  320. be mapped as long as it stays valid. This is why the using actual file paths
  321. are preferred unless you plan to keep the file descriptor open.
  322. **Note:** file descriptors are problematic as they are not necessarily unique, as two
  323. different files opened and closed in succession might have the same file descriptor id.
  324. **Note:** Using file descriptors directly is faster once new windows are mapped as it
  325. prevents the file to be opened again just for the purpose of mapping it."""
  326. regions = self._fdict.get(path_or_fd)
  327. if regions is None:
  328. regions = self.MapRegionListCls(path_or_fd)
  329. self._fdict[path_or_fd] = regions
  330. # END obtain region for path
  331. return self.WindowCursorCls(self, regions)
  332. def collect(self):
  333. """Collect all available free-to-collect mapped regions
  334. :return: Amount of freed handles"""
  335. return self._collect_lru_region(0)
  336. def num_file_handles(self):
  337. """:return: amount of file handles in use. Each mapped region uses one file handle"""
  338. return self._handle_count
  339. def num_open_files(self):
  340. """Amount of opened files in the system"""
  341. return reduce(lambda x, y: x + y, (1 for rlist in self._fdict.values() if len(rlist) > 0), 0)
  342. def window_size(self):
  343. """:return: size of each window when allocating new regions"""
  344. return self._window_size
  345. def mapped_memory_size(self):
  346. """:return: amount of bytes currently mapped in total"""
  347. return self._memory_size
  348. def max_file_handles(self):
  349. """:return: maximium amount of handles we may have opened"""
  350. return self._max_handle_count
  351. def max_mapped_memory_size(self):
  352. """:return: maximum amount of memory we may allocate"""
  353. return self._max_memory_size
  354. #} END interface
  355. #{ Special Purpose Interface
  356. def force_map_handle_removal_win(self, base_path):
  357. """ONLY AVAILABLE ON WINDOWS
  358. On windows removing files is not allowed if anybody still has it opened.
  359. If this process is ourselves, and if the whole process uses this memory
  360. manager (as far as the parent framework is concerned) we can enforce
  361. closing all memory maps whose path matches the given base path to
  362. allow the respective operation after all.
  363. The respective system must NOT access the closed memory regions anymore !
  364. This really may only be used if you know that the items which keep
  365. the cursors alive will not be using it anymore. They need to be recreated !
  366. :return: Amount of closed handles
  367. **Note:** does nothing on non-windows platforms"""
  368. if sys.platform != 'win32':
  369. return
  370. # END early bailout
  371. num_closed = 0
  372. for path, rlist in self._fdict.items():
  373. if path.startswith(base_path):
  374. for region in rlist:
  375. region.release()
  376. num_closed += 1
  377. # END path matches
  378. # END for each path
  379. return num_closed
  380. #} END special purpose interface
  381. class SlidingWindowMapManager(StaticWindowMapManager):
  382. """Maintains a list of ranges of mapped memory regions in one or more files and allows to easily
  383. obtain additional regions assuring there is no overlap.
  384. Once a certain memory limit is reached globally, or if there cannot be more open file handles
  385. which result from each mmap call, the least recently used, and currently unused mapped regions
  386. are unloaded automatically.
  387. **Note:** currently not thread-safe !
  388. **Note:** in the current implementation, we will automatically unload windows if we either cannot
  389. create more memory maps (as the open file handles limit is hit) or if we have allocated more than
  390. a safe amount of memory already, which would possibly cause memory allocations to fail as our address
  391. space is full."""
  392. __slots__ = tuple()
  393. def __init__(self, window_size=-1, max_memory_size=0, max_open_handles=sys.maxsize):
  394. """Adjusts the default window size to -1"""
  395. super().__init__(window_size, max_memory_size, max_open_handles)
  396. def _obtain_region(self, a, offset, size, flags, is_recursive):
  397. # bisect to find an existing region. The c++ implementation cannot
  398. # do that as it uses a linked list for regions.
  399. r = None
  400. lo = 0
  401. hi = len(a)
  402. while lo < hi:
  403. mid = (lo + hi) // 2
  404. ofs = a[mid]._b
  405. if ofs <= offset:
  406. if a[mid].includes_ofs(offset):
  407. r = a[mid]
  408. break
  409. # END have region
  410. lo = mid + 1
  411. else:
  412. hi = mid
  413. # END handle position
  414. # END while bisecting
  415. if r is None:
  416. window_size = self._window_size
  417. left = self.MapWindowCls(0, 0)
  418. mid = self.MapWindowCls(offset, size)
  419. right = self.MapWindowCls(a.file_size(), 0)
  420. # we want to honor the max memory size, and assure we have anough
  421. # memory available
  422. # Save calls !
  423. if self._memory_size + window_size > self._max_memory_size:
  424. self._collect_lru_region(window_size)
  425. # END handle collection
  426. # we assume the list remains sorted by offset
  427. insert_pos = 0
  428. len_regions = len(a)
  429. if len_regions == 1:
  430. if a[0]._b <= offset:
  431. insert_pos = 1
  432. # END maintain sort
  433. else:
  434. # find insert position
  435. insert_pos = len_regions
  436. for i, region in enumerate(a):
  437. if region._b > offset:
  438. insert_pos = i
  439. break
  440. # END if insert position is correct
  441. # END for each region
  442. # END obtain insert pos
  443. # adjust the actual offset and size values to create the largest
  444. # possible mapping
  445. if insert_pos == 0:
  446. if len_regions:
  447. right = self.MapWindowCls.from_region(a[insert_pos])
  448. # END adjust right side
  449. else:
  450. if insert_pos != len_regions:
  451. right = self.MapWindowCls.from_region(a[insert_pos])
  452. # END adjust right window
  453. left = self.MapWindowCls.from_region(a[insert_pos - 1])
  454. # END adjust surrounding windows
  455. mid.extend_left_to(left, window_size)
  456. mid.extend_right_to(right, window_size)
  457. mid.align()
  458. # it can happen that we align beyond the end of the file
  459. if mid.ofs_end() > right.ofs:
  460. mid.size = right.ofs - mid.ofs
  461. # END readjust size
  462. # insert new region at the right offset to keep the order
  463. try:
  464. if self._handle_count >= self._max_handle_count:
  465. raise Exception
  466. # END assert own imposed max file handles
  467. r = self.MapRegionCls(a.path_or_fd(), mid.ofs, mid.size, flags)
  468. except Exception:
  469. # apparently we are out of system resources or hit a limit
  470. # As many more operations are likely to fail in that condition (
  471. # like reading a file from disk, etc) we free up as much as possible
  472. # As this invalidates our insert position, we have to recurse here
  473. if is_recursive:
  474. # we already tried this, and still have no success in obtaining
  475. # a mapping. This is an exception, so we propagate it
  476. raise
  477. # END handle existing recursion
  478. self._collect_lru_region(0)
  479. return self._obtain_region(a, offset, size, flags, True)
  480. # END handle exceptions
  481. self._handle_count += 1
  482. self._memory_size += r.size()
  483. a.insert(insert_pos, r)
  484. # END create new region
  485. return r