test_buf.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. from .lib import TestBase, FileCreator
  2. from smmap.mman import (
  3. SlidingWindowMapManager,
  4. StaticWindowMapManager
  5. )
  6. from smmap.buf import SlidingWindowMapBuffer
  7. from random import randint
  8. from time import time
  9. import sys
  10. import os
  11. man_optimal = SlidingWindowMapManager()
  12. man_worst_case = SlidingWindowMapManager(
  13. window_size=TestBase.k_window_test_size // 100,
  14. max_memory_size=TestBase.k_window_test_size // 3,
  15. max_open_handles=15)
  16. static_man = StaticWindowMapManager()
  17. class TestBuf(TestBase):
  18. def test_basics(self):
  19. with FileCreator(self.k_window_test_size, "buffer_test") as fc:
  20. # invalid paths fail upon construction
  21. c = man_optimal.make_cursor(fc.path)
  22. self.assertRaises(ValueError, SlidingWindowMapBuffer, type(c)()) # invalid cursor
  23. self.assertRaises(ValueError, SlidingWindowMapBuffer, c, fc.size) # offset too large
  24. buf = SlidingWindowMapBuffer() # can create uninitailized buffers
  25. assert buf.cursor() is None
  26. # can call end access any time
  27. buf.end_access()
  28. buf.end_access()
  29. assert len(buf) == 0
  30. # begin access can revive it, if the offset is suitable
  31. offset = 100
  32. assert buf.begin_access(c, fc.size) == False
  33. assert buf.begin_access(c, offset) == True
  34. assert len(buf) == fc.size - offset
  35. assert buf.cursor().is_valid()
  36. # empty begin access keeps it valid on the same path, but alters the offset
  37. assert buf.begin_access() == True
  38. assert len(buf) == fc.size
  39. assert buf.cursor().is_valid()
  40. # simple access
  41. with open(fc.path, 'rb') as fp:
  42. data = fp.read()
  43. assert data[offset] == buf[0]
  44. assert data[offset:offset * 2] == buf[0:offset]
  45. # negative indices, partial slices
  46. assert buf[-1] == buf[len(buf) - 1]
  47. assert buf[-10:] == buf[len(buf) - 10:len(buf)]
  48. # end access makes its cursor invalid
  49. buf.end_access()
  50. assert not buf.cursor().is_valid()
  51. assert buf.cursor().is_associated() # but it remains associated
  52. # an empty begin access fixes it up again
  53. assert buf.begin_access() == True and buf.cursor().is_valid()
  54. del(buf) # ends access automatically
  55. del(c)
  56. assert man_optimal.num_file_handles() == 1
  57. # PERFORMANCE
  58. # blast away with random access and a full mapping - we don't want to
  59. # exaggerate the manager's overhead, but measure the buffer overhead
  60. # We do it once with an optimal setting, and with a worse manager which
  61. # will produce small mappings only !
  62. max_num_accesses = 100
  63. fd = os.open(fc.path, os.O_RDONLY)
  64. for item in (fc.path, fd):
  65. for manager, man_id in ((man_optimal, 'optimal'),
  66. (man_worst_case, 'worst case'),
  67. (static_man, 'static optimal')):
  68. buf = SlidingWindowMapBuffer(manager.make_cursor(item))
  69. assert manager.num_file_handles() == 1
  70. for access_mode in range(2): # single, multi
  71. num_accesses_left = max_num_accesses
  72. num_bytes = 0
  73. fsize = fc.size
  74. st = time()
  75. buf.begin_access()
  76. while num_accesses_left:
  77. num_accesses_left -= 1
  78. if access_mode: # multi
  79. ofs_start = randint(0, fsize)
  80. ofs_end = randint(ofs_start, fsize)
  81. d = buf[ofs_start:ofs_end]
  82. assert len(d) == ofs_end - ofs_start
  83. assert d == data[ofs_start:ofs_end]
  84. num_bytes += len(d)
  85. del d
  86. else:
  87. pos = randint(0, fsize)
  88. assert buf[pos] == data[pos]
  89. num_bytes += 1
  90. # END handle mode
  91. # END handle num accesses
  92. buf.end_access()
  93. assert manager.num_file_handles()
  94. assert manager.collect()
  95. assert manager.num_file_handles() == 0
  96. elapsed = max(time() - st, 0.001) # prevent zero division errors on windows
  97. mb = float(1000 * 1000)
  98. mode_str = (access_mode and "slice") or "single byte"
  99. print("%s: Made %i random %s accesses to buffer created from %s reading a total of %f mb in %f s (%f mb/s)"
  100. % (man_id, max_num_accesses, mode_str, type(item), num_bytes / mb, elapsed, (num_bytes / mb) / elapsed),
  101. file=sys.stderr)
  102. # END handle access mode
  103. del buf
  104. # END for each manager
  105. # END for each input
  106. os.close(fd)