diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2014-11-13 08:34:54 +0100 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2014-11-13 08:34:54 +0100 |
commit | f53ddc686c0d226b2c69cc3732406dd3796932cf (patch) | |
tree | 4f5e5dcbd922deda608a6df0e9cff1f1df045530 /smmap | |
parent | 1af4b42a2354acbb53c7956d647655922658fd80 (diff) | |
parent | 948a9274527d14702875581d7115389cf9aa8244 (diff) | |
download | smmap-f53ddc686c0d226b2c69cc3732406dd3796932cf.tar.gz |
Merge branch 'py2n3'v0.8.3
Clode cleanup and performance regression fixes in py3
Diffstat (limited to 'smmap')
-rw-r--r-- | smmap/__init__.py | 2 | ||||
-rw-r--r-- | smmap/buf.py | 12 | ||||
-rw-r--r-- | smmap/mman.py | 45 | ||||
-rw-r--r-- | smmap/test/test_buf.py | 54 | ||||
-rw-r--r-- | smmap/test/test_mman.py | 87 | ||||
-rw-r--r-- | smmap/test/test_util.py | 9 | ||||
-rw-r--r-- | smmap/util.py | 9 |
7 files changed, 117 insertions, 101 deletions
diff --git a/smmap/__init__.py b/smmap/__init__.py index 879ebea..c494648 100644 --- a/smmap/__init__.py +++ b/smmap/__init__.py @@ -3,7 +3,7 @@ __author__ = "Sebastian Thiel" __contact__ = "byronimo@gmail.com" __homepage__ = "https://github.com/Byron/smmap" -version_info = (0, 8, 2) +version_info = (0, 8, 3) __version__ = '.'.join(str(i) for i in version_info) # make everything available in root package for convenience diff --git a/smmap/buf.py b/smmap/buf.py index 2f27d4d..ef9d49e 100644 --- a/smmap/buf.py +++ b/smmap/buf.py @@ -1,6 +1,4 @@ """Module with a simple buffer implementation using the memory manager""" -from .mman import WindowCursor - import sys __all__ = ["SlidingWindowMapBuffer"] @@ -79,18 +77,18 @@ class SlidingWindowMapBuffer(object): else: l = j-i # total length ofs = i - # Keeping tokens in a list could possible be faster, but the list - # overhead outweighs the benefits (tested) ! - md = bytes() + # It's fastest to keep tokens and join later, especially in py3, which was 7 times slower + # in the previous iteration of this code + md = list() while l: c.use_region(ofs, l) assert c.is_valid() d = c.buffer()[:l] ofs += len(d) l -= len(d) - md += d + md.append(d) #END while there are bytes to read - return md + return bytes().join(md) # END fast or slow path #{ Interface diff --git a/smmap/mman.py b/smmap/mman.py index 7cbb535..da6fd81 100644 --- a/smmap/mman.py +++ b/smmap/mman.py @@ -1,13 +1,12 @@ """Module containing a memory memory manager which provides a sliding window on a number of memory mapped files""" from .util import ( - MapWindow, - MapRegion, - MapRegionList, - is_64_bit, - align_to_mmap, - string_types, - buffer, - ) + MapWindow, + MapRegion, + MapRegionList, + is_64_bit, + string_types, + buffer, + ) from weakref import ref import sys @@ -102,7 +101,7 @@ class WindowCursor(object): :param flags: additional flags to be given to os.open in case a file handle is initially opened for mapping. Has no effect if a region can actually be reused. :return: this instance - it should be queried for whether it points to a valid memory region. - This is not the case if the mapping failed becaues we reached the end of the file + This is not the case if the mapping failed because we reached the end of the file **Note:**: The size actually mapped may be smaller than the given size. If that is the case, either the file has reached its end, or the map was created between two existing regions""" @@ -138,7 +137,7 @@ class WindowCursor(object): """Unuse the ucrrent region. Does nothing if we have no current region **Note:** the cursor unuses the region automatically upon destruction. It is recommended - to unuse the region once you are done reading from it in persistent cursors as it + to un-use the region once you are done reading from it in persistent cursors as it helps to free up resource more quickly""" self._region = None # note: should reset ofs and size, but we spare that for performance. Its not @@ -204,7 +203,7 @@ class WindowCursor(object): return self._rlist.file_size() def path_or_fd(self): - """:return: path or file decriptor of the underlying mapped file""" + """:return: path or file descriptor of the underlying mapped file""" return self._rlist.path_or_fd() def path(self): @@ -238,12 +237,12 @@ class StaticWindowMapManager(object): These clients would have to use a SlidingWindowMapBuffer to hide this fact. This type will always use a maximum window size, and optimize certain methods to - acomodate this fact""" + accommodate this fact""" __slots__ = [ '_fdict', # mapping of path -> StorageHelper (of some kind '_window_size', # maximum size of a window - '_max_memory_size', # maximum amount ofmemory we may allocate + '_max_memory_size', # maximum amount of memory we may allocate '_max_handle_count', # maximum amount of handles to keep open '_memory_size', # currently allocated memory size '_handle_count', # amount of currently allocated file handles @@ -261,14 +260,14 @@ class StaticWindowMapManager(object): def __init__(self, window_size = 0, max_memory_size = 0, max_open_handles = sys.maxsize): """initialize the manager with the given parameters. :param window_size: if -1, a default window size will be chosen depending on - the operating system's architechture. It will internally be quantified to a multiple of the page size + the operating system's architecture. It will internally be quantified to a multiple of the page size If 0, the window may have any size, which basically results in mapping the whole file at one :param max_memory_size: maximum amount of memory we may map at once before releasing mapped regions. - If 0, a viable default iwll be set dependning on the system's architecture. - It is a soft limit that is tried to be kept, but nothing bad happens if we have to overallocate - :param max_open_handles: if not maxin, limit the amount of open file handles to the given number. - Otherwise the amount is only limited by the system iteself. If a system or soft limit is hit, - the manager will free as many handles as posisble""" + If 0, a viable default will be set depending on the system's architecture. + It is a soft limit that is tried to be kept, but nothing bad happens if we have to over-allocate + :param max_open_handles: if not maxint, limit the amount of open file handles to the given number. + Otherwise the amount is only limited by the system itself. If a system or soft limit is hit, + the manager will free as many handles as possible""" self._fdict = dict() self._window_size = window_size self._max_memory_size = max_memory_size @@ -277,7 +276,7 @@ class StaticWindowMapManager(object): self._handle_count = 0 if window_size < 0: - coeff = 32 + coeff = 64 if is_64_bit(): coeff = 1024 #END handle arch @@ -285,7 +284,7 @@ class StaticWindowMapManager(object): # END handle max window size if max_memory_size == 0: - coeff = 512 + coeff = 1024 if is_64_bit(): coeff = 8192 #END handle arch @@ -351,8 +350,6 @@ class StaticWindowMapManager(object): # As many more operations are likely to fail in that condition ( # like reading a file from disk, etc) we free up as much as possible # As this invalidates our insert position, we have to recurse here - # NOTE: The c++ version uses a linked list to curcumvent this, but - # using that in python is probably too slow anyway if is_recursive: # we already tried this, and still have no success in obtaining # a mapping. This is an exception, so we propagate it @@ -563,8 +560,6 @@ class SlidingWindowMapManager(StaticWindowMapManager): # As many more operations are likely to fail in that condition ( # like reading a file from disk, etc) we free up as much as possible # As this invalidates our insert position, we have to recurse here - # NOTE: The c++ version uses a linked list to curcumvent this, but - # using that in python is probably too slow anyway if is_recursive: # we already tried this, and still have no success in obtaining # a mapping. This is an exception, so we propagate it diff --git a/smmap/test/test_buf.py b/smmap/test/test_buf.py index 15dfb82..d3e51e2 100644 --- a/smmap/test/test_buf.py +++ b/smmap/test/test_buf.py @@ -1,12 +1,18 @@ +from __future__ import print_function + from .lib import TestBase, FileCreator -from smmap.mman import SlidingWindowMapManager, StaticWindowMapManager -from smmap.buf import * +from smmap.mman import ( + SlidingWindowMapManager, + StaticWindowMapManager + ) +from smmap.buf import SlidingWindowMapBuffer from random import randint from time import time import sys import os +import logging man_optimal = SlidingWindowMapManager() @@ -17,65 +23,66 @@ man_worst_case = SlidingWindowMapManager( static_man = StaticWindowMapManager() class TestBuf(TestBase): - + def test_basics(self): fc = FileCreator(self.k_window_test_size, "buffer_test") - + # invalid paths fail upon construction c = man_optimal.make_cursor(fc.path) self.assertRaises(ValueError, SlidingWindowMapBuffer, type(c)()) # invalid cursor self.assertRaises(ValueError, SlidingWindowMapBuffer, c, fc.size) # offset too large - + buf = SlidingWindowMapBuffer() # can create uninitailized buffers assert buf.cursor() is None - + # can call end access any time buf.end_access() buf.end_access() assert len(buf) == 0 - + # begin access can revive it, if the offset is suitable offset = 100 assert buf.begin_access(c, fc.size) == False assert buf.begin_access(c, offset) == True assert len(buf) == fc.size - offset assert buf.cursor().is_valid() - + # empty begin access keeps it valid on the same path, but alters the offset assert buf.begin_access() == True assert len(buf) == fc.size assert buf.cursor().is_valid() - + # simple access - data = open(fc.path, 'rb').read() + with open(fc.path, 'rb') as fp: + data = fp.read() assert data[offset] == buf[0] assert data[offset:offset*2] == buf[0:offset] - + # negative indices, partial slices assert buf[-1] == buf[len(buf)-1] assert buf[-10:] == buf[len(buf)-10:len(buf)] - + # end access makes its cursor invalid buf.end_access() assert not buf.cursor().is_valid() assert buf.cursor().is_associated() # but it remains associated - + # an empty begin access fixes it up again assert buf.begin_access() == True and buf.cursor().is_valid() del(buf) # ends access automatically del(c) - + assert man_optimal.num_file_handles() == 1 - + # PERFORMANCE - # blast away with rnadom access and a full mapping - we don't want to - # exagerate the manager's overhead, but measure the buffer overhead - # We do it once with an optimal setting, and with a worse manager which + # blast away with random access and a full mapping - we don't want to + # exaggerate the manager's overhead, but measure the buffer overhead + # We do it once with an optimal setting, and with a worse manager which # will produce small mappings only ! max_num_accesses = 100 fd = os.open(fc.path, os.O_RDONLY) for item in (fc.path, fd): - for manager, man_id in ( (man_optimal, 'optimal'), + for manager, man_id in ( (man_optimal, 'optimal'), (man_worst_case, 'worst case'), (static_man, 'static optimal')): buf = SlidingWindowMapBuffer(manager.make_cursor(item)) @@ -84,7 +91,7 @@ class TestBuf(TestBase): num_accesses_left = max_num_accesses num_bytes = 0 fsize = fc.size - + st = time() buf.begin_access() while num_accesses_left: @@ -102,7 +109,7 @@ class TestBuf(TestBase): num_bytes += 1 #END handle mode # END handle num accesses - + buf.end_access() assert manager.num_file_handles() assert manager.collect() @@ -110,8 +117,9 @@ class TestBuf(TestBase): elapsed = max(time() - st, 0.001) # prevent zero division errors on windows mb = float(1000*1000) mode_str = (access_mode and "slice") or "single byte" - sys.stderr.write("%s: Made %i random %s accesses to buffer created from %s reading a total of %f mb in %f s (%f mb/s)\n" - % (man_id, max_num_accesses, mode_str, type(item), num_bytes/mb, elapsed, (num_bytes/mb)/elapsed)) + print("%s: Made %i random %s accesses to buffer created from %s reading a total of %f mb in %f s (%f mb/s)" + % (man_id, max_num_accesses, mode_str, type(item), num_bytes/mb, elapsed, (num_bytes/mb)/elapsed), + file=sys.stderr) # END handle access mode # END for each manager # END for each input diff --git a/smmap/test/test_mman.py b/smmap/test/test_mman.py index e0516b2..cc5d914 100644 --- a/smmap/test/test_mman.py +++ b/smmap/test/test_mman.py @@ -1,9 +1,13 @@ +from __future__ import print_function + from .lib import TestBase, FileCreator -from smmap.mman import * -from smmap.mman import WindowCursor +from smmap.mman import ( + WindowCursor, + SlidingWindowMapManager, + StaticWindowMapManager + ) from smmap.util import align_to_mmap -from smmap.exc import RegionCollectionError from random import randint from time import time @@ -12,43 +16,43 @@ import sys from copy import copy class TestMMan(TestBase): - + def test_cursor(self): fc = FileCreator(self.k_window_test_size, "cursor_test") - + man = SlidingWindowMapManager() ci = WindowCursor(man) # invalid cursor assert not ci.is_valid() assert not ci.is_associated() assert ci.size() == 0 # this is cached, so we can query it in invalid state - + cv = man.make_cursor(fc.path) assert not cv.is_valid() # no region mapped yet assert cv.is_associated()# but it know where to map it from assert cv.file_size() == fc.size assert cv.path() == fc.path - + # copy module cio = copy(cv) assert not cio.is_valid() and cio.is_associated() - + # assign method assert not ci.is_associated() ci.assign(cv) assert not ci.is_valid() and ci.is_associated() - + # unuse non-existing region is fine cv.unuse_region() cv.unuse_region() - + # destruction is fine (even multiple times) cv._destroy() WindowCursor(man)._destroy() - + def test_memory_manager(self): slide_man = SlidingWindowMapManager() static_man = StaticWindowMapManager() - + for man in (static_man, slide_man): assert man.num_file_handles() == 0 assert man.num_open_files() == 0 @@ -59,15 +63,15 @@ class TestMMan(TestBase): assert man.window_size() > winsize_cmp_val assert man.mapped_memory_size() == 0 assert man.max_mapped_memory_size() > 0 - + # collection doesn't raise in 'any' mode man._collect_lru_region(0) # doesn't raise if we are within the limit man._collect_lru_region(10) - - # doesn't fail if we overallocate + + # doesn't fail if we over-allocate assert man._collect_lru_region(sys.maxsize) == 0 - + # use a region, verify most basic functionality fc = FileCreator(self.k_window_test_size, "manager_test") fd = os.open(fc.path, os.O_RDONLY) @@ -77,8 +81,9 @@ class TestMMan(TestBase): assert c.use_region(10, 10).is_valid() assert c.ofs_begin() == 10 assert c.size() == 10 - assert c.buffer()[:] == open(fc.path, 'rb').read(20)[10:] - + with open(fc.path, 'rb') as fp: + assert c.buffer()[:] == fp.read(20)[10:] + if isinstance(item, int): self.assertRaises(ValueError, c.path) else: @@ -87,38 +92,39 @@ class TestMMan(TestBase): #END for each input os.close(fd) # END for each manager type - + def test_memman_operation(self): # test more access, force it to actually unmap regions fc = FileCreator(self.k_window_test_size, "manager_operation_test") - data = open(fc.path, 'rb').read() + with open(fc.path, 'rb') as fp: + data = fp.read() fd = os.open(fc.path, os.O_RDONLY) max_num_handles = 15 - #small_size = + #small_size = for mtype, args in ( (StaticWindowMapManager, (0, fc.size // 3, max_num_handles)), (SlidingWindowMapManager, (fc.size // 100, fc.size // 3, max_num_handles)),): for item in (fc.path, fd): assert len(data) == fc.size - + # small windows, a reasonable max memory. Not too many regions at once man = mtype(window_size=args[0], max_memory_size=args[1], max_open_handles=args[2]) c = man.make_cursor(item) - + # still empty (more about that is tested in test_memory_manager() assert man.num_open_files() == 0 assert man.mapped_memory_size() == 0 - + base_offset = 5000 # window size is 0 for static managers, hence size will be 0. We take that into consideration size = man.window_size() // 2 assert c.use_region(base_offset, size).is_valid() rr = c.region_ref() assert rr().client_count() == 2 # the manager and the cursor and us - + assert man.num_open_files() == 1 assert man.num_file_handles() == 1 assert man.mapped_memory_size() == rr().size() - + #assert c.size() == size # the cursor may overallocate in its static version assert c.ofs_begin() == base_offset assert rr().ofs_begin() == 0 # it was aligned and expanded @@ -127,9 +133,9 @@ class TestMMan(TestBase): else: assert rr().size() == fc.size #END ignore static managers which dont use windows and are aligned to file boundaries - - assert c.buffer()[:] == data[base_offset:base_offset+(size or c.size())] - + + assert c.buffer()[:] == data[base_offset:base_offset+(size or c.size())] + # obtain second window, which spans the first part of the file - it is a still the same window nsize = (size or fc.size) - 10 assert c.use_region(0, nsize).is_valid() @@ -138,7 +144,7 @@ class TestMMan(TestBase): assert c.size() == nsize assert c.ofs_begin() == 0 assert c.buffer()[:] == data[:nsize] - + # map some part at the end, our requested size cannot be kept overshoot = 4000 base_offset = fc.size - (size or c.size()) + overshoot @@ -156,23 +162,23 @@ class TestMMan(TestBase): assert rr().ofs_begin() < c.ofs_begin() # it should have extended itself to the left assert rr().ofs_end() <= fc.size # it cannot be larger than the file assert c.buffer()[:] == data[base_offset:base_offset+(size or c.size())] - + # unising a region makes the cursor invalid c.unuse_region() assert not c.is_valid() if man.window_size(): - # but doesn't change anything regarding the handle count - we cache it and only + # but doesn't change anything regarding the handle count - we cache it and only # remove mapped regions if we have to assert man.num_file_handles() == 2 #END ignore this for static managers - + # iterate through the windows, verify data contents # this will trigger map collection after a while max_random_accesses = 5000 num_random_accesses = max_random_accesses memory_read = 0 st = time() - + # cache everything to get some more performance includes_ofs = c.includes_ofs max_mapped_memory_size = man.max_mapped_memory_size() @@ -182,7 +188,7 @@ class TestMMan(TestBase): while num_random_accesses: num_random_accesses -= 1 base_offset = randint(0, fc.size - 1) - + # precondition if man.window_size(): assert max_mapped_memory_size >= mapped_memory_size() @@ -192,19 +198,20 @@ class TestMMan(TestBase): csize = c.size() assert c.buffer()[:] == data[base_offset:base_offset+csize] memory_read += csize - + assert includes_ofs(base_offset) assert includes_ofs(base_offset+csize-1) assert not includes_ofs(base_offset+csize) # END while we should do an access elapsed = max(time() - st, 0.001) # prevent zero divison errors on windows mb = float(1000 * 1000) - sys.stderr.write("%s: Read %i mb of memory with %i random on cursor initialized with %s accesses in %fs (%f mb/s)\n" - % (mtype, memory_read/mb, max_random_accesses, type(item), elapsed, (memory_read/mb)/elapsed)) - + print("%s: Read %i mb of memory with %i random on cursor initialized with %s accesses in %fs (%f mb/s)\n" + % (mtype, memory_read/mb, max_random_accesses, type(item), elapsed, (memory_read/mb)/elapsed), + file=sys.stderr) + # an offset as large as the size doesn't work ! assert not c.use_region(fc.size, size).is_valid() - + # collection - it should be able to collect all assert man.num_file_handles() assert man.collect() diff --git a/smmap/test/test_util.py b/smmap/test/test_util.py index 8afba00..745da83 100644 --- a/smmap/test/test_util.py +++ b/smmap/test/test_util.py @@ -1,6 +1,13 @@ from .lib import TestBase, FileCreator -from smmap.util import * +from smmap.util import ( + MapWindow, + MapRegion, + MapRegionList, + ALLOCATIONGRANULARITY, + is_64_bit, + align_to_mmap + ) import os import sys diff --git a/smmap/util.py b/smmap/util.py index c37dfdd..a4d7d8f 100644 --- a/smmap/util.py +++ b/smmap/util.py @@ -23,8 +23,9 @@ try: except NameError: # Python 3 has no `buffer`; only `memoryview` def buffer(obj, offset, size): - return memoryview(obj[offset:offset+size]) - + # return memoryview(obj[offset:offset+size]) + # doing it directly is much faster ! + return obj[offset:offset+size] def string_types(): if sys.version_info[0] >= 3: @@ -112,7 +113,7 @@ class MapRegion(object): '__weakref__' ] _need_compat_layer = sys.version_info[0] < 3 and sys.version_info[1] < 6 - + if _need_compat_layer: __slots__.append('_mfb') # mapped memory buffer to provide offset #END handle additional slot @@ -282,4 +283,4 @@ class MapRegionList(list): #END update file size return self._file_size -#} END utilty classes +#} END utility classes |