summaryrefslogtreecommitdiff
path: root/smmap
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2014-11-13 08:34:54 +0100
committerSebastian Thiel <byronimo@gmail.com>2014-11-13 08:34:54 +0100
commitf53ddc686c0d226b2c69cc3732406dd3796932cf (patch)
tree4f5e5dcbd922deda608a6df0e9cff1f1df045530 /smmap
parent1af4b42a2354acbb53c7956d647655922658fd80 (diff)
parent948a9274527d14702875581d7115389cf9aa8244 (diff)
downloadsmmap-f53ddc686c0d226b2c69cc3732406dd3796932cf.tar.gz
Merge branch 'py2n3'v0.8.3
Clode cleanup and performance regression fixes in py3
Diffstat (limited to 'smmap')
-rw-r--r--smmap/__init__.py2
-rw-r--r--smmap/buf.py12
-rw-r--r--smmap/mman.py45
-rw-r--r--smmap/test/test_buf.py54
-rw-r--r--smmap/test/test_mman.py87
-rw-r--r--smmap/test/test_util.py9
-rw-r--r--smmap/util.py9
7 files changed, 117 insertions, 101 deletions
diff --git a/smmap/__init__.py b/smmap/__init__.py
index 879ebea..c494648 100644
--- a/smmap/__init__.py
+++ b/smmap/__init__.py
@@ -3,7 +3,7 @@
__author__ = "Sebastian Thiel"
__contact__ = "byronimo@gmail.com"
__homepage__ = "https://github.com/Byron/smmap"
-version_info = (0, 8, 2)
+version_info = (0, 8, 3)
__version__ = '.'.join(str(i) for i in version_info)
# make everything available in root package for convenience
diff --git a/smmap/buf.py b/smmap/buf.py
index 2f27d4d..ef9d49e 100644
--- a/smmap/buf.py
+++ b/smmap/buf.py
@@ -1,6 +1,4 @@
"""Module with a simple buffer implementation using the memory manager"""
-from .mman import WindowCursor
-
import sys
__all__ = ["SlidingWindowMapBuffer"]
@@ -79,18 +77,18 @@ class SlidingWindowMapBuffer(object):
else:
l = j-i # total length
ofs = i
- # Keeping tokens in a list could possible be faster, but the list
- # overhead outweighs the benefits (tested) !
- md = bytes()
+ # It's fastest to keep tokens and join later, especially in py3, which was 7 times slower
+ # in the previous iteration of this code
+ md = list()
while l:
c.use_region(ofs, l)
assert c.is_valid()
d = c.buffer()[:l]
ofs += len(d)
l -= len(d)
- md += d
+ md.append(d)
#END while there are bytes to read
- return md
+ return bytes().join(md)
# END fast or slow path
#{ Interface
diff --git a/smmap/mman.py b/smmap/mman.py
index 7cbb535..da6fd81 100644
--- a/smmap/mman.py
+++ b/smmap/mman.py
@@ -1,13 +1,12 @@
"""Module containing a memory memory manager which provides a sliding window on a number of memory mapped files"""
from .util import (
- MapWindow,
- MapRegion,
- MapRegionList,
- is_64_bit,
- align_to_mmap,
- string_types,
- buffer,
- )
+ MapWindow,
+ MapRegion,
+ MapRegionList,
+ is_64_bit,
+ string_types,
+ buffer,
+ )
from weakref import ref
import sys
@@ -102,7 +101,7 @@ class WindowCursor(object):
:param flags: additional flags to be given to os.open in case a file handle is initially opened
for mapping. Has no effect if a region can actually be reused.
:return: this instance - it should be queried for whether it points to a valid memory region.
- This is not the case if the mapping failed becaues we reached the end of the file
+ This is not the case if the mapping failed because we reached the end of the file
**Note:**: The size actually mapped may be smaller than the given size. If that is the case,
either the file has reached its end, or the map was created between two existing regions"""
@@ -138,7 +137,7 @@ class WindowCursor(object):
"""Unuse the ucrrent region. Does nothing if we have no current region
**Note:** the cursor unuses the region automatically upon destruction. It is recommended
- to unuse the region once you are done reading from it in persistent cursors as it
+ to un-use the region once you are done reading from it in persistent cursors as it
helps to free up resource more quickly"""
self._region = None
# note: should reset ofs and size, but we spare that for performance. Its not
@@ -204,7 +203,7 @@ class WindowCursor(object):
return self._rlist.file_size()
def path_or_fd(self):
- """:return: path or file decriptor of the underlying mapped file"""
+ """:return: path or file descriptor of the underlying mapped file"""
return self._rlist.path_or_fd()
def path(self):
@@ -238,12 +237,12 @@ class StaticWindowMapManager(object):
These clients would have to use a SlidingWindowMapBuffer to hide this fact.
This type will always use a maximum window size, and optimize certain methods to
- acomodate this fact"""
+ accommodate this fact"""
__slots__ = [
'_fdict', # mapping of path -> StorageHelper (of some kind
'_window_size', # maximum size of a window
- '_max_memory_size', # maximum amount ofmemory we may allocate
+ '_max_memory_size', # maximum amount of memory we may allocate
'_max_handle_count', # maximum amount of handles to keep open
'_memory_size', # currently allocated memory size
'_handle_count', # amount of currently allocated file handles
@@ -261,14 +260,14 @@ class StaticWindowMapManager(object):
def __init__(self, window_size = 0, max_memory_size = 0, max_open_handles = sys.maxsize):
"""initialize the manager with the given parameters.
:param window_size: if -1, a default window size will be chosen depending on
- the operating system's architechture. It will internally be quantified to a multiple of the page size
+ the operating system's architecture. It will internally be quantified to a multiple of the page size
If 0, the window may have any size, which basically results in mapping the whole file at one
:param max_memory_size: maximum amount of memory we may map at once before releasing mapped regions.
- If 0, a viable default iwll be set dependning on the system's architecture.
- It is a soft limit that is tried to be kept, but nothing bad happens if we have to overallocate
- :param max_open_handles: if not maxin, limit the amount of open file handles to the given number.
- Otherwise the amount is only limited by the system iteself. If a system or soft limit is hit,
- the manager will free as many handles as posisble"""
+ If 0, a viable default will be set depending on the system's architecture.
+ It is a soft limit that is tried to be kept, but nothing bad happens if we have to over-allocate
+ :param max_open_handles: if not maxint, limit the amount of open file handles to the given number.
+ Otherwise the amount is only limited by the system itself. If a system or soft limit is hit,
+ the manager will free as many handles as possible"""
self._fdict = dict()
self._window_size = window_size
self._max_memory_size = max_memory_size
@@ -277,7 +276,7 @@ class StaticWindowMapManager(object):
self._handle_count = 0
if window_size < 0:
- coeff = 32
+ coeff = 64
if is_64_bit():
coeff = 1024
#END handle arch
@@ -285,7 +284,7 @@ class StaticWindowMapManager(object):
# END handle max window size
if max_memory_size == 0:
- coeff = 512
+ coeff = 1024
if is_64_bit():
coeff = 8192
#END handle arch
@@ -351,8 +350,6 @@ class StaticWindowMapManager(object):
# As many more operations are likely to fail in that condition (
# like reading a file from disk, etc) we free up as much as possible
# As this invalidates our insert position, we have to recurse here
- # NOTE: The c++ version uses a linked list to curcumvent this, but
- # using that in python is probably too slow anyway
if is_recursive:
# we already tried this, and still have no success in obtaining
# a mapping. This is an exception, so we propagate it
@@ -563,8 +560,6 @@ class SlidingWindowMapManager(StaticWindowMapManager):
# As many more operations are likely to fail in that condition (
# like reading a file from disk, etc) we free up as much as possible
# As this invalidates our insert position, we have to recurse here
- # NOTE: The c++ version uses a linked list to curcumvent this, but
- # using that in python is probably too slow anyway
if is_recursive:
# we already tried this, and still have no success in obtaining
# a mapping. This is an exception, so we propagate it
diff --git a/smmap/test/test_buf.py b/smmap/test/test_buf.py
index 15dfb82..d3e51e2 100644
--- a/smmap/test/test_buf.py
+++ b/smmap/test/test_buf.py
@@ -1,12 +1,18 @@
+from __future__ import print_function
+
from .lib import TestBase, FileCreator
-from smmap.mman import SlidingWindowMapManager, StaticWindowMapManager
-from smmap.buf import *
+from smmap.mman import (
+ SlidingWindowMapManager,
+ StaticWindowMapManager
+ )
+from smmap.buf import SlidingWindowMapBuffer
from random import randint
from time import time
import sys
import os
+import logging
man_optimal = SlidingWindowMapManager()
@@ -17,65 +23,66 @@ man_worst_case = SlidingWindowMapManager(
static_man = StaticWindowMapManager()
class TestBuf(TestBase):
-
+
def test_basics(self):
fc = FileCreator(self.k_window_test_size, "buffer_test")
-
+
# invalid paths fail upon construction
c = man_optimal.make_cursor(fc.path)
self.assertRaises(ValueError, SlidingWindowMapBuffer, type(c)()) # invalid cursor
self.assertRaises(ValueError, SlidingWindowMapBuffer, c, fc.size) # offset too large
-
+
buf = SlidingWindowMapBuffer() # can create uninitailized buffers
assert buf.cursor() is None
-
+
# can call end access any time
buf.end_access()
buf.end_access()
assert len(buf) == 0
-
+
# begin access can revive it, if the offset is suitable
offset = 100
assert buf.begin_access(c, fc.size) == False
assert buf.begin_access(c, offset) == True
assert len(buf) == fc.size - offset
assert buf.cursor().is_valid()
-
+
# empty begin access keeps it valid on the same path, but alters the offset
assert buf.begin_access() == True
assert len(buf) == fc.size
assert buf.cursor().is_valid()
-
+
# simple access
- data = open(fc.path, 'rb').read()
+ with open(fc.path, 'rb') as fp:
+ data = fp.read()
assert data[offset] == buf[0]
assert data[offset:offset*2] == buf[0:offset]
-
+
# negative indices, partial slices
assert buf[-1] == buf[len(buf)-1]
assert buf[-10:] == buf[len(buf)-10:len(buf)]
-
+
# end access makes its cursor invalid
buf.end_access()
assert not buf.cursor().is_valid()
assert buf.cursor().is_associated() # but it remains associated
-
+
# an empty begin access fixes it up again
assert buf.begin_access() == True and buf.cursor().is_valid()
del(buf) # ends access automatically
del(c)
-
+
assert man_optimal.num_file_handles() == 1
-
+
# PERFORMANCE
- # blast away with rnadom access and a full mapping - we don't want to
- # exagerate the manager's overhead, but measure the buffer overhead
- # We do it once with an optimal setting, and with a worse manager which
+ # blast away with random access and a full mapping - we don't want to
+ # exaggerate the manager's overhead, but measure the buffer overhead
+ # We do it once with an optimal setting, and with a worse manager which
# will produce small mappings only !
max_num_accesses = 100
fd = os.open(fc.path, os.O_RDONLY)
for item in (fc.path, fd):
- for manager, man_id in ( (man_optimal, 'optimal'),
+ for manager, man_id in ( (man_optimal, 'optimal'),
(man_worst_case, 'worst case'),
(static_man, 'static optimal')):
buf = SlidingWindowMapBuffer(manager.make_cursor(item))
@@ -84,7 +91,7 @@ class TestBuf(TestBase):
num_accesses_left = max_num_accesses
num_bytes = 0
fsize = fc.size
-
+
st = time()
buf.begin_access()
while num_accesses_left:
@@ -102,7 +109,7 @@ class TestBuf(TestBase):
num_bytes += 1
#END handle mode
# END handle num accesses
-
+
buf.end_access()
assert manager.num_file_handles()
assert manager.collect()
@@ -110,8 +117,9 @@ class TestBuf(TestBase):
elapsed = max(time() - st, 0.001) # prevent zero division errors on windows
mb = float(1000*1000)
mode_str = (access_mode and "slice") or "single byte"
- sys.stderr.write("%s: Made %i random %s accesses to buffer created from %s reading a total of %f mb in %f s (%f mb/s)\n"
- % (man_id, max_num_accesses, mode_str, type(item), num_bytes/mb, elapsed, (num_bytes/mb)/elapsed))
+ print("%s: Made %i random %s accesses to buffer created from %s reading a total of %f mb in %f s (%f mb/s)"
+ % (man_id, max_num_accesses, mode_str, type(item), num_bytes/mb, elapsed, (num_bytes/mb)/elapsed),
+ file=sys.stderr)
# END handle access mode
# END for each manager
# END for each input
diff --git a/smmap/test/test_mman.py b/smmap/test/test_mman.py
index e0516b2..cc5d914 100644
--- a/smmap/test/test_mman.py
+++ b/smmap/test/test_mman.py
@@ -1,9 +1,13 @@
+from __future__ import print_function
+
from .lib import TestBase, FileCreator
-from smmap.mman import *
-from smmap.mman import WindowCursor
+from smmap.mman import (
+ WindowCursor,
+ SlidingWindowMapManager,
+ StaticWindowMapManager
+ )
from smmap.util import align_to_mmap
-from smmap.exc import RegionCollectionError
from random import randint
from time import time
@@ -12,43 +16,43 @@ import sys
from copy import copy
class TestMMan(TestBase):
-
+
def test_cursor(self):
fc = FileCreator(self.k_window_test_size, "cursor_test")
-
+
man = SlidingWindowMapManager()
ci = WindowCursor(man) # invalid cursor
assert not ci.is_valid()
assert not ci.is_associated()
assert ci.size() == 0 # this is cached, so we can query it in invalid state
-
+
cv = man.make_cursor(fc.path)
assert not cv.is_valid() # no region mapped yet
assert cv.is_associated()# but it know where to map it from
assert cv.file_size() == fc.size
assert cv.path() == fc.path
-
+
# copy module
cio = copy(cv)
assert not cio.is_valid() and cio.is_associated()
-
+
# assign method
assert not ci.is_associated()
ci.assign(cv)
assert not ci.is_valid() and ci.is_associated()
-
+
# unuse non-existing region is fine
cv.unuse_region()
cv.unuse_region()
-
+
# destruction is fine (even multiple times)
cv._destroy()
WindowCursor(man)._destroy()
-
+
def test_memory_manager(self):
slide_man = SlidingWindowMapManager()
static_man = StaticWindowMapManager()
-
+
for man in (static_man, slide_man):
assert man.num_file_handles() == 0
assert man.num_open_files() == 0
@@ -59,15 +63,15 @@ class TestMMan(TestBase):
assert man.window_size() > winsize_cmp_val
assert man.mapped_memory_size() == 0
assert man.max_mapped_memory_size() > 0
-
+
# collection doesn't raise in 'any' mode
man._collect_lru_region(0)
# doesn't raise if we are within the limit
man._collect_lru_region(10)
-
- # doesn't fail if we overallocate
+
+ # doesn't fail if we over-allocate
assert man._collect_lru_region(sys.maxsize) == 0
-
+
# use a region, verify most basic functionality
fc = FileCreator(self.k_window_test_size, "manager_test")
fd = os.open(fc.path, os.O_RDONLY)
@@ -77,8 +81,9 @@ class TestMMan(TestBase):
assert c.use_region(10, 10).is_valid()
assert c.ofs_begin() == 10
assert c.size() == 10
- assert c.buffer()[:] == open(fc.path, 'rb').read(20)[10:]
-
+ with open(fc.path, 'rb') as fp:
+ assert c.buffer()[:] == fp.read(20)[10:]
+
if isinstance(item, int):
self.assertRaises(ValueError, c.path)
else:
@@ -87,38 +92,39 @@ class TestMMan(TestBase):
#END for each input
os.close(fd)
# END for each manager type
-
+
def test_memman_operation(self):
# test more access, force it to actually unmap regions
fc = FileCreator(self.k_window_test_size, "manager_operation_test")
- data = open(fc.path, 'rb').read()
+ with open(fc.path, 'rb') as fp:
+ data = fp.read()
fd = os.open(fc.path, os.O_RDONLY)
max_num_handles = 15
- #small_size =
+ #small_size =
for mtype, args in ( (StaticWindowMapManager, (0, fc.size // 3, max_num_handles)),
(SlidingWindowMapManager, (fc.size // 100, fc.size // 3, max_num_handles)),):
for item in (fc.path, fd):
assert len(data) == fc.size
-
+
# small windows, a reasonable max memory. Not too many regions at once
man = mtype(window_size=args[0], max_memory_size=args[1], max_open_handles=args[2])
c = man.make_cursor(item)
-
+
# still empty (more about that is tested in test_memory_manager()
assert man.num_open_files() == 0
assert man.mapped_memory_size() == 0
-
+
base_offset = 5000
# window size is 0 for static managers, hence size will be 0. We take that into consideration
size = man.window_size() // 2
assert c.use_region(base_offset, size).is_valid()
rr = c.region_ref()
assert rr().client_count() == 2 # the manager and the cursor and us
-
+
assert man.num_open_files() == 1
assert man.num_file_handles() == 1
assert man.mapped_memory_size() == rr().size()
-
+
#assert c.size() == size # the cursor may overallocate in its static version
assert c.ofs_begin() == base_offset
assert rr().ofs_begin() == 0 # it was aligned and expanded
@@ -127,9 +133,9 @@ class TestMMan(TestBase):
else:
assert rr().size() == fc.size
#END ignore static managers which dont use windows and are aligned to file boundaries
-
- assert c.buffer()[:] == data[base_offset:base_offset+(size or c.size())]
-
+
+ assert c.buffer()[:] == data[base_offset:base_offset+(size or c.size())]
+
# obtain second window, which spans the first part of the file - it is a still the same window
nsize = (size or fc.size) - 10
assert c.use_region(0, nsize).is_valid()
@@ -138,7 +144,7 @@ class TestMMan(TestBase):
assert c.size() == nsize
assert c.ofs_begin() == 0
assert c.buffer()[:] == data[:nsize]
-
+
# map some part at the end, our requested size cannot be kept
overshoot = 4000
base_offset = fc.size - (size or c.size()) + overshoot
@@ -156,23 +162,23 @@ class TestMMan(TestBase):
assert rr().ofs_begin() < c.ofs_begin() # it should have extended itself to the left
assert rr().ofs_end() <= fc.size # it cannot be larger than the file
assert c.buffer()[:] == data[base_offset:base_offset+(size or c.size())]
-
+
# unising a region makes the cursor invalid
c.unuse_region()
assert not c.is_valid()
if man.window_size():
- # but doesn't change anything regarding the handle count - we cache it and only
+ # but doesn't change anything regarding the handle count - we cache it and only
# remove mapped regions if we have to
assert man.num_file_handles() == 2
#END ignore this for static managers
-
+
# iterate through the windows, verify data contents
# this will trigger map collection after a while
max_random_accesses = 5000
num_random_accesses = max_random_accesses
memory_read = 0
st = time()
-
+
# cache everything to get some more performance
includes_ofs = c.includes_ofs
max_mapped_memory_size = man.max_mapped_memory_size()
@@ -182,7 +188,7 @@ class TestMMan(TestBase):
while num_random_accesses:
num_random_accesses -= 1
base_offset = randint(0, fc.size - 1)
-
+
# precondition
if man.window_size():
assert max_mapped_memory_size >= mapped_memory_size()
@@ -192,19 +198,20 @@ class TestMMan(TestBase):
csize = c.size()
assert c.buffer()[:] == data[base_offset:base_offset+csize]
memory_read += csize
-
+
assert includes_ofs(base_offset)
assert includes_ofs(base_offset+csize-1)
assert not includes_ofs(base_offset+csize)
# END while we should do an access
elapsed = max(time() - st, 0.001) # prevent zero divison errors on windows
mb = float(1000 * 1000)
- sys.stderr.write("%s: Read %i mb of memory with %i random on cursor initialized with %s accesses in %fs (%f mb/s)\n"
- % (mtype, memory_read/mb, max_random_accesses, type(item), elapsed, (memory_read/mb)/elapsed))
-
+ print("%s: Read %i mb of memory with %i random on cursor initialized with %s accesses in %fs (%f mb/s)\n"
+ % (mtype, memory_read/mb, max_random_accesses, type(item), elapsed, (memory_read/mb)/elapsed),
+ file=sys.stderr)
+
# an offset as large as the size doesn't work !
assert not c.use_region(fc.size, size).is_valid()
-
+
# collection - it should be able to collect all
assert man.num_file_handles()
assert man.collect()
diff --git a/smmap/test/test_util.py b/smmap/test/test_util.py
index 8afba00..745da83 100644
--- a/smmap/test/test_util.py
+++ b/smmap/test/test_util.py
@@ -1,6 +1,13 @@
from .lib import TestBase, FileCreator
-from smmap.util import *
+from smmap.util import (
+ MapWindow,
+ MapRegion,
+ MapRegionList,
+ ALLOCATIONGRANULARITY,
+ is_64_bit,
+ align_to_mmap
+ )
import os
import sys
diff --git a/smmap/util.py b/smmap/util.py
index c37dfdd..a4d7d8f 100644
--- a/smmap/util.py
+++ b/smmap/util.py
@@ -23,8 +23,9 @@ try:
except NameError:
# Python 3 has no `buffer`; only `memoryview`
def buffer(obj, offset, size):
- return memoryview(obj[offset:offset+size])
-
+ # return memoryview(obj[offset:offset+size])
+ # doing it directly is much faster !
+ return obj[offset:offset+size]
def string_types():
if sys.version_info[0] >= 3:
@@ -112,7 +113,7 @@ class MapRegion(object):
'__weakref__'
]
_need_compat_layer = sys.version_info[0] < 3 and sys.version_info[1] < 6
-
+
if _need_compat_layer:
__slots__.append('_mfb') # mapped memory buffer to provide offset
#END handle additional slot
@@ -282,4 +283,4 @@ class MapRegionList(list):
#END update file size
return self._file_size
-#} END utilty classes
+#} END utility classes