summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulian Taylor <jtaylor.debian@googlemail.com>2014-04-09 19:25:27 +0200
committerJulian Taylor <jtaylor.debian@googlemail.com>2014-05-04 22:34:33 +0200
commite47a50efc284c677ba4d0337e11dc8514fca7e5b (patch)
tree21636d9c55b652a652e1c901788de3cf6719975e
parent753149eddd46c1c1823fe3932831b2b914bc8724 (diff)
downloadnumpy-e47a50efc284c677ba4d0337e11dc8514fca7e5b.tar.gz
ENH: add small memory chunk cache
Add very simplistic caching of small memory blocks (< 1024 bytes) This improves the performance of small array operations by about 15%-20%: without cache: In [1]: d = np.array([1,1]) In [2]: %timeit d*d*d*d 1000000 loops, best of 3: 2.1 µs per loop with cache: In [1]: d = np.array([1,1]) In [2]: %timeit d*d*d*d 1000000 loops, best of 3: 1.7 µs per loop The cache is a simple list of buckets of pointers, one for each size up to 1023 bytes. Two caches are required one for array data one one for dimensinos/strides as they use different allocators. All pointers can still be deleted with their matching deallocator but this implies the size of the block must be known at the time of the free in order to place the pointer in the right bucket. This should not be an issue for well behaved numpy users. Technically the ABI is broken in this change as the size of the dimension memory block has been decreased from 3 * nd to 2 * nd, the increase to 3 was a relic from the NA branch and is not needed anymore. It should be unlikely someone relies on this size.
-rw-r--r--numpy/core/src/multiarray/arrayobject.c5
-rw-r--r--numpy/core/src/multiarray/ctors.c113
-rw-r--r--numpy/core/src/multiarray/ctors.h5
-rw-r--r--numpy/core/tests/test_multiarray.py3
4 files changed, 116 insertions, 10 deletions
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index cd0912510..94437a44c 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -419,10 +419,11 @@ array_dealloc(PyArrayObject *self)
* self already...
*/
}
- PyDataMem_FREE(fa->data);
+ npy_free_cache(fa->data, PyArray_NBYTES(self));
}
- PyDimMem_FREE(fa->dimensions);
+ /* must match allocation in PyArray_NewFromDescr */
+ npy_free_cache_dim(fa->dimensions, 2 * fa->nd);
Py_DECREF(fa->descr);
Py_TYPE(self)->tp_free((PyObject *)self);
}
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 5f8fbc4dc..06d42f66a 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -28,6 +28,108 @@
#include "scalarmathmodule.h" /* for npy_mul_with_overflow_intp */
#include <assert.h>
+#define NBUCKETS 1024 /* number of buckets for data*/
+#define NBUCKETS_DIM 16 /* number of buckets for dimensions/strides */
+#define NCACHE 8 /* 1 + number of cache entries per bucket */
+static void * datacache[NBUCKETS][NCACHE];
+static void * dimcache[NBUCKETS_DIM][NCACHE];
+
+/*
+ * very simplistic small memory block cache to avoid more expensive libc
+ * allocations
+ * base function for data cache with 1 byte buckets and dimension cache with
+ * sizeof(npy_intp) byte buckets
+ */
+static NPY_INLINE void *
+_npy_alloc_cache(npy_uintp nelem, npy_uintp esz, npy_uint msz,
+ void * (*cache)[NCACHE], void * (*alloc)(size_t))
+{
+ assert((esz == 1 && cache == datacache) ||
+ (esz == sizeof(npy_intp) && cache == dimcache));
+ if (nelem < msz) {
+ /* first entry is used as fill counter */
+ npy_uintp * idx = (npy_uintp *)&(cache[nelem][0]);
+ if (*idx > 0) {
+ return cache[nelem][(*idx)--];
+ }
+ }
+ return alloc(nelem * esz);
+}
+
+/*
+ * return pointer p to cache, nelem is number of elements of the cache bucket
+ * size (1 or sizeof(npy_intp)) of the block pointed too
+ */
+static NPY_INLINE void
+_npy_free_cache(void * p, npy_uintp nelem, npy_uint msz,
+ void * (*cache)[NCACHE], void (*dealloc)(void *))
+{
+ if (p != NULL && nelem < msz) {
+ /* first entry is used as fill counter */
+ npy_uintp * idx = (npy_uintp *)&(cache[nelem][0]);
+ if (*idx < NCACHE - 1) {
+ cache[nelem][++(*idx)] = p;
+ return;
+ }
+ }
+ dealloc(p);
+}
+
+
+/*
+ * array data cache, sz is number of bytes to allocate
+ */
+static void *
+npy_alloc_cache(npy_uintp sz)
+{
+ return _npy_alloc_cache(sz, 1, NBUCKETS, datacache, &PyDataMem_NEW);
+}
+
+/* zero initialized data, sz is number of bytes to allocate */
+static void *
+npy_alloc_cache_zero(npy_uintp sz)
+{
+ if (sz < NBUCKETS) {
+ void * p = _npy_alloc_cache(sz, 1, NBUCKETS, datacache,
+ &PyDataMem_NEW);
+ memset(p, 0, sz);
+ return p;
+ }
+ return PyDataMem_NEW_ZEROED(sz, 1);
+}
+
+void
+npy_free_cache(void * p, npy_uintp sz)
+{
+ _npy_free_cache(p, sz, NBUCKETS, datacache, &PyDataMem_FREE);
+}
+
+/*
+ * dimension/stride cache, uses a different allocator and is always a multiple
+ * of npy_intp
+ */
+static void *
+npy_alloc_cache_dim(npy_uintp sz)
+{
+ /* dims + strides */
+ if (NPY_UNLIKELY(sz < 2)) {
+ sz = 2;
+ }
+ return _npy_alloc_cache(sz, sizeof(npy_intp), NBUCKETS_DIM, dimcache,
+ &PyArray_malloc);
+}
+
+void
+npy_free_cache_dim(void * p, npy_uintp sz)
+{
+ /* dims + strides */
+ if (NPY_UNLIKELY(sz < 2)) {
+ sz = 2;
+ }
+ _npy_free_cache(p, sz, NBUCKETS_DIM, dimcache,
+ &PyArray_free);
+}
+
/*
* Reading from a file or a string.
*
@@ -886,15 +988,12 @@ PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd,
int i;
size_t sd;
npy_intp size;
- assert(dims != NULL || (nd == 0));
if (descr->subarray) {
PyObject *ret;
npy_intp newdims[2*NPY_MAXDIMS];
npy_intp *newstrides = NULL;
- if (nd > 0) {
- memcpy(newdims, dims, nd * sizeof(npy_intp));
- }
+ memcpy(newdims, dims, nd*sizeof(npy_intp));
if (strides) {
newstrides = newdims + NPY_MAXDIMS;
memcpy(newstrides, strides, nd*sizeof(npy_intp));
@@ -994,7 +1093,7 @@ PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd,
fa->weakreflist = (PyObject *)NULL;
if (nd > 0) {
- fa->dimensions = PyDimMem_NEW(3*nd);
+ fa->dimensions = npy_alloc_cache_dim(2 * nd);
if (fa->dimensions == NULL) {
PyErr_NoMemory();
goto fail;
@@ -1034,10 +1133,10 @@ PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd,
* which could also be sub-fields of a VOID array
*/
if (zeroed || PyDataType_FLAGCHK(descr, NPY_NEEDS_INIT)) {
- data = PyDataMem_NEW_ZEROED(sd, 1);
+ data = npy_alloc_cache_zero(sd);
}
else {
- data = PyDataMem_NEW(sd);
+ data = npy_alloc_cache(sd);
}
if (data == NULL) {
PyErr_NoMemory();
diff --git a/numpy/core/src/multiarray/ctors.h b/numpy/core/src/multiarray/ctors.h
index 757362fb8..3042e52d1 100644
--- a/numpy/core/src/multiarray/ctors.h
+++ b/numpy/core/src/multiarray/ctors.h
@@ -79,5 +79,10 @@ PyArray_AssignFromSequence(PyArrayObject *self, PyObject *v);
NPY_NO_EXPORT PyArrayObject *
PyArray_SubclassWrap(PyArrayObject *arr_of_subclass, PyArrayObject *towrap);
+NPY_NO_EXPORT void
+npy_free_cache(void * p, npy_uintp sd);
+
+NPY_NO_EXPORT void
+npy_free_cache_dim(void * p, npy_uintp sd);
#endif
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 194ba0e37..ed1f71c49 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -3980,7 +3980,8 @@ class TestMemEventHook(TestCase):
# multiarray/multiarray_tests.c.src
test_pydatamem_seteventhook_start()
# force an allocation and free of a numpy array
- a = np.zeros(10)
+ # needs to be larger then limit of small memory cacher in ctors.c
+ a = np.zeros(1000)
del a
test_pydatamem_seteventhook_end()