summaryrefslogtreecommitdiff
path: root/numpy/core/src
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2015-11-16 13:46:38 -0700
committerCharles Harris <charlesr.harris@gmail.com>2015-11-16 13:46:38 -0700
commit6d3ec6596b9287604ab912873f5a35ed4a4222af (patch)
tree16d52e44e3956313925ee41cf116871ac24b54b5 /numpy/core/src
parentcf66c68c6a560c934f4a767934573c7f85dcb4ae (diff)
parent904da7c202384c8a2a6ec88cece378f70e2dd956 (diff)
downloadnumpy-6d3ec6596b9287604ab912873f5a35ed4a4222af.tar.gz
Merge pull request #6666 from juliantaylor/prefetch-sum
ENH: use prefetching for summation
Diffstat (limited to 'numpy/core/src')
-rw-r--r--numpy/core/src/umath/loops.c.src4
1 files changed, 4 insertions, 0 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 854c1e17a..aff6180c7 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1444,6 +1444,8 @@ pairwise_sum_@TYPE@(@dtype@ *a, npy_uintp n, npy_intp stride)
r[7] = @trf@(a[7 * stride]);
for (i = 8; i < n - (n % 8); i += 8) {
+ /* small blocksizes seems to mess with hardware prefetch */
+ NPY_PREFETCH(&a[(i + 512 / sizeof(a[0])) * stride], 0, 3);
r[0] += @trf@(a[(i + 0) * stride]);
r[1] += @trf@(a[(i + 1) * stride]);
r[2] += @trf@(a[(i + 2) * stride]);
@@ -2190,6 +2192,8 @@ pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, @ftype@ * a, npy_uintp n,
r[7] = a[6 * stride + 1];
for (i = 8; i < n - (n % 8); i += 8) {
+ /* small blocksizes seems to mess with hardware prefetch */
+ NPY_PREFETCH(&a[(i + 512 / sizeof(a[0])) * stride], 0, 3);
r[0] += a[(i + 0) * stride];
r[1] += a[(i + 0) * stride + 1];
r[2] += a[(i + 2) * stride];