summaryrefslogtreecommitdiff
path: root/numpy/testing
diff options
context:
space:
mode:
authorJulian Taylor <jtaylor.debian@googlemail.com>2013-05-19 17:04:27 +0200
committerJulian Taylor <jtaylor.debian@googlemail.com>2013-05-25 17:36:00 +0200
commit0adccaaa910ab495e993f453956fd983775604f3 (patch)
tree575e6b1bc7066bbe24ade1fee8576e4e31f2f7ef /numpy/testing
parent8ff5e37bff03925da4c1b121b38188f9fd779b4d (diff)
downloadnumpy-0adccaaa910ab495e993f453956fd983775604f3.tar.gz
ENH: vectorize sqrt ufunc using SSE2
specialize the sqrt ufunc for float and double and vectorize it using SSE2. improves performance by 4/2 for float/double if one is not memory bound due to non-cached data. performance is always better on all tested machines (amd phenom X2, intel xeon 5xxx/7xxx, core2duo, corei7) This version will not set errno on invalid input, but numpy only checks the fpu flags so the behavior is the same. In principle the compiler could autovectorize it when setting ffast-math (for no errno) and specializing the loop for the vectorizable strides and giving it some hints (restrict, __builtin_assume_aligned, etc.), but its simpler and more reliable to simply vectorize it by hand.
Diffstat (limited to 'numpy/testing')
-rw-r--r--numpy/testing/utils.py67
1 files changed, 67 insertions, 0 deletions
diff --git a/numpy/testing/utils.py b/numpy/testing/utils.py
index 40c569c0f..7a3ea7a1c 100644
--- a/numpy/testing/utils.py
+++ b/numpy/testing/utils.py
@@ -10,6 +10,7 @@ import re
import operator
import warnings
from .nosetester import import_nose
+from numpy.core import float32, empty, arange
if sys.version_info[0] >= 3:
from io import StringIO
@@ -1523,3 +1524,69 @@ def assert_no_warnings(func, *args, **kw):
finally:
ctx.__exit__()
return result
+
+
+def gen_alignment_data(dtype=float32, type='binary', max_size=24):
+ """
+ generator producing data with different alignment and offsets
+ to test simd vectorization
+
+ Parameters
+ ----------
+ dtype : dtype
+ data type to produce
+ type : string
+ 'unary': create data for unary operations, creates one input
+ and output array
+ 'binary': create data for unary operations, creates two input
+ and output array
+ max_size : integer
+ maximum size of data to produce
+
+ Returns
+ -------
+ if type is 'unary' yields one output, one input array and a message
+ containing information on the data
+ if type is 'binary' yields one output array, two input array and a message
+ containing information on the data
+
+ """
+ ufmt = 'unary offset=(%d, %d), size=%d, dtype=%r, %s'
+ bfmt = 'binary offset=(%d, %d, %d), size=%d, dtype=%r, %s'
+ for o in range(3):
+ for s in range(o + 2, max(o + 3, max_size)):
+ if type == 'unary':
+ inp = lambda : arange(s, dtype=dtype)[o:]
+ out = empty((s,), dtype=dtype)[o:]
+ yield out, inp(), ufmt % (o, o, s, dtype, 'out of place')
+ yield inp(), inp(), ufmt % (o, o, s, dtype, 'in place')
+ yield out[1:], inp()[:-1], ufmt % \
+ (o + 1, o, s - 1, dtype, 'out of place')
+ yield out[:-1], inp()[1:], ufmt % \
+ (o, o + 1, s - 1, dtype, 'out of place')
+ yield inp()[:-1], inp()[1:], ufmt % \
+ (o, o + 1, s - 1, dtype, 'aliased')
+ yield inp()[1:], inp()[:-1], ufmt % \
+ (o + 1, o, s - 1, dtype, 'aliased')
+ if type == 'binary':
+ inp1 = lambda :arange(s, dtype=dtype)[o:]
+ inp2 = lambda :arange(s, dtype=dtype)[o:]
+ out = empty((s,), dtype=dtype)[o:]
+ yield out, inp1(), inp2(), bfmt % \
+ (o, o, o, s, dtype, 'out of place')
+ yield inp1(), inp1(), inp2(), bfmt % \
+ (o, o, o, s, dtype, 'in place1')
+ yield inp2(), inp1(), inp2(), bfmt % \
+ (o, o, o, s, dtype, 'in place2')
+ yield out[1:], inp1()[:-1], inp2()[:-1], bfmt % \
+ (o + 1, o, o, s - 1, dtype, 'out of place')
+ yield out[-1:], inp1()[1:], inp2()[:-1], bfmt % \
+ (o, o + 1, o, s - 1, dtype, 'out of place')
+ yield out[-1:], inp1()[:-1], inp2()[1:], bfmt % \
+ (o, o, o + 1, s - 1, dtype, 'out of place')
+ yield inp1()[1:], inp1()[:-1], inp2()[:-1], bfmt % \
+ (o + 1, o, o, s - 1, dtype, 'aliased')
+ yield inp1()[-1:], inp1()[1:], inp2()[:-1], bfmt % \
+ (o, o + 1, o, s - 1, dtype, 'aliased')
+ yield inp1()[-1:], inp1()[:-1], inp2()[1:], bfmt % \
+ (o, o, o + 1, s - 1, dtype, 'aliased')