diff options
author | Julian Taylor <jtaylor.debian@googlemail.com> | 2013-05-19 17:04:27 +0200 |
---|---|---|
committer | Julian Taylor <jtaylor.debian@googlemail.com> | 2013-05-25 17:36:00 +0200 |
commit | 0adccaaa910ab495e993f453956fd983775604f3 (patch) | |
tree | 575e6b1bc7066bbe24ade1fee8576e4e31f2f7ef | |
parent | 8ff5e37bff03925da4c1b121b38188f9fd779b4d (diff) | |
download | numpy-0adccaaa910ab495e993f453956fd983775604f3.tar.gz |
ENH: vectorize sqrt ufunc using SSE2
specialize the sqrt ufunc for float and double and vectorize it using
SSE2.
improves performance by 4/2 for float/double if one is not memory bound
due to non-cached data.
performance is always better on all tested machines (amd phenom X2,
intel xeon 5xxx/7xxx, core2duo, corei7)
This version will not set errno on invalid input, but numpy only checks
the fpu flags so the behavior is the same.
In principle the compiler could autovectorize it when setting ffast-math
(for no errno) and specializing the loop for the vectorizable strides
and giving it some hints (restrict, __builtin_assume_aligned, etc.),
but its simpler and more reliable to simply vectorize it by hand.
-rw-r--r-- | numpy/core/code_generators/generate_umath.py | 2 | ||||
-rw-r--r-- | numpy/core/setup.py | 5 | ||||
-rw-r--r-- | numpy/core/setup_common.py | 4 | ||||
-rw-r--r-- | numpy/core/src/private/lowlevel_strided_loops.h | 46 | ||||
-rw-r--r-- | numpy/core/src/scalarmathmodule.c.src | 10 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 81 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.h | 356 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.h.src | 6 | ||||
-rw-r--r-- | numpy/core/tests/test_umath.py | 16 | ||||
-rw-r--r-- | numpy/testing/utils.py | 67 |
10 files changed, 417 insertions, 176 deletions
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index ad711f888..1bc22d777 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -207,6 +207,7 @@ cmplx = 'FDG' cmplxO = cmplx + O cmplxP = cmplx + P inexact = flts + cmplx +inexactvec = 'fd' noint = inexact+O nointP = inexact+P allP = bints+times+flts+cmplxP @@ -686,6 +687,7 @@ defdict = { Ufunc(1, 1, None, docstrings.get('numpy.core.umath.sqrt'), None, + TD(inexactvec), TD(inexact, f='sqrt', astype={'e':'f'}), TD(P, f='sqrt'), ), diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 5a1e6cf6e..a6ddfb843 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -161,6 +161,11 @@ def check_math_capabilities(config, moredefs, mathlibs): check_funcs(OPTIONAL_STDFUNCS) + + for h in OPTIONAL_HEADERS: + if config.check_func("", decl=False, call=False, headers=[h]): + moredefs.append((fname2def(h).replace(".", "_"), 1)) + for f, args in OPTIONAL_INTRINSICS: if config.check_func(f, decl=False, call=True, call_args=args): moredefs.append((fname2def(f), 1)) diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index e778e507b..847c4a624 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -97,6 +97,10 @@ OPTIONAL_STDFUNCS = ["expm1", "log1p", "acosh", "asinh", "atanh", "rint", "trunc", "exp2", "log2", "hypot", "atan2", "pow", "copysign", "nextafter"] + +OPTIONAL_HEADERS = ["emmintrin.h" # SSE2, amd64 only +] + # optional gcc compiler builtins and their call arguments # call arguments are required as the compiler will do strict signature checking OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'), diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h index c9fd1248f..742882a92 100644 --- a/numpy/core/src/private/lowlevel_strided_loops.h +++ b/numpy/core/src/private/lowlevel_strided_loops.h @@ -396,6 +396,52 @@ PyArray_PrepareThreeRawArrayIter(int ndim, npy_intp *shape, char **out_dataB, npy_intp *out_stridesB, char **out_dataC, npy_intp *out_stridesC); + +/* + * Return number of elements that must be peeled from + * the start of 'addr' with 'nvals' elements of size 'esize' + * in order to reach 'alignment'. + * see npy_blocked_end for an example + */ +static NPY_INLINE npy_intp +npy_aligned_block_offset(const void * addr, const npy_intp esize, + const npy_intp alignment, const npy_intp nvals) +{ + const npy_intp offset = (npy_intp)addr & (alignment - 1); + npy_intp peel = offset ? (alignment - offset) / esize : 0; + peel = nvals < peel ? nvals : peel; + return peel; +} + +/* + * Return upper loop bound for an array of 'nvals' elements + * of size 'esize' peeled by 'offset' elements and blocking to + * a vector size of 'vsz' in bytes + * + * example usage: + * npy_intp i; + * double v[101]; + * npy_intp esize = sizeof(v[0]); + * npy_intp peel = npy_aligned_block_offset(v, esize, 16, n); + * // peel to alignment 16 + * for (i = 0; i < peel; i++) + * <scalar-op> + * // simd vectorized operation + * for (; i < npy_blocked_end(peel, esize, 16, n); i += 16 / esize) + * <blocked-op> + * // handle scalar rest + * for(; i < n; i++) + * <scalar-op> + */ +static NPY_INLINE npy_intp +npy_blocked_end(const npy_intp offset, const npy_intp esize, + const npy_intp vsz, const npy_intp nvals) +{ + return nvals - offset - (nvals - offset) % (vsz / esize); +} + + + /* Start raw iteration */ #define NPY_RAW_ITER_START(idim, ndim, coord, shape) \ memset((coord), 0, (ndim) * sizeof(coord[0])); \ diff --git a/numpy/core/src/scalarmathmodule.c.src b/numpy/core/src/scalarmathmodule.c.src index b87d9b405..47ad9de61 100644 --- a/numpy/core/src/scalarmathmodule.c.src +++ b/numpy/core/src/scalarmathmodule.c.src @@ -1766,8 +1766,14 @@ get_functions(void) } funcdata = ((PyUFuncObject *)obj)->data; signatures = ((PyUFuncObject *)obj)->types; - i = 0; - j = 0; + /* + * sqrt ufunc is specialized for double and float loops in + * generate_umath.py, the first to go into FLOAT/DOUBLE_sqrt + * they have the same signature as the scalar variants so we need to skip + * over them + */ + i = 4; + j = 2; while (signatures[i] != NPY_FLOAT) { i += 2; j++; } diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 040486a32..e307faa46 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -15,10 +15,16 @@ #include "numpy/ufuncobject.h" #include "numpy/npy_math.h" #include "numpy/halffloat.h" +#include "lowlevel_strided_loops.h" #include "npy_pycompat.h" #include "ufunc_object.h" +#include <assert.h> + +#ifdef HAVE_EMMINTRIN_H +#include <emmintrin.h> +#endif /* @@ -27,10 +33,25 @@ ***************************************************************************** */ + +static NPY_INLINE int npy_is_aligned(const void * p, const npy_intp alignment) +{ + return ((npy_intp)(p) & ((alignment) - 1)) == 0; +} + #define IS_BINARY_REDUCE ((args[0] == args[2])\ && (steps[0] == steps[2])\ && (steps[0] == 0)) +/* + * stride is equal to element size and input and destination are equal or + * don't overlap within one register + */ +#define IS_BLOCKABLE_UNARY(esize, vsize) \ + (steps[0] == (esize) && steps[0] == steps[1] && \ + (npy_is_aligned(args[0], esize) && npy_is_aligned(args[1], esize)) && \ + ((abs(args[1] - args[0]) >= (vsize)) || ((abs(args[1] - args[0]) == 0)))) + #define OUTPUT_LOOP\ char *op1 = args[1];\ npy_intp os1 = steps[1];\ @@ -52,6 +73,22 @@ npy_intp i;\ for(i = 0; i < n; i++, ip1 += is1, op1 += os1, op2 += os2) +/* align output to alignment + * store alignment is usually more important than load alignment */ +#define UNARY_LOOP_BLOCK_ALIGN_OUT(type, alignment)\ + type *ip1 = (type *)args[0], *op1 = (type *)args[1];\ + npy_intp n = dimensions[0];\ + npy_intp i, peel = npy_aligned_block_offset(op1, sizeof(type),\ + alignment, n);\ + for(i = 0; i < peel; i++) + +#define UNARY_LOOP_BLOCKED(type, vsize)\ + for(; i < npy_blocked_end(peel, sizeof(type), vsize, n);\ + i += (vsize / sizeof(type))) + +#define UNARY_LOOP_BLOCKED_END\ + for (; i < n; i++) + #define BINARY_LOOP\ char *ip1 = args[0], *ip2 = args[1], *op1 = args[2];\ npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];\ @@ -1268,6 +1305,50 @@ TIMEDELTA_mm_d_divide(char **args, npy_intp *dimensions, npy_intp *steps, void * /**begin repeat + * #type = npy_float, npy_double# + * #TYPE = FLOAT, DOUBLE# + * #scalarf = npy_sqrtf, npy_sqrt# + * #vtype = __m128, __m128d# + * #vsuf = ps, pd# + */ +NPY_NO_EXPORT void +@TYPE@_sqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ +#ifdef HAVE_EMMINTRIN_H + if (IS_BLOCKABLE_UNARY(sizeof(@type@), 16)) { + UNARY_LOOP_BLOCK_ALIGN_OUT(@type@, 16) { + op1[i] = @scalarf@(ip1[i]); + } + assert(npy_is_aligned(&op1[i], 16)); + if (npy_is_aligned(&ip1[i], 16)) { + UNARY_LOOP_BLOCKED(@type@, 16) { + @vtype@ d = _mm_load_@vsuf@(&ip1[i]); + _mm_store_@vsuf@(&op1[i], _mm_sqrt_@vsuf@(d)); + } + } + else { + UNARY_LOOP_BLOCKED(@type@, 16) { + @vtype@ d = _mm_loadu_@vsuf@(&ip1[i]); + _mm_store_@vsuf@(&op1[i], _mm_sqrt_@vsuf@(d)); + } + } + UNARY_LOOP_BLOCKED_END { + op1[i] = @scalarf@(ip1[i]); + } + } + else +#endif + { + UNARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + *(@type@ *)op1 = @scalarf@(in1); + } + } +} +/**end repeat**/ + + +/**begin repeat * Float types * #type = npy_float, npy_double, npy_longdouble# * #TYPE = FLOAT, DOUBLE, LONGDOUBLE# diff --git a/numpy/core/src/umath/loops.h b/numpy/core/src/umath/loops.h index c2123459b..7f49f1956 100644 --- a/numpy/core/src/umath/loops.h +++ b/numpy/core/src/umath/loops.h @@ -1541,56 +1541,64 @@ ULONGLONG_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NP ***************************************************************************** */ +#line 187 +NPY_NO_EXPORT void +FLOAT_sqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + +#line 187 +NPY_NO_EXPORT void +DOUBLE_sqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + -#line 191 +#line 197 -#line 198 +#line 204 NPY_NO_EXPORT void HALF_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 198 +#line 204 NPY_NO_EXPORT void HALF_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 198 +#line 204 NPY_NO_EXPORT void HALF_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 198 +#line 204 NPY_NO_EXPORT void HALF_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void HALF_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void HALF_not_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void HALF_less(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void HALF_less_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void HALF_greater(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void HALF_greater_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void HALF_logical_and(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void HALF_logical_or(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -1601,49 +1609,49 @@ HALF_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_U NPY_NO_EXPORT void HALF_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void HALF_isnan(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void HALF_isinf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void HALF_isfinite(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void HALF_signbit(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void HALF_copysign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void HALF_nextafter(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void HALF_spacing(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 229 +#line 235 NPY_NO_EXPORT void HALF_maximum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 229 +#line 235 NPY_NO_EXPORT void HALF_minimum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 237 +#line 243 NPY_NO_EXPORT void HALF_fmax(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 237 +#line 243 NPY_NO_EXPORT void HALF_fmin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -1696,55 +1704,55 @@ HALF_ldexp_long(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UN #define HALF_true_divide HALF_divide -#line 191 +#line 197 -#line 198 +#line 204 NPY_NO_EXPORT void FLOAT_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 198 +#line 204 NPY_NO_EXPORT void FLOAT_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 198 +#line 204 NPY_NO_EXPORT void FLOAT_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 198 +#line 204 NPY_NO_EXPORT void FLOAT_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void FLOAT_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void FLOAT_not_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void FLOAT_less(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void FLOAT_less_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void FLOAT_greater(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void FLOAT_greater_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void FLOAT_logical_and(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void FLOAT_logical_or(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -1755,49 +1763,49 @@ FLOAT_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_ NPY_NO_EXPORT void FLOAT_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void FLOAT_isnan(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void FLOAT_isinf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void FLOAT_isfinite(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void FLOAT_signbit(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void FLOAT_copysign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void FLOAT_nextafter(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void FLOAT_spacing(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 229 +#line 235 NPY_NO_EXPORT void FLOAT_maximum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 229 +#line 235 NPY_NO_EXPORT void FLOAT_minimum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 237 +#line 243 NPY_NO_EXPORT void FLOAT_fmax(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 237 +#line 243 NPY_NO_EXPORT void FLOAT_fmin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -1850,55 +1858,55 @@ FLOAT_ldexp_long(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_U #define FLOAT_true_divide FLOAT_divide -#line 191 +#line 197 -#line 198 +#line 204 NPY_NO_EXPORT void DOUBLE_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 198 +#line 204 NPY_NO_EXPORT void DOUBLE_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 198 +#line 204 NPY_NO_EXPORT void DOUBLE_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 198 +#line 204 NPY_NO_EXPORT void DOUBLE_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void DOUBLE_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void DOUBLE_not_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void DOUBLE_less(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void DOUBLE_less_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void DOUBLE_greater(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void DOUBLE_greater_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void DOUBLE_logical_and(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void DOUBLE_logical_or(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -1909,49 +1917,49 @@ DOUBLE_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY NPY_NO_EXPORT void DOUBLE_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void DOUBLE_isnan(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void DOUBLE_isinf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void DOUBLE_isfinite(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void DOUBLE_signbit(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void DOUBLE_copysign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void DOUBLE_nextafter(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void DOUBLE_spacing(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 229 +#line 235 NPY_NO_EXPORT void DOUBLE_maximum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 229 +#line 235 NPY_NO_EXPORT void DOUBLE_minimum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 237 +#line 243 NPY_NO_EXPORT void DOUBLE_fmax(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 237 +#line 243 NPY_NO_EXPORT void DOUBLE_fmin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2004,55 +2012,55 @@ DOUBLE_ldexp_long(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_ #define DOUBLE_true_divide DOUBLE_divide -#line 191 +#line 197 -#line 198 +#line 204 NPY_NO_EXPORT void LONGDOUBLE_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 198 +#line 204 NPY_NO_EXPORT void LONGDOUBLE_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 198 +#line 204 NPY_NO_EXPORT void LONGDOUBLE_multiply(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 198 +#line 204 NPY_NO_EXPORT void LONGDOUBLE_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void LONGDOUBLE_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void LONGDOUBLE_not_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void LONGDOUBLE_less(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void LONGDOUBLE_less_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void LONGDOUBLE_greater(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void LONGDOUBLE_greater_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void LONGDOUBLE_logical_and(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 207 +#line 213 NPY_NO_EXPORT void LONGDOUBLE_logical_or(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2063,49 +2071,49 @@ LONGDOUBLE_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void NPY_NO_EXPORT void LONGDOUBLE_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void LONGDOUBLE_isnan(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void LONGDOUBLE_isinf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void LONGDOUBLE_isfinite(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void LONGDOUBLE_signbit(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void LONGDOUBLE_copysign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void LONGDOUBLE_nextafter(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 221 +#line 227 NPY_NO_EXPORT void LONGDOUBLE_spacing(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 229 +#line 235 NPY_NO_EXPORT void LONGDOUBLE_maximum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 229 +#line 235 NPY_NO_EXPORT void LONGDOUBLE_minimum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 237 +#line 243 NPY_NO_EXPORT void LONGDOUBLE_fmax(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 237 +#line 243 NPY_NO_EXPORT void LONGDOUBLE_fmin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2173,14 +2181,14 @@ LONGDOUBLE_ldexp_long(char **args, npy_intp *dimensions, npy_intp *steps, void * #define CEQ(xr,xi,yr,yi) (xr == yr && xi == yi); #define CNE(xr,xi,yr,yi) (xr != yr || xi != yi); -#line 310 - #line 316 + +#line 322 NPY_NO_EXPORT void CFLOAT_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 316 +#line 322 NPY_NO_EXPORT void CFLOAT_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2195,36 +2203,36 @@ CFLOAT_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUS NPY_NO_EXPORT void CFLOAT_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CFLOAT_greater(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CFLOAT_greater_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CFLOAT_less(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CFLOAT_less_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CFLOAT_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CFLOAT_not_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 343 +#line 349 NPY_NO_EXPORT void CFLOAT_logical_and(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 343 +#line 349 NPY_NO_EXPORT void CFLOAT_logical_or(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2234,15 +2242,15 @@ CFLOAT_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY NPY_NO_EXPORT void CFLOAT_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 357 +#line 363 NPY_NO_EXPORT void CFLOAT_isnan(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 357 +#line 363 NPY_NO_EXPORT void CFLOAT_isinf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 357 +#line 363 NPY_NO_EXPORT void CFLOAT_isfinite(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2268,20 +2276,20 @@ CFLOAT__arg(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED NPY_NO_EXPORT void CFLOAT_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 386 +#line 392 NPY_NO_EXPORT void CFLOAT_maximum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 386 +#line 392 NPY_NO_EXPORT void CFLOAT_minimum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 394 +#line 400 NPY_NO_EXPORT void CFLOAT_fmax(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 394 +#line 400 NPY_NO_EXPORT void CFLOAT_fmin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2289,14 +2297,14 @@ CFLOAT_fmin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED #define CFLOAT_true_divide CFLOAT_divide -#line 310 - #line 316 + +#line 322 NPY_NO_EXPORT void CDOUBLE_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 316 +#line 322 NPY_NO_EXPORT void CDOUBLE_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2311,36 +2319,36 @@ CDOUBLE_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNU NPY_NO_EXPORT void CDOUBLE_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CDOUBLE_greater(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CDOUBLE_greater_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CDOUBLE_less(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CDOUBLE_less_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CDOUBLE_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CDOUBLE_not_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 343 +#line 349 NPY_NO_EXPORT void CDOUBLE_logical_and(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 343 +#line 349 NPY_NO_EXPORT void CDOUBLE_logical_or(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2350,15 +2358,15 @@ CDOUBLE_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void *NP NPY_NO_EXPORT void CDOUBLE_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 357 +#line 363 NPY_NO_EXPORT void CDOUBLE_isnan(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 357 +#line 363 NPY_NO_EXPORT void CDOUBLE_isinf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 357 +#line 363 NPY_NO_EXPORT void CDOUBLE_isfinite(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2384,20 +2392,20 @@ CDOUBLE__arg(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE NPY_NO_EXPORT void CDOUBLE_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 386 +#line 392 NPY_NO_EXPORT void CDOUBLE_maximum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 386 +#line 392 NPY_NO_EXPORT void CDOUBLE_minimum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 394 +#line 400 NPY_NO_EXPORT void CDOUBLE_fmax(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 394 +#line 400 NPY_NO_EXPORT void CDOUBLE_fmin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2405,14 +2413,14 @@ CDOUBLE_fmin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE #define CDOUBLE_true_divide CDOUBLE_divide -#line 310 - #line 316 + +#line 322 NPY_NO_EXPORT void CLONGDOUBLE_add(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 316 +#line 322 NPY_NO_EXPORT void CLONGDOUBLE_subtract(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2427,36 +2435,36 @@ CLONGDOUBLE_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY NPY_NO_EXPORT void CLONGDOUBLE_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CLONGDOUBLE_greater(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CLONGDOUBLE_greater_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CLONGDOUBLE_less(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CLONGDOUBLE_less_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CLONGDOUBLE_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 334 +#line 340 NPY_NO_EXPORT void CLONGDOUBLE_not_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 343 +#line 349 NPY_NO_EXPORT void CLONGDOUBLE_logical_and(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 343 +#line 349 NPY_NO_EXPORT void CLONGDOUBLE_logical_or(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2466,15 +2474,15 @@ CLONGDOUBLE_logical_xor(char **args, npy_intp *dimensions, npy_intp *steps, void NPY_NO_EXPORT void CLONGDOUBLE_logical_not(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 357 +#line 363 NPY_NO_EXPORT void CLONGDOUBLE_isnan(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 357 +#line 363 NPY_NO_EXPORT void CLONGDOUBLE_isinf(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 357 +#line 363 NPY_NO_EXPORT void CLONGDOUBLE_isfinite(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2500,20 +2508,20 @@ CLONGDOUBLE__arg(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_U NPY_NO_EXPORT void CLONGDOUBLE_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 386 +#line 392 NPY_NO_EXPORT void CLONGDOUBLE_maximum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 386 +#line 392 NPY_NO_EXPORT void CLONGDOUBLE_minimum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 394 +#line 400 NPY_NO_EXPORT void CLONGDOUBLE_fmax(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 394 +#line 400 NPY_NO_EXPORT void CLONGDOUBLE_fmin(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2544,81 +2552,81 @@ TIMEDELTA_absolute(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY NPY_NO_EXPORT void TIMEDELTA_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 427 +#line 433 NPY_NO_EXPORT void DATETIME__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); -#line 435 +#line 441 NPY_NO_EXPORT void DATETIME_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 435 +#line 441 NPY_NO_EXPORT void DATETIME_not_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 435 +#line 441 NPY_NO_EXPORT void DATETIME_greater(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 435 +#line 441 NPY_NO_EXPORT void DATETIME_greater_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 435 +#line 441 NPY_NO_EXPORT void DATETIME_less(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 435 +#line 441 NPY_NO_EXPORT void DATETIME_less_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 443 +#line 449 NPY_NO_EXPORT void DATETIME_maximum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 443 +#line 449 NPY_NO_EXPORT void DATETIME_minimum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 427 +#line 433 NPY_NO_EXPORT void TIMEDELTA__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); -#line 435 +#line 441 NPY_NO_EXPORT void TIMEDELTA_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 435 +#line 441 NPY_NO_EXPORT void TIMEDELTA_not_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 435 +#line 441 NPY_NO_EXPORT void TIMEDELTA_greater(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 435 +#line 441 NPY_NO_EXPORT void TIMEDELTA_greater_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 435 +#line 441 NPY_NO_EXPORT void TIMEDELTA_less(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 435 +#line 441 NPY_NO_EXPORT void TIMEDELTA_less_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 443 +#line 449 NPY_NO_EXPORT void TIMEDELTA_maximum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 443 +#line 449 NPY_NO_EXPORT void TIMEDELTA_minimum(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); @@ -2683,27 +2691,27 @@ TIMEDELTA_mm_d_divide(char **args, npy_intp *dimensions, npy_intp *steps, void * ***************************************************************************** */ -#line 511 +#line 517 NPY_NO_EXPORT void OBJECT_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 511 +#line 517 NPY_NO_EXPORT void OBJECT_not_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 511 +#line 517 NPY_NO_EXPORT void OBJECT_greater(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 511 +#line 517 NPY_NO_EXPORT void OBJECT_greater_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 511 +#line 517 NPY_NO_EXPORT void OBJECT_less(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); -#line 511 +#line 517 NPY_NO_EXPORT void OBJECT_less_equal(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src index 87b47a754..a8a58c5de 100644 --- a/numpy/core/src/umath/loops.h.src +++ b/numpy/core/src/umath/loops.h.src @@ -181,6 +181,12 @@ U@TYPE@_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_ ***************************************************************************** */ +/**begin repeat + * #TYPE = FLOAT, DOUBLE# + */ +NPY_NO_EXPORT void +@TYPE@_sqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat**/ /**begin repeat * Float types diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index ea92ce7de..1967db547 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -4,6 +4,7 @@ import sys import platform from numpy.testing import * +from numpy.testing.utils import gen_alignment_data import numpy.core.umath as ncu import numpy as np @@ -100,6 +101,21 @@ class TestPower(TestCase): assert_almost_equal(x**(-1), [1., 0.5, 1./3]) assert_almost_equal(x**(0.5), [1., ncu.sqrt(2), ncu.sqrt(3)]) + for out, inp, msg in gen_alignment_data(dtype=np.float32, + type='unary'): + exp = [ncu.sqrt(i) for i in inp] + assert_almost_equal(inp**(0.5), exp, err_msg=msg) + np.sqrt(inp, out=out) + assert_equal(out, exp, err_msg=msg) + + for out, inp, msg in gen_alignment_data(dtype=np.float64, + type='unary'): + exp = [ncu.sqrt(i) for i in inp] + assert_almost_equal(inp**(0.5), exp, err_msg=msg) + np.sqrt(inp, out=out) + assert_equal(out, exp, err_msg=msg) + + def test_power_complex(self): x = np.array([1+2j, 2+3j, 3+4j]) assert_equal(x**0, [1., 1., 1.]) diff --git a/numpy/testing/utils.py b/numpy/testing/utils.py index 40c569c0f..7a3ea7a1c 100644 --- a/numpy/testing/utils.py +++ b/numpy/testing/utils.py @@ -10,6 +10,7 @@ import re import operator import warnings from .nosetester import import_nose +from numpy.core import float32, empty, arange if sys.version_info[0] >= 3: from io import StringIO @@ -1523,3 +1524,69 @@ def assert_no_warnings(func, *args, **kw): finally: ctx.__exit__() return result + + +def gen_alignment_data(dtype=float32, type='binary', max_size=24): + """ + generator producing data with different alignment and offsets + to test simd vectorization + + Parameters + ---------- + dtype : dtype + data type to produce + type : string + 'unary': create data for unary operations, creates one input + and output array + 'binary': create data for unary operations, creates two input + and output array + max_size : integer + maximum size of data to produce + + Returns + ------- + if type is 'unary' yields one output, one input array and a message + containing information on the data + if type is 'binary' yields one output array, two input array and a message + containing information on the data + + """ + ufmt = 'unary offset=(%d, %d), size=%d, dtype=%r, %s' + bfmt = 'binary offset=(%d, %d, %d), size=%d, dtype=%r, %s' + for o in range(3): + for s in range(o + 2, max(o + 3, max_size)): + if type == 'unary': + inp = lambda : arange(s, dtype=dtype)[o:] + out = empty((s,), dtype=dtype)[o:] + yield out, inp(), ufmt % (o, o, s, dtype, 'out of place') + yield inp(), inp(), ufmt % (o, o, s, dtype, 'in place') + yield out[1:], inp()[:-1], ufmt % \ + (o + 1, o, s - 1, dtype, 'out of place') + yield out[:-1], inp()[1:], ufmt % \ + (o, o + 1, s - 1, dtype, 'out of place') + yield inp()[:-1], inp()[1:], ufmt % \ + (o, o + 1, s - 1, dtype, 'aliased') + yield inp()[1:], inp()[:-1], ufmt % \ + (o + 1, o, s - 1, dtype, 'aliased') + if type == 'binary': + inp1 = lambda :arange(s, dtype=dtype)[o:] + inp2 = lambda :arange(s, dtype=dtype)[o:] + out = empty((s,), dtype=dtype)[o:] + yield out, inp1(), inp2(), bfmt % \ + (o, o, o, s, dtype, 'out of place') + yield inp1(), inp1(), inp2(), bfmt % \ + (o, o, o, s, dtype, 'in place1') + yield inp2(), inp1(), inp2(), bfmt % \ + (o, o, o, s, dtype, 'in place2') + yield out[1:], inp1()[:-1], inp2()[:-1], bfmt % \ + (o + 1, o, o, s - 1, dtype, 'out of place') + yield out[-1:], inp1()[1:], inp2()[:-1], bfmt % \ + (o, o + 1, o, s - 1, dtype, 'out of place') + yield out[-1:], inp1()[:-1], inp2()[1:], bfmt % \ + (o, o, o + 1, s - 1, dtype, 'out of place') + yield inp1()[1:], inp1()[:-1], inp2()[:-1], bfmt % \ + (o + 1, o, o, s - 1, dtype, 'aliased') + yield inp1()[-1:], inp1()[1:], inp2()[:-1], bfmt % \ + (o, o + 1, o, s - 1, dtype, 'aliased') + yield inp1()[-1:], inp1()[:-1], inp2()[1:], bfmt % \ + (o, o, o + 1, s - 1, dtype, 'aliased') |