summaryrefslogtreecommitdiff
path: root/numpy/core/src
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/core/src')
-rw-r--r--numpy/core/src/umath/loops.c.src26
-rw-r--r--numpy/core/src/umath/simd.inc.src20
2 files changed, 32 insertions, 14 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 89eeb0c47..a2649ed93 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1621,21 +1621,23 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE
NPY_NO_EXPORT NPY_GCC_OPT_3 void
FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
+ if (!run_unary_@isa@_@func@_FLOAT(args, dimensions, steps)) {
+ UNARY_LOOP {
+ /*
+ * We use the AVX function to compute exp/log for scalar elements as well.
+ * This is needed to ensure the output of strided and non-strided
+ * cases match. But this worsens the performance of strided arrays.
+ * There is plan to fix this in a subsequent patch by using gather
+ * instructions for strided arrays in the AVX function.
+ */
#if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS
- @ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]);
+ @ISA@_@func@_FLOAT((npy_float *)op1, (npy_float *)ip1, 1);
#else
- /*
- * This is the path it would take if ISA was runtime detected, but not
- * compiled for. It fixes the error on clang6.0 which fails to compile
- * AVX512F version. Not sure if I like this idea, if during runtime it
- * detects AXV512F, it will end up running the scalar version instead
- * of AVX2.
- */
- UNARY_LOOP {
- const npy_float in1 = *(npy_float *)ip1;
- *(npy_float *)op1 = @scalarf@(in1);
- }
+ const npy_float in1 = *(npy_float *)ip1;
+ *(npy_float *)op1 = @scalarf@(in1);
#endif
+ }
+ }
}
/**end repeat1**/
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 72493e308..1c6ac4426 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -122,20 +122,36 @@ abs_ptrdiff(char *a, char *b)
/**begin repeat
* #ISA = AVX2, AVX512F#
+ * #isa = avx2, avx512f#
+ * #REGISTER_SIZE = 32, 64#
*/
/* prototypes */
-#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
/**begin repeat1
* #func = exp, log#
*/
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
static NPY_INLINE void
@ISA@_@func@_FLOAT(npy_float *, npy_float *, const npy_intp n);
+#endif
-/**end repeat1**/
+static NPY_INLINE int
+run_unary_@isa@_@func@_FLOAT(char **args, npy_intp *dimensions, npy_intp *steps)
+{
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
+ if (IS_BLOCKABLE_UNARY(sizeof(npy_float), @REGISTER_SIZE@)) {
+ @ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]);
+ return 1;
+ }
+ else
+ return 0;
#endif
+ return 0;
+}
+
+/**end repeat1**/
/**end repeat**/