summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2017-02-16 15:41:27 -0700
committerGitHub <noreply@github.com>2017-02-16 15:41:27 -0700
commitd55f40b1e48d5a5fbed80e00a140c9db6e19732f (patch)
tree733aa0ac1abdcc1aadf6452be5729b7b83095a09 /numpy
parentba1ddc421eac7b5c3accb99a4c633490a1814838 (diff)
parent8e8ce442e8449916a93951093cdce16cec006bcc (diff)
downloadnumpy-d55f40b1e48d5a5fbed80e00a140c9db6e19732f.tar.gz
Merge pull request #8043 from pv/ufunc-copy-overlap
ENH: umath: ensure ufuncs are well-defined with memory overlapping inputs
Diffstat (limited to 'numpy')
-rw-r--r--numpy/add_newdocs.py7
-rw-r--r--numpy/core/code_generators/cversions.txt3
-rw-r--r--numpy/core/code_generators/numpy_api.py2
-rw-r--r--numpy/core/include/numpy/ndarraytypes.h8
-rw-r--r--numpy/core/setup.py5
-rw-r--r--numpy/core/setup_common.py3
-rw-r--r--numpy/core/src/multiarray/mapping.c88
-rw-r--r--numpy/core/src/multiarray/nditer_constr.c101
-rw-r--r--numpy/core/src/multiarray/nditer_impl.h2
-rw-r--r--numpy/core/src/multiarray/nditer_pywrap.c10
-rw-r--r--numpy/core/src/private/lowlevel_strided_loops.h125
-rw-r--r--numpy/core/src/umath/reduction.c46
-rw-r--r--numpy/core/src/umath/ufunc_object.c179
-rw-r--r--numpy/core/tests/test_mem_overlap.py466
-rw-r--r--numpy/core/tests/test_nditer.py100
-rw-r--r--numpy/core/tests/test_umath.py16
16 files changed, 1035 insertions, 126 deletions
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py
index 41c560074..09f4e40c4 100644
--- a/numpy/add_newdocs.py
+++ b/numpy/add_newdocs.py
@@ -169,6 +169,10 @@ add_newdoc('numpy.core', 'nditer',
with one per iteration dimension, to be tracked.
* "common_dtype" causes all the operands to be converted to
a common data type, with copying or buffering as necessary.
+ * "copy_if_overlap" causes the iterator to determine if read
+ operands have overlap with write operands, and make temporary
+ copies as necessary to avoid overlap. False positives (needless
+ copying) are possible in some cases.
* "delay_bufalloc" delays allocation of the buffers until
a reset() call is made. Allows "allocate" operands to
be initialized before their values are copied into the buffers.
@@ -208,6 +212,9 @@ add_newdoc('numpy.core', 'nditer',
copies those elements indicated by this mask.
* 'writemasked' indicates that only elements where the chosen
'arraymask' operand is True will be written to.
+ * "overlap_assume_elementwise" can be used to mark operands that are
+ accessed only in the iterator order, to allow less conservative
+ copying when "copy_if_overlap" is present.
op_dtypes : dtype or tuple of dtype(s), optional
The required data type(s) of the operands. If copying or buffering
is enabled, the data will be converted to/from their original types.
diff --git a/numpy/core/code_generators/cversions.txt b/numpy/core/code_generators/cversions.txt
index 9ade153f5..54140f24a 100644
--- a/numpy/core/code_generators/cversions.txt
+++ b/numpy/core/code_generators/cversions.txt
@@ -34,3 +34,6 @@
# Version 10 (NumPy 1.11) No change.
# Version 10 (NumPy 1.12) No change.
0x0000000a = 9b8bce614655d3eb02acddcb508203cb
+
+# Version 11 (NumPy 1.13) Added PyArray_MapIterArrayCopyIfOverlap
+0x0000000b = edb1ba83730c650fd9bc5772a919cda7
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
index 972966627..d1406e3b2 100644
--- a/numpy/core/code_generators/numpy_api.py
+++ b/numpy/core/code_generators/numpy_api.py
@@ -344,6 +344,8 @@ multiarray_funcs_api = {
# End 1.9 API
'PyArray_CheckAnyScalarExact': (300, NonNull(1)),
# End 1.10 API
+ 'PyArray_MapIterArrayCopyIfOverlap': (301,),
+ # End 1.13 API
}
ufunc_types_api = {
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index a9848f434..7f6fe6524 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -1008,6 +1008,12 @@ typedef void (NpyIter_GetMultiIndexFunc)(NpyIter *iter,
#define NPY_ITER_DELAY_BUFALLOC 0x00000800
/* When NPY_KEEPORDER is specified, disable reversing negative-stride axes */
#define NPY_ITER_DONT_NEGATE_STRIDES 0x00001000
+/*
+ * If output operands overlap with other operands (based on heuristics that
+ * has false positives but no false negatives), make temporary copies to
+ * eliminate overlap.
+ */
+#define NPY_ITER_COPY_IF_OVERLAP 0x00002000
/*** Per-operand flags that may be passed to the iterator constructors ***/
@@ -1039,6 +1045,8 @@ typedef void (NpyIter_GetMultiIndexFunc)(NpyIter *iter,
#define NPY_ITER_WRITEMASKED 0x10000000
/* This array is the mask for all WRITEMASKED operands */
#define NPY_ITER_ARRAYMASK 0x20000000
+/* Assume iterator order data access for COPY_IF_OVERLAP */
+#define NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE 0x40000000
#define NPY_ITER_GLOBAL_FLAGS 0x0000ffff
#define NPY_ITER_PER_OP_FLAGS 0xffff0000
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 01070cc74..f45042bec 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -901,7 +901,8 @@ def configuration(parent_package='',top_path=None):
join('src', 'umath', 'loops.c.src'),
join('src', 'umath', 'ufunc_object.c'),
join('src', 'umath', 'scalarmath.c.src'),
- join('src', 'umath', 'ufunc_type_resolution.c')]
+ join('src', 'umath', 'ufunc_type_resolution.c'),
+ join('src', 'private', 'mem_overlap.c')]
umath_deps = [
generate_umath_py,
@@ -911,6 +912,8 @@ def configuration(parent_package='',top_path=None):
join('src', 'private', 'templ_common.h.src'),
join('src', 'umath', 'simd.inc.src'),
join(codegen_dir, 'generate_ufunc_api.py'),
+ join('src', 'private', 'lowlevel_strided_loops.h'),
+ join('src', 'private', 'mem_overlap.h'),
join('src', 'private', 'ufunc_override.h')] + npymath_sources
config.add_extension('umath',
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index 596b3996c..357051cdb 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -38,7 +38,8 @@ C_ABI_VERSION = 0x01000009
# 0x0000000a - 1.10.x
# 0x0000000a - 1.11.x
# 0x0000000a - 1.12.x
-C_API_VERSION = 0x0000000a
+# 0x0000000b - 1.13.x
+C_API_VERSION = 0x0000000b
class MismatchCAPIWarning(Warning):
pass
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 70affbc9b..7a009d237 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -19,6 +19,7 @@
#include "mapping.h"
#include "lowlevel_strided_loops.h"
#include "item_selection.h"
+#include "mem_overlap.h"
#define HAS_INTEGER 1
@@ -704,6 +705,39 @@ prepare_index(PyArrayObject *self, PyObject *index,
/**
+ * Check if self has memory overlap with one of the index arrays, or with extra_op.
+ *
+ * @returns 1 if memory overlap found, 0 if not.
+ */
+NPY_NO_EXPORT int
+index_has_memory_overlap(PyArrayObject *self,
+ int index_type, npy_index_info *indices, int num,
+ PyObject *extra_op)
+{
+ int i;
+
+ if (index_type & (HAS_FANCY | HAS_BOOL)) {
+ for (i = 0; i < num; ++i) {
+ if (indices[i].object != NULL &&
+ PyArray_Check(indices[i].object) &&
+ solve_may_share_memory(self,
+ (PyArrayObject *)indices[i].object,
+ 1) != 0) {
+ return 1;
+ }
+ }
+ }
+
+ if (extra_op != NULL && PyArray_Check(extra_op) &&
+ solve_may_share_memory(self, (PyArrayObject *)extra_op, 1) != 0) {
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/**
* Get pointer for an integer index.
*
* For a purely integer index, set ptr to the memory address.
@@ -1923,7 +1957,9 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
* Either they are equivalent, or the values must
* be a scalar
*/
- (PyArray_EQUIVALENTLY_ITERABLE(ind, tmp_arr) ||
+ (PyArray_EQUIVALENTLY_ITERABLE(ind, tmp_arr,
+ PyArray_TRIVIALLY_ITERABLE_OP_READ,
+ PyArray_TRIVIALLY_ITERABLE_OP_READ) ||
(PyArray_NDIM(tmp_arr) == 0 &&
PyArray_TRIVIALLY_ITERABLE(tmp_arr))) &&
/* Check if the type is equivalent to INTP */
@@ -3130,19 +3166,22 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type,
/*NUMPY_API
*
- * Use advanced indexing to iterate an array. Please note
- * that most of this public API is currently not guaranteed
- * to stay the same between versions. If you plan on using
- * it, please consider adding more utility functions here
- * to accommodate new features.
+ * Same as PyArray_MapIterArray, but:
+ *
+ * If copy_if_overlap != 0, check if `a` has memory overlap with any of the
+ * arrays in `index` and with `extra_op`. If yes, make copies as appropriate
+ * to avoid problems if `a` is modified during the iteration.
+ * `iter->array` may contain a copied array (with UPDATEIFCOPY set).
*/
NPY_NO_EXPORT PyObject *
-PyArray_MapIterArray(PyArrayObject * a, PyObject * index)
+PyArray_MapIterArrayCopyIfOverlap(PyArrayObject * a, PyObject * index,
+ int copy_if_overlap, PyArrayObject *extra_op)
{
PyArrayMapIterObject * mit = NULL;
PyArrayObject *subspace = NULL;
npy_index_info indices[NPY_MAXDIMS * 2 + 1];
int i, index_num, ndim, fancy_ndim, index_type;
+ PyArrayObject *a_copy = NULL;
index_type = prepare_index(a, index, indices, &index_num,
&ndim, &fancy_ndim, 0);
@@ -3151,6 +3190,28 @@ PyArray_MapIterArray(PyArrayObject * a, PyObject * index)
return NULL;
}
+ if (copy_if_overlap && index_has_memory_overlap(a, index_type, indices,
+ index_num,
+ (PyObject *)extra_op)) {
+ /* Make a copy of the input array */
+ a_copy = (PyArrayObject *)PyArray_NewLikeArray(a, NPY_ANYORDER,
+ NULL, 0);
+ if (a_copy == NULL) {
+ goto fail;
+ }
+
+ if (PyArray_CopyInto(a_copy, a) != 0) {
+ goto fail;
+ }
+
+ Py_INCREF(a);
+ if (PyArray_SetUpdateIfCopyBase(a_copy, a) < 0) {
+ goto fail;
+ }
+
+ a = a_copy;
+ }
+
/* If it is not a pure fancy index, need to get the subspace */
if (index_type != HAS_FANCY) {
if (get_view_from_index(a, &subspace, indices, index_num, 1) < 0) {
@@ -3178,6 +3239,7 @@ PyArray_MapIterArray(PyArrayObject * a, PyObject * index)
goto fail;
}
+ Py_XDECREF(a_copy);
Py_XDECREF(subspace);
PyArray_MapIterReset(mit);
@@ -3188,6 +3250,7 @@ PyArray_MapIterArray(PyArrayObject * a, PyObject * index)
return (PyObject *)mit;
fail:
+ Py_XDECREF(a_copy);
Py_XDECREF(subspace);
Py_XDECREF((PyObject *)mit);
for (i=0; i < index_num; i++) {
@@ -3197,6 +3260,17 @@ PyArray_MapIterArray(PyArrayObject * a, PyObject * index)
}
+/*NUMPY_API
+ *
+ * Use advanced indexing to iterate an array.
+ */
+NPY_NO_EXPORT PyObject *
+PyArray_MapIterArray(PyArrayObject * a, PyObject * index)
+{
+ return PyArray_MapIterArrayCopyIfOverlap(a, index, 0, NULL);
+}
+
+
#undef HAS_INTEGER
#undef HAS_NEWAXIS
#undef HAS_SLICE
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index 48f4928db..b74aca01c 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -17,6 +17,8 @@
#include "arrayobject.h"
#include "templ_common.h"
+#include "mem_overlap.h"
+
/* Internal helper functions private to this file */
static int
@@ -2708,6 +2710,93 @@ npyiter_allocate_arrays(NpyIter *iter,
bufferdata = NIT_BUFFERDATA(iter);
}
+ if (flags & NPY_ITER_COPY_IF_OVERLAP) {
+ /*
+ * Perform operand memory overlap checks, if requested.
+ *
+ * If any write operand has memory overlap with any read operand,
+ * eliminate all overlap by making temporary copies, by enabling
+ * NPY_OP_ITFLAG_FORCECOPY for the write operand to force UPDATEIFCOPY.
+ *
+ * Operands with NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE enabled are not
+ * considered overlapping if the arrays are exactly the same. In this
+ * case, the iterator loops through them in the same order element by
+ * element. (As usual, the user-provided inner loop is assumed to be
+ * able to deal with this level of simple aliasing.)
+ */
+ for (iop = 0; iop < nop; ++iop) {
+ int may_share_memory = 0;
+ int iother;
+
+ if (op[iop] == NULL) {
+ /* Iterator will always allocate */
+ continue;
+ }
+
+ if (!(op_itflags[iop] & NPY_OP_ITFLAG_WRITE)) {
+ /*
+ * Copy output operands only, not inputs.
+ * A more sophisticated heuristic could be
+ * substituted here later.
+ */
+ continue;
+ }
+
+ for (iother = 0; iother < nop; ++iother) {
+ if (iother == iop || op[iother] == NULL) {
+ continue;
+ }
+
+ if (!(op_itflags[iother] & NPY_OP_ITFLAG_READ)) {
+ /* No data dependence for arrays not read from */
+ continue;
+ }
+
+ if (op_itflags[iother] & NPY_OP_ITFLAG_FORCECOPY) {
+ /* Already copied */
+ continue;
+ }
+
+ /*
+ * If the arrays are views to exactly the same data, no need
+ * to make copies, if the caller (eg ufunc) says it accesses
+ * data only in the iterator order.
+ *
+ * However, if there is internal overlap (e.g. a zero stride on
+ * a non-unit dimension), a copy cannot be avoided.
+ */
+ if ((op_flags[iop] & NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE) &&
+ (op_flags[iother] & NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE) &&
+ PyArray_BYTES(op[iop]) == PyArray_BYTES(op[iother]) &&
+ PyArray_NDIM(op[iop]) == PyArray_NDIM(op[iother]) &&
+ PyArray_CompareLists(PyArray_DIMS(op[iop]),
+ PyArray_DIMS(op[iother]),
+ PyArray_NDIM(op[iop])) &&
+ PyArray_CompareLists(PyArray_STRIDES(op[iop]),
+ PyArray_STRIDES(op[iother]),
+ PyArray_NDIM(op[iop])) &&
+ PyArray_DESCR(op[iop]) == PyArray_DESCR(op[iother]) &&
+ solve_may_have_internal_overlap(op[iop], 1) == 0) {
+
+ continue;
+ }
+
+ /*
+ * Use max work = 1. If the arrays are large, it might
+ * make sense to go further.
+ */
+ may_share_memory = solve_may_share_memory(op[iop],
+ op[iother],
+ 1);
+
+ if (may_share_memory) {
+ op_itflags[iop] |= NPY_OP_ITFLAG_FORCECOPY;
+ break;
+ }
+ }
+ }
+ }
+
for (iop = 0; iop < nop; ++iop) {
/*
* Check whether there are any WRITEMASKED REDUCE operands
@@ -2797,9 +2886,15 @@ npyiter_allocate_arrays(NpyIter *iter,
NBF_STRIDES(bufferdata)[iop] = 0;
}
}
- /* If casting is required and permitted */
- else if ((op_itflags[iop] & NPY_OP_ITFLAG_CAST) &&
- (op_flags[iop] & (NPY_ITER_COPY|NPY_ITER_UPDATEIFCOPY))) {
+ /*
+ * Make a temporary copy if,
+ * 1. If casting is required and permitted, or,
+ * 2. If force-copy is requested
+ */
+ else if (((op_itflags[iop] & NPY_OP_ITFLAG_CAST) &&
+ (op_flags[iop] &
+ (NPY_ITER_COPY|NPY_ITER_UPDATEIFCOPY))) ||
+ (op_itflags[iop] & NPY_OP_ITFLAG_FORCECOPY)) {
PyArrayObject *temp;
int ondim = PyArray_NDIM(op[iop]);
diff --git a/numpy/core/src/multiarray/nditer_impl.h b/numpy/core/src/multiarray/nditer_impl.h
index ae24f46e6..7788d327b 100644
--- a/numpy/core/src/multiarray/nditer_impl.h
+++ b/numpy/core/src/multiarray/nditer_impl.h
@@ -122,6 +122,8 @@
#define NPY_OP_ITFLAG_WRITEMASKED 0x0080
/* The operand's data pointer is pointing into its buffer */
#define NPY_OP_ITFLAG_USINGBUFFER 0x0100
+/* The operand must be copied (with UPDATEIFCOPY if also ITFLAG_WRITE) */
+#define NPY_OP_ITFLAG_FORCECOPY 0x0200
/*
* The data layout of the iterator is fully specified by
diff --git a/numpy/core/src/multiarray/nditer_pywrap.c b/numpy/core/src/multiarray/nditer_pywrap.c
index 3d035390c..5f921fda0 100644
--- a/numpy/core/src/multiarray/nditer_pywrap.c
+++ b/numpy/core/src/multiarray/nditer_pywrap.c
@@ -148,6 +148,11 @@ NpyIter_GlobalFlagsConverter(PyObject *flags_in, npy_uint32 *flags)
flag = NPY_ITER_C_INDEX;
}
break;
+ case 'i':
+ if (strcmp(str, "copy_if_overlap") == 0) {
+ flag = NPY_ITER_COPY_IF_OVERLAP;
+ }
+ break;
case 'n':
if (strcmp(str, "common_dtype") == 0) {
flag = NPY_ITER_COMMON_DTYPE;
@@ -355,6 +360,11 @@ NpyIter_OpFlagsConverter(PyObject *op_flags_in,
break;
}
break;
+ case 'o':
+ if (strcmp(str, "overlap_assume_elementwise") == 0) {
+ flag = NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE;
+ }
+ break;
case 'r':
if (length > 4) switch (str[4]) {
case 'o':
diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h
index 02b8c73c1..f66de14b5 100644
--- a/numpy/core/src/private/lowlevel_strided_loops.h
+++ b/numpy/core/src/private/lowlevel_strided_loops.h
@@ -2,6 +2,7 @@
#define __LOWLEVEL_STRIDED_LOOPS_H
#include "common.h"
#include <npy_config.h>
+#include "mem_overlap.h"
/*
* NOTE: This API should remain private for the time being, to allow
@@ -662,7 +663,24 @@ npy_bswap8_unaligned(char * x)
* Note: Equivalently iterable macro requires one of arr1 or arr2 be
* trivially iterable to be valid.
*/
-#define PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2) ( \
+
+/**
+ * Determine whether two arrays are safe for trivial iteration in cases where
+ * some of the arrays may be modified.
+ *
+ * In-place iteration is safe if one of the following is true:
+ *
+ * - Both arrays are read-only
+ * - The arrays do not have overlapping memory (based on a check that may be too
+ * strict)
+ * - The strides match, and the non-read-only array base addresses are equal or
+ * before the read-only one, ensuring correct data dependency.
+ */
+
+#define PyArray_TRIVIALLY_ITERABLE_OP_NOREAD 0
+#define PyArray_TRIVIALLY_ITERABLE_OP_READ 1
+
+#define PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) ( \
PyArray_NDIM(arr1) == PyArray_NDIM(arr2) && \
PyArray_CompareLists(PyArray_DIMS(arr1), \
PyArray_DIMS(arr2), \
@@ -673,6 +691,67 @@ npy_bswap8_unaligned(char * x)
NPY_ARRAY_F_CONTIGUOUS)) \
)
+#define PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size, arr) ( \
+ size == 1 ? 0 : ((PyArray_NDIM(arr) == 1) ? \
+ PyArray_STRIDE(arr, 0) : \
+ PyArray_ITEMSIZE(arr)))
+
+static NPY_INLINE int
+PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(PyArrayObject *arr1, PyArrayObject *arr2,
+ int arr1_read, int arr2_read)
+{
+ npy_intp size1, size2, stride1, stride2;
+ int arr1_ahead = 0, arr2_ahead = 0;
+
+ if (arr1_read && arr2_read) {
+ return 1;
+ }
+
+ if (solve_may_share_memory(arr1, arr2, 1) == 0) {
+ return 1;
+ }
+
+ /*
+ * Arrays overlapping in memory may be equivalently iterable if input
+ * arrays stride ahead faster than output arrays.
+ */
+
+ size1 = PyArray_SIZE(arr1);
+ size2 = PyArray_SIZE(arr2);
+
+ stride1 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size1, arr1);
+ stride2 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size2, arr2);
+
+ /*
+ * Arrays with zero stride are never "ahead" since the element is reused
+ * (at this point we know the array extents overlap).
+ */
+
+ if (stride1 > 0) {
+ arr1_ahead = (stride1 >= stride2 &&
+ PyArray_BYTES(arr1) >= PyArray_BYTES(arr2));
+ }
+ else if (stride1 < 0) {
+ arr1_ahead = (stride1 <= stride2 &&
+ PyArray_BYTES(arr1) <= PyArray_BYTES(arr2));
+ }
+
+ if (stride2 > 0) {
+ arr2_ahead = (stride2 >= stride1 &&
+ PyArray_BYTES(arr2) >= PyArray_BYTES(arr1));
+ }
+ else if (stride2 < 0) {
+ arr2_ahead = (stride2 <= stride1 &&
+ PyArray_BYTES(arr2) <= PyArray_BYTES(arr1));
+ }
+
+ return (!arr1_read || arr1_ahead) && (!arr2_read || arr2_ahead);
+}
+
+#define PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2, arr1_read, arr2_read) ( \
+ PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) && \
+ PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK( \
+ arr1, arr2, arr1_read, arr2_read))
#define PyArray_TRIVIALLY_ITERABLE(arr) ( \
PyArray_NDIM(arr) <= 1 || \
PyArray_CHKFLAGS(arr, NPY_ARRAY_C_CONTIGUOUS) || \
@@ -687,15 +766,16 @@ npy_bswap8_unaligned(char * x)
PyArray_ITEMSIZE(arr)));
-#define PyArray_TRIVIALLY_ITERABLE_PAIR(arr1, arr2) (\
+#define PyArray_TRIVIALLY_ITERABLE_PAIR(arr1, arr2, arr1_read, arr2_read) ( \
PyArray_TRIVIALLY_ITERABLE(arr1) && \
(PyArray_NDIM(arr2) == 0 || \
- PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2) || \
+ PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) || \
(PyArray_NDIM(arr1) == 0 && \
PyArray_TRIVIALLY_ITERABLE(arr2) \
) \
- ) \
- )
+ ) && \
+ PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(arr1, arr2, arr1_read, arr2_read) \
+ )
#define PyArray_PREPARE_TRIVIAL_PAIR_ITERATION(arr1, arr2, \
count, \
data1, data2, \
@@ -705,33 +785,32 @@ npy_bswap8_unaligned(char * x)
count = ((size1 > size2) || size1 == 0) ? size1 : size2; \
data1 = PyArray_BYTES(arr1); \
data2 = PyArray_BYTES(arr2); \
- stride1 = (size1 == 1 ? 0 : ((PyArray_NDIM(arr1) == 1) ? \
- PyArray_STRIDE(arr1, 0) : \
- PyArray_ITEMSIZE(arr1))); \
- stride2 = (size2 == 1 ? 0 : ((PyArray_NDIM(arr2) == 1) ? \
- PyArray_STRIDE(arr2, 0) : \
- PyArray_ITEMSIZE(arr2))); \
+ stride1 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size1, arr1); \
+ stride2 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size2, arr2); \
}
-#define PyArray_TRIVIALLY_ITERABLE_TRIPLE(arr1, arr2, arr3) (\
+#define PyArray_TRIVIALLY_ITERABLE_TRIPLE(arr1, arr2, arr3, arr1_read, arr2_read, arr3_read) ( \
PyArray_TRIVIALLY_ITERABLE(arr1) && \
((PyArray_NDIM(arr2) == 0 && \
(PyArray_NDIM(arr3) == 0 || \
- PyArray_EQUIVALENTLY_ITERABLE(arr1, arr3) \
+ PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr3) \
) \
) || \
- (PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2) && \
+ (PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) && \
(PyArray_NDIM(arr3) == 0 || \
- PyArray_EQUIVALENTLY_ITERABLE(arr1, arr3) \
+ PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr3) \
) \
) || \
(PyArray_NDIM(arr1) == 0 && \
PyArray_TRIVIALLY_ITERABLE(arr2) && \
(PyArray_NDIM(arr3) == 0 || \
- PyArray_EQUIVALENTLY_ITERABLE(arr2, arr3) \
+ PyArray_EQUIVALENTLY_ITERABLE_BASE(arr2, arr3) \
) \
) \
- ) \
+ ) && \
+ PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(arr1, arr2, arr1_read, arr2_read) && \
+ PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(arr1, arr3, arr1_read, arr3_read) && \
+ PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(arr2, arr3, arr2_read, arr3_read) \
)
#define PyArray_PREPARE_TRIVIAL_TRIPLE_ITERATION(arr1, arr2, arr3, \
@@ -746,15 +825,9 @@ npy_bswap8_unaligned(char * x)
data1 = PyArray_BYTES(arr1); \
data2 = PyArray_BYTES(arr2); \
data3 = PyArray_BYTES(arr3); \
- stride1 = (size1 == 1 ? 0 : ((PyArray_NDIM(arr1) == 1) ? \
- PyArray_STRIDE(arr1, 0) : \
- PyArray_ITEMSIZE(arr1))); \
- stride2 = (size2 == 1 ? 0 : ((PyArray_NDIM(arr2) == 1) ? \
- PyArray_STRIDE(arr2, 0) : \
- PyArray_ITEMSIZE(arr2))); \
- stride3 = (size3 == 1 ? 0 : ((PyArray_NDIM(arr3) == 1) ? \
- PyArray_STRIDE(arr3, 0) : \
- PyArray_ITEMSIZE(arr3))); \
+ stride1 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size1, arr1); \
+ stride2 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size2, arr2); \
+ stride3 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size3, arr3); \
}
#endif
diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c
index 8079f7e0f..47598bed9 100644
--- a/numpy/core/src/umath/reduction.c
+++ b/numpy/core/src/umath/reduction.c
@@ -83,7 +83,8 @@ allocate_reduce_result(PyArrayObject *arr, npy_bool *axis_flags,
*/
static PyArrayObject *
conform_reduce_result(int ndim, npy_bool *axis_flags,
- PyArrayObject *out, int keepdims, const char *funcname)
+ PyArrayObject *out, int keepdims, const char *funcname,
+ int need_copy)
{
npy_intp strides[NPY_MAXDIMS], shape[NPY_MAXDIMS];
npy_intp *strides_out = PyArray_STRIDES(out);
@@ -151,6 +152,7 @@ conform_reduce_result(int ndim, npy_bool *axis_flags,
/* Allocate the view */
dtype = PyArray_DESCR(out);
Py_INCREF(dtype);
+
ret = (PyArrayObject_fields *)PyArray_NewFromDescr(&PyArray_Type,
dtype,
ndim, shape,
@@ -161,13 +163,41 @@ conform_reduce_result(int ndim, npy_bool *axis_flags,
if (ret == NULL) {
return NULL;
}
+
Py_INCREF(out);
if (PyArray_SetBaseObject((PyArrayObject *)ret, (PyObject *)out) < 0) {
Py_DECREF(ret);
return NULL;
}
- return (PyArrayObject *)ret;
+ if (need_copy) {
+ PyArrayObject *ret_copy;
+
+ ret_copy = (PyArrayObject *)PyArray_NewLikeArray(
+ (PyArrayObject *)ret, NPY_ANYORDER, NULL, 0);
+ if (ret_copy == NULL) {
+ Py_DECREF(ret);
+ return NULL;
+ }
+
+ if (PyArray_CopyInto(ret_copy, (PyArrayObject *)ret) != 0) {
+ Py_DECREF(ret);
+ Py_DECREF(ret_copy);
+ return NULL;
+ }
+
+ Py_INCREF(ret);
+ if (PyArray_SetUpdateIfCopyBase(ret_copy, (PyArrayObject *)ret) < 0) {
+ Py_DECREF(ret);
+ Py_DECREF(ret_copy);
+ return NULL;
+ }
+
+ return ret_copy;
+ }
+ else {
+ return (PyArrayObject *)ret;
+ }
}
/*
@@ -201,11 +231,16 @@ PyArray_CreateReduceResult(PyArrayObject *operand, PyArrayObject *out,
result = allocate_reduce_result(operand, axis_flags, dtype, subok);
}
else {
+ int need_copy = 0;
+
+ if (solve_may_share_memory(operand, out, 1) != 0) {
+ need_copy = 1;
+ }
+
/* Steal the dtype reference */
Py_XDECREF(dtype);
-
result = conform_reduce_result(PyArray_NDIM(operand), axis_flags,
- out, keepdims, funcname);
+ out, keepdims, funcname, need_copy);
}
return result;
@@ -445,6 +480,9 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
/*
* This either conforms 'out' to the ndim of 'operand', or allocates
* a new array appropriate for this reduction.
+ *
+ * A new array with UPDATEIFCOPY is allocated if operand and out have memory
+ * overlap.
*/
Py_INCREF(result_dtype);
result = PyArray_CreateReduceResult(operand, out,
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 62024d2e3..06ce22464 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -41,6 +41,7 @@
#include "lowlevel_strided_loops.h"
#include "ufunc_type_resolution.h"
#include "reduction.h"
+#include "mem_overlap.h"
#include "ufunc_object.h"
#include "ufunc_override.h"
@@ -1416,7 +1417,8 @@ iterator_loop(PyUFuncObject *ufunc,
/* Set up the flags */
for (i = 0; i < nin; ++i) {
op_flags[i] = NPY_ITER_READONLY |
- NPY_ITER_ALIGNED;
+ NPY_ITER_ALIGNED |
+ NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE;
/*
* If READWRITE flag has been set for this operand,
* then clear default READONLY flag
@@ -1431,7 +1433,8 @@ iterator_loop(PyUFuncObject *ufunc,
NPY_ITER_ALIGNED |
NPY_ITER_ALLOCATE |
NPY_ITER_NO_BROADCAST |
- NPY_ITER_NO_SUBTYPE;
+ NPY_ITER_NO_SUBTYPE |
+ NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE;
}
iter_flags = ufunc->iter_flags |
@@ -1440,7 +1443,22 @@ iterator_loop(PyUFuncObject *ufunc,
NPY_ITER_ZEROSIZE_OK |
NPY_ITER_BUFFERED |
NPY_ITER_GROWINNER |
- NPY_ITER_DELAY_BUFALLOC;
+ NPY_ITER_DELAY_BUFALLOC |
+ NPY_ITER_COPY_IF_OVERLAP;
+
+ /* Call the __array_prepare__ functions for already existing output arrays.
+ * Do this before creating the iterator, as the iterator may UPDATEIFCOPY
+ * some of them.
+ */
+ for (i = 0; i < nout; ++i) {
+ if (op[nin+i] == NULL) {
+ continue;
+ }
+ if (prepare_ufunc_output(ufunc, &op[nin+i],
+ arr_prep[i], arr_prep_args, i) < 0) {
+ return -1;
+ }
+ }
/*
* Allocate the iterator. Because the types of the inputs
@@ -1458,32 +1476,41 @@ iterator_loop(PyUFuncObject *ufunc,
/* Copy any allocated outputs */
op_it = NpyIter_GetOperandArray(iter);
- for (i = nin; i < nop; ++i) {
- if (op[i] == NULL) {
- op[i] = op_it[i];
- Py_INCREF(op[i]);
- }
- }
-
- /* Call the __array_prepare__ functions where necessary */
for (i = 0; i < nout; ++i) {
- if (prepare_ufunc_output(ufunc, &op[nin+i],
- arr_prep[i], arr_prep_args, i) < 0) {
- NpyIter_Deallocate(iter);
- return -1;
+ if (op[nin+i] == NULL) {
+ op[nin+i] = op_it[nin+i];
+ Py_INCREF(op[nin+i]);
+
+ /* Call the __array_prepare__ functions for the new array */
+ if (prepare_ufunc_output(ufunc, &op[nin+i],
+ arr_prep[i], arr_prep_args, i) < 0) {
+ NpyIter_Deallocate(iter);
+ return -1;
+ }
+
+ /*
+ * In case __array_prepare__ returned a different array, put the
+ * results directly there, ignoring the array allocated by the
+ * iterator.
+ *
+ * Here, we assume the user-provided __array_prepare__ behaves
+ * sensibly and doesn't return an array overlapping in memory
+ * with other operands --- the op[nin+i] array passed to it is newly
+ * allocated and doesn't have any overlap.
+ */
+ baseptrs[nin+i] = PyArray_BYTES(op[nin+i]);
+ }
+ else {
+ baseptrs[nin+i] = PyArray_BYTES(op_it[nin+i]);
}
}
/* Only do the loop if the iteration size is non-zero */
if (NpyIter_GetIterSize(iter) != 0) {
-
- /* Reset the iterator with the base pointers from the wrapped outputs */
+ /* Reset the iterator with the base pointers from possible __array_prepare__ */
for (i = 0; i < nin; ++i) {
baseptrs[i] = PyArray_BYTES(op_it[i]);
}
- for (i = nin; i < nop; ++i) {
- baseptrs[i] = PyArray_BYTES(op[i]);
- }
if (NpyIter_ResetBasePointers(iter, baseptrs, NULL) != NPY_SUCCEED) {
NpyIter_Deallocate(iter);
return -1;
@@ -1581,7 +1608,9 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc,
}
else if (op[1] != NULL &&
PyArray_NDIM(op[1]) >= PyArray_NDIM(op[0]) &&
- PyArray_TRIVIALLY_ITERABLE_PAIR(op[0], op[1])) {
+ PyArray_TRIVIALLY_ITERABLE_PAIR(op[0], op[1],
+ PyArray_TRIVIALLY_ITERABLE_OP_READ,
+ PyArray_TRIVIALLY_ITERABLE_OP_NOREAD)) {
/* Call the __prepare_array__ if necessary */
if (prepare_ufunc_output(ufunc, &op[1],
@@ -1598,7 +1627,9 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc,
else if (nin == 2 && nout == 1) {
if (op[2] == NULL &&
(order == NPY_ANYORDER || order == NPY_KEEPORDER) &&
- PyArray_TRIVIALLY_ITERABLE_PAIR(op[0], op[1])) {
+ PyArray_TRIVIALLY_ITERABLE_PAIR(op[0], op[1],
+ PyArray_TRIVIALLY_ITERABLE_OP_READ,
+ PyArray_TRIVIALLY_ITERABLE_OP_READ)) {
PyArrayObject *tmp;
/*
* Have to choose the input with more dimensions to clone, as
@@ -1637,7 +1668,10 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc,
else if (op[2] != NULL &&
PyArray_NDIM(op[2]) >= PyArray_NDIM(op[0]) &&
PyArray_NDIM(op[2]) >= PyArray_NDIM(op[1]) &&
- PyArray_TRIVIALLY_ITERABLE_TRIPLE(op[0], op[1], op[2])) {
+ PyArray_TRIVIALLY_ITERABLE_TRIPLE(op[0], op[1], op[2],
+ PyArray_TRIVIALLY_ITERABLE_OP_READ,
+ PyArray_TRIVIALLY_ITERABLE_OP_READ,
+ PyArray_TRIVIALLY_ITERABLE_OP_NOREAD)) {
/* Call the __prepare_array__ if necessary */
if (prepare_ufunc_output(ufunc, &op[2],
@@ -1701,7 +1735,6 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
npy_intp *strides;
npy_intp *countptr;
- PyArrayObject **op_it;
npy_uint32 iter_flags;
if (wheremask != NULL) {
@@ -1719,7 +1752,8 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
for (i = 0; i < nin; ++i) {
op_flags[i] = default_op_in_flags |
NPY_ITER_READONLY |
- NPY_ITER_ALIGNED;
+ NPY_ITER_ALIGNED |
+ NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE;
/*
* If READWRITE flag has been set for this operand,
* then clear default READONLY flag
@@ -1730,12 +1764,19 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
}
}
for (i = nin; i < nop; ++i) {
+ /*
+ * We don't write to all elements, and the iterator may make
+ * UPDATEIFCOPY temporary copies. The output arrays must be considered
+ * READWRITE by the iterator, so that the elements we don't write to are
+ * copied to the possible temporary array.
+ */
op_flags[i] = default_op_out_flags |
- NPY_ITER_WRITEONLY |
+ NPY_ITER_READWRITE |
NPY_ITER_ALIGNED |
NPY_ITER_ALLOCATE |
NPY_ITER_NO_BROADCAST |
- NPY_ITER_NO_SUBTYPE;
+ NPY_ITER_NO_SUBTYPE |
+ NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE;
}
if (wheremask != NULL) {
op_flags[nop] = NPY_ITER_READONLY | NPY_ITER_ARRAYMASK;
@@ -1748,7 +1789,8 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
NPY_ITER_REFS_OK |
NPY_ITER_ZEROSIZE_OK |
NPY_ITER_BUFFERED |
- NPY_ITER_GROWINNER;
+ NPY_ITER_GROWINNER |
+ NPY_ITER_COPY_IF_OVERLAP;
/*
* Allocate the iterator. Because the types of the inputs
@@ -1768,22 +1810,37 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
needs_api = NpyIter_IterationNeedsAPI(iter);
- /* Copy any allocated outputs */
- op_it = NpyIter_GetOperandArray(iter);
+ /* Call the __array_prepare__ functions where necessary */
for (i = nin; i < nop; ++i) {
- if (op[i] == NULL) {
- op[i] = op_it[i];
- Py_INCREF(op[i]);
+ PyArrayObject *op_tmp;
+
+ /* prepare_ufunc_output may decref & replace pointer */
+ op_tmp = op[i];
+ Py_INCREF(op_tmp);
+
+ if (prepare_ufunc_output(ufunc, &op_tmp,
+ arr_prep[i], arr_prep_args, i) < 0) {
+ NpyIter_Deallocate(iter);
+ return -1;
}
- }
- /* Call the __array_prepare__ functions where necessary */
- for (i = 0; i < nout; ++i) {
- if (prepare_ufunc_output(ufunc, &op[nin+i],
- arr_prep[i], arr_prep_args, i) < 0) {
+ /* Validate that the prepare_ufunc_output didn't mess with pointers */
+ if (PyArray_BYTES(op_tmp) != PyArray_BYTES(op[i])) {
+ PyErr_SetString(PyExc_ValueError,
+ "The __array_prepare__ functions modified the data "
+ "pointer addresses in an invalid fashion");
+ Py_DECREF(op_tmp);
NpyIter_Deallocate(iter);
return -1;
}
+
+ /*
+ * Put the updated operand back and undo the DECREF above. If
+ * COPY_IF_OVERLAP made a temporary copy, the output will be copied in
+ * by UPDATEIFCOPY even if op[i] was changed.
+ */
+ op[i] = op_tmp;
+ Py_DECREF(op_tmp);
}
/* Only do the loop if the iteration size is non-zero */
@@ -1794,17 +1851,6 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc,
PyArray_Descr **iter_dtypes;
NPY_BEGIN_THREADS_DEF;
- /* Validate that the prepare_ufunc_output didn't mess with pointers */
- for (i = nin; i < nop; ++i) {
- if (PyArray_BYTES(op[i]) != PyArray_BYTES(op_it[i])) {
- PyErr_SetString(PyExc_ValueError,
- "The __array_prepare__ functions modified the data "
- "pointer addresses in an invalid fashion");
- NpyIter_Deallocate(iter);
- return -1;
- }
- }
-
/*
* Get the inner loop, with the possibility of specialization
* based on the fixed strides.
@@ -2265,7 +2311,8 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
for (i = 0; i < nin; ++i) {
op_flags[i] = NPY_ITER_READONLY |
NPY_ITER_COPY |
- NPY_ITER_ALIGNED;
+ NPY_ITER_ALIGNED |
+ NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE;
/*
* If READWRITE flag has been set for this operand,
* then clear default READONLY flag
@@ -2280,14 +2327,16 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
NPY_ITER_UPDATEIFCOPY|
NPY_ITER_ALIGNED|
NPY_ITER_ALLOCATE|
- NPY_ITER_NO_BROADCAST;
+ NPY_ITER_NO_BROADCAST|
+ NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE;
}
iter_flags = ufunc->iter_flags |
NPY_ITER_MULTI_INDEX |
NPY_ITER_REFS_OK |
NPY_ITER_REDUCE_OK |
- NPY_ITER_ZEROSIZE_OK;
+ NPY_ITER_ZEROSIZE_OK |
+ NPY_ITER_COPY_IF_OVERLAP;
/* Create the iterator */
iter = NpyIter_AdvancedNew(nop, op, iter_flags,
@@ -3174,11 +3223,16 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
!PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(out)))) {
need_outer_iterator = 1;
}
+ /* If input and output overlap in memory, use iterator to figure it out */
+ else if (out != NULL && solve_may_share_memory(out, arr, NPY_MAY_SHARE_BOUNDS) != 0) {
+ need_outer_iterator = 1;
+ }
if (need_outer_iterator) {
int ndim_iter = 0;
npy_uint32 flags = NPY_ITER_ZEROSIZE_OK|
- NPY_ITER_REFS_OK;
+ NPY_ITER_REFS_OK|
+ NPY_ITER_COPY_IF_OVERLAP;
PyArray_Descr **op_dtypes_param = NULL;
/*
@@ -3592,7 +3646,8 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
if (need_outer_iterator) {
npy_uint32 flags = NPY_ITER_ZEROSIZE_OK|
NPY_ITER_REFS_OK|
- NPY_ITER_MULTI_INDEX;
+ NPY_ITER_MULTI_INDEX|
+ NPY_ITER_COPY_IF_OVERLAP;
/*
* The way reduceat is set up, we can't do buffering,
@@ -3635,6 +3690,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
/* In case COPY or UPDATEIFCOPY occurred */
op[0] = NpyIter_GetOperandArray(iter)[0];
op[1] = NpyIter_GetOperandArray(iter)[1];
+ op[2] = NpyIter_GetOperandArray(iter)[2];
if (out == NULL) {
out = op[0];
@@ -5259,11 +5315,6 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
op1_array = (PyArrayObject *)op1;
- iter = (PyArrayMapIterObject *)PyArray_MapIterArray(op1_array, idx);
- if (iter == NULL) {
- goto fail;
- }
-
/* Create second operand from number array if needed. */
if (op2 != NULL) {
op2_array = (PyArrayObject *)PyArray_FromAny(op2, NULL,
@@ -5271,7 +5322,17 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
if (op2_array == NULL) {
goto fail;
}
+ }
+ /* Create map iterator */
+ iter = (PyArrayMapIterObject *)PyArray_MapIterArrayCopyIfOverlap(
+ op1_array, idx, 1, op2_array);
+ if (iter == NULL) {
+ goto fail;
+ }
+ op1_array = iter->array; /* May be updateifcopied on overlap */
+
+ if (op2 != NULL) {
/*
* May need to swap axes so that second operand is
* iterated over correctly
diff --git a/numpy/core/tests/test_mem_overlap.py b/numpy/core/tests/test_mem_overlap.py
index acca53856..a95a03fa4 100644
--- a/numpy/core/tests/test_mem_overlap.py
+++ b/numpy/core/tests/test_mem_overlap.py
@@ -4,9 +4,11 @@ import sys
import itertools
import numpy as np
-from numpy.testing import run_module_suite, assert_, assert_raises, assert_equal
+from numpy.testing import (run_module_suite, assert_, assert_raises, assert_equal,
+ assert_array_equal, assert_allclose)
from numpy.core.multiarray_tests import solve_diophantine, internal_overlap
+from numpy.core import umath_tests
from numpy.lib.stride_tricks import as_strided
from numpy.compat import long
@@ -84,7 +86,7 @@ def _check_assignment(srcidx, dstidx):
def test_overlapping_assignments():
- """Test automatically generated assignments which overlap in memory."""
+ # Test automatically generated assignments which overlap in memory.
inds = _indices(ndims)
@@ -107,7 +109,6 @@ def test_diophantine_fuzz():
min_count = 500//(ndim + 1)
- numbers = []
while min(feasible_count, infeasible_count) < min_count:
# Ensure big and small integer problems
A_max = 1 + rng.randint(0, 11, dtype=np.intp)**6
@@ -252,13 +253,12 @@ def test_may_share_memory_manual():
check_may_share_memory_exact(x, x.copy())
-def check_may_share_memory_easy_fuzz(get_max_work, same_steps, min_count):
- # Check that overlap problems with common strides are solved with
- # little work.
- x = np.zeros([17,34,71,97], dtype=np.int16)
-
+def iter_random_view_pairs(x, same_steps=True, equal_size=False):
rng = np.random.RandomState(1234)
+ if equal_size and same_steps:
+ raise ValueError()
+
def random_slice(n, step):
start = rng.randint(0, n+1, dtype=np.intp)
stop = rng.randint(start, n+1, dtype=np.intp)
@@ -267,31 +267,93 @@ def check_may_share_memory_easy_fuzz(get_max_work, same_steps, min_count):
step *= -1
return slice(start, stop, step)
- feasible = 0
- infeasible = 0
+ def random_slice_fixed_size(n, step, size):
+ start = rng.randint(0, n+1 - size*step)
+ stop = start + (size-1)*step + 1
+ if rng.randint(0, 2) == 0:
+ stop, start = start-1, stop-1
+ if stop < 0:
+ stop = None
+ step *= -1
+ return slice(start, stop, step)
- while min(feasible, infeasible) < min_count:
+ # First a few regular views
+ yield x, x
+ for j in range(1, 7, 3):
+ yield x[j:], x[:-j]
+ yield x[...,j:], x[...,:-j]
+
+ # An array with zero stride internal overlap
+ strides = list(x.strides)
+ strides[0] = 0
+ xp = as_strided(x, shape=x.shape, strides=strides)
+ yield x, xp
+ yield xp, xp
+
+ # An array with non-zero stride internal overlap
+ strides = list(x.strides)
+ if strides[0] > 1:
+ strides[0] = 1
+ xp = as_strided(x, shape=x.shape, strides=strides)
+ yield x, xp
+ yield xp, xp
+
+ # Then discontiguous views
+ while True:
steps = tuple(rng.randint(1, 11, dtype=np.intp)
if rng.randint(0, 5, dtype=np.intp) == 0 else 1
for j in range(x.ndim))
- if same_steps:
+ s1 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps))
+
+ t1 = np.arange(x.ndim)
+ rng.shuffle(t1)
+
+ if equal_size:
+ t2 = t1
+ else:
+ t2 = np.arange(x.ndim)
+ rng.shuffle(t2)
+
+ a = x[s1]
+
+ if equal_size:
+ if a.size == 0:
+ continue
+
+ steps2 = tuple(rng.randint(1, max(2, p//(1+pa)))
+ if rng.randint(0, 5) == 0 else 1
+ for p, s, pa in zip(x.shape, s1, a.shape))
+ s2 = tuple(random_slice_fixed_size(p, s, pa)
+ for p, s, pa in zip(x.shape, steps2, a.shape))
+ elif same_steps:
steps2 = steps
else:
steps2 = tuple(rng.randint(1, 11, dtype=np.intp)
if rng.randint(0, 5, dtype=np.intp) == 0 else 1
for j in range(x.ndim))
- t1 = np.arange(x.ndim)
- rng.shuffle(t1)
-
- t2 = np.arange(x.ndim)
- rng.shuffle(t2)
+ if not equal_size:
+ s2 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps2))
- s1 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps))
- s2 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps2))
- a = x[s1].transpose(t1)
+ a = a.transpose(t1)
b = x[s2].transpose(t2)
+ yield a, b
+
+
+def check_may_share_memory_easy_fuzz(get_max_work, same_steps, min_count):
+ # Check that overlap problems with common strides are solved with
+ # little work.
+ x = np.zeros([17,34,71,97], dtype=np.int16)
+
+ feasible = 0
+ infeasible = 0
+
+ pair_iter = iter_random_view_pairs(x, same_steps)
+
+ while min(feasible, infeasible) < min_count:
+ a, b = next(pair_iter)
+
bounds_overlap = np.may_share_memory(a, b)
may_share_answer = np.may_share_memory(a, b)
easy_answer = np.may_share_memory(a, b, max_work=get_max_work(a, b))
@@ -299,11 +361,10 @@ def check_may_share_memory_easy_fuzz(get_max_work, same_steps, min_count):
if easy_answer != exact_answer:
# assert_equal is slow...
- assert_equal(easy_answer, exact_answer, err_msg=repr((s1, s2)))
+ assert_equal(easy_answer, exact_answer)
if may_share_answer != bounds_overlap:
- assert_equal(may_share_answer, bounds_overlap,
- err_msg=repr((s1, s2)))
+ assert_equal(may_share_answer, bounds_overlap)
if bounds_overlap:
if exact_answer:
@@ -524,5 +585,364 @@ def test_non_ndarray_inputs():
assert_(np.may_share_memory(cls(x[1::3]), x[::2]))
+def view_element_first_byte(x):
+ """Construct an array viewing the first byte of each element of `x`"""
+ from numpy.lib.stride_tricks import DummyArray
+ interface = dict(x.__array_interface__)
+ interface['typestr'] = '|b1'
+ interface['descr'] = [('', '|b1')]
+ return np.asarray(DummyArray(interface, x))
+
+
+def assert_copy_equivalent(operation, args, out, **kwargs):
+ """
+ Check that operation(*args, out=out) produces results
+ equivalent to out[...] = operation(*args, out=out.copy())
+ """
+
+ kwargs['out'] = out
+ kwargs2 = dict(kwargs)
+ kwargs2['out'] = out.copy()
+
+ out_orig = out.copy()
+ out[...] = operation(*args, **kwargs2)
+ expected = out.copy()
+ out[...] = out_orig
+
+ got = operation(*args, **kwargs).copy()
+
+ if (got != expected).any():
+ assert_equal(got, expected)
+
+
+class TestUFunc(object):
+ """
+ Test ufunc call memory overlap handling
+ """
+
+ def check_unary_fuzz(self, operation, get_out_axis_size, dtype=np.int16,
+ count=5000):
+ shapes = [7, 13, 8, 21, 29, 32]
+
+ rng = np.random.RandomState(1234)
+
+ for ndim in range(1, 6):
+ x = rng.randint(0, 2**16, size=shapes[:ndim]).astype(dtype)
+
+ it = iter_random_view_pairs(x, same_steps=False, equal_size=True)
+
+ min_count = count // (ndim + 1)**2
+
+ overlapping = 0
+ while overlapping < min_count:
+ a, b = next(it)
+
+ a_orig = a.copy()
+ b_orig = b.copy()
+
+ if get_out_axis_size is None:
+ assert_copy_equivalent(operation, [a], out=b)
+
+ if np.shares_memory(a, b):
+ overlapping += 1
+ else:
+ for axis in itertools.chain(range(ndim), [None]):
+ a[...] = a_orig
+ b[...] = b_orig
+
+ # Determine size for reduction axis (None if scalar)
+ outsize, scalarize = get_out_axis_size(a, b, axis)
+ if outsize == 'skip':
+ continue
+
+ # Slice b to get an output array of the correct size
+ sl = [slice(None)] * ndim
+ if axis is None:
+ if outsize is None:
+ sl = [slice(0, 1)] + [0]*(ndim - 1)
+ else:
+ sl = [slice(0, outsize)] + [0]*(ndim - 1)
+ else:
+ if outsize is None:
+ k = b.shape[axis]//2
+ if ndim == 1:
+ sl[axis] = slice(k, k + 1)
+ else:
+ sl[axis] = k
+ else:
+ assert b.shape[axis] >= outsize
+ sl[axis] = slice(0, outsize)
+ b_out = b[tuple(sl)]
+
+ if scalarize:
+ b_out = b_out.reshape([])
+
+ if np.shares_memory(a, b_out):
+ overlapping += 1
+
+ # Check result
+ assert_copy_equivalent(operation, [a], out=b_out, axis=axis)
+
+ def test_unary_ufunc_call_fuzz(self):
+ self.check_unary_fuzz(np.invert, None, np.int16)
+
+ def test_binary_ufunc_accumulate_fuzz(self):
+ def get_out_axis_size(a, b, axis):
+ if axis is None:
+ if a.ndim == 1:
+ return a.size, False
+ else:
+ return 'skip', False # accumulate doesn't support this
+ else:
+ return a.shape[axis], False
+
+ self.check_unary_fuzz(np.add.accumulate, get_out_axis_size,
+ dtype=np.int16, count=500)
+
+ def test_binary_ufunc_reduce_fuzz(self):
+ def get_out_axis_size(a, b, axis):
+ return None, (axis is None or a.ndim == 1)
+
+ self.check_unary_fuzz(np.add.reduce, get_out_axis_size,
+ dtype=np.int16, count=500)
+
+ def test_binary_ufunc_reduceat_fuzz(self):
+ def get_out_axis_size(a, b, axis):
+ if axis is None:
+ if a.ndim == 1:
+ return a.size, False
+ else:
+ return 'skip', False # reduceat doesn't support this
+ else:
+ return a.shape[axis], False
+
+ def do_reduceat(a, out, axis):
+ if axis is None:
+ size = len(a)
+ step = size//len(out)
+ else:
+ size = a.shape[axis]
+ step = a.shape[axis] // out.shape[axis]
+ idx = np.arange(0, size, step)
+ return np.add.reduceat(a, idx, out=out, axis=axis)
+
+ self.check_unary_fuzz(do_reduceat, get_out_axis_size,
+ dtype=np.int16, count=500)
+
+ def test_binary_ufunc_reduceat_manual(self):
+ def check(ufunc, a, ind, out):
+ c1 = ufunc.reduceat(a.copy(), ind.copy(), out=out.copy())
+ c2 = ufunc.reduceat(a, ind, out=out)
+ assert_array_equal(c1, c2)
+
+ # Exactly same input/output arrays
+ a = np.arange(10000, dtype=np.int16)
+ check(np.add, a, a[::-1].copy(), a)
+
+ # Overlap with index
+ a = np.arange(10000, dtype=np.int16)
+ check(np.add, a, a[::-1], a)
+
+ def test_unary_gufunc_fuzz(self):
+ shapes = [7, 13, 8, 21, 29, 32]
+ gufunc = umath_tests.euclidean_pdist
+
+ rng = np.random.RandomState(1234)
+
+ for ndim in range(2, 6):
+ x = rng.rand(*shapes[:ndim])
+
+ it = iter_random_view_pairs(x, same_steps=False, equal_size=True)
+
+ min_count = 500 // (ndim + 1)**2
+
+ overlapping = 0
+ while overlapping < min_count:
+ a, b = next(it)
+
+ if min(a.shape[-2:]) < 2 or min(b.shape[-2:]) < 2 or a.shape[-1] < 2:
+ continue
+
+ # Ensure the shapes are so that euclidean_pdist is happy
+ if b.shape[-1] > b.shape[-2]:
+ b = b[...,0,:]
+ else:
+ b = b[...,:,0]
+
+ n = a.shape[-2]
+ p = n * (n - 1) // 2
+ if p <= b.shape[-1] and p > 0:
+ b = b[...,:p]
+ else:
+ n = max(2, int(np.sqrt(b.shape[-1]))//2)
+ p = n * (n - 1) // 2
+ a = a[...,:n,:]
+ b = b[...,:p]
+
+ # Call
+ if np.shares_memory(a, b):
+ overlapping += 1
+
+ with np.errstate(over='ignore', invalid='ignore'):
+ assert_copy_equivalent(gufunc, [a], out=b)
+
+ def test_ufunc_at_manual(self):
+ def check(ufunc, a, ind, b=None):
+ a0 = a.copy()
+ if b is None:
+ ufunc.at(a0, ind.copy())
+ c1 = a0.copy()
+ ufunc.at(a, ind)
+ c2 = a.copy()
+ else:
+ ufunc.at(a0, ind.copy(), b.copy())
+ c1 = a0.copy()
+ ufunc.at(a, ind, b)
+ c2 = a.copy()
+ assert_array_equal(c1, c2)
+
+ # Overlap with index
+ a = np.arange(10000, dtype=np.int16)
+ check(np.invert, a[::-1], a)
+
+ # Overlap with second data array
+ a = np.arange(100, dtype=np.int16)
+ ind = np.arange(0, 100, 2, dtype=np.int16)
+ check(np.add, a, ind, a[25:75])
+
+ def test_unary_ufunc_1d_manual(self):
+ # Exercise branches in PyArray_EQUIVALENTLY_ITERABLE
+
+ def check(a, b):
+ a_orig = a.copy()
+ b_orig = b.copy()
+
+ b0 = b.copy()
+ c1 = ufunc(a, out=b0)
+ c2 = ufunc(a, out=b)
+ assert_array_equal(c1, c2)
+
+ # Trigger "fancy ufunc loop" code path
+ mask = view_element_first_byte(b).view(np.bool_)
+
+ a[...] = a_orig
+ b[...] = b_orig
+ c1 = ufunc(a, out=b.copy(), where=mask.copy()).copy()
+
+ a[...] = a_orig
+ b[...] = b_orig
+ c2 = ufunc(a, out=b, where=mask.copy()).copy()
+
+ # Also, mask overlapping with output
+ a[...] = a_orig
+ b[...] = b_orig
+ c3 = ufunc(a, out=b, where=mask).copy()
+
+ assert_array_equal(c1, c2)
+ assert_array_equal(c1, c3)
+
+ dtypes = [np.int8, np.int16, np.int32, np.int64, np.float32,
+ np.float64, np.complex64, np.complex128]
+ dtypes = [np.dtype(x) for x in dtypes]
+
+ for dtype in dtypes:
+ if np.issubdtype(dtype, np.integer):
+ ufunc = np.invert
+ else:
+ ufunc = np.reciprocal
+
+ n = 1000
+ k = 10
+ indices = [
+ np.index_exp[:n],
+ np.index_exp[k:k+n],
+ np.index_exp[n-1::-1],
+ np.index_exp[k+n-1:k-1:-1],
+ np.index_exp[:2*n:2],
+ np.index_exp[k:k+2*n:2],
+ np.index_exp[2*n-1::-2],
+ np.index_exp[k+2*n-1:k-1:-2],
+ ]
+
+ for xi, yi in itertools.product(indices, indices):
+ v = np.arange(1, 1 + n*2 + k, dtype=dtype)
+ x = v[xi]
+ y = v[yi]
+
+ with np.errstate(all='ignore'):
+ check(x, y)
+
+ # Scalar cases
+ check(x[:1], y)
+ check(x[-1:], y)
+ check(x[:1].reshape([]), y)
+ check(x[-1:].reshape([]), y)
+
+ def test_unary_ufunc_where_same(self):
+ # Check behavior at wheremask overlap
+ ufunc = np.invert
+
+ def check(a, out, mask):
+ c1 = ufunc(a, out=out.copy(), where=mask.copy())
+ c2 = ufunc(a, out=out, where=mask)
+ assert_array_equal(c1, c2)
+
+ # Check behavior with same input and output arrays
+ x = np.arange(100).astype(np.bool_)
+ check(x, x, x)
+ check(x, x.copy(), x)
+ check(x, x, x.copy())
+
+ def test_binary_ufunc_1d_manual(self):
+ ufunc = np.add
+
+ def check(a, b, c):
+ c0 = c.copy()
+ c1 = ufunc(a, b, out=c0)
+ c2 = ufunc(a, b, out=c)
+ assert_array_equal(c1, c2)
+
+ for dtype in [np.int8, np.int16, np.int32, np.int64,
+ np.float32, np.float64, np.complex64, np.complex128]:
+ # Check different data dependency orders
+
+ n = 1000
+ k = 10
+
+ indices = []
+ for p in [1, 2]:
+ indices.extend([
+ np.index_exp[:p*n:p],
+ np.index_exp[k:k+p*n:p],
+ np.index_exp[p*n-1::-p],
+ np.index_exp[k+p*n-1:k-1:-p],
+ ])
+
+ for x, y, z in itertools.product(indices, indices, indices):
+ v = np.arange(6*n).astype(dtype)
+ x = v[x]
+ y = v[y]
+ z = v[z]
+
+ check(x, y, z)
+
+ # Scalar cases
+ check(x[:1], y, z)
+ check(x[-1:], y, z)
+ check(x[:1].reshape([]), y, z)
+ check(x[-1:].reshape([]), y, z)
+ check(x, y[:1], z)
+ check(x, y[-1:], z)
+ check(x, y[:1].reshape([]), z)
+ check(x, y[-1:].reshape([]), z)
+
+ def test_inplace_op_simple_manual(self):
+ rng = np.random.RandomState(1234)
+ x = rng.rand(200, 200) # bigger than bufsize
+
+ x += x.T
+ assert_array_equal(x - x.T, 0)
+
+
if __name__ == "__main__":
run_module_suite()
diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py
index f5096e023..1863f36f7 100644
--- a/numpy/core/tests/test_nditer.py
+++ b/numpy/core/tests/test_nditer.py
@@ -1139,6 +1139,94 @@ def test_iter_common_dtype():
assert_equal(i.dtypes[1], np.dtype('c16'))
assert_equal(i.dtypes[2], np.dtype('c16'))
+def test_iter_copy_if_overlap():
+ # Ensure the iterator makes copies on read/write overlap, if requested
+
+ # Copy not needed, 1 op
+ for flag in ['readonly', 'writeonly', 'readwrite']:
+ a = arange(10)
+ i = nditer([a], ['copy_if_overlap'], [[flag]])
+ assert_(i.operands[0] is a)
+
+ # Copy needed, 2 ops, read-write overlap
+ x = arange(10)
+ a = x[1:]
+ b = x[:-1]
+ i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['readwrite']])
+ assert_(not np.shares_memory(*i.operands))
+
+ # Copy not needed with elementwise, 2 ops, exactly same arrays
+ x = arange(10)
+ a = x
+ b = x
+ i = nditer([a, b], ['copy_if_overlap'], [['readonly', 'overlap_assume_elementwise'],
+ ['readwrite', 'overlap_assume_elementwise']])
+ assert_(i.operands[0] is a and i.operands[1] is b)
+ i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['readwrite']])
+ assert_(i.operands[0] is a and not np.shares_memory(i.operands[1], b))
+
+ # Copy not needed, 2 ops, no overlap
+ x = arange(10)
+ a = x[::2]
+ b = x[1::2]
+ i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['writeonly']])
+ assert_(i.operands[0] is a and i.operands[1] is b)
+
+ # Copy needed, 2 ops, read-write overlap
+ x = arange(4, dtype=np.int8)
+ a = x[3:]
+ b = x.view(np.int32)[:1]
+ i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['writeonly']])
+ assert_(not np.shares_memory(*i.operands))
+
+ # Copy needed, 3 ops, read-write overlap
+ for flag in ['writeonly', 'readwrite']:
+ x = np.ones([10, 10])
+ a = x
+ b = x.T
+ c = x
+ i = nditer([a, b, c], ['copy_if_overlap'],
+ [['readonly'], ['readonly'], [flag]])
+ a2, b2, c2 = i.operands
+ assert_(not np.shares_memory(a2, c2))
+ assert_(not np.shares_memory(b2, c2))
+
+ # Copy not needed, 3 ops, read-only overlap
+ x = np.ones([10, 10])
+ a = x
+ b = x.T
+ c = x
+ i = nditer([a, b, c], ['copy_if_overlap'],
+ [['readonly'], ['readonly'], ['readonly']])
+ a2, b2, c2 = i.operands
+ assert_(a is a2)
+ assert_(b is b2)
+ assert_(c is c2)
+
+ # Copy not needed, 3 ops, read-only overlap
+ x = np.ones([10, 10])
+ a = x
+ b = np.ones([10, 10])
+ c = x.T
+ i = nditer([a, b, c], ['copy_if_overlap'],
+ [['readonly'], ['writeonly'], ['readonly']])
+ a2, b2, c2 = i.operands
+ assert_(a is a2)
+ assert_(b is b2)
+ assert_(c is c2)
+
+ # Copy not needed, 3 ops, write-only overlap
+ x = np.arange(7)
+ a = x[:3]
+ b = x[3:6]
+ c = x[4:7]
+ i = nditer([a, b, c], ['copy_if_overlap'],
+ [['readonly'], ['writeonly'], ['writeonly']])
+ a2, b2, c2 = i.operands
+ assert_(a is a2)
+ assert_(b is b2)
+ assert_(c is c2)
+
def test_iter_op_axes():
# Check that custom axes work
@@ -2320,7 +2408,11 @@ def test_iter_reduction():
assert_equal(i[1].strides, (0,))
# Do the reduction
for x, y in i:
- y[...] += x
+ # Use a for loop instead of ``y[...] += x``
+ # (equivalent to ``y[...] = y[...].copy() + x``),
+ # because y has zero strides we use for the reduction
+ for j in range(len(y)):
+ y[j] += x[j]
# Since no axes were specified, should have allocated a scalar
assert_equal(i.operands[1].ndim, 0)
assert_equal(i.operands[1], np.sum(a))
@@ -2371,7 +2463,11 @@ def test_iter_buffering_reduction():
assert_equal(i[1].strides, (0,))
# Do the reduction
for x, y in i:
- y[...] += x
+ # Use a for loop instead of ``y[...] += x``
+ # (equivalent to ``y[...] = y[...].copy() + x``),
+ # because y has zero strides we use for the reduction
+ for j in range(len(y)):
+ y[j] += x[j]
assert_equal(b, np.sum(a, axis=1))
# Iterator inner double loop was wrong on this one
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index 436cb0355..48d8f9379 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -1452,6 +1452,22 @@ class TestSpecialMethods(TestCase):
assert_equal(x, np.array(2))
assert_equal(type(x), with_prepare)
+ def test_prepare_out(self):
+
+ class with_prepare(np.ndarray):
+ __array_priority__ = 10
+
+ def __array_prepare__(self, arr, context):
+ return np.array(arr).view(type=with_prepare)
+
+ a = np.array([1]).view(type=with_prepare)
+ x = np.add(a, a, a)
+ # Returned array is new, because of the strange
+ # __array_prepare__ above
+ assert_(not np.shares_memory(x, a))
+ assert_equal(x, np.array([2]))
+ assert_equal(type(x), with_prepare)
+
def test_failing_prepare(self):
class A(object):