diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2017-02-16 15:41:27 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-02-16 15:41:27 -0700 |
commit | d55f40b1e48d5a5fbed80e00a140c9db6e19732f (patch) | |
tree | 733aa0ac1abdcc1aadf6452be5729b7b83095a09 /numpy | |
parent | ba1ddc421eac7b5c3accb99a4c633490a1814838 (diff) | |
parent | 8e8ce442e8449916a93951093cdce16cec006bcc (diff) | |
download | numpy-d55f40b1e48d5a5fbed80e00a140c9db6e19732f.tar.gz |
Merge pull request #8043 from pv/ufunc-copy-overlap
ENH: umath: ensure ufuncs are well-defined with memory overlapping inputs
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/add_newdocs.py | 7 | ||||
-rw-r--r-- | numpy/core/code_generators/cversions.txt | 3 | ||||
-rw-r--r-- | numpy/core/code_generators/numpy_api.py | 2 | ||||
-rw-r--r-- | numpy/core/include/numpy/ndarraytypes.h | 8 | ||||
-rw-r--r-- | numpy/core/setup.py | 5 | ||||
-rw-r--r-- | numpy/core/setup_common.py | 3 | ||||
-rw-r--r-- | numpy/core/src/multiarray/mapping.c | 88 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer_constr.c | 101 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer_impl.h | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer_pywrap.c | 10 | ||||
-rw-r--r-- | numpy/core/src/private/lowlevel_strided_loops.h | 125 | ||||
-rw-r--r-- | numpy/core/src/umath/reduction.c | 46 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 179 | ||||
-rw-r--r-- | numpy/core/tests/test_mem_overlap.py | 466 | ||||
-rw-r--r-- | numpy/core/tests/test_nditer.py | 100 | ||||
-rw-r--r-- | numpy/core/tests/test_umath.py | 16 |
16 files changed, 1035 insertions, 126 deletions
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index 41c560074..09f4e40c4 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -169,6 +169,10 @@ add_newdoc('numpy.core', 'nditer', with one per iteration dimension, to be tracked. * "common_dtype" causes all the operands to be converted to a common data type, with copying or buffering as necessary. + * "copy_if_overlap" causes the iterator to determine if read + operands have overlap with write operands, and make temporary + copies as necessary to avoid overlap. False positives (needless + copying) are possible in some cases. * "delay_bufalloc" delays allocation of the buffers until a reset() call is made. Allows "allocate" operands to be initialized before their values are copied into the buffers. @@ -208,6 +212,9 @@ add_newdoc('numpy.core', 'nditer', copies those elements indicated by this mask. * 'writemasked' indicates that only elements where the chosen 'arraymask' operand is True will be written to. + * "overlap_assume_elementwise" can be used to mark operands that are + accessed only in the iterator order, to allow less conservative + copying when "copy_if_overlap" is present. op_dtypes : dtype or tuple of dtype(s), optional The required data type(s) of the operands. If copying or buffering is enabled, the data will be converted to/from their original types. diff --git a/numpy/core/code_generators/cversions.txt b/numpy/core/code_generators/cversions.txt index 9ade153f5..54140f24a 100644 --- a/numpy/core/code_generators/cversions.txt +++ b/numpy/core/code_generators/cversions.txt @@ -34,3 +34,6 @@ # Version 10 (NumPy 1.11) No change. # Version 10 (NumPy 1.12) No change. 0x0000000a = 9b8bce614655d3eb02acddcb508203cb + +# Version 11 (NumPy 1.13) Added PyArray_MapIterArrayCopyIfOverlap +0x0000000b = edb1ba83730c650fd9bc5772a919cda7 diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py index 972966627..d1406e3b2 100644 --- a/numpy/core/code_generators/numpy_api.py +++ b/numpy/core/code_generators/numpy_api.py @@ -344,6 +344,8 @@ multiarray_funcs_api = { # End 1.9 API 'PyArray_CheckAnyScalarExact': (300, NonNull(1)), # End 1.10 API + 'PyArray_MapIterArrayCopyIfOverlap': (301,), + # End 1.13 API } ufunc_types_api = { diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index a9848f434..7f6fe6524 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -1008,6 +1008,12 @@ typedef void (NpyIter_GetMultiIndexFunc)(NpyIter *iter, #define NPY_ITER_DELAY_BUFALLOC 0x00000800 /* When NPY_KEEPORDER is specified, disable reversing negative-stride axes */ #define NPY_ITER_DONT_NEGATE_STRIDES 0x00001000 +/* + * If output operands overlap with other operands (based on heuristics that + * has false positives but no false negatives), make temporary copies to + * eliminate overlap. + */ +#define NPY_ITER_COPY_IF_OVERLAP 0x00002000 /*** Per-operand flags that may be passed to the iterator constructors ***/ @@ -1039,6 +1045,8 @@ typedef void (NpyIter_GetMultiIndexFunc)(NpyIter *iter, #define NPY_ITER_WRITEMASKED 0x10000000 /* This array is the mask for all WRITEMASKED operands */ #define NPY_ITER_ARRAYMASK 0x20000000 +/* Assume iterator order data access for COPY_IF_OVERLAP */ +#define NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE 0x40000000 #define NPY_ITER_GLOBAL_FLAGS 0x0000ffff #define NPY_ITER_PER_OP_FLAGS 0xffff0000 diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 01070cc74..f45042bec 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -901,7 +901,8 @@ def configuration(parent_package='',top_path=None): join('src', 'umath', 'loops.c.src'), join('src', 'umath', 'ufunc_object.c'), join('src', 'umath', 'scalarmath.c.src'), - join('src', 'umath', 'ufunc_type_resolution.c')] + join('src', 'umath', 'ufunc_type_resolution.c'), + join('src', 'private', 'mem_overlap.c')] umath_deps = [ generate_umath_py, @@ -911,6 +912,8 @@ def configuration(parent_package='',top_path=None): join('src', 'private', 'templ_common.h.src'), join('src', 'umath', 'simd.inc.src'), join(codegen_dir, 'generate_ufunc_api.py'), + join('src', 'private', 'lowlevel_strided_loops.h'), + join('src', 'private', 'mem_overlap.h'), join('src', 'private', 'ufunc_override.h')] + npymath_sources config.add_extension('umath', diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 596b3996c..357051cdb 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -38,7 +38,8 @@ C_ABI_VERSION = 0x01000009 # 0x0000000a - 1.10.x # 0x0000000a - 1.11.x # 0x0000000a - 1.12.x -C_API_VERSION = 0x0000000a +# 0x0000000b - 1.13.x +C_API_VERSION = 0x0000000b class MismatchCAPIWarning(Warning): pass diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index 70affbc9b..7a009d237 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -19,6 +19,7 @@ #include "mapping.h" #include "lowlevel_strided_loops.h" #include "item_selection.h" +#include "mem_overlap.h" #define HAS_INTEGER 1 @@ -704,6 +705,39 @@ prepare_index(PyArrayObject *self, PyObject *index, /** + * Check if self has memory overlap with one of the index arrays, or with extra_op. + * + * @returns 1 if memory overlap found, 0 if not. + */ +NPY_NO_EXPORT int +index_has_memory_overlap(PyArrayObject *self, + int index_type, npy_index_info *indices, int num, + PyObject *extra_op) +{ + int i; + + if (index_type & (HAS_FANCY | HAS_BOOL)) { + for (i = 0; i < num; ++i) { + if (indices[i].object != NULL && + PyArray_Check(indices[i].object) && + solve_may_share_memory(self, + (PyArrayObject *)indices[i].object, + 1) != 0) { + return 1; + } + } + } + + if (extra_op != NULL && PyArray_Check(extra_op) && + solve_may_share_memory(self, (PyArrayObject *)extra_op, 1) != 0) { + return 1; + } + + return 0; +} + + +/** * Get pointer for an integer index. * * For a purely integer index, set ptr to the memory address. @@ -1923,7 +1957,9 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op) * Either they are equivalent, or the values must * be a scalar */ - (PyArray_EQUIVALENTLY_ITERABLE(ind, tmp_arr) || + (PyArray_EQUIVALENTLY_ITERABLE(ind, tmp_arr, + PyArray_TRIVIALLY_ITERABLE_OP_READ, + PyArray_TRIVIALLY_ITERABLE_OP_READ) || (PyArray_NDIM(tmp_arr) == 0 && PyArray_TRIVIALLY_ITERABLE(tmp_arr))) && /* Check if the type is equivalent to INTP */ @@ -3130,19 +3166,22 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type, /*NUMPY_API * - * Use advanced indexing to iterate an array. Please note - * that most of this public API is currently not guaranteed - * to stay the same between versions. If you plan on using - * it, please consider adding more utility functions here - * to accommodate new features. + * Same as PyArray_MapIterArray, but: + * + * If copy_if_overlap != 0, check if `a` has memory overlap with any of the + * arrays in `index` and with `extra_op`. If yes, make copies as appropriate + * to avoid problems if `a` is modified during the iteration. + * `iter->array` may contain a copied array (with UPDATEIFCOPY set). */ NPY_NO_EXPORT PyObject * -PyArray_MapIterArray(PyArrayObject * a, PyObject * index) +PyArray_MapIterArrayCopyIfOverlap(PyArrayObject * a, PyObject * index, + int copy_if_overlap, PyArrayObject *extra_op) { PyArrayMapIterObject * mit = NULL; PyArrayObject *subspace = NULL; npy_index_info indices[NPY_MAXDIMS * 2 + 1]; int i, index_num, ndim, fancy_ndim, index_type; + PyArrayObject *a_copy = NULL; index_type = prepare_index(a, index, indices, &index_num, &ndim, &fancy_ndim, 0); @@ -3151,6 +3190,28 @@ PyArray_MapIterArray(PyArrayObject * a, PyObject * index) return NULL; } + if (copy_if_overlap && index_has_memory_overlap(a, index_type, indices, + index_num, + (PyObject *)extra_op)) { + /* Make a copy of the input array */ + a_copy = (PyArrayObject *)PyArray_NewLikeArray(a, NPY_ANYORDER, + NULL, 0); + if (a_copy == NULL) { + goto fail; + } + + if (PyArray_CopyInto(a_copy, a) != 0) { + goto fail; + } + + Py_INCREF(a); + if (PyArray_SetUpdateIfCopyBase(a_copy, a) < 0) { + goto fail; + } + + a = a_copy; + } + /* If it is not a pure fancy index, need to get the subspace */ if (index_type != HAS_FANCY) { if (get_view_from_index(a, &subspace, indices, index_num, 1) < 0) { @@ -3178,6 +3239,7 @@ PyArray_MapIterArray(PyArrayObject * a, PyObject * index) goto fail; } + Py_XDECREF(a_copy); Py_XDECREF(subspace); PyArray_MapIterReset(mit); @@ -3188,6 +3250,7 @@ PyArray_MapIterArray(PyArrayObject * a, PyObject * index) return (PyObject *)mit; fail: + Py_XDECREF(a_copy); Py_XDECREF(subspace); Py_XDECREF((PyObject *)mit); for (i=0; i < index_num; i++) { @@ -3197,6 +3260,17 @@ PyArray_MapIterArray(PyArrayObject * a, PyObject * index) } +/*NUMPY_API + * + * Use advanced indexing to iterate an array. + */ +NPY_NO_EXPORT PyObject * +PyArray_MapIterArray(PyArrayObject * a, PyObject * index) +{ + return PyArray_MapIterArrayCopyIfOverlap(a, index, 0, NULL); +} + + #undef HAS_INTEGER #undef HAS_NEWAXIS #undef HAS_SLICE diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index 48f4928db..b74aca01c 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -17,6 +17,8 @@ #include "arrayobject.h" #include "templ_common.h" +#include "mem_overlap.h" + /* Internal helper functions private to this file */ static int @@ -2708,6 +2710,93 @@ npyiter_allocate_arrays(NpyIter *iter, bufferdata = NIT_BUFFERDATA(iter); } + if (flags & NPY_ITER_COPY_IF_OVERLAP) { + /* + * Perform operand memory overlap checks, if requested. + * + * If any write operand has memory overlap with any read operand, + * eliminate all overlap by making temporary copies, by enabling + * NPY_OP_ITFLAG_FORCECOPY for the write operand to force UPDATEIFCOPY. + * + * Operands with NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE enabled are not + * considered overlapping if the arrays are exactly the same. In this + * case, the iterator loops through them in the same order element by + * element. (As usual, the user-provided inner loop is assumed to be + * able to deal with this level of simple aliasing.) + */ + for (iop = 0; iop < nop; ++iop) { + int may_share_memory = 0; + int iother; + + if (op[iop] == NULL) { + /* Iterator will always allocate */ + continue; + } + + if (!(op_itflags[iop] & NPY_OP_ITFLAG_WRITE)) { + /* + * Copy output operands only, not inputs. + * A more sophisticated heuristic could be + * substituted here later. + */ + continue; + } + + for (iother = 0; iother < nop; ++iother) { + if (iother == iop || op[iother] == NULL) { + continue; + } + + if (!(op_itflags[iother] & NPY_OP_ITFLAG_READ)) { + /* No data dependence for arrays not read from */ + continue; + } + + if (op_itflags[iother] & NPY_OP_ITFLAG_FORCECOPY) { + /* Already copied */ + continue; + } + + /* + * If the arrays are views to exactly the same data, no need + * to make copies, if the caller (eg ufunc) says it accesses + * data only in the iterator order. + * + * However, if there is internal overlap (e.g. a zero stride on + * a non-unit dimension), a copy cannot be avoided. + */ + if ((op_flags[iop] & NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE) && + (op_flags[iother] & NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE) && + PyArray_BYTES(op[iop]) == PyArray_BYTES(op[iother]) && + PyArray_NDIM(op[iop]) == PyArray_NDIM(op[iother]) && + PyArray_CompareLists(PyArray_DIMS(op[iop]), + PyArray_DIMS(op[iother]), + PyArray_NDIM(op[iop])) && + PyArray_CompareLists(PyArray_STRIDES(op[iop]), + PyArray_STRIDES(op[iother]), + PyArray_NDIM(op[iop])) && + PyArray_DESCR(op[iop]) == PyArray_DESCR(op[iother]) && + solve_may_have_internal_overlap(op[iop], 1) == 0) { + + continue; + } + + /* + * Use max work = 1. If the arrays are large, it might + * make sense to go further. + */ + may_share_memory = solve_may_share_memory(op[iop], + op[iother], + 1); + + if (may_share_memory) { + op_itflags[iop] |= NPY_OP_ITFLAG_FORCECOPY; + break; + } + } + } + } + for (iop = 0; iop < nop; ++iop) { /* * Check whether there are any WRITEMASKED REDUCE operands @@ -2797,9 +2886,15 @@ npyiter_allocate_arrays(NpyIter *iter, NBF_STRIDES(bufferdata)[iop] = 0; } } - /* If casting is required and permitted */ - else if ((op_itflags[iop] & NPY_OP_ITFLAG_CAST) && - (op_flags[iop] & (NPY_ITER_COPY|NPY_ITER_UPDATEIFCOPY))) { + /* + * Make a temporary copy if, + * 1. If casting is required and permitted, or, + * 2. If force-copy is requested + */ + else if (((op_itflags[iop] & NPY_OP_ITFLAG_CAST) && + (op_flags[iop] & + (NPY_ITER_COPY|NPY_ITER_UPDATEIFCOPY))) || + (op_itflags[iop] & NPY_OP_ITFLAG_FORCECOPY)) { PyArrayObject *temp; int ondim = PyArray_NDIM(op[iop]); diff --git a/numpy/core/src/multiarray/nditer_impl.h b/numpy/core/src/multiarray/nditer_impl.h index ae24f46e6..7788d327b 100644 --- a/numpy/core/src/multiarray/nditer_impl.h +++ b/numpy/core/src/multiarray/nditer_impl.h @@ -122,6 +122,8 @@ #define NPY_OP_ITFLAG_WRITEMASKED 0x0080 /* The operand's data pointer is pointing into its buffer */ #define NPY_OP_ITFLAG_USINGBUFFER 0x0100 +/* The operand must be copied (with UPDATEIFCOPY if also ITFLAG_WRITE) */ +#define NPY_OP_ITFLAG_FORCECOPY 0x0200 /* * The data layout of the iterator is fully specified by diff --git a/numpy/core/src/multiarray/nditer_pywrap.c b/numpy/core/src/multiarray/nditer_pywrap.c index 3d035390c..5f921fda0 100644 --- a/numpy/core/src/multiarray/nditer_pywrap.c +++ b/numpy/core/src/multiarray/nditer_pywrap.c @@ -148,6 +148,11 @@ NpyIter_GlobalFlagsConverter(PyObject *flags_in, npy_uint32 *flags) flag = NPY_ITER_C_INDEX; } break; + case 'i': + if (strcmp(str, "copy_if_overlap") == 0) { + flag = NPY_ITER_COPY_IF_OVERLAP; + } + break; case 'n': if (strcmp(str, "common_dtype") == 0) { flag = NPY_ITER_COMMON_DTYPE; @@ -355,6 +360,11 @@ NpyIter_OpFlagsConverter(PyObject *op_flags_in, break; } break; + case 'o': + if (strcmp(str, "overlap_assume_elementwise") == 0) { + flag = NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; + } + break; case 'r': if (length > 4) switch (str[4]) { case 'o': diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h index 02b8c73c1..f66de14b5 100644 --- a/numpy/core/src/private/lowlevel_strided_loops.h +++ b/numpy/core/src/private/lowlevel_strided_loops.h @@ -2,6 +2,7 @@ #define __LOWLEVEL_STRIDED_LOOPS_H #include "common.h" #include <npy_config.h> +#include "mem_overlap.h" /* * NOTE: This API should remain private for the time being, to allow @@ -662,7 +663,24 @@ npy_bswap8_unaligned(char * x) * Note: Equivalently iterable macro requires one of arr1 or arr2 be * trivially iterable to be valid. */ -#define PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2) ( \ + +/** + * Determine whether two arrays are safe for trivial iteration in cases where + * some of the arrays may be modified. + * + * In-place iteration is safe if one of the following is true: + * + * - Both arrays are read-only + * - The arrays do not have overlapping memory (based on a check that may be too + * strict) + * - The strides match, and the non-read-only array base addresses are equal or + * before the read-only one, ensuring correct data dependency. + */ + +#define PyArray_TRIVIALLY_ITERABLE_OP_NOREAD 0 +#define PyArray_TRIVIALLY_ITERABLE_OP_READ 1 + +#define PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) ( \ PyArray_NDIM(arr1) == PyArray_NDIM(arr2) && \ PyArray_CompareLists(PyArray_DIMS(arr1), \ PyArray_DIMS(arr2), \ @@ -673,6 +691,67 @@ npy_bswap8_unaligned(char * x) NPY_ARRAY_F_CONTIGUOUS)) \ ) +#define PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size, arr) ( \ + size == 1 ? 0 : ((PyArray_NDIM(arr) == 1) ? \ + PyArray_STRIDE(arr, 0) : \ + PyArray_ITEMSIZE(arr))) + +static NPY_INLINE int +PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(PyArrayObject *arr1, PyArrayObject *arr2, + int arr1_read, int arr2_read) +{ + npy_intp size1, size2, stride1, stride2; + int arr1_ahead = 0, arr2_ahead = 0; + + if (arr1_read && arr2_read) { + return 1; + } + + if (solve_may_share_memory(arr1, arr2, 1) == 0) { + return 1; + } + + /* + * Arrays overlapping in memory may be equivalently iterable if input + * arrays stride ahead faster than output arrays. + */ + + size1 = PyArray_SIZE(arr1); + size2 = PyArray_SIZE(arr2); + + stride1 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size1, arr1); + stride2 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size2, arr2); + + /* + * Arrays with zero stride are never "ahead" since the element is reused + * (at this point we know the array extents overlap). + */ + + if (stride1 > 0) { + arr1_ahead = (stride1 >= stride2 && + PyArray_BYTES(arr1) >= PyArray_BYTES(arr2)); + } + else if (stride1 < 0) { + arr1_ahead = (stride1 <= stride2 && + PyArray_BYTES(arr1) <= PyArray_BYTES(arr2)); + } + + if (stride2 > 0) { + arr2_ahead = (stride2 >= stride1 && + PyArray_BYTES(arr2) >= PyArray_BYTES(arr1)); + } + else if (stride2 < 0) { + arr2_ahead = (stride2 <= stride1 && + PyArray_BYTES(arr2) <= PyArray_BYTES(arr1)); + } + + return (!arr1_read || arr1_ahead) && (!arr2_read || arr2_ahead); +} + +#define PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2, arr1_read, arr2_read) ( \ + PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) && \ + PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK( \ + arr1, arr2, arr1_read, arr2_read)) #define PyArray_TRIVIALLY_ITERABLE(arr) ( \ PyArray_NDIM(arr) <= 1 || \ PyArray_CHKFLAGS(arr, NPY_ARRAY_C_CONTIGUOUS) || \ @@ -687,15 +766,16 @@ npy_bswap8_unaligned(char * x) PyArray_ITEMSIZE(arr))); -#define PyArray_TRIVIALLY_ITERABLE_PAIR(arr1, arr2) (\ +#define PyArray_TRIVIALLY_ITERABLE_PAIR(arr1, arr2, arr1_read, arr2_read) ( \ PyArray_TRIVIALLY_ITERABLE(arr1) && \ (PyArray_NDIM(arr2) == 0 || \ - PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2) || \ + PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) || \ (PyArray_NDIM(arr1) == 0 && \ PyArray_TRIVIALLY_ITERABLE(arr2) \ ) \ - ) \ - ) + ) && \ + PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(arr1, arr2, arr1_read, arr2_read) \ + ) #define PyArray_PREPARE_TRIVIAL_PAIR_ITERATION(arr1, arr2, \ count, \ data1, data2, \ @@ -705,33 +785,32 @@ npy_bswap8_unaligned(char * x) count = ((size1 > size2) || size1 == 0) ? size1 : size2; \ data1 = PyArray_BYTES(arr1); \ data2 = PyArray_BYTES(arr2); \ - stride1 = (size1 == 1 ? 0 : ((PyArray_NDIM(arr1) == 1) ? \ - PyArray_STRIDE(arr1, 0) : \ - PyArray_ITEMSIZE(arr1))); \ - stride2 = (size2 == 1 ? 0 : ((PyArray_NDIM(arr2) == 1) ? \ - PyArray_STRIDE(arr2, 0) : \ - PyArray_ITEMSIZE(arr2))); \ + stride1 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size1, arr1); \ + stride2 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size2, arr2); \ } -#define PyArray_TRIVIALLY_ITERABLE_TRIPLE(arr1, arr2, arr3) (\ +#define PyArray_TRIVIALLY_ITERABLE_TRIPLE(arr1, arr2, arr3, arr1_read, arr2_read, arr3_read) ( \ PyArray_TRIVIALLY_ITERABLE(arr1) && \ ((PyArray_NDIM(arr2) == 0 && \ (PyArray_NDIM(arr3) == 0 || \ - PyArray_EQUIVALENTLY_ITERABLE(arr1, arr3) \ + PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr3) \ ) \ ) || \ - (PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2) && \ + (PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) && \ (PyArray_NDIM(arr3) == 0 || \ - PyArray_EQUIVALENTLY_ITERABLE(arr1, arr3) \ + PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr3) \ ) \ ) || \ (PyArray_NDIM(arr1) == 0 && \ PyArray_TRIVIALLY_ITERABLE(arr2) && \ (PyArray_NDIM(arr3) == 0 || \ - PyArray_EQUIVALENTLY_ITERABLE(arr2, arr3) \ + PyArray_EQUIVALENTLY_ITERABLE_BASE(arr2, arr3) \ ) \ ) \ - ) \ + ) && \ + PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(arr1, arr2, arr1_read, arr2_read) && \ + PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(arr1, arr3, arr1_read, arr3_read) && \ + PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(arr2, arr3, arr2_read, arr3_read) \ ) #define PyArray_PREPARE_TRIVIAL_TRIPLE_ITERATION(arr1, arr2, arr3, \ @@ -746,15 +825,9 @@ npy_bswap8_unaligned(char * x) data1 = PyArray_BYTES(arr1); \ data2 = PyArray_BYTES(arr2); \ data3 = PyArray_BYTES(arr3); \ - stride1 = (size1 == 1 ? 0 : ((PyArray_NDIM(arr1) == 1) ? \ - PyArray_STRIDE(arr1, 0) : \ - PyArray_ITEMSIZE(arr1))); \ - stride2 = (size2 == 1 ? 0 : ((PyArray_NDIM(arr2) == 1) ? \ - PyArray_STRIDE(arr2, 0) : \ - PyArray_ITEMSIZE(arr2))); \ - stride3 = (size3 == 1 ? 0 : ((PyArray_NDIM(arr3) == 1) ? \ - PyArray_STRIDE(arr3, 0) : \ - PyArray_ITEMSIZE(arr3))); \ + stride1 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size1, arr1); \ + stride2 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size2, arr2); \ + stride3 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size3, arr3); \ } #endif diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c index 8079f7e0f..47598bed9 100644 --- a/numpy/core/src/umath/reduction.c +++ b/numpy/core/src/umath/reduction.c @@ -83,7 +83,8 @@ allocate_reduce_result(PyArrayObject *arr, npy_bool *axis_flags, */ static PyArrayObject * conform_reduce_result(int ndim, npy_bool *axis_flags, - PyArrayObject *out, int keepdims, const char *funcname) + PyArrayObject *out, int keepdims, const char *funcname, + int need_copy) { npy_intp strides[NPY_MAXDIMS], shape[NPY_MAXDIMS]; npy_intp *strides_out = PyArray_STRIDES(out); @@ -151,6 +152,7 @@ conform_reduce_result(int ndim, npy_bool *axis_flags, /* Allocate the view */ dtype = PyArray_DESCR(out); Py_INCREF(dtype); + ret = (PyArrayObject_fields *)PyArray_NewFromDescr(&PyArray_Type, dtype, ndim, shape, @@ -161,13 +163,41 @@ conform_reduce_result(int ndim, npy_bool *axis_flags, if (ret == NULL) { return NULL; } + Py_INCREF(out); if (PyArray_SetBaseObject((PyArrayObject *)ret, (PyObject *)out) < 0) { Py_DECREF(ret); return NULL; } - return (PyArrayObject *)ret; + if (need_copy) { + PyArrayObject *ret_copy; + + ret_copy = (PyArrayObject *)PyArray_NewLikeArray( + (PyArrayObject *)ret, NPY_ANYORDER, NULL, 0); + if (ret_copy == NULL) { + Py_DECREF(ret); + return NULL; + } + + if (PyArray_CopyInto(ret_copy, (PyArrayObject *)ret) != 0) { + Py_DECREF(ret); + Py_DECREF(ret_copy); + return NULL; + } + + Py_INCREF(ret); + if (PyArray_SetUpdateIfCopyBase(ret_copy, (PyArrayObject *)ret) < 0) { + Py_DECREF(ret); + Py_DECREF(ret_copy); + return NULL; + } + + return ret_copy; + } + else { + return (PyArrayObject *)ret; + } } /* @@ -201,11 +231,16 @@ PyArray_CreateReduceResult(PyArrayObject *operand, PyArrayObject *out, result = allocate_reduce_result(operand, axis_flags, dtype, subok); } else { + int need_copy = 0; + + if (solve_may_share_memory(operand, out, 1) != 0) { + need_copy = 1; + } + /* Steal the dtype reference */ Py_XDECREF(dtype); - result = conform_reduce_result(PyArray_NDIM(operand), axis_flags, - out, keepdims, funcname); + out, keepdims, funcname, need_copy); } return result; @@ -445,6 +480,9 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, /* * This either conforms 'out' to the ndim of 'operand', or allocates * a new array appropriate for this reduction. + * + * A new array with UPDATEIFCOPY is allocated if operand and out have memory + * overlap. */ Py_INCREF(result_dtype); result = PyArray_CreateReduceResult(operand, out, diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 62024d2e3..06ce22464 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -41,6 +41,7 @@ #include "lowlevel_strided_loops.h" #include "ufunc_type_resolution.h" #include "reduction.h" +#include "mem_overlap.h" #include "ufunc_object.h" #include "ufunc_override.h" @@ -1416,7 +1417,8 @@ iterator_loop(PyUFuncObject *ufunc, /* Set up the flags */ for (i = 0; i < nin; ++i) { op_flags[i] = NPY_ITER_READONLY | - NPY_ITER_ALIGNED; + NPY_ITER_ALIGNED | + NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; /* * If READWRITE flag has been set for this operand, * then clear default READONLY flag @@ -1431,7 +1433,8 @@ iterator_loop(PyUFuncObject *ufunc, NPY_ITER_ALIGNED | NPY_ITER_ALLOCATE | NPY_ITER_NO_BROADCAST | - NPY_ITER_NO_SUBTYPE; + NPY_ITER_NO_SUBTYPE | + NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; } iter_flags = ufunc->iter_flags | @@ -1440,7 +1443,22 @@ iterator_loop(PyUFuncObject *ufunc, NPY_ITER_ZEROSIZE_OK | NPY_ITER_BUFFERED | NPY_ITER_GROWINNER | - NPY_ITER_DELAY_BUFALLOC; + NPY_ITER_DELAY_BUFALLOC | + NPY_ITER_COPY_IF_OVERLAP; + + /* Call the __array_prepare__ functions for already existing output arrays. + * Do this before creating the iterator, as the iterator may UPDATEIFCOPY + * some of them. + */ + for (i = 0; i < nout; ++i) { + if (op[nin+i] == NULL) { + continue; + } + if (prepare_ufunc_output(ufunc, &op[nin+i], + arr_prep[i], arr_prep_args, i) < 0) { + return -1; + } + } /* * Allocate the iterator. Because the types of the inputs @@ -1458,32 +1476,41 @@ iterator_loop(PyUFuncObject *ufunc, /* Copy any allocated outputs */ op_it = NpyIter_GetOperandArray(iter); - for (i = nin; i < nop; ++i) { - if (op[i] == NULL) { - op[i] = op_it[i]; - Py_INCREF(op[i]); - } - } - - /* Call the __array_prepare__ functions where necessary */ for (i = 0; i < nout; ++i) { - if (prepare_ufunc_output(ufunc, &op[nin+i], - arr_prep[i], arr_prep_args, i) < 0) { - NpyIter_Deallocate(iter); - return -1; + if (op[nin+i] == NULL) { + op[nin+i] = op_it[nin+i]; + Py_INCREF(op[nin+i]); + + /* Call the __array_prepare__ functions for the new array */ + if (prepare_ufunc_output(ufunc, &op[nin+i], + arr_prep[i], arr_prep_args, i) < 0) { + NpyIter_Deallocate(iter); + return -1; + } + + /* + * In case __array_prepare__ returned a different array, put the + * results directly there, ignoring the array allocated by the + * iterator. + * + * Here, we assume the user-provided __array_prepare__ behaves + * sensibly and doesn't return an array overlapping in memory + * with other operands --- the op[nin+i] array passed to it is newly + * allocated and doesn't have any overlap. + */ + baseptrs[nin+i] = PyArray_BYTES(op[nin+i]); + } + else { + baseptrs[nin+i] = PyArray_BYTES(op_it[nin+i]); } } /* Only do the loop if the iteration size is non-zero */ if (NpyIter_GetIterSize(iter) != 0) { - - /* Reset the iterator with the base pointers from the wrapped outputs */ + /* Reset the iterator with the base pointers from possible __array_prepare__ */ for (i = 0; i < nin; ++i) { baseptrs[i] = PyArray_BYTES(op_it[i]); } - for (i = nin; i < nop; ++i) { - baseptrs[i] = PyArray_BYTES(op[i]); - } if (NpyIter_ResetBasePointers(iter, baseptrs, NULL) != NPY_SUCCEED) { NpyIter_Deallocate(iter); return -1; @@ -1581,7 +1608,9 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc, } else if (op[1] != NULL && PyArray_NDIM(op[1]) >= PyArray_NDIM(op[0]) && - PyArray_TRIVIALLY_ITERABLE_PAIR(op[0], op[1])) { + PyArray_TRIVIALLY_ITERABLE_PAIR(op[0], op[1], + PyArray_TRIVIALLY_ITERABLE_OP_READ, + PyArray_TRIVIALLY_ITERABLE_OP_NOREAD)) { /* Call the __prepare_array__ if necessary */ if (prepare_ufunc_output(ufunc, &op[1], @@ -1598,7 +1627,9 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc, else if (nin == 2 && nout == 1) { if (op[2] == NULL && (order == NPY_ANYORDER || order == NPY_KEEPORDER) && - PyArray_TRIVIALLY_ITERABLE_PAIR(op[0], op[1])) { + PyArray_TRIVIALLY_ITERABLE_PAIR(op[0], op[1], + PyArray_TRIVIALLY_ITERABLE_OP_READ, + PyArray_TRIVIALLY_ITERABLE_OP_READ)) { PyArrayObject *tmp; /* * Have to choose the input with more dimensions to clone, as @@ -1637,7 +1668,10 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc, else if (op[2] != NULL && PyArray_NDIM(op[2]) >= PyArray_NDIM(op[0]) && PyArray_NDIM(op[2]) >= PyArray_NDIM(op[1]) && - PyArray_TRIVIALLY_ITERABLE_TRIPLE(op[0], op[1], op[2])) { + PyArray_TRIVIALLY_ITERABLE_TRIPLE(op[0], op[1], op[2], + PyArray_TRIVIALLY_ITERABLE_OP_READ, + PyArray_TRIVIALLY_ITERABLE_OP_READ, + PyArray_TRIVIALLY_ITERABLE_OP_NOREAD)) { /* Call the __prepare_array__ if necessary */ if (prepare_ufunc_output(ufunc, &op[2], @@ -1701,7 +1735,6 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc, npy_intp *strides; npy_intp *countptr; - PyArrayObject **op_it; npy_uint32 iter_flags; if (wheremask != NULL) { @@ -1719,7 +1752,8 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc, for (i = 0; i < nin; ++i) { op_flags[i] = default_op_in_flags | NPY_ITER_READONLY | - NPY_ITER_ALIGNED; + NPY_ITER_ALIGNED | + NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; /* * If READWRITE flag has been set for this operand, * then clear default READONLY flag @@ -1730,12 +1764,19 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc, } } for (i = nin; i < nop; ++i) { + /* + * We don't write to all elements, and the iterator may make + * UPDATEIFCOPY temporary copies. The output arrays must be considered + * READWRITE by the iterator, so that the elements we don't write to are + * copied to the possible temporary array. + */ op_flags[i] = default_op_out_flags | - NPY_ITER_WRITEONLY | + NPY_ITER_READWRITE | NPY_ITER_ALIGNED | NPY_ITER_ALLOCATE | NPY_ITER_NO_BROADCAST | - NPY_ITER_NO_SUBTYPE; + NPY_ITER_NO_SUBTYPE | + NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; } if (wheremask != NULL) { op_flags[nop] = NPY_ITER_READONLY | NPY_ITER_ARRAYMASK; @@ -1748,7 +1789,8 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc, NPY_ITER_REFS_OK | NPY_ITER_ZEROSIZE_OK | NPY_ITER_BUFFERED | - NPY_ITER_GROWINNER; + NPY_ITER_GROWINNER | + NPY_ITER_COPY_IF_OVERLAP; /* * Allocate the iterator. Because the types of the inputs @@ -1768,22 +1810,37 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc, needs_api = NpyIter_IterationNeedsAPI(iter); - /* Copy any allocated outputs */ - op_it = NpyIter_GetOperandArray(iter); + /* Call the __array_prepare__ functions where necessary */ for (i = nin; i < nop; ++i) { - if (op[i] == NULL) { - op[i] = op_it[i]; - Py_INCREF(op[i]); + PyArrayObject *op_tmp; + + /* prepare_ufunc_output may decref & replace pointer */ + op_tmp = op[i]; + Py_INCREF(op_tmp); + + if (prepare_ufunc_output(ufunc, &op_tmp, + arr_prep[i], arr_prep_args, i) < 0) { + NpyIter_Deallocate(iter); + return -1; } - } - /* Call the __array_prepare__ functions where necessary */ - for (i = 0; i < nout; ++i) { - if (prepare_ufunc_output(ufunc, &op[nin+i], - arr_prep[i], arr_prep_args, i) < 0) { + /* Validate that the prepare_ufunc_output didn't mess with pointers */ + if (PyArray_BYTES(op_tmp) != PyArray_BYTES(op[i])) { + PyErr_SetString(PyExc_ValueError, + "The __array_prepare__ functions modified the data " + "pointer addresses in an invalid fashion"); + Py_DECREF(op_tmp); NpyIter_Deallocate(iter); return -1; } + + /* + * Put the updated operand back and undo the DECREF above. If + * COPY_IF_OVERLAP made a temporary copy, the output will be copied in + * by UPDATEIFCOPY even if op[i] was changed. + */ + op[i] = op_tmp; + Py_DECREF(op_tmp); } /* Only do the loop if the iteration size is non-zero */ @@ -1794,17 +1851,6 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc, PyArray_Descr **iter_dtypes; NPY_BEGIN_THREADS_DEF; - /* Validate that the prepare_ufunc_output didn't mess with pointers */ - for (i = nin; i < nop; ++i) { - if (PyArray_BYTES(op[i]) != PyArray_BYTES(op_it[i])) { - PyErr_SetString(PyExc_ValueError, - "The __array_prepare__ functions modified the data " - "pointer addresses in an invalid fashion"); - NpyIter_Deallocate(iter); - return -1; - } - } - /* * Get the inner loop, with the possibility of specialization * based on the fixed strides. @@ -2265,7 +2311,8 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, for (i = 0; i < nin; ++i) { op_flags[i] = NPY_ITER_READONLY | NPY_ITER_COPY | - NPY_ITER_ALIGNED; + NPY_ITER_ALIGNED | + NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; /* * If READWRITE flag has been set for this operand, * then clear default READONLY flag @@ -2280,14 +2327,16 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, NPY_ITER_UPDATEIFCOPY| NPY_ITER_ALIGNED| NPY_ITER_ALLOCATE| - NPY_ITER_NO_BROADCAST; + NPY_ITER_NO_BROADCAST| + NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; } iter_flags = ufunc->iter_flags | NPY_ITER_MULTI_INDEX | NPY_ITER_REFS_OK | NPY_ITER_REDUCE_OK | - NPY_ITER_ZEROSIZE_OK; + NPY_ITER_ZEROSIZE_OK | + NPY_ITER_COPY_IF_OVERLAP; /* Create the iterator */ iter = NpyIter_AdvancedNew(nop, op, iter_flags, @@ -3174,11 +3223,16 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, !PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(out)))) { need_outer_iterator = 1; } + /* If input and output overlap in memory, use iterator to figure it out */ + else if (out != NULL && solve_may_share_memory(out, arr, NPY_MAY_SHARE_BOUNDS) != 0) { + need_outer_iterator = 1; + } if (need_outer_iterator) { int ndim_iter = 0; npy_uint32 flags = NPY_ITER_ZEROSIZE_OK| - NPY_ITER_REFS_OK; + NPY_ITER_REFS_OK| + NPY_ITER_COPY_IF_OVERLAP; PyArray_Descr **op_dtypes_param = NULL; /* @@ -3592,7 +3646,8 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, if (need_outer_iterator) { npy_uint32 flags = NPY_ITER_ZEROSIZE_OK| NPY_ITER_REFS_OK| - NPY_ITER_MULTI_INDEX; + NPY_ITER_MULTI_INDEX| + NPY_ITER_COPY_IF_OVERLAP; /* * The way reduceat is set up, we can't do buffering, @@ -3635,6 +3690,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, /* In case COPY or UPDATEIFCOPY occurred */ op[0] = NpyIter_GetOperandArray(iter)[0]; op[1] = NpyIter_GetOperandArray(iter)[1]; + op[2] = NpyIter_GetOperandArray(iter)[2]; if (out == NULL) { out = op[0]; @@ -5259,11 +5315,6 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args) op1_array = (PyArrayObject *)op1; - iter = (PyArrayMapIterObject *)PyArray_MapIterArray(op1_array, idx); - if (iter == NULL) { - goto fail; - } - /* Create second operand from number array if needed. */ if (op2 != NULL) { op2_array = (PyArrayObject *)PyArray_FromAny(op2, NULL, @@ -5271,7 +5322,17 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args) if (op2_array == NULL) { goto fail; } + } + /* Create map iterator */ + iter = (PyArrayMapIterObject *)PyArray_MapIterArrayCopyIfOverlap( + op1_array, idx, 1, op2_array); + if (iter == NULL) { + goto fail; + } + op1_array = iter->array; /* May be updateifcopied on overlap */ + + if (op2 != NULL) { /* * May need to swap axes so that second operand is * iterated over correctly diff --git a/numpy/core/tests/test_mem_overlap.py b/numpy/core/tests/test_mem_overlap.py index acca53856..a95a03fa4 100644 --- a/numpy/core/tests/test_mem_overlap.py +++ b/numpy/core/tests/test_mem_overlap.py @@ -4,9 +4,11 @@ import sys import itertools import numpy as np -from numpy.testing import run_module_suite, assert_, assert_raises, assert_equal +from numpy.testing import (run_module_suite, assert_, assert_raises, assert_equal, + assert_array_equal, assert_allclose) from numpy.core.multiarray_tests import solve_diophantine, internal_overlap +from numpy.core import umath_tests from numpy.lib.stride_tricks import as_strided from numpy.compat import long @@ -84,7 +86,7 @@ def _check_assignment(srcidx, dstidx): def test_overlapping_assignments(): - """Test automatically generated assignments which overlap in memory.""" + # Test automatically generated assignments which overlap in memory. inds = _indices(ndims) @@ -107,7 +109,6 @@ def test_diophantine_fuzz(): min_count = 500//(ndim + 1) - numbers = [] while min(feasible_count, infeasible_count) < min_count: # Ensure big and small integer problems A_max = 1 + rng.randint(0, 11, dtype=np.intp)**6 @@ -252,13 +253,12 @@ def test_may_share_memory_manual(): check_may_share_memory_exact(x, x.copy()) -def check_may_share_memory_easy_fuzz(get_max_work, same_steps, min_count): - # Check that overlap problems with common strides are solved with - # little work. - x = np.zeros([17,34,71,97], dtype=np.int16) - +def iter_random_view_pairs(x, same_steps=True, equal_size=False): rng = np.random.RandomState(1234) + if equal_size and same_steps: + raise ValueError() + def random_slice(n, step): start = rng.randint(0, n+1, dtype=np.intp) stop = rng.randint(start, n+1, dtype=np.intp) @@ -267,31 +267,93 @@ def check_may_share_memory_easy_fuzz(get_max_work, same_steps, min_count): step *= -1 return slice(start, stop, step) - feasible = 0 - infeasible = 0 + def random_slice_fixed_size(n, step, size): + start = rng.randint(0, n+1 - size*step) + stop = start + (size-1)*step + 1 + if rng.randint(0, 2) == 0: + stop, start = start-1, stop-1 + if stop < 0: + stop = None + step *= -1 + return slice(start, stop, step) - while min(feasible, infeasible) < min_count: + # First a few regular views + yield x, x + for j in range(1, 7, 3): + yield x[j:], x[:-j] + yield x[...,j:], x[...,:-j] + + # An array with zero stride internal overlap + strides = list(x.strides) + strides[0] = 0 + xp = as_strided(x, shape=x.shape, strides=strides) + yield x, xp + yield xp, xp + + # An array with non-zero stride internal overlap + strides = list(x.strides) + if strides[0] > 1: + strides[0] = 1 + xp = as_strided(x, shape=x.shape, strides=strides) + yield x, xp + yield xp, xp + + # Then discontiguous views + while True: steps = tuple(rng.randint(1, 11, dtype=np.intp) if rng.randint(0, 5, dtype=np.intp) == 0 else 1 for j in range(x.ndim)) - if same_steps: + s1 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps)) + + t1 = np.arange(x.ndim) + rng.shuffle(t1) + + if equal_size: + t2 = t1 + else: + t2 = np.arange(x.ndim) + rng.shuffle(t2) + + a = x[s1] + + if equal_size: + if a.size == 0: + continue + + steps2 = tuple(rng.randint(1, max(2, p//(1+pa))) + if rng.randint(0, 5) == 0 else 1 + for p, s, pa in zip(x.shape, s1, a.shape)) + s2 = tuple(random_slice_fixed_size(p, s, pa) + for p, s, pa in zip(x.shape, steps2, a.shape)) + elif same_steps: steps2 = steps else: steps2 = tuple(rng.randint(1, 11, dtype=np.intp) if rng.randint(0, 5, dtype=np.intp) == 0 else 1 for j in range(x.ndim)) - t1 = np.arange(x.ndim) - rng.shuffle(t1) - - t2 = np.arange(x.ndim) - rng.shuffle(t2) + if not equal_size: + s2 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps2)) - s1 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps)) - s2 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps2)) - a = x[s1].transpose(t1) + a = a.transpose(t1) b = x[s2].transpose(t2) + yield a, b + + +def check_may_share_memory_easy_fuzz(get_max_work, same_steps, min_count): + # Check that overlap problems with common strides are solved with + # little work. + x = np.zeros([17,34,71,97], dtype=np.int16) + + feasible = 0 + infeasible = 0 + + pair_iter = iter_random_view_pairs(x, same_steps) + + while min(feasible, infeasible) < min_count: + a, b = next(pair_iter) + bounds_overlap = np.may_share_memory(a, b) may_share_answer = np.may_share_memory(a, b) easy_answer = np.may_share_memory(a, b, max_work=get_max_work(a, b)) @@ -299,11 +361,10 @@ def check_may_share_memory_easy_fuzz(get_max_work, same_steps, min_count): if easy_answer != exact_answer: # assert_equal is slow... - assert_equal(easy_answer, exact_answer, err_msg=repr((s1, s2))) + assert_equal(easy_answer, exact_answer) if may_share_answer != bounds_overlap: - assert_equal(may_share_answer, bounds_overlap, - err_msg=repr((s1, s2))) + assert_equal(may_share_answer, bounds_overlap) if bounds_overlap: if exact_answer: @@ -524,5 +585,364 @@ def test_non_ndarray_inputs(): assert_(np.may_share_memory(cls(x[1::3]), x[::2])) +def view_element_first_byte(x): + """Construct an array viewing the first byte of each element of `x`""" + from numpy.lib.stride_tricks import DummyArray + interface = dict(x.__array_interface__) + interface['typestr'] = '|b1' + interface['descr'] = [('', '|b1')] + return np.asarray(DummyArray(interface, x)) + + +def assert_copy_equivalent(operation, args, out, **kwargs): + """ + Check that operation(*args, out=out) produces results + equivalent to out[...] = operation(*args, out=out.copy()) + """ + + kwargs['out'] = out + kwargs2 = dict(kwargs) + kwargs2['out'] = out.copy() + + out_orig = out.copy() + out[...] = operation(*args, **kwargs2) + expected = out.copy() + out[...] = out_orig + + got = operation(*args, **kwargs).copy() + + if (got != expected).any(): + assert_equal(got, expected) + + +class TestUFunc(object): + """ + Test ufunc call memory overlap handling + """ + + def check_unary_fuzz(self, operation, get_out_axis_size, dtype=np.int16, + count=5000): + shapes = [7, 13, 8, 21, 29, 32] + + rng = np.random.RandomState(1234) + + for ndim in range(1, 6): + x = rng.randint(0, 2**16, size=shapes[:ndim]).astype(dtype) + + it = iter_random_view_pairs(x, same_steps=False, equal_size=True) + + min_count = count // (ndim + 1)**2 + + overlapping = 0 + while overlapping < min_count: + a, b = next(it) + + a_orig = a.copy() + b_orig = b.copy() + + if get_out_axis_size is None: + assert_copy_equivalent(operation, [a], out=b) + + if np.shares_memory(a, b): + overlapping += 1 + else: + for axis in itertools.chain(range(ndim), [None]): + a[...] = a_orig + b[...] = b_orig + + # Determine size for reduction axis (None if scalar) + outsize, scalarize = get_out_axis_size(a, b, axis) + if outsize == 'skip': + continue + + # Slice b to get an output array of the correct size + sl = [slice(None)] * ndim + if axis is None: + if outsize is None: + sl = [slice(0, 1)] + [0]*(ndim - 1) + else: + sl = [slice(0, outsize)] + [0]*(ndim - 1) + else: + if outsize is None: + k = b.shape[axis]//2 + if ndim == 1: + sl[axis] = slice(k, k + 1) + else: + sl[axis] = k + else: + assert b.shape[axis] >= outsize + sl[axis] = slice(0, outsize) + b_out = b[tuple(sl)] + + if scalarize: + b_out = b_out.reshape([]) + + if np.shares_memory(a, b_out): + overlapping += 1 + + # Check result + assert_copy_equivalent(operation, [a], out=b_out, axis=axis) + + def test_unary_ufunc_call_fuzz(self): + self.check_unary_fuzz(np.invert, None, np.int16) + + def test_binary_ufunc_accumulate_fuzz(self): + def get_out_axis_size(a, b, axis): + if axis is None: + if a.ndim == 1: + return a.size, False + else: + return 'skip', False # accumulate doesn't support this + else: + return a.shape[axis], False + + self.check_unary_fuzz(np.add.accumulate, get_out_axis_size, + dtype=np.int16, count=500) + + def test_binary_ufunc_reduce_fuzz(self): + def get_out_axis_size(a, b, axis): + return None, (axis is None or a.ndim == 1) + + self.check_unary_fuzz(np.add.reduce, get_out_axis_size, + dtype=np.int16, count=500) + + def test_binary_ufunc_reduceat_fuzz(self): + def get_out_axis_size(a, b, axis): + if axis is None: + if a.ndim == 1: + return a.size, False + else: + return 'skip', False # reduceat doesn't support this + else: + return a.shape[axis], False + + def do_reduceat(a, out, axis): + if axis is None: + size = len(a) + step = size//len(out) + else: + size = a.shape[axis] + step = a.shape[axis] // out.shape[axis] + idx = np.arange(0, size, step) + return np.add.reduceat(a, idx, out=out, axis=axis) + + self.check_unary_fuzz(do_reduceat, get_out_axis_size, + dtype=np.int16, count=500) + + def test_binary_ufunc_reduceat_manual(self): + def check(ufunc, a, ind, out): + c1 = ufunc.reduceat(a.copy(), ind.copy(), out=out.copy()) + c2 = ufunc.reduceat(a, ind, out=out) + assert_array_equal(c1, c2) + + # Exactly same input/output arrays + a = np.arange(10000, dtype=np.int16) + check(np.add, a, a[::-1].copy(), a) + + # Overlap with index + a = np.arange(10000, dtype=np.int16) + check(np.add, a, a[::-1], a) + + def test_unary_gufunc_fuzz(self): + shapes = [7, 13, 8, 21, 29, 32] + gufunc = umath_tests.euclidean_pdist + + rng = np.random.RandomState(1234) + + for ndim in range(2, 6): + x = rng.rand(*shapes[:ndim]) + + it = iter_random_view_pairs(x, same_steps=False, equal_size=True) + + min_count = 500 // (ndim + 1)**2 + + overlapping = 0 + while overlapping < min_count: + a, b = next(it) + + if min(a.shape[-2:]) < 2 or min(b.shape[-2:]) < 2 or a.shape[-1] < 2: + continue + + # Ensure the shapes are so that euclidean_pdist is happy + if b.shape[-1] > b.shape[-2]: + b = b[...,0,:] + else: + b = b[...,:,0] + + n = a.shape[-2] + p = n * (n - 1) // 2 + if p <= b.shape[-1] and p > 0: + b = b[...,:p] + else: + n = max(2, int(np.sqrt(b.shape[-1]))//2) + p = n * (n - 1) // 2 + a = a[...,:n,:] + b = b[...,:p] + + # Call + if np.shares_memory(a, b): + overlapping += 1 + + with np.errstate(over='ignore', invalid='ignore'): + assert_copy_equivalent(gufunc, [a], out=b) + + def test_ufunc_at_manual(self): + def check(ufunc, a, ind, b=None): + a0 = a.copy() + if b is None: + ufunc.at(a0, ind.copy()) + c1 = a0.copy() + ufunc.at(a, ind) + c2 = a.copy() + else: + ufunc.at(a0, ind.copy(), b.copy()) + c1 = a0.copy() + ufunc.at(a, ind, b) + c2 = a.copy() + assert_array_equal(c1, c2) + + # Overlap with index + a = np.arange(10000, dtype=np.int16) + check(np.invert, a[::-1], a) + + # Overlap with second data array + a = np.arange(100, dtype=np.int16) + ind = np.arange(0, 100, 2, dtype=np.int16) + check(np.add, a, ind, a[25:75]) + + def test_unary_ufunc_1d_manual(self): + # Exercise branches in PyArray_EQUIVALENTLY_ITERABLE + + def check(a, b): + a_orig = a.copy() + b_orig = b.copy() + + b0 = b.copy() + c1 = ufunc(a, out=b0) + c2 = ufunc(a, out=b) + assert_array_equal(c1, c2) + + # Trigger "fancy ufunc loop" code path + mask = view_element_first_byte(b).view(np.bool_) + + a[...] = a_orig + b[...] = b_orig + c1 = ufunc(a, out=b.copy(), where=mask.copy()).copy() + + a[...] = a_orig + b[...] = b_orig + c2 = ufunc(a, out=b, where=mask.copy()).copy() + + # Also, mask overlapping with output + a[...] = a_orig + b[...] = b_orig + c3 = ufunc(a, out=b, where=mask).copy() + + assert_array_equal(c1, c2) + assert_array_equal(c1, c3) + + dtypes = [np.int8, np.int16, np.int32, np.int64, np.float32, + np.float64, np.complex64, np.complex128] + dtypes = [np.dtype(x) for x in dtypes] + + for dtype in dtypes: + if np.issubdtype(dtype, np.integer): + ufunc = np.invert + else: + ufunc = np.reciprocal + + n = 1000 + k = 10 + indices = [ + np.index_exp[:n], + np.index_exp[k:k+n], + np.index_exp[n-1::-1], + np.index_exp[k+n-1:k-1:-1], + np.index_exp[:2*n:2], + np.index_exp[k:k+2*n:2], + np.index_exp[2*n-1::-2], + np.index_exp[k+2*n-1:k-1:-2], + ] + + for xi, yi in itertools.product(indices, indices): + v = np.arange(1, 1 + n*2 + k, dtype=dtype) + x = v[xi] + y = v[yi] + + with np.errstate(all='ignore'): + check(x, y) + + # Scalar cases + check(x[:1], y) + check(x[-1:], y) + check(x[:1].reshape([]), y) + check(x[-1:].reshape([]), y) + + def test_unary_ufunc_where_same(self): + # Check behavior at wheremask overlap + ufunc = np.invert + + def check(a, out, mask): + c1 = ufunc(a, out=out.copy(), where=mask.copy()) + c2 = ufunc(a, out=out, where=mask) + assert_array_equal(c1, c2) + + # Check behavior with same input and output arrays + x = np.arange(100).astype(np.bool_) + check(x, x, x) + check(x, x.copy(), x) + check(x, x, x.copy()) + + def test_binary_ufunc_1d_manual(self): + ufunc = np.add + + def check(a, b, c): + c0 = c.copy() + c1 = ufunc(a, b, out=c0) + c2 = ufunc(a, b, out=c) + assert_array_equal(c1, c2) + + for dtype in [np.int8, np.int16, np.int32, np.int64, + np.float32, np.float64, np.complex64, np.complex128]: + # Check different data dependency orders + + n = 1000 + k = 10 + + indices = [] + for p in [1, 2]: + indices.extend([ + np.index_exp[:p*n:p], + np.index_exp[k:k+p*n:p], + np.index_exp[p*n-1::-p], + np.index_exp[k+p*n-1:k-1:-p], + ]) + + for x, y, z in itertools.product(indices, indices, indices): + v = np.arange(6*n).astype(dtype) + x = v[x] + y = v[y] + z = v[z] + + check(x, y, z) + + # Scalar cases + check(x[:1], y, z) + check(x[-1:], y, z) + check(x[:1].reshape([]), y, z) + check(x[-1:].reshape([]), y, z) + check(x, y[:1], z) + check(x, y[-1:], z) + check(x, y[:1].reshape([]), z) + check(x, y[-1:].reshape([]), z) + + def test_inplace_op_simple_manual(self): + rng = np.random.RandomState(1234) + x = rng.rand(200, 200) # bigger than bufsize + + x += x.T + assert_array_equal(x - x.T, 0) + + if __name__ == "__main__": run_module_suite() diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py index f5096e023..1863f36f7 100644 --- a/numpy/core/tests/test_nditer.py +++ b/numpy/core/tests/test_nditer.py @@ -1139,6 +1139,94 @@ def test_iter_common_dtype(): assert_equal(i.dtypes[1], np.dtype('c16')) assert_equal(i.dtypes[2], np.dtype('c16')) +def test_iter_copy_if_overlap(): + # Ensure the iterator makes copies on read/write overlap, if requested + + # Copy not needed, 1 op + for flag in ['readonly', 'writeonly', 'readwrite']: + a = arange(10) + i = nditer([a], ['copy_if_overlap'], [[flag]]) + assert_(i.operands[0] is a) + + # Copy needed, 2 ops, read-write overlap + x = arange(10) + a = x[1:] + b = x[:-1] + i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['readwrite']]) + assert_(not np.shares_memory(*i.operands)) + + # Copy not needed with elementwise, 2 ops, exactly same arrays + x = arange(10) + a = x + b = x + i = nditer([a, b], ['copy_if_overlap'], [['readonly', 'overlap_assume_elementwise'], + ['readwrite', 'overlap_assume_elementwise']]) + assert_(i.operands[0] is a and i.operands[1] is b) + i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['readwrite']]) + assert_(i.operands[0] is a and not np.shares_memory(i.operands[1], b)) + + # Copy not needed, 2 ops, no overlap + x = arange(10) + a = x[::2] + b = x[1::2] + i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['writeonly']]) + assert_(i.operands[0] is a and i.operands[1] is b) + + # Copy needed, 2 ops, read-write overlap + x = arange(4, dtype=np.int8) + a = x[3:] + b = x.view(np.int32)[:1] + i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['writeonly']]) + assert_(not np.shares_memory(*i.operands)) + + # Copy needed, 3 ops, read-write overlap + for flag in ['writeonly', 'readwrite']: + x = np.ones([10, 10]) + a = x + b = x.T + c = x + i = nditer([a, b, c], ['copy_if_overlap'], + [['readonly'], ['readonly'], [flag]]) + a2, b2, c2 = i.operands + assert_(not np.shares_memory(a2, c2)) + assert_(not np.shares_memory(b2, c2)) + + # Copy not needed, 3 ops, read-only overlap + x = np.ones([10, 10]) + a = x + b = x.T + c = x + i = nditer([a, b, c], ['copy_if_overlap'], + [['readonly'], ['readonly'], ['readonly']]) + a2, b2, c2 = i.operands + assert_(a is a2) + assert_(b is b2) + assert_(c is c2) + + # Copy not needed, 3 ops, read-only overlap + x = np.ones([10, 10]) + a = x + b = np.ones([10, 10]) + c = x.T + i = nditer([a, b, c], ['copy_if_overlap'], + [['readonly'], ['writeonly'], ['readonly']]) + a2, b2, c2 = i.operands + assert_(a is a2) + assert_(b is b2) + assert_(c is c2) + + # Copy not needed, 3 ops, write-only overlap + x = np.arange(7) + a = x[:3] + b = x[3:6] + c = x[4:7] + i = nditer([a, b, c], ['copy_if_overlap'], + [['readonly'], ['writeonly'], ['writeonly']]) + a2, b2, c2 = i.operands + assert_(a is a2) + assert_(b is b2) + assert_(c is c2) + def test_iter_op_axes(): # Check that custom axes work @@ -2320,7 +2408,11 @@ def test_iter_reduction(): assert_equal(i[1].strides, (0,)) # Do the reduction for x, y in i: - y[...] += x + # Use a for loop instead of ``y[...] += x`` + # (equivalent to ``y[...] = y[...].copy() + x``), + # because y has zero strides we use for the reduction + for j in range(len(y)): + y[j] += x[j] # Since no axes were specified, should have allocated a scalar assert_equal(i.operands[1].ndim, 0) assert_equal(i.operands[1], np.sum(a)) @@ -2371,7 +2463,11 @@ def test_iter_buffering_reduction(): assert_equal(i[1].strides, (0,)) # Do the reduction for x, y in i: - y[...] += x + # Use a for loop instead of ``y[...] += x`` + # (equivalent to ``y[...] = y[...].copy() + x``), + # because y has zero strides we use for the reduction + for j in range(len(y)): + y[j] += x[j] assert_equal(b, np.sum(a, axis=1)) # Iterator inner double loop was wrong on this one diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index 436cb0355..48d8f9379 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -1452,6 +1452,22 @@ class TestSpecialMethods(TestCase): assert_equal(x, np.array(2)) assert_equal(type(x), with_prepare) + def test_prepare_out(self): + + class with_prepare(np.ndarray): + __array_priority__ = 10 + + def __array_prepare__(self, arr, context): + return np.array(arr).view(type=with_prepare) + + a = np.array([1]).view(type=with_prepare) + x = np.add(a, a, a) + # Returned array is new, because of the strange + # __array_prepare__ above + assert_(not np.shares_memory(x, a)) + assert_equal(x, np.array([2])) + assert_equal(type(x), with_prepare) + def test_failing_prepare(self): class A(object): |