diff options
author | Pauli Virtanen <pav@iki.fi> | 2017-01-19 22:40:09 +0100 |
---|---|---|
committer | Pauli Virtanen <pav@iki.fi> | 2017-01-19 22:47:19 +0100 |
commit | acedf6860823234460facf7957e42dc65ab5efad (patch) | |
tree | c055bb8cd417691ef67ba722563d7728ffec22d4 | |
parent | e4af3fd9f485e9f8d923339d4a632bc83f9ad8d7 (diff) | |
download | numpy-acedf6860823234460facf7957e42dc65ab5efad.tar.gz |
ENH: core: change NPY_ITER_OVERLAP_NOT_SAME to NPY_ITER_OVERLAP_ALLOW_SAME
It's clearer to explicitly specify which operands are accessed
elementwise, rather than to list exceptions.
-rw-r--r-- | doc/source/reference/c-api.iterator.rst | 39 | ||||
-rw-r--r-- | numpy/add_newdocs.py | 8 | ||||
-rw-r--r-- | numpy/core/include/numpy/ndarraytypes.h | 7 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer_constr.c | 8 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer_pywrap.c | 5 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 21 | ||||
-rw-r--r-- | numpy/core/tests/test_nditer.py | 7 |
7 files changed, 51 insertions, 44 deletions
diff --git a/doc/source/reference/c-api.iterator.rst b/doc/source/reference/c-api.iterator.rst index 5761e56c2..367dc15ef 100644 --- a/doc/source/reference/c-api.iterator.rst +++ b/doc/source/reference/c-api.iterator.rst @@ -463,28 +463,17 @@ Construction and Destruction .. c:var:: NPY_ITER_COPY_IF_OVERLAP - If a write operand has overlap with a read operand, eliminate all - overlap by making temporary copies (with UPDATEIFCOPY for write - operands). - - Overlapping means: - - - For a (read, write) pair of operands, there is a memory address - that contains data common to both arrays, which can be reached - via *different* index/dtype/shape combinations. - - - In particular, unless the arrays have the same shape, dtype, - strides, start address, and NPY_ITER_OVERLAP_NOT_SAME is not specified, - any shared common data byte accessible - by indexing implies overlap. + If any write operand has overlap with any read operand, eliminate all + overlap by making temporary copies (enabling UPDATEIFCOPY for write + operands, if necessary). A pair of operands has overlap if there is + a memory address that contains data common to both arrays. Because exact overlap detection has exponential runtime in the number of dimensions, the decision is made based on heuristics, which has false positives (needless copies in unusual cases) but has no false negatives. - If read/write overlap exists and write operands are modified in the - iterator loop element-wise, this flag ensures the result of the + If any read/write overlap exists, this flag ensures the result of the operation is the same as if all operands were copied. In cases where copies would need to be made, **the result of the computation may be undefined without this flag!** @@ -619,14 +608,20 @@ Construction and Destruction returns true from the corresponding element in the ARRAYMASK operand. - .. c:var:: NPY_ITER_OVERLAP_NOT_SAME + .. c:var:: NPY_ITER_OVERLAP_ALLOW_SAME + + In memory overlap checks, operands with ``NPY_ITER_OVERLAP_ALLOW_SAME`` + set are considered non-overlapping if they point to exactly the same array. + This means arrays with the same shape, dtype, strides, and start address. + In other cases, the default rules implied by + ``NPY_ITER_COPY_IF_OVERLAP`` apply. - In the memory overlap checks done when ``NPY_ITER_COPY_IF_OVERLAP`` - is specified, consider this array as overlapping even if it is - exactly the same as another array. + This flag can be enabled on the set of operands that are accessed + only in the iterator order, i.e. the operation is element-wise, + to avoid unnecessary copies. - This flag should be set on arrays that are not accessed in the - iterator order. + This flag has effect only if ``NPY_ITER_COPY_IF_OVERLAP`` is enabled + on the iterator. .. c:function:: NpyIter* NpyIter_AdvancedNew(npy_intp nop, PyArrayObject** op, npy_uint32 flags, NPY_ORDER order, NPY_CASTING casting, npy_uint32* op_flags, PyArray_Descr** op_dtypes, int oa_ndim, int** op_axes, npy_intp* itershape, npy_intp buffersize) diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index b7b000376..9fc28a4ae 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -170,9 +170,8 @@ add_newdoc('numpy.core', 'nditer', * "common_dtype" causes all the operands to be converted to a common data type, with copying or buffering as necessary. * "copy_if_overlap" causes the iterator to determine if read - operands have overlap with write operands (except if - the arrays are exactly the same), and make temporary copies - as necessary to avoid overlap. False positives (needless + operands have overlap with write operands, and make temporary + copies as necessary to avoid overlap. False positives (needless copying) are possible in some cases. * "delay_bufalloc" delays allocation of the buffers until a reset() call is made. Allows "allocate" operands to @@ -213,6 +212,9 @@ add_newdoc('numpy.core', 'nditer', copies those elements indicated by this mask. * 'writemasked' indicates that only elements where the chosen 'arraymask' operand is True will be written to. + * "overlap_allow_same" can be used to mark operands that are + accessed only in the iterator order, to allow less conservative + copying when "copy_if_overlap" is present. op_dtypes : dtype or tuple of dtype(s), optional The required data type(s) of the operands. If copying or buffering is enabled, the data will be converted to/from their original types. diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index 3c5af9408..d2e73d73a 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -1045,11 +1045,8 @@ typedef void (NpyIter_GetMultiIndexFunc)(NpyIter *iter, #define NPY_ITER_WRITEMASKED 0x10000000 /* This array is the mask for all WRITEMASKED operands */ #define NPY_ITER_ARRAYMASK 0x20000000 -/* - * Consider this array as overlapping for COPY_IF_OVERLAP, - * even if it is exactly the same as another array. - */ -#define NPY_ITER_OVERLAP_NOT_SAME 0x40000000 +/* Consider identical arrays non-overlapping for COPY_IF_OVERLAP */ +#define NPY_ITER_OVERLAP_ALLOW_SAME 0x40000000 #define NPY_ITER_GLOBAL_FLAGS 0x0000ffff #define NPY_ITER_PER_OP_FLAGS 0xffff0000 diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index f8829d0b9..27906ca53 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -2747,11 +2747,11 @@ npyiter_allocate_arrays(NpyIter *iter, /* * If the arrays are views to exactly the same data, no need - * to make copies, because ufunc inner loops are assumed to - * deal with that + * to make copies, if the caller (eg ufunc) says it accesses + * data only in the iterator order. */ - if (!(op_flags[iop] & NPY_ITER_OVERLAP_NOT_SAME) && - !(op_flags[iother] & NPY_ITER_OVERLAP_NOT_SAME) && + if ((op_flags[iop] & NPY_ITER_OVERLAP_ALLOW_SAME) && + (op_flags[iother] & NPY_ITER_OVERLAP_ALLOW_SAME) && PyArray_BYTES(op[iop]) == PyArray_BYTES(op[iother]) && PyArray_NDIM(op[iop]) == PyArray_NDIM(op[iother]) && PyArray_CompareLists(PyArray_DIMS(op[iop]), diff --git a/numpy/core/src/multiarray/nditer_pywrap.c b/numpy/core/src/multiarray/nditer_pywrap.c index 26756dfc1..1f512bb7d 100644 --- a/numpy/core/src/multiarray/nditer_pywrap.c +++ b/numpy/core/src/multiarray/nditer_pywrap.c @@ -360,6 +360,11 @@ NpyIter_OpFlagsConverter(PyObject *op_flags_in, break; } break; + case 'o': + if (strcmp(str, "overlap_allow_same") == 0) { + flag = NPY_ITER_OVERLAP_ALLOW_SAME; + } + break; case 'r': if (length > 4) switch (str[4]) { case 'o': diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 4ade0e34c..bed395fc4 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -1417,7 +1417,8 @@ iterator_loop(PyUFuncObject *ufunc, /* Set up the flags */ for (i = 0; i < nin; ++i) { op_flags[i] = NPY_ITER_READONLY | - NPY_ITER_ALIGNED; + NPY_ITER_ALIGNED | + NPY_ITER_OVERLAP_ALLOW_SAME; /* * If READWRITE flag has been set for this operand, * then clear default READONLY flag @@ -1432,7 +1433,8 @@ iterator_loop(PyUFuncObject *ufunc, NPY_ITER_ALIGNED | NPY_ITER_ALLOCATE | NPY_ITER_NO_BROADCAST | - NPY_ITER_NO_SUBTYPE; + NPY_ITER_NO_SUBTYPE | + NPY_ITER_OVERLAP_ALLOW_SAME; } iter_flags = ufunc->iter_flags | @@ -1728,7 +1730,8 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc, for (i = 0; i < nin; ++i) { op_flags[i] = default_op_in_flags | NPY_ITER_READONLY | - NPY_ITER_ALIGNED; + NPY_ITER_ALIGNED | + NPY_ITER_OVERLAP_ALLOW_SAME; /* * If READWRITE flag has been set for this operand, * then clear default READONLY flag @@ -1748,7 +1751,8 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc, NPY_ITER_ALIGNED | NPY_ITER_ALLOCATE | NPY_ITER_NO_BROADCAST | - NPY_ITER_NO_SUBTYPE; + NPY_ITER_NO_SUBTYPE | + NPY_ITER_OVERLAP_ALLOW_SAME; } if (wheremask != NULL) { op_flags[nop] = NPY_ITER_READONLY | NPY_ITER_ARRAYMASK; @@ -2276,7 +2280,8 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, for (i = 0; i < nin; ++i) { op_flags[i] = NPY_ITER_READONLY | NPY_ITER_COPY | - NPY_ITER_ALIGNED; + NPY_ITER_ALIGNED | + NPY_ITER_OVERLAP_ALLOW_SAME; /* * If READWRITE flag has been set for this operand, * then clear default READONLY flag @@ -2291,7 +2296,8 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, NPY_ITER_UPDATEIFCOPY| NPY_ITER_ALIGNED| NPY_ITER_ALLOCATE| - NPY_ITER_NO_BROADCAST; + NPY_ITER_NO_BROADCAST| + NPY_ITER_OVERLAP_ALLOW_SAME; } iter_flags = ufunc->iter_flags | @@ -3627,8 +3633,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, NPY_ITER_ALIGNED; op_flags[1] = NPY_ITER_READONLY| NPY_ITER_COPY| - NPY_ITER_ALIGNED| - NPY_ITER_OVERLAP_NOT_SAME; + NPY_ITER_ALIGNED; op_flags[2] = NPY_ITER_READONLY; op_dtypes[1] = op_dtypes[0]; diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py index 41c2d5529..15735406c 100644 --- a/numpy/core/tests/test_nditer.py +++ b/numpy/core/tests/test_nditer.py @@ -1155,12 +1155,15 @@ def test_iter_copy_if_overlap(): i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['readwrite']]) assert_(not np.shares_memory(*i.operands)) - # Copy not needed, 2 ops, exactly same arrays + # Copy not needed with allow_same, 2 ops, exactly same arrays x = arange(10) a = x b = x - i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['readwrite']]) + i = nditer([a, b], ['copy_if_overlap'], [['readonly', 'overlap_allow_same'], + ['readwrite', 'overlap_allow_same']]) assert_(i.operands[0] is a and i.operands[1] is b) + i = nditer([a, b], ['copy_if_overlap'], [['readonly'], ['readwrite']]) + assert_(i.operands[0] is a and not np.shares_memory(i.operands[1], b)) # Copy not needed, 2 ops, no overlap x = arange(10) |