summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornjsmith <njs@pobox.com>2013-04-11 12:31:22 -0700
committernjsmith <njs@pobox.com>2013-04-11 12:31:22 -0700
commitb9232f34bfb53bf7c574bfa350dd981f58d6a2d4 (patch)
tree6208488b45ea2341df2151dbb09d685857cf703c
parentb7053e8d065f820a6c2b3db7e8df3feaf5adbd71 (diff)
parent1675ad9e5b95605a851337f407e1fad33cf10c9c (diff)
downloadnumpy-b9232f34bfb53bf7c574bfa350dd981f58d6a2d4.tar.gz
Merge pull request #452 from seberg/enhdel
ENH: delete and insert generalization and speed improvements
-rw-r--r--doc/release/1.8.0-notes.rst14
-rw-r--r--numpy/lib/function_base.py249
-rw-r--r--numpy/lib/tests/test_function_base.py114
3 files changed, 314 insertions, 63 deletions
diff --git a/doc/release/1.8.0-notes.rst b/doc/release/1.8.0-notes.rst
index d7e5c2200..ad33b5513 100644
--- a/doc/release/1.8.0-notes.rst
+++ b/doc/release/1.8.0-notes.rst
@@ -85,6 +85,20 @@ The function np.take now allows 0-d arrays as indices.
The separate compilation mode is now enabled by default.
+Several changes to np.insert and np.delete:
+* Previously, negative indices and indices that pointed past the end of
+ the array were simply ignored. Now, this will raise a Future or Deprecation
+ Warning. In the future they will be treated like normal indexing treats
+ them -- negative indices will wrap around, and out-of-bound indices will
+ generate an error.
+* Previously, boolean indices were treated as if they were integers (always
+ referring to either the 0th or 1st item in the array). In the future, they
+ will be treated as masks. In this release, they raise a FutureWarning
+ warning of this coming change.
+* In Numpy 1.7. np.insert already allowed the syntax
+ `np.insert(arr, 3, [1,2,3])` to insert multiple items at a single position.
+ In Numpy 1.8. this is also possible for `np.insert(arr, [3], [1, 2, 3])`.
+
C-API
~~~~~
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 1e5a822a7..43bd3af8b 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -28,12 +28,14 @@ from numpy.core import atleast_1d, atleast_2d
from numpy.lib.twodim_base import diag
from ._compiled_base import _insert, add_docstring
from ._compiled_base import digitize, bincount, interp as compiled_interp
-from .arraysetops import setdiff1d
from .utils import deprecate
from ._compiled_base import add_newdoc_ufunc
import numpy as np
import collections
+# Force range to be a generator, for np.delete's usage.
+if sys.version_info[0] < 3:
+ range = xrange
def iterable(y):
"""
@@ -1737,9 +1739,9 @@ class vectorize(object):
Set of strings or integers representing the positional or keyword
arguments for which the function will not be vectorized. These will be
passed directly to `pyfunc` unmodified.
-
+
.. versionadded:: 1.7.0
-
+
cache : bool, optional
If `True`, then cache the first function call that determines the number
of outputs if `otypes` is not provided.
@@ -1866,7 +1868,7 @@ class vectorize(object):
the_args = list(args)
def func(*vargs):
for _n, _i in enumerate(inds):
- the_args[_i] = vargs[_n]
+ the_args[_i] = vargs[_n]
kwargs.update(zip(names, vargs[len(inds):]))
return self.pyfunc(*the_args, **kwargs)
@@ -1927,7 +1929,7 @@ class vectorize(object):
ufunc = frompyfunc(_func, len(args), nout)
return ufunc, otypes
-
+
def _vectorize_call(self, func, args):
"""Vectorized call to `func` over positional `args`."""
if not args:
@@ -1936,8 +1938,8 @@ class vectorize(object):
ufunc, otypes = self._get_ufunc_and_otypes(func=func, args=args)
# Convert args to object arrays first
- inputs = [array(_a, copy=False, subok=True, dtype=object)
- for _a in args]
+ inputs = [array(_a, copy=False, subok=True, dtype=object)
+ for _a in args]
outputs = ufunc(*inputs)
@@ -3386,7 +3388,8 @@ def meshgrid(*xi, **kwargs):
def delete(arr, obj, axis=None):
"""
- Return a new array with sub-arrays along an axis deleted.
+ Return a new array with sub-arrays along an axis deleted. For a one
+ dimensional array, this returns those entries not returned by `arr[obj]`.
Parameters
----------
@@ -3410,6 +3413,15 @@ def delete(arr, obj, axis=None):
insert : Insert elements into an array.
append : Append elements at the end of an array.
+ Notes
+ -----
+ Often it is preferable to use a boolean mask. For example:
+ >>> mask = np.ones(len(arr), dtype=bool)
+ >>> mask[[0,2,4]] = False
+ >>> result = arr[mask,...]
+ Is equivalent to `np.delete(arr, [0,2,4], axis=0)`, but allows further
+ use of `mask`.
+
Examples
--------
>>> arr = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
@@ -3436,7 +3448,6 @@ def delete(arr, obj, axis=None):
except AttributeError:
pass
-
arr = asarray(arr)
ndim = arr.ndim
if axis is None:
@@ -3445,34 +3456,35 @@ def delete(arr, obj, axis=None):
ndim = arr.ndim;
axis = ndim-1;
if ndim == 0:
+ warnings.warn("in the future the special handling of scalars "
+ "will be removed from delete and raise an error",
+ DeprecationWarning)
if wrap:
return wrap(arr)
else:
return arr.copy()
+
slobj = [slice(None)]*ndim
N = arr.shape[axis]
newshape = list(arr.shape)
- if isinstance(obj, (int, long, integer)):
- if (obj < 0): obj += N
- if (obj < 0 or obj >=N):
- raise ValueError(
- "invalid entry")
- newshape[axis]-=1;
- new = empty(newshape, arr.dtype, arr.flags.fnc)
- slobj[axis] = slice(None, obj)
- new[slobj] = arr[slobj]
- slobj[axis] = slice(obj,None)
- slobj2 = [slice(None)]*ndim
- slobj2[axis] = slice(obj+1,None)
- new[slobj] = arr[slobj2]
- elif isinstance(obj, slice):
+
+ if isinstance(obj, slice):
start, stop, step = obj.indices(N)
- numtodel = len(list(range(start, stop, step)))
+ xr = range(start, stop, step)
+ numtodel = len(xr)
+
if numtodel <= 0:
if wrap:
- return wrap(new)
+ return wrap(arr.copy())
else:
return arr.copy()
+
+ # Invert if step is negative:
+ if step < 0:
+ step = -step
+ start = xr[-1]
+ stop = xr[0] + 1
+
newshape[axis] -= numtodel
new = empty(newshape, arr.dtype, arr.flags.fnc)
# copy initial chunk
@@ -3493,24 +3505,77 @@ def delete(arr, obj, axis=None):
if step == 1:
pass
else: # use array indexing.
- obj = arange(start, stop, step, dtype=intp)
- all = arange(start, stop, dtype=intp)
- obj = setdiff1d(all, obj)
+ keep = ones(stop-start, dtype=bool)
+ keep[:stop-start:step] = False
slobj[axis] = slice(start, stop-numtodel)
slobj2 = [slice(None)]*ndim
- slobj2[axis] = obj
+ slobj2[axis] = slice(start, stop)
+ arr = arr[slobj2]
+ slobj2[axis] = keep
new[slobj] = arr[slobj2]
- else: # default behavior
- obj = array(obj, dtype=intp, copy=0, ndmin=1)
- all = arange(N, dtype=intp)
- obj = setdiff1d(all, obj)
- slobj[axis] = obj
+ if wrap:
+ return wrap(new)
+ else:
+ return new
+
+ _obj = obj
+ obj = np.asarray(obj)
+ # After removing the special handling of booleans and out of
+ # bounds values, the conversion to the array can be removed.
+ if obj.dtype == bool:
+ warnings.warn("in the future insert will treat boolean arrays "
+ "and array-likes as boolean index instead "
+ "of casting it to integer", FutureWarning)
+ obj = obj.astype(intp)
+ if isinstance(_obj, (int, long, integer)):
+ # optimization for a single value
+ obj = obj.item()
+ if (obj < -N or obj >=N):
+ raise IndexError("index %i is out of bounds for axis "
+ "%i with size %i" % (obj, axis, N))
+ if (obj < 0): obj += N
+ newshape[axis]-=1;
+ new = empty(newshape, arr.dtype, arr.flags.fnc)
+ slobj[axis] = slice(None, obj)
+ new[slobj] = arr[slobj]
+ slobj[axis] = slice(obj,None)
+ slobj2 = [slice(None)]*ndim
+ slobj2[axis] = slice(obj+1,None)
+ new[slobj] = arr[slobj2]
+ else:
+ if obj.size == 0 and not isinstance(_obj, np.ndarray):
+ obj = obj.astype(intp)
+ if not np.can_cast(obj, intp, 'same_kind'):
+ # obj.size = 1 special case always failed and would just
+ # give superfluous warnings.
+ warnings.warn("using a non-integer array as obj in delete "
+ "will result in an error in the future", DeprecationWarning)
+ obj = obj.astype(intp)
+ keep = ones(N, dtype=bool)
+
+ # Test if there are out of bound indices, this is deprecated
+ inside_bounds = (obj < N) & (obj >= -N)
+ if not inside_bounds.all():
+ warnings.warn("in the future out of bounds indices will raise an "
+ "error instead of being ignored by `numpy.delete`.",
+ DeprecationWarning)
+ obj = obj[inside_bounds]
+ positive_indices = obj >= 0
+ if not positive_indices.all():
+ warnings.warn("in the future negative indices will not be ignored "
+ "by `numpy.delete`.", FutureWarning)
+ obj = obj[positive_indices]
+
+ keep[obj,] = False
+ slobj[axis] = keep
new = arr[slobj]
+
if wrap:
return wrap(new)
else:
return new
+
def insert(arr, obj, values, axis=None):
"""
Insert values along the given axis before the given indices.
@@ -3522,9 +3587,16 @@ def insert(arr, obj, values, axis=None):
obj : int, slice or sequence of ints
Object that defines the index or indices before which `values` is
inserted.
+
+ .. versionadded:: 1.8.0
+
+ Support for multiple insertions when `obj` is a single scalar or a
+ sequence with one element (similar to calling insert multiple times).
values : array_like
Values to insert into `arr`. If the type of `values` is different
from that of `arr`, `values` is converted to the type of `arr`.
+ `values` should be shaped so that ``arr[...,obj,...] = values``
+ is legal.
axis : int, optional
Axis along which to insert `values`. If `axis` is None then `arr`
is flattened first.
@@ -3539,8 +3611,15 @@ def insert(arr, obj, values, axis=None):
See Also
--------
append : Append elements at the end of an array.
+ concatenate : Join a sequence of arrays together.
delete : Delete elements from an array.
+ Notes
+ -----
+ Note that for higher dimensional inserts `obj=0` behaves very different
+ from `obj=[0]` just like `arr[:,0,:] = values` is different from
+ `arr[:,[0],:] = values`.
+
Examples
--------
>>> a = np.array([[1, 1], [2, 2], [3, 3]])
@@ -3555,6 +3634,15 @@ def insert(arr, obj, values, axis=None):
[2, 5, 2],
[3, 5, 3]])
+ Difference between sequence and scalars:
+ >>> np.insert(a, [1], [[1],[2],[3]], axis=1)
+ array([[1, 1, 1],
+ [2, 2, 2],
+ [3, 3, 3]])
+ >>> np.array_equal(np.insert(a, 1, [1, 2, 3], axis=1),
+ ... np.insert(a, [1], [[1],[2],[3]], axis=1))
+ True
+
>>> b = a.flatten()
>>> b
array([1, 1, 2, 2, 3, 3])
@@ -3589,6 +3677,9 @@ def insert(arr, obj, values, axis=None):
ndim = arr.ndim
axis = ndim-1
if (ndim == 0):
+ warnings.warn("in the future the special handling of scalars "
+ "will be removed from insert and raise an error",
+ DeprecationWarning)
arr = arr.copy()
arr[...] = values
if wrap:
@@ -3599,38 +3690,76 @@ def insert(arr, obj, values, axis=None):
N = arr.shape[axis]
newshape = list(arr.shape)
- if isinstance(obj, (int, long, integer)):
- if (obj < 0): obj += N
- if obj < 0 or obj > N:
- raise ValueError(
- "index (%d) out of range (0<=index<=%d) "\
- "in dimension %d" % (obj, N, axis))
- if isscalar(values):
- obj = [obj]
- else:
- values = asarray(values)
- if ndim > values.ndim:
- obj = [obj]
- else:
- obj = [obj] * len(values)
-
- elif isinstance(obj, slice):
+ if isinstance(obj, slice):
# turn it into a range object
- obj = arange(*obj.indices(N),**{'dtype':intp})
+ indices = arange(*obj.indices(N),**{'dtype':intp})
+ else:
+ indices = np.asarray(obj)
+ if indices.dtype == bool:
+ # See also delete
+ warnings.warn("in the future insert will treat boolean arrays "
+ "and array-likes as a boolean index instead "
+ "of casting it to integer", FutureWarning)
+ indices = indices.astype(intp)
+ # Code after warning period:
+ #if obj.ndim != 1:
+ # raise ValueError('boolean array argument obj to insert '
+ # 'must be one dimensional')
+ #indices = np.flatnonzero(obj)
+ elif indices.ndim > 1:
+ raise ValueError("index array argument obj to insert must "
+ "be one dimensional or scalar")
+ if indices.size == 1:
+ index = indices.item()
+ if index < -N or index > N:
+ raise IndexError("index %i is out of bounds for axis "
+ "%i with size %i" % (obj, axis, N))
+ if (index < 0): index += N
+
+ values = array(values, copy=False, ndmin=arr.ndim)
+ if indices.ndim == 0:
+ # broadcasting is very different here, since a[:,0,:] = ... behaves
+ # very different from a[:,[0],:] = ...! This changes values so that
+ # it works likes the second case. (here a[:,0:1,:])
+ values = np.rollaxis(values, 0, axis+1)
+ numnew = values.shape[axis]
+ newshape[axis] += numnew
+ new = empty(newshape, arr.dtype, arr.flags.fnc)
+ slobj[axis] = slice(None, index)
+ new[slobj] = arr[slobj]
+ slobj[axis] = slice(index, index+numnew)
+ new[slobj] = values
+ slobj[axis] = slice(index+numnew, None)
+ slobj2 = [slice(None)] * ndim
+ slobj2[axis] = slice(index, None)
+ new[slobj] = arr[slobj2]
+ if wrap:
+ return wrap(new)
+ return new
+ elif indices.size == 0 and not isinstance(obj, np.ndarray):
+ # Can safely cast the empty list to intp
+ indices = indices.astype(intp)
- # get two sets of indices
- # one is the indices which will hold the new stuff
- # two is the indices where arr will be copied over
+ if not np.can_cast(indices, intp, 'same_kind'):
+ warnings.warn("using a non-integer array as obj in insert "
+ "will result in an error in the future",
+ DeprecationWarning)
+ indices = indices.astype(intp)
+
+ indices[indices < 0] += N
+
+ numnew = len(indices)
+ order = indices.argsort(kind='mergesort') # stable sort
+ indices[order] += np.arange(numnew)
- obj = asarray(obj, dtype=intp)
- numnew = len(obj)
- index1 = obj + arange(numnew)
- index2 = setdiff1d(arange(numnew+N),index1)
newshape[axis] += numnew
+ old_mask = ones(newshape[axis], dtype=bool)
+ old_mask[indices] = False
+
new = empty(newshape, arr.dtype, arr.flags.fnc)
slobj2 = [slice(None)]*ndim
- slobj[axis] = index1
- slobj2[axis] = index2
+ slobj[axis] = indices
+ slobj2[axis] = old_mask
new[slobj] = values
new[slobj2] = arr
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 28786dc3e..ae68be41f 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -5,7 +5,7 @@ import numpy as np
from numpy.testing import (
run_module_suite, TestCase, assert_, assert_equal,
assert_array_equal, assert_almost_equal, assert_array_almost_equal,
- assert_raises, assert_allclose, assert_array_max_ulp
+ assert_raises, assert_allclose, assert_array_max_ulp, assert_warns
)
from numpy.random import rand
from numpy.lib import *
@@ -174,17 +174,64 @@ class TestInsert(TestCase):
assert_equal(insert(a, 3, 1), [1, 2, 3, 1])
assert_equal(insert(a, [1, 1, 1], [1, 2, 3]), [1, 1, 2, 3, 2, 3])
assert_equal(insert(a, 1,[1,2,3]), [1, 1, 2, 3, 2, 3])
- assert_equal(insert(a,[1,2,3],9),[1,9,2,9,3,9])
+ assert_equal(insert(a,[1,-1,3],9),[1,9,2,9,3,9])
+ assert_equal(insert(a,slice(-1,None,-1), 9),[9,1,9,2,9,3])
+ assert_equal(insert(a,[-1,1,3], [7,8,9]),[1,8,2,7,3,9])
b = np.array([0, 1], dtype=np.float64)
assert_equal(insert(b, 0, b[0]), [0., 0., 1.])
+ assert_equal(insert(b, [], []), b)
+ # Bools will be treated differently in the future:
+ #assert_equal(insert(a, np.array([True]*4), 9), [9,1,9,2,9,3,9])
+ with warnings.catch_warnings(record=True) as w:
+ warnings.filterwarnings('always', '', FutureWarning)
+ assert_equal(insert(a, np.array([True]*4), 9), [1,9,9,9,9,2,3])
+ assert_(w[0].category is FutureWarning)
+
def test_multidim(self):
a = [[1, 1, 1]]
r = [[2, 2, 2],
[1, 1, 1]]
+ assert_equal(insert(a, 0, [1]), [1,1,1,1])
assert_equal(insert(a, 0, [2, 2, 2], axis=0), r)
assert_equal(insert(a, 0, 2, axis=0), r)
assert_equal(insert(a, 2, 2, axis=1), [[1, 1, 2, 1]])
+ a = np.array([[1, 1], [2, 2], [3, 3]])
+ b = np.arange(1,4).repeat(3).reshape(3,3)
+ c = np.concatenate((a[:,0:1], np.arange(1,4).repeat(3).reshape(3,3).T,
+ a[:,1:2]), axis=1)
+ assert_equal(insert(a, [1], [[1],[2],[3]], axis=1), b)
+ assert_equal(insert(a, [1], [1, 2, 3], axis=1), c)
+ # scalars behave differently, in this case exactly opposite:
+ assert_equal(insert(a, 1, [1, 2, 3], axis=1), b)
+ assert_equal(insert(a, 1, [[1],[2],[3]], axis=1), c)
+
+ a = np.arange(4).reshape(2,2)
+ assert_equal(insert(a[:,:1], 1, a[:,1], axis=1), a)
+ assert_equal(insert(a[:1,:], 1, a[1,:], axis=0), a)
+
+ def test_0d(self):
+ # This is an error in the future
+ a = np.array(1)
+ with warnings.catch_warnings(record=True) as w:
+ warnings.filterwarnings('always', '', DeprecationWarning)
+ assert_equal(insert(a, [], 2, axis=0), np.array(2))
+ assert_(w[0].category is DeprecationWarning)
+
+ def test_subclass(self):
+ class SubClass(np.ndarray):
+ pass
+ a = np.arange(10).view(SubClass)
+ assert_(isinstance(np.insert(a, 0, [0]), SubClass))
+ assert_(isinstance(np.insert(a, [], []), SubClass))
+ assert_(isinstance(np.insert(a, [0,1], [1,2]), SubClass))
+ assert_(isinstance(np.insert(a, slice(1,2), [1,2]), SubClass))
+ assert_(isinstance(np.insert(a, slice(1,-2), []), SubClass))
+ # This is an error in the future:
+ a = np.array(1).view(SubClass)
+ assert_(isinstance(np.insert(a, 0, [0]), SubClass))
+
+
class TestAmax(TestCase):
def test_basic(self):
a = [3, 4, 5, 10, -3, -5, 6.0]
@@ -302,6 +349,67 @@ class TestDiff(TestCase):
assert_array_equal(diff(x, n=2, axis=0), out4)
+class TestDelete(TestCase):
+ def setUp(self):
+ self.a = np.arange(5)
+ self.nd_a = np.arange(5).repeat(2).reshape(1,5,2)
+
+ def _check_inverse_of_slicing(self, indices):
+ a_del = delete(self.a, indices)
+ nd_a_del = delete(self.nd_a, indices, axis=1)
+ msg = 'Delete failed for obj: %r' % indices
+ # NOTE: The cast should be removed after warning phase for bools
+ if not isinstance(indices, (slice, int, long, np.integer)):
+ indices = np.asarray(indices, dtype=np.intp)
+ indices = indices[(indices >= 0) & (indices < 5)]
+ assert_array_equal(setxor1d(a_del, self.a[indices,]), self.a,
+ err_msg=msg)
+ xor = setxor1d(nd_a_del[0,:,0], self.nd_a[0,indices,0])
+ assert_array_equal(xor, self.nd_a[0,:,0], err_msg=msg)
+
+ def test_slices(self):
+ lims = [-6, -2, 0, 1, 2, 4, 5]
+ steps = [-3, -1, 1, 3]
+ for start in lims:
+ for stop in lims:
+ for step in steps:
+ s = slice(start, stop, step)
+ self._check_inverse_of_slicing(s)
+
+ def test_fancy(self):
+ # Deprecation/FutureWarning tests should be kept after change.
+ self._check_inverse_of_slicing(np.array([[0,1],[2,1]]))
+ assert_raises(DeprecationWarning, delete, self.a, [100])
+ assert_raises(DeprecationWarning, delete, self.a, [-100])
+ with warnings.catch_warnings(record=True) as w:
+ warnings.filterwarnings('always', '', FutureWarning)
+ self._check_inverse_of_slicing([0, -1, 2, 2])
+ obj = np.array([True, False, False], dtype=bool)
+ self._check_inverse_of_slicing(obj)
+ assert_(w[0].category is FutureWarning)
+ assert_(w[1].category is FutureWarning)
+
+ def test_single(self):
+ self._check_inverse_of_slicing(0)
+ self._check_inverse_of_slicing(-4)
+
+ def test_0d(self):
+ a = np.array(1)
+ with warnings.catch_warnings(record=True) as w:
+ warnings.filterwarnings('always', '', DeprecationWarning)
+ assert_equal(delete(a, [], axis=0), a)
+ assert_(w[0].category is DeprecationWarning)
+
+ def test_subclass(self):
+ class SubClass(np.ndarray):
+ pass
+ a = self.a.view(SubClass)
+ assert_(isinstance(delete(a, 0), SubClass))
+ assert_(isinstance(delete(a, []), SubClass))
+ assert_(isinstance(delete(a, [0,1]), SubClass))
+ assert_(isinstance(delete(a, slice(1,2)), SubClass))
+ assert_(isinstance(delete(a, slice(1,-2)), SubClass))
+
class TestGradient(TestCase):
def test_basic(self):
v = [[1, 1], [3, 4]]
@@ -531,7 +639,7 @@ class TestVectorize(TestCase):
res2a = f2(np.arange(3))
assert_equal(res1a, res2a)
assert_equal(res1b, res2b)
-
+
def test_string_ticket_1892(self):
"""Test vectorization over strings: issue 1892."""
f = np.vectorize(lambda x:x)