summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2018-02-16 12:42:53 -0700
committerCharles Harris <charlesr.harris@gmail.com>2018-02-16 13:31:32 -0700
commitc9d4e6e1078238f5748fb4536b12da8956f63737 (patch)
treed35f6d5893e3957ac9af3f4dc5b656865c04c207
parent7311b961a6827abdee8179cf40f3ab4a2b682408 (diff)
downloadnumpy-c9d4e6e1078238f5748fb4536b12da8956f63737.tar.gz
BUG: Revert sort optimization in np.unique.
Backport of #10588. The optimization was to sort integer subarrays by treating them as strings of unsigned bytes. That worked fine for finding the unique subarrays, but the sort order of the results could be unexpected. Closes #10495.
-rw-r--r--numpy/lib/arraysetops.py36
-rw-r--r--numpy/lib/tests/test_arraysetops.py9
2 files changed, 30 insertions, 15 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 6fce1c047..c02b84104 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -135,16 +135,18 @@ def unique(ar, return_index=False, return_inverse=False,
return_counts : bool, optional
If True, also return the number of times each unique item appears
in `ar`.
+
.. versionadded:: 1.9.0
- axis : int or None, optional
- The axis to operate on. If None, `ar` will be flattened beforehand.
- Otherwise, duplicate items will be removed along the provided axis,
- with all the other axes belonging to the each of the unique elements.
- Object arrays or structured arrays that contain objects are not
- supported if the `axis` kwarg is used.
- .. versionadded:: 1.13.0
+ axis : int or None, optional
+ The axis to operate on. If None, `ar` will be flattened. If an integer,
+ the subarrays indexed by the given axis will be flattened and treated
+ as the elements of a 1-D array with the dimension of the given axis,
+ see the notes for more details. Object arrays or structured arrays
+ that contain objects are not supported if the `axis` kwarg is used. The
+ default is None.
+ .. versionadded:: 1.13.0
Returns
-------
@@ -166,6 +168,17 @@ def unique(ar, return_index=False, return_inverse=False,
numpy.lib.arraysetops : Module with a number of other functions for
performing set operations on arrays.
+ Notes
+ -----
+ When an axis is specified the subarrays indexed by the axis are sorted.
+ This is done by making the specified axis the first dimension of the array
+ and then flattening the subarrays in C order. The flattened subarrays are
+ then viewed as a structured type with each element given a label, with the
+ effect that we end up with a 1-D array of structured types that can be
+ treated in the same way as any other 1-D array. The result is that the
+ flattened subarrays are sorted in lexicographic order starting with the
+ first element.
+
Examples
--------
>>> np.unique([1, 1, 2, 2, 3, 3])
@@ -217,14 +230,7 @@ def unique(ar, return_index=False, return_inverse=False,
ar = ar.reshape(orig_shape[0], -1)
ar = np.ascontiguousarray(ar)
- if ar.dtype.char in (np.typecodes['AllInteger'] +
- np.typecodes['Datetime'] + 'S'):
- # Optimization: Creating a view of your data with a np.void data type of
- # size the number of bytes in a full row. Handles any type where items
- # have a unique binary representation, i.e. 0 is only 0, not +0 and -0.
- dtype = np.dtype((np.void, ar.dtype.itemsize * ar.shape[1]))
- else:
- dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]
+ dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]
try:
consolidated = ar.view(dtype)
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index c2ba7ac86..c293c7da3 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -453,6 +453,15 @@ class TestUnique(object):
assert_array_equal(v.data, v2.data, msg)
assert_array_equal(v.mask, v2.mask, msg)
+ def test_unique_sort_order_with_axis(self):
+ # These tests fail if sorting along axis is done by treating subarrays
+ # as unsigned byte strings. See gh-10495.
+ fmt = "sort order incorrect for integer type '%s'"
+ for dt in 'bhilq':
+ a = np.array([[-1],[0]], dt)
+ b = np.unique(a, axis=0)
+ assert_array_equal(a, b, fmt % dt)
+
def _run_axis_tests(self, dtype):
data = np.array([[0, 1, 0, 0],
[1, 0, 0, 0],