summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2018-02-16 17:09:03 -0700
committerGitHub <noreply@github.com>2018-02-16 17:09:03 -0700
commita35a0404633bc9c293cb27068b4263dce428f4e0 (patch)
treeeff7f1183095b5a4d0b36a8050c433ae69a7ddaa
parentc5bdcd5873a1ba2158f3c97fc3c21df88eb890f5 (diff)
parentc9d4e6e1078238f5748fb4536b12da8956f63737 (diff)
downloadnumpy-a35a0404633bc9c293cb27068b4263dce428f4e0.tar.gz
Merge pull request #10608 from charris/backport-10588
BUG: Revert sort optimization in np.unique.
-rw-r--r--numpy/lib/arraysetops.py36
-rw-r--r--numpy/lib/tests/test_arraysetops.py9
2 files changed, 30 insertions, 15 deletions
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index 6fce1c047..c02b84104 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -135,16 +135,18 @@ def unique(ar, return_index=False, return_inverse=False,
return_counts : bool, optional
If True, also return the number of times each unique item appears
in `ar`.
+
.. versionadded:: 1.9.0
- axis : int or None, optional
- The axis to operate on. If None, `ar` will be flattened beforehand.
- Otherwise, duplicate items will be removed along the provided axis,
- with all the other axes belonging to the each of the unique elements.
- Object arrays or structured arrays that contain objects are not
- supported if the `axis` kwarg is used.
- .. versionadded:: 1.13.0
+ axis : int or None, optional
+ The axis to operate on. If None, `ar` will be flattened. If an integer,
+ the subarrays indexed by the given axis will be flattened and treated
+ as the elements of a 1-D array with the dimension of the given axis,
+ see the notes for more details. Object arrays or structured arrays
+ that contain objects are not supported if the `axis` kwarg is used. The
+ default is None.
+ .. versionadded:: 1.13.0
Returns
-------
@@ -166,6 +168,17 @@ def unique(ar, return_index=False, return_inverse=False,
numpy.lib.arraysetops : Module with a number of other functions for
performing set operations on arrays.
+ Notes
+ -----
+ When an axis is specified the subarrays indexed by the axis are sorted.
+ This is done by making the specified axis the first dimension of the array
+ and then flattening the subarrays in C order. The flattened subarrays are
+ then viewed as a structured type with each element given a label, with the
+ effect that we end up with a 1-D array of structured types that can be
+ treated in the same way as any other 1-D array. The result is that the
+ flattened subarrays are sorted in lexicographic order starting with the
+ first element.
+
Examples
--------
>>> np.unique([1, 1, 2, 2, 3, 3])
@@ -217,14 +230,7 @@ def unique(ar, return_index=False, return_inverse=False,
ar = ar.reshape(orig_shape[0], -1)
ar = np.ascontiguousarray(ar)
- if ar.dtype.char in (np.typecodes['AllInteger'] +
- np.typecodes['Datetime'] + 'S'):
- # Optimization: Creating a view of your data with a np.void data type of
- # size the number of bytes in a full row. Handles any type where items
- # have a unique binary representation, i.e. 0 is only 0, not +0 and -0.
- dtype = np.dtype((np.void, ar.dtype.itemsize * ar.shape[1]))
- else:
- dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]
+ dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]
try:
consolidated = ar.view(dtype)
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index c2ba7ac86..c293c7da3 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -453,6 +453,15 @@ class TestUnique(object):
assert_array_equal(v.data, v2.data, msg)
assert_array_equal(v.mask, v2.mask, msg)
+ def test_unique_sort_order_with_axis(self):
+ # These tests fail if sorting along axis is done by treating subarrays
+ # as unsigned byte strings. See gh-10495.
+ fmt = "sort order incorrect for integer type '%s'"
+ for dt in 'bhilq':
+ a = np.array([[-1],[0]], dt)
+ b = np.unique(a, axis=0)
+ assert_array_equal(a, b, fmt % dt)
+
def _run_axis_tests(self, dtype):
data = np.array([[0, 1, 0, 0],
[1, 0, 0, 0],