summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--benchmarks/benchmarks/bench_lib.py24
-rw-r--r--doc/release/upcoming_changes/18070.improvement.rst12
-rw-r--r--numpy/lib/arraysetops.py21
-rw-r--r--numpy/lib/tests/test_arraysetops.py46
4 files changed, 102 insertions, 1 deletions
diff --git a/benchmarks/benchmarks/bench_lib.py b/benchmarks/benchmarks/bench_lib.py
index c22ceaa5e..f7884cd6c 100644
--- a/benchmarks/benchmarks/bench_lib.py
+++ b/benchmarks/benchmarks/bench_lib.py
@@ -53,6 +53,7 @@ class Pad(Benchmark):
def time_pad(self, shape, pad_width, mode):
np.pad(self.array, pad_width, mode)
+
class Nan(Benchmark):
"""Benchmarks for nan functions"""
@@ -113,3 +114,26 @@ class Nan(Benchmark):
def time_nanpercentile(self, array_size, percent_nans):
np.nanpercentile(self.arr, q=50)
+
+
+class Unique(Benchmark):
+ """Benchmark for np.unique with np.nan values."""
+
+ param_names = ["array_size", "percent_nans"]
+ params = [
+ # sizes of the 1D arrays
+ [200, int(2e5)],
+ # percent of np.nan in arrays
+ [0, 0.1, 2., 50., 90.],
+ ]
+
+ def setup(self, array_size, percent_nans):
+ np.random.seed(123)
+ # produce a randomly shuffled array with the
+ # approximate desired percentage np.nan content
+ base_array = np.random.uniform(size=array_size)
+ base_array[base_array < percent_nans / 100.] = np.nan
+ self.arr = base_array
+
+ def time_unique(self, array_size, percent_nans):
+ np.unique(self.arr)
diff --git a/doc/release/upcoming_changes/18070.improvement.rst b/doc/release/upcoming_changes/18070.improvement.rst
new file mode 100644
index 000000000..ae750fb12
--- /dev/null
+++ b/doc/release/upcoming_changes/18070.improvement.rst
@@ -0,0 +1,12 @@
+``np.unique`` now returns single ``NaN``
+----------------------------------------
+When ``np.unique`` operated on an array with multiple ``NaN`` entries,
+its return included a ``NaN`` for each entry that was ``NaN`` in the original array.
+This is now improved such that the returned array contains just one ``NaN`` as the
+last element.
+
+Also for complex arrays all ``NaN`` values are considered equivalent
+(no matter whether the ``NaN`` is in the real or imaginary part). As the
+representant for the returned array the smallest one in the
+lexicographical order is chosen - see ``np.sort`` for how the lexicographical
+order is defined for complex arrays. \ No newline at end of file
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index eb5c488e4..7600e17be 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -209,6 +209,16 @@ def unique(ar, return_index=False, return_inverse=False,
flattened subarrays are sorted in lexicographic order starting with the
first element.
+ .. versionchanged: NumPy 1.21
+ If nan values are in the input array, a single nan is put
+ to the end of the sorted unique values.
+
+ Also for complex arrays all NaN values are considered equivalent
+ (no matter whether the NaN is in the real or imaginary part).
+ As the representant for the returned array the smallest one in the
+ lexicographical order is chosen - see np.sort for how the lexicographical
+ order is defined for complex arrays.
+
Examples
--------
>>> np.unique([1, 1, 2, 2, 3, 3])
@@ -324,7 +334,16 @@ def _unique1d(ar, return_index=False, return_inverse=False,
aux = ar
mask = np.empty(aux.shape, dtype=np.bool_)
mask[:1] = True
- mask[1:] = aux[1:] != aux[:-1]
+ if aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and np.isnan(aux[-1]):
+ if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent
+ aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left')
+ else:
+ aux_firstnan = np.searchsorted(aux, aux[-1], side='left')
+ mask[1:aux_firstnan] = (aux[1:aux_firstnan] != aux[:aux_firstnan - 1])
+ mask[aux_firstnan] = True
+ mask[aux_firstnan + 1:] = False
+ else:
+ mask[1:] = aux[1:] != aux[:-1]
ret = (aux[mask],)
if return_index:
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index de2ef255c..d62da9efb 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -564,6 +564,52 @@ class TestUnique:
assert_equal(a3_idx.dtype, np.intp)
assert_equal(a3_inv.dtype, np.intp)
+ # test for ticket 2111 - float
+ a = [2.0, np.nan, 1.0, np.nan]
+ ua = [1.0, 2.0, np.nan]
+ ua_idx = [2, 0, 1]
+ ua_inv = [1, 2, 0, 2]
+ ua_cnt = [1, 1, 2]
+ assert_equal(np.unique(a), ua)
+ assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
+ assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
+ assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
+
+ # test for ticket 2111 - complex
+ a = [2.0-1j, np.nan, 1.0+1j, complex(0.0, np.nan), complex(1.0, np.nan)]
+ ua = [1.0+1j, 2.0-1j, complex(0.0, np.nan)]
+ ua_idx = [2, 0, 3]
+ ua_inv = [1, 2, 0, 2, 2]
+ ua_cnt = [1, 1, 3]
+ assert_equal(np.unique(a), ua)
+ assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
+ assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
+ assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
+
+ # test for ticket 2111 - datetime64
+ nat = np.datetime64('nat')
+ a = [np.datetime64('2020-12-26'), nat, np.datetime64('2020-12-24'), nat]
+ ua = [np.datetime64('2020-12-24'), np.datetime64('2020-12-26'), nat]
+ ua_idx = [2, 0, 1]
+ ua_inv = [1, 2, 0, 2]
+ ua_cnt = [1, 1, 2]
+ assert_equal(np.unique(a), ua)
+ assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
+ assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
+ assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
+
+ # test for ticket 2111 - timedelta
+ nat = np.timedelta64('nat')
+ a = [np.timedelta64(1, 'D'), nat, np.timedelta64(1, 'h'), nat]
+ ua = [np.timedelta64(1, 'h'), np.timedelta64(1, 'D'), nat]
+ ua_idx = [2, 0, 1]
+ ua_inv = [1, 2, 0, 2]
+ ua_cnt = [1, 1, 2]
+ assert_equal(np.unique(a), ua)
+ assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
+ assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
+ assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
+
def test_unique_axis_errors(self):
assert_raises(TypeError, self._run_axis_tests, object)
assert_raises(TypeError, self._run_axis_tests,