diff options
author | Eric Wieser <wieser.eric@gmail.com> | 2017-12-23 11:47:26 +0000 |
---|---|---|
committer | Eric Wieser <wieser.eric@gmail.com> | 2017-12-24 22:32:05 +0000 |
commit | f0ffd58e7dbe8f4b111b0c090e8df0c3b779e1fe (patch) | |
tree | b792cb73d4aa87560fbefb4bb8e442b0bba74e5c /numpy/lib | |
parent | d45cf0b261b4a563f44fa238d34bd55f051f315f (diff) | |
download | numpy-f0ffd58e7dbe8f4b111b0c090e8df0c3b779e1fe.tar.gz |
BUG: Allow nan values in the data when the bins are explicit
Fixes gh-7503
Closes gh-8984
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/histograms.py | 6 | ||||
-rw-r--r-- | numpy/lib/tests/test_histograms.py | 25 |
2 files changed, 29 insertions, 2 deletions
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index 9c4ba8efc..ccae9de22 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -259,8 +259,6 @@ def _get_bin_edges(a, bins, range, weights): The upper bound, lowerbound, and number of bins, used in the optimized implementation of `histogram` that works on uniform bins. """ - first_edge, last_edge = _get_outer_edges(a, range) - # parse the overloaded bins argument n_equal_bins = None bin_edges = None @@ -276,6 +274,8 @@ def _get_bin_edges(a, bins, range, weights): raise TypeError("Automated estimation of the number of " "bins is not supported for weighted data") + first_edge, last_edge = _get_outer_edges(a, range) + # truncate the range if needed if range is not None: keep = (a >= first_edge) @@ -304,6 +304,8 @@ def _get_bin_edges(a, bins, range, weights): if n_equal_bins < 1: raise ValueError('`bins` must be positive, when an integer') + first_edge, last_edge = _get_outer_edges(a, range) + elif np.ndim(bins) == 1: bin_edges = np.asarray(bins) if np.any(bin_edges[:-1] > bin_edges[1:]): diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py index 8319041b8..0986ad16b 100644 --- a/numpy/lib/tests/test_histograms.py +++ b/numpy/lib/tests/test_histograms.py @@ -249,6 +249,31 @@ class TestHistogram(object): np.histogram([np.array([0.5]) for i in range(10)] + [.500000000000001]) np.histogram([np.array([0.5]) for i in range(10)] + [.5]) + def test_some_nan_values(self): + # gh-7503 + one_nan = np.array([0, 1, np.nan]) + all_nan = np.array([np.nan, np.nan]) + + # the internal commparisons with NaN give warnings + sup = suppress_warnings() + sup.filter(RuntimeWarning) + with sup: + # can't infer range with nan + assert_raises(ValueError, histogram, one_nan, bins='auto') + assert_raises(ValueError, histogram, all_nan, bins='auto') + + # explicit range solves the problem + h, b = histogram(one_nan, bins='auto', range=(0, 1)) + assert_equal(h.sum(), 2) # nan is not counted + h, b = histogram(all_nan, bins='auto', range=(0, 1)) + assert_equal(h.sum(), 0) # nan is not counted + + # as does an explicit set of bins + h, b = histogram(one_nan, bins=[0, 1]) + assert_equal(h.sum(), 2) # nan is not counted + h, b = histogram(all_nan, bins=[0, 1]) + assert_equal(h.sum(), 0) # nan is not counted + class TestHistogramOptimBinNums(object): """ |