summaryrefslogtreecommitdiff
path: root/numpy/lib
diff options
context:
space:
mode:
authorEric Wieser <wieser.eric@gmail.com>2017-12-23 11:47:26 +0000
committerEric Wieser <wieser.eric@gmail.com>2017-12-24 22:32:05 +0000
commitf0ffd58e7dbe8f4b111b0c090e8df0c3b779e1fe (patch)
treeb792cb73d4aa87560fbefb4bb8e442b0bba74e5c /numpy/lib
parentd45cf0b261b4a563f44fa238d34bd55f051f315f (diff)
downloadnumpy-f0ffd58e7dbe8f4b111b0c090e8df0c3b779e1fe.tar.gz
BUG: Allow nan values in the data when the bins are explicit
Fixes gh-7503 Closes gh-8984
Diffstat (limited to 'numpy/lib')
-rw-r--r--numpy/lib/histograms.py6
-rw-r--r--numpy/lib/tests/test_histograms.py25
2 files changed, 29 insertions, 2 deletions
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py
index 9c4ba8efc..ccae9de22 100644
--- a/numpy/lib/histograms.py
+++ b/numpy/lib/histograms.py
@@ -259,8 +259,6 @@ def _get_bin_edges(a, bins, range, weights):
The upper bound, lowerbound, and number of bins, used in the optimized
implementation of `histogram` that works on uniform bins.
"""
- first_edge, last_edge = _get_outer_edges(a, range)
-
# parse the overloaded bins argument
n_equal_bins = None
bin_edges = None
@@ -276,6 +274,8 @@ def _get_bin_edges(a, bins, range, weights):
raise TypeError("Automated estimation of the number of "
"bins is not supported for weighted data")
+ first_edge, last_edge = _get_outer_edges(a, range)
+
# truncate the range if needed
if range is not None:
keep = (a >= first_edge)
@@ -304,6 +304,8 @@ def _get_bin_edges(a, bins, range, weights):
if n_equal_bins < 1:
raise ValueError('`bins` must be positive, when an integer')
+ first_edge, last_edge = _get_outer_edges(a, range)
+
elif np.ndim(bins) == 1:
bin_edges = np.asarray(bins)
if np.any(bin_edges[:-1] > bin_edges[1:]):
diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py
index 8319041b8..0986ad16b 100644
--- a/numpy/lib/tests/test_histograms.py
+++ b/numpy/lib/tests/test_histograms.py
@@ -249,6 +249,31 @@ class TestHistogram(object):
np.histogram([np.array([0.5]) for i in range(10)] + [.500000000000001])
np.histogram([np.array([0.5]) for i in range(10)] + [.5])
+ def test_some_nan_values(self):
+ # gh-7503
+ one_nan = np.array([0, 1, np.nan])
+ all_nan = np.array([np.nan, np.nan])
+
+ # the internal commparisons with NaN give warnings
+ sup = suppress_warnings()
+ sup.filter(RuntimeWarning)
+ with sup:
+ # can't infer range with nan
+ assert_raises(ValueError, histogram, one_nan, bins='auto')
+ assert_raises(ValueError, histogram, all_nan, bins='auto')
+
+ # explicit range solves the problem
+ h, b = histogram(one_nan, bins='auto', range=(0, 1))
+ assert_equal(h.sum(), 2) # nan is not counted
+ h, b = histogram(all_nan, bins='auto', range=(0, 1))
+ assert_equal(h.sum(), 0) # nan is not counted
+
+ # as does an explicit set of bins
+ h, b = histogram(one_nan, bins=[0, 1])
+ assert_equal(h.sum(), 2) # nan is not counted
+ h, b = histogram(all_nan, bins=[0, 1])
+ assert_equal(h.sum(), 0) # nan is not counted
+
class TestHistogramOptimBinNums(object):
"""