summaryrefslogtreecommitdiff
path: root/numpy/lib
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib')
-rw-r--r--numpy/lib/function_base.py21
-rw-r--r--numpy/lib/tests/test_function_base.py11
2 files changed, 27 insertions, 5 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 3533a59fc..24afa39c2 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -639,6 +639,9 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
# Pre-compute histogram scaling factor
norm = bins / (mx - mn)
+ # Compute the bin edges for potential correction.
+ bin_edges = linspace(mn, mx, bins + 1, endpoint=True)
+
# We iterate over blocks here for two reasons: the first is that for
# large arrays, it is actually faster (for example for a 10^8 array it
# is 2x as fast) and it results in a memory footprint 3x lower in the
@@ -657,14 +660,22 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
tmp_a = tmp_a[keep]
if tmp_w is not None:
tmp_w = tmp_w[keep]
- tmp_a = tmp_a.astype(float)
- tmp_a -= mn
+ tmp_a_data = tmp_a.astype(float)
+ tmp_a = tmp_a_data - mn
tmp_a *= norm
# Compute the bin indices, and for values that lie exactly on mx we
# need to subtract one
indices = tmp_a.astype(np.intp)
- indices[indices == bins] -= 1
+ equals_endpoint = (indices == bins)
+ indices[equals_endpoint] -= 1
+
+ # The index computation is not guaranteed to give exactly
+ # consistent results within ~1 ULP of the bin edges.
+ decrement = tmp_a_data < bin_edges[indices]
+ indices[decrement] -= 1
+ increment = (tmp_a_data >= bin_edges[indices + 1]) & ~equals_endpoint
+ indices[increment] += 1
# We now compute the histogram using bincount
if ntype.kind == 'c':
@@ -673,8 +684,8 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
else:
n += np.bincount(indices, weights=tmp_w, minlength=bins).astype(ntype)
- # We now compute the bin edges since these are returned
- bins = linspace(mn, mx, bins + 1, endpoint=True)
+ # Rename the bin edges for return.
+ bins = bin_edges
else:
bins = asarray(bins)
if (np.diff(bins) < 0).any():
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 0f71393ad..868a28036 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -1407,6 +1407,17 @@ class TestHistogram(TestCase):
assert_raises(ValueError, histogram, vals, range=[np.nan,0.75])
assert_raises(ValueError, histogram, vals, range=[0.25,np.inf])
+ def test_bin_edge_cases(self):
+ # Ensure that floating-point computations correctly place edge cases.
+ arr = np.array([337, 404, 739, 806, 1007, 1811, 2012])
+ hist, edges = np.histogram(arr, bins=8296, range=(2, 2280))
+ mask = hist > 0
+ left_edges = edges[:-1][mask]
+ right_edges = edges[1:][mask]
+ for x, left, right in zip(arr, left_edges, right_edges):
+ self.assertGreaterEqual(x, left)
+ self.assertLess(x, right)
+
class TestHistogramOptimBinNums(TestCase):
"""