diff options
author | Robert Kern <rkern@enthought.com> | 2016-05-24 11:05:48 +0100 |
---|---|---|
committer | Robert Kern <rkern@enthought.com> | 2016-05-24 11:07:03 +0100 |
commit | 65279ff854825f3421cc210450e4cf24acdd3866 (patch) | |
tree | 5d12863b6a946164fee8933c1517f0750e4fe2f2 /numpy/lib | |
parent | 8bc4e9ad19c14873b0ea5c2622cc86a1dfdd4017 (diff) | |
download | numpy-65279ff854825f3421cc210450e4cf24acdd3866.tar.gz |
ENH: correct initial index estimate in histogram.
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/function_base.py | 21 |
1 files changed, 16 insertions, 5 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 3533a59fc..24afa39c2 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -639,6 +639,9 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, # Pre-compute histogram scaling factor norm = bins / (mx - mn) + # Compute the bin edges for potential correction. + bin_edges = linspace(mn, mx, bins + 1, endpoint=True) + # We iterate over blocks here for two reasons: the first is that for # large arrays, it is actually faster (for example for a 10^8 array it # is 2x as fast) and it results in a memory footprint 3x lower in the @@ -657,14 +660,22 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, tmp_a = tmp_a[keep] if tmp_w is not None: tmp_w = tmp_w[keep] - tmp_a = tmp_a.astype(float) - tmp_a -= mn + tmp_a_data = tmp_a.astype(float) + tmp_a = tmp_a_data - mn tmp_a *= norm # Compute the bin indices, and for values that lie exactly on mx we # need to subtract one indices = tmp_a.astype(np.intp) - indices[indices == bins] -= 1 + equals_endpoint = (indices == bins) + indices[equals_endpoint] -= 1 + + # The index computation is not guaranteed to give exactly + # consistent results within ~1 ULP of the bin edges. + decrement = tmp_a_data < bin_edges[indices] + indices[decrement] -= 1 + increment = (tmp_a_data >= bin_edges[indices + 1]) & ~equals_endpoint + indices[increment] += 1 # We now compute the histogram using bincount if ntype.kind == 'c': @@ -673,8 +684,8 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, else: n += np.bincount(indices, weights=tmp_w, minlength=bins).astype(ntype) - # We now compute the bin edges since these are returned - bins = linspace(mn, mx, bins + 1, endpoint=True) + # Rename the bin edges for return. + bins = bin_edges else: bins = asarray(bins) if (np.diff(bins) < 0).any(): |