summaryrefslogtreecommitdiff
path: root/numpy/lib/histograms.py
diff options
context:
space:
mode:
authorEric Wieser <wieser.eric@gmail.com>2018-01-03 23:13:12 -0800
committerEric Wieser <wieser.eric@gmail.com>2018-02-02 01:04:34 -0800
commit1122303c1834eff71f9441901d1e85e74f200f93 (patch)
tree917dd70117c9af6ff81c708b7270eda8e13b982b /numpy/lib/histograms.py
parent728af07231b7bea3218f4149dfc60b8ff952b7b2 (diff)
downloadnumpy-1122303c1834eff71f9441901d1e85e74f200f93.tar.gz
BUG: Fix crashes when using float32 values in uniform histograms
Fixes #8123, closes #9189, fixes #10319 This is a workaround to #10322, not a fix for it. Adds tests for cases where bounds are more precise than the data, which led to inconsistencies in the optimized path.
Diffstat (limited to 'numpy/lib/histograms.py')
-rw-r--r--numpy/lib/histograms.py25
1 files changed, 18 insertions, 7 deletions
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py
index ec2d0fe81..c5679ace8 100644
--- a/numpy/lib/histograms.py
+++ b/numpy/lib/histograms.py
@@ -318,9 +318,17 @@ def _get_bin_edges(a, bins, range, weights):
raise ValueError('`bins` must be 1d, when an array')
if n_equal_bins is not None:
+ # gh-10322 means that type resolution rules are dependent on array
+ # shapes. To avoid this causing problems, we pick a type now and stick
+ # with it throughout.
+ bin_type = np.result_type(first_edge, last_edge, a)
+ if np.issubdtype(bin_type, np.integer):
+ bin_type = np.result_type(bin_type, float)
+
# bin edges must be computed
bin_edges = np.linspace(
- first_edge, last_edge, n_equal_bins + 1, endpoint=True)
+ first_edge, last_edge, n_equal_bins + 1,
+ endpoint=True, dtype=bin_type)
return bin_edges, (first_edge, last_edge, n_equal_bins)
else:
return bin_edges, None
@@ -605,21 +613,24 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
tmp_a = tmp_a[keep]
if tmp_w is not None:
tmp_w = tmp_w[keep]
- tmp_a_data = tmp_a.astype(float)
- tmp_a = tmp_a_data - first_edge
- tmp_a *= norm
+
+ # This cast ensures no type promotions occur below, which gh-10322
+ # make unpredictable. Getting it wrong leads to precision errors
+ # like gh-8123.
+ tmp_a = tmp_a.astype(bin_edges.dtype, copy=False)
# Compute the bin indices, and for values that lie exactly on
# last_edge we need to subtract one
- indices = tmp_a.astype(np.intp)
+ f_indices = (tmp_a - first_edge) * norm
+ indices = f_indices.astype(np.intp)
indices[indices == n_equal_bins] -= 1
# The index computation is not guaranteed to give exactly
# consistent results within ~1 ULP of the bin edges.
- decrement = tmp_a_data < bin_edges[indices]
+ decrement = tmp_a < bin_edges[indices]
indices[decrement] -= 1
# The last bin includes the right edge. The other bins do not.
- increment = ((tmp_a_data >= bin_edges[indices + 1])
+ increment = ((tmp_a >= bin_edges[indices + 1])
& (indices != n_equal_bins - 1))
indices[increment] += 1