diff options
author | Eric Wieser <wieser.eric@gmail.com> | 2018-01-03 23:13:12 -0800 |
---|---|---|
committer | Eric Wieser <wieser.eric@gmail.com> | 2018-02-02 01:04:34 -0800 |
commit | 1122303c1834eff71f9441901d1e85e74f200f93 (patch) | |
tree | 917dd70117c9af6ff81c708b7270eda8e13b982b /numpy/lib/histograms.py | |
parent | 728af07231b7bea3218f4149dfc60b8ff952b7b2 (diff) | |
download | numpy-1122303c1834eff71f9441901d1e85e74f200f93.tar.gz |
BUG: Fix crashes when using float32 values in uniform histograms
Fixes #8123, closes #9189, fixes #10319
This is a workaround to #10322, not a fix for it.
Adds tests for cases where bounds are more precise than the data, which led to inconsistencies in the optimized path.
Diffstat (limited to 'numpy/lib/histograms.py')
-rw-r--r-- | numpy/lib/histograms.py | 25 |
1 files changed, 18 insertions, 7 deletions
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index ec2d0fe81..c5679ace8 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -318,9 +318,17 @@ def _get_bin_edges(a, bins, range, weights): raise ValueError('`bins` must be 1d, when an array') if n_equal_bins is not None: + # gh-10322 means that type resolution rules are dependent on array + # shapes. To avoid this causing problems, we pick a type now and stick + # with it throughout. + bin_type = np.result_type(first_edge, last_edge, a) + if np.issubdtype(bin_type, np.integer): + bin_type = np.result_type(bin_type, float) + # bin edges must be computed bin_edges = np.linspace( - first_edge, last_edge, n_equal_bins + 1, endpoint=True) + first_edge, last_edge, n_equal_bins + 1, + endpoint=True, dtype=bin_type) return bin_edges, (first_edge, last_edge, n_equal_bins) else: return bin_edges, None @@ -605,21 +613,24 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, tmp_a = tmp_a[keep] if tmp_w is not None: tmp_w = tmp_w[keep] - tmp_a_data = tmp_a.astype(float) - tmp_a = tmp_a_data - first_edge - tmp_a *= norm + + # This cast ensures no type promotions occur below, which gh-10322 + # make unpredictable. Getting it wrong leads to precision errors + # like gh-8123. + tmp_a = tmp_a.astype(bin_edges.dtype, copy=False) # Compute the bin indices, and for values that lie exactly on # last_edge we need to subtract one - indices = tmp_a.astype(np.intp) + f_indices = (tmp_a - first_edge) * norm + indices = f_indices.astype(np.intp) indices[indices == n_equal_bins] -= 1 # The index computation is not guaranteed to give exactly # consistent results within ~1 ULP of the bin edges. - decrement = tmp_a_data < bin_edges[indices] + decrement = tmp_a < bin_edges[indices] indices[decrement] -= 1 # The last bin includes the right edge. The other bins do not. - increment = ((tmp_a_data >= bin_edges[indices + 1]) + increment = ((tmp_a >= bin_edges[indices + 1]) & (indices != n_equal_bins - 1)) indices[increment] += 1 |