diff options
author | Eric Wieser <wieser.eric@gmail.com> | 2017-12-09 17:10:10 -0800 |
---|---|---|
committer | Eric Wieser <wieser.eric@gmail.com> | 2017-12-24 10:26:26 +0000 |
commit | 425068b019fc5f3644f6b7cdc030ab3c7de857f3 (patch) | |
tree | 5da247b601cbb4d23db72bd53de6e4bab824b44e /numpy/lib/histograms.py | |
parent | aecee019491defc79c7a881a2418803164887d74 (diff) | |
download | numpy-425068b019fc5f3644f6b7cdc030ab3c7de857f3.tar.gz |
MAINT: extract private helper function to compute histogram bin edges
Diffstat (limited to 'numpy/lib/histograms.py')
-rw-r--r-- | numpy/lib/histograms.py | 159 |
1 files changed, 91 insertions, 68 deletions
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index 2dc2df31b..d75a18fce 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -238,6 +238,90 @@ def _get_outer_edges(a, range): return first_edge, last_edge +def _get_bin_edges(a, bins, range, weights): + """ + Computes the bins used internally by `histogram`. + + Parameters + ========== + a : ndarray + Ravelled data array + bins, range + Forwarded arguments from `histogram`. + weights : ndarray, optional + Ravelled weights array, or None + + Returns + ======= + bin_edges : ndarray + Array of bin edges + uniform_bins : (Number, Number, int): + The upper bound, lowerbound, and number of bins, used in the optimized + implementation of `histogram` that works on uniform bins. + """ + first_edge, last_edge = _get_outer_edges(a, range) + + # parse the overloaded bins argument + n_equal_bins = None + bin_edges = None + + if isinstance(bins, basestring): + bin_name = bins + # if `bins` is a string for an automatic method, + # this will replace it with the number of bins calculated + if bin_name not in _hist_bin_selectors: + raise ValueError( + "{!r} is not a valid estimator for `bins`".format(bin_name)) + if weights is not None: + raise TypeError("Automated estimation of the number of " + "bins is not supported for weighted data") + + # truncate the range if needed + if range is not None: + keep = (a >= first_edge) + keep &= (a <= last_edge) + if not np.logical_and.reduce(keep): + a = a[keep] + + if a.size == 0: + n_equal_bins = 1 + else: + # Do not call selectors on empty arrays + width = _hist_bin_selectors[bin_name](a) + if width: + n_equal_bins = int(np.ceil((last_edge - first_edge) / width)) + else: + # Width can be zero for some estimators, e.g. FD when + # the IQR of the data is zero. + n_equal_bins = 1 + + elif np.ndim(bins) == 0: + try: + n_equal_bins = operator.index(bins) + except TypeError: + raise TypeError( + '`bins` must be an integer, a string, or an array') + if n_equal_bins < 1: + raise ValueError('`bins` must be positive, when an integer') + + elif np.ndim(bins) == 1: + bin_edges = np.asarray(bins) + if np.any(bin_edges[:-1] > bin_edges[1:]): + raise ValueError( + '`bins` must increase monotonically, when an array') + + else: + raise ValueError('`bins` must be 1d, when an array') + + if n_equal_bins is not None: + # bin edges must be computed + bin_edges = np.linspace( + first_edge, last_edge, n_equal_bins + 1, endpoint=True) + return bin_edges, (first_edge, last_edge, n_equal_bins) + else: + return bin_edges, None + + def histogram(a, bins=10, range=None, normed=False, weights=None, density=None): r""" @@ -453,75 +537,9 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, >>> plt.show() """ - - # density overrides the normed keyword - if density is not None: - normed = False - a, weights = _ravel_and_check_weights(a, weights) - # Do not modify the original value of range so we can check for `None` - first_edge, last_edge = _get_outer_edges(a, range) - - # parse the overloaded bins argument - n_equal_bins = None - bin_edges = None - - if isinstance(bins, basestring): - bin_name = bins - # if `bins` is a string for an automatic method, - # this will replace it with the number of bins calculated - if bin_name not in _hist_bin_selectors: - raise ValueError( - "{!r} is not a valid estimator for `bins`".format(bin_name)) - if weights is not None: - raise TypeError("Automated estimation of the number of " - "bins is not supported for weighted data") - # Make a reference to `a` - b = a - # Update the reference if the range needs truncation - if range is not None: - keep = (a >= first_edge) - keep &= (a <= last_edge) - if not np.logical_and.reduce(keep): - b = a[keep] - - if b.size == 0: - n_equal_bins = 1 - else: - # Do not call selectors on empty arrays - width = _hist_bin_selectors[bin_name](b) - if width: - n_equal_bins = int(np.ceil((last_edge - first_edge) / width)) - else: - # Width can be zero for some estimators, e.g. FD when - # the IQR of the data is zero. - n_equal_bins = 1 - - elif np.ndim(bins) == 0: - try: - n_equal_bins = operator.index(bins) - except TypeError: - raise TypeError( - '`bins` must be an integer, a string, or an array') - if n_equal_bins < 1: - raise ValueError('`bins` must be positive, when an integer') - - elif np.ndim(bins) == 1: - bin_edges = np.asarray(bins) - if np.any(bin_edges[:-1] > bin_edges[1:]): - raise ValueError( - '`bins` must increase monotonically, when an array') - - else: - raise ValueError('`bins` must be 1d, when an array') - - del bins - - # compute the bins if only the count was specified - if n_equal_bins is not None: - bin_edges = np.linspace( - first_edge, last_edge, n_equal_bins + 1, endpoint=True) + bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights) # Histogram is an integer or a float array depending on the weights. if weights is None: @@ -541,10 +559,11 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, np.can_cast(weights.dtype, complex) ) - if n_equal_bins is not None and simple_weights: + if uniform_bins is not None and simple_weights: # Fast algorithm for equal bins # We now convert values of a to bin indices, under the assumption of # equal bin widths (which is valid here). + first_edge, last_edge, n_equal_bins = uniform_bins # Initialize empty histogram n = np.zeros(n_equal_bins, ntype) @@ -620,6 +639,10 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, n = np.diff(cum_n) + # density overrides the normed keyword + if density is not None: + normed = False + if density: db = np.array(np.diff(bin_edges), float) return n/db/n.sum(), bin_edges |