summaryrefslogtreecommitdiff
path: root/numpy/lib/histograms.py
diff options
context:
space:
mode:
authorEric Wieser <wieser.eric@gmail.com>2017-12-09 17:10:10 -0800
committerEric Wieser <wieser.eric@gmail.com>2017-12-24 10:26:26 +0000
commit425068b019fc5f3644f6b7cdc030ab3c7de857f3 (patch)
tree5da247b601cbb4d23db72bd53de6e4bab824b44e /numpy/lib/histograms.py
parentaecee019491defc79c7a881a2418803164887d74 (diff)
downloadnumpy-425068b019fc5f3644f6b7cdc030ab3c7de857f3.tar.gz
MAINT: extract private helper function to compute histogram bin edges
Diffstat (limited to 'numpy/lib/histograms.py')
-rw-r--r--numpy/lib/histograms.py159
1 files changed, 91 insertions, 68 deletions
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py
index 2dc2df31b..d75a18fce 100644
--- a/numpy/lib/histograms.py
+++ b/numpy/lib/histograms.py
@@ -238,6 +238,90 @@ def _get_outer_edges(a, range):
return first_edge, last_edge
+def _get_bin_edges(a, bins, range, weights):
+ """
+ Computes the bins used internally by `histogram`.
+
+ Parameters
+ ==========
+ a : ndarray
+ Ravelled data array
+ bins, range
+ Forwarded arguments from `histogram`.
+ weights : ndarray, optional
+ Ravelled weights array, or None
+
+ Returns
+ =======
+ bin_edges : ndarray
+ Array of bin edges
+ uniform_bins : (Number, Number, int):
+ The upper bound, lowerbound, and number of bins, used in the optimized
+ implementation of `histogram` that works on uniform bins.
+ """
+ first_edge, last_edge = _get_outer_edges(a, range)
+
+ # parse the overloaded bins argument
+ n_equal_bins = None
+ bin_edges = None
+
+ if isinstance(bins, basestring):
+ bin_name = bins
+ # if `bins` is a string for an automatic method,
+ # this will replace it with the number of bins calculated
+ if bin_name not in _hist_bin_selectors:
+ raise ValueError(
+ "{!r} is not a valid estimator for `bins`".format(bin_name))
+ if weights is not None:
+ raise TypeError("Automated estimation of the number of "
+ "bins is not supported for weighted data")
+
+ # truncate the range if needed
+ if range is not None:
+ keep = (a >= first_edge)
+ keep &= (a <= last_edge)
+ if not np.logical_and.reduce(keep):
+ a = a[keep]
+
+ if a.size == 0:
+ n_equal_bins = 1
+ else:
+ # Do not call selectors on empty arrays
+ width = _hist_bin_selectors[bin_name](a)
+ if width:
+ n_equal_bins = int(np.ceil((last_edge - first_edge) / width))
+ else:
+ # Width can be zero for some estimators, e.g. FD when
+ # the IQR of the data is zero.
+ n_equal_bins = 1
+
+ elif np.ndim(bins) == 0:
+ try:
+ n_equal_bins = operator.index(bins)
+ except TypeError:
+ raise TypeError(
+ '`bins` must be an integer, a string, or an array')
+ if n_equal_bins < 1:
+ raise ValueError('`bins` must be positive, when an integer')
+
+ elif np.ndim(bins) == 1:
+ bin_edges = np.asarray(bins)
+ if np.any(bin_edges[:-1] > bin_edges[1:]):
+ raise ValueError(
+ '`bins` must increase monotonically, when an array')
+
+ else:
+ raise ValueError('`bins` must be 1d, when an array')
+
+ if n_equal_bins is not None:
+ # bin edges must be computed
+ bin_edges = np.linspace(
+ first_edge, last_edge, n_equal_bins + 1, endpoint=True)
+ return bin_edges, (first_edge, last_edge, n_equal_bins)
+ else:
+ return bin_edges, None
+
+
def histogram(a, bins=10, range=None, normed=False, weights=None,
density=None):
r"""
@@ -453,75 +537,9 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
>>> plt.show()
"""
-
- # density overrides the normed keyword
- if density is not None:
- normed = False
-
a, weights = _ravel_and_check_weights(a, weights)
- # Do not modify the original value of range so we can check for `None`
- first_edge, last_edge = _get_outer_edges(a, range)
-
- # parse the overloaded bins argument
- n_equal_bins = None
- bin_edges = None
-
- if isinstance(bins, basestring):
- bin_name = bins
- # if `bins` is a string for an automatic method,
- # this will replace it with the number of bins calculated
- if bin_name not in _hist_bin_selectors:
- raise ValueError(
- "{!r} is not a valid estimator for `bins`".format(bin_name))
- if weights is not None:
- raise TypeError("Automated estimation of the number of "
- "bins is not supported for weighted data")
- # Make a reference to `a`
- b = a
- # Update the reference if the range needs truncation
- if range is not None:
- keep = (a >= first_edge)
- keep &= (a <= last_edge)
- if not np.logical_and.reduce(keep):
- b = a[keep]
-
- if b.size == 0:
- n_equal_bins = 1
- else:
- # Do not call selectors on empty arrays
- width = _hist_bin_selectors[bin_name](b)
- if width:
- n_equal_bins = int(np.ceil((last_edge - first_edge) / width))
- else:
- # Width can be zero for some estimators, e.g. FD when
- # the IQR of the data is zero.
- n_equal_bins = 1
-
- elif np.ndim(bins) == 0:
- try:
- n_equal_bins = operator.index(bins)
- except TypeError:
- raise TypeError(
- '`bins` must be an integer, a string, or an array')
- if n_equal_bins < 1:
- raise ValueError('`bins` must be positive, when an integer')
-
- elif np.ndim(bins) == 1:
- bin_edges = np.asarray(bins)
- if np.any(bin_edges[:-1] > bin_edges[1:]):
- raise ValueError(
- '`bins` must increase monotonically, when an array')
-
- else:
- raise ValueError('`bins` must be 1d, when an array')
-
- del bins
-
- # compute the bins if only the count was specified
- if n_equal_bins is not None:
- bin_edges = np.linspace(
- first_edge, last_edge, n_equal_bins + 1, endpoint=True)
+ bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights)
# Histogram is an integer or a float array depending on the weights.
if weights is None:
@@ -541,10 +559,11 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
np.can_cast(weights.dtype, complex)
)
- if n_equal_bins is not None and simple_weights:
+ if uniform_bins is not None and simple_weights:
# Fast algorithm for equal bins
# We now convert values of a to bin indices, under the assumption of
# equal bin widths (which is valid here).
+ first_edge, last_edge, n_equal_bins = uniform_bins
# Initialize empty histogram
n = np.zeros(n_equal_bins, ntype)
@@ -620,6 +639,10 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
n = np.diff(cum_n)
+ # density overrides the normed keyword
+ if density is not None:
+ normed = False
+
if density:
db = np.array(np.diff(bin_edges), float)
return n/db/n.sum(), bin_edges