summaryrefslogtreecommitdiff
path: root/numpy/lib/function_base.py
diff options
context:
space:
mode:
authorVarun Nayyar <nayyarv@gmail.com>2015-09-22 19:38:34 +1000
committerJoseph Fox-Rabinovitz <jfoxrabinovitz@gmail.com>2016-02-13 23:15:28 -0500
commit62bb0cb9681f638b56dc29b970228cb36c2104b6 (patch)
tree10dd5fd6834cbeb16638efa90ee0e8436fec5117 /numpy/lib/function_base.py
parent26af0ce08f2c6660d8623446d79cb0569f20c2f6 (diff)
downloadnumpy-62bb0cb9681f638b56dc29b970228cb36c2104b6.tar.gz
ENH: Adding support to the range keyword for estimation of the optimal number of bins and associated tests
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r--numpy/lib/function_base.py26
1 files changed, 23 insertions, 3 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 06d1ee4a7..66213c5e0 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -76,7 +76,7 @@ def iterable(y):
return True
-def _hist_optim_numbins_estimator(a, estimator):
+def _hist_optim_numbins_estimator(a, estimator, data_range=None, data_weights=None):
"""
A helper function to be called from ``histogram`` to deal with
estimating optimal number of bins.
@@ -84,15 +84,34 @@ def _hist_optim_numbins_estimator(a, estimator):
A description of the estimators can be found at
https://en.wikipedia.org/wiki/Histogram#Number_of_bins_and_width
+ Parameters
+ ----------
+ a : array_like
+ The data with which to estimate the number of bins
estimator: str
If ``estimator`` is one of ['auto', 'fd', 'scott', 'doane',
'rice', 'sturges', 'sqrt'], this function will choose the
appropriate estimation method and return the optimal number of
bins it calculates.
+ data_range: tuple (min, max)
+ The range that the data to be binned should be restricted to.
+ data_weights:
+ weights are not supported, so this field must be empty or None.
"""
if a.size == 0:
return 1
+ if data_weights is not None:
+ raise TypeError("Automated estimation of the number of "
+ "bins is not supported for weighted data")
+
+ if data_range is not None:
+ mn, mx = data_range
+ keep = (a >= mn)
+ keep &= (a <= mx)
+ if not np.logical_and.reduce(keep):
+ a = a[keep]
+
def sqrt(x):
"""
Square Root Estimator
@@ -223,7 +242,8 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
If `bins` is a string from the list below, `histogram` will use
the method chosen to calculate the optimal number of bins (see
Notes for more detail on the estimators). For visualisation, we
- suggest using the 'auto' option.
+ suggest using the 'auto' option. Weighted data is not supported
+ for automated bin size selection.
'auto'
Maximum of the 'sturges' and 'fd' estimators. Provides good
@@ -426,7 +446,7 @@ def histogram(a, bins=10, range=None, normed=False, weights=None,
if isinstance(bins, basestring):
- bins = _hist_optim_numbins_estimator(a, bins)
+ bins = _hist_optim_numbins_estimator(a, bins, range, weights)
# if `bins` is a string for an automatic method,
# this will replace it with the number of bins calculated