summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Wieser <wieser.eric@gmail.com>2017-12-22 15:39:23 -0800
committerEric Wieser <wieser.eric@gmail.com>2017-12-26 08:47:32 +0000
commit1e08c83a1b84a3b847c101d478814c18adda56f2 (patch)
tree98a4a15c124f09a910c87a799e825ee20680e35d
parent59084fa132c244c0ae622cae8f7a58a363eaa7e8 (diff)
downloadnumpy-1e08c83a1b84a3b847c101d478814c18adda56f2.tar.gz
ENH: Add support for datetimes to histograms
Currently only supported for explicit bins
-rw-r--r--doc/release/1.15.0-notes.rst5
-rw-r--r--numpy/lib/histograms.py20
-rw-r--r--numpy/lib/tests/test_histograms.py25
3 files changed, 41 insertions, 9 deletions
diff --git a/doc/release/1.15.0-notes.rst b/doc/release/1.15.0-notes.rst
index a24156ea0..bd0dccb63 100644
--- a/doc/release/1.15.0-notes.rst
+++ b/doc/release/1.15.0-notes.rst
@@ -74,6 +74,11 @@ Note that calling `histogram` on NaN values continues to raise the
`RuntimeWarning`s typical of working with nan values, which can be silenced
as usual with `errstate`.
+``histogram`` works on datetime types, when explicit bin edges are given
+------------------------------------------------------------------------
+Dates, times, and timedeltas can now be histogrammed. The bin edges must be
+passed explicitly, and are not yet computed automatically.
+
``np.r_`` works with 0d arrays, and ``np.ma.mr_` works with ``np.ma.masked``
----------------------------------------------------------------------------
0d arrays passed to the `r_` and `mr_` concatenation helpers are now treated as
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py
index ccae9de22..a19ff07d8 100644
--- a/numpy/lib/histograms.py
+++ b/numpy/lib/histograms.py
@@ -217,23 +217,25 @@ def _get_outer_edges(a, range):
Determine the outer bin edges to use, from either the data or the range
argument
"""
- if range is None:
- if a.size == 0:
- # handle empty arrays. Can't determine range, so use 0-1.
- first_edge, last_edge = 0.0, 1.0
- else:
- first_edge, last_edge = a.min() + 0.0, a.max() + 0.0
+ if range is not None:
+ first_edge, last_edge = range
+ elif a.size == 0:
+ # handle empty arrays. Can't determine range, so use 0-1.
+ first_edge, last_edge = 0, 1
else:
- first_edge, last_edge = [mi + 0.0 for mi in range]
+ first_edge, last_edge = a.min(), a.max()
+
if first_edge > last_edge:
raise ValueError(
'max must be larger than min in range parameter.')
if not (np.isfinite(first_edge) and np.isfinite(last_edge)):
raise ValueError(
'range parameter must be finite.')
+
+ # expand empty range to avoid divide by zero
if first_edge == last_edge:
- first_edge -= 0.5
- last_edge += 0.5
+ first_edge = first_edge - 0.5
+ last_edge = last_edge + 0.5
return first_edge, last_edge
diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py
index 0986ad16b..58547dc17 100644
--- a/numpy/lib/tests/test_histograms.py
+++ b/numpy/lib/tests/test_histograms.py
@@ -274,6 +274,31 @@ class TestHistogram(object):
h, b = histogram(all_nan, bins=[0, 1])
assert_equal(h.sum(), 0) # nan is not counted
+ def test_datetime(self):
+ begin = np.datetime64('2000-01-01', 'D')
+ offsets = np.array([0, 0, 1, 1, 2, 3, 5, 10, 20])
+ bins = np.array([0, 2, 7, 20])
+ dates = begin + offsets
+ date_bins = begin + bins
+
+ td = np.dtype('timedelta64[D]')
+
+ # Results should be the same for integer offsets or datetime values.
+ # For now, only explicit bins are supported, since linspace does not
+ # work on datetimes or timedeltas
+ d_count, d_edge = histogram(dates, bins=date_bins)
+ t_count, t_edge = histogram(offsets.astype(td), bins=bins.astype(td))
+ i_count, i_edge = histogram(offsets, bins=bins)
+
+ assert_equal(d_count, i_count)
+ assert_equal(t_count, i_count)
+
+ assert_equal((d_edge - begin).astype(int), i_edge)
+ assert_equal(t_edge.astype(int), i_edge)
+
+ assert_equal(d_edge.dtype, dates.dtype)
+ assert_equal(t_edge.dtype, td)
+
class TestHistogramOptimBinNums(object):
"""