diff options
author | Antony Lee <anntzer.lee@gmail.com> | 2015-12-05 20:46:09 -0800 |
---|---|---|
committer | Antony Lee <anntzer.lee@gmail.com> | 2016-01-16 20:24:25 -0800 |
commit | 5be93a2580a232705e897984d0f920bc6346990e (patch) | |
tree | 2f464cc5e9d461b81c5da6b8bfdc4191ea7f253e /numpy | |
parent | 865c3e375a598e5a0f7d9e690eda4702de8658af (diff) | |
download | numpy-5be93a2580a232705e897984d0f920bc6346990e.tar.gz |
MAINT: memcpy-based ~4x faster, typed shuffle.
Only for 1d-ndarrays exactly, as subtypes (e.g. masked arrays) may not
allow direct shuffle of the underlying buffer (in fact, the old
implementation destroyed the underlying values of masked arrays while
shuffling).
Also handles struct-containing-object 1d ndarrays properly.
See #6776 for an earlier, less general (but even faster: ~6x)
improvement attempt, #5514 for the original issue.
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/random/mtrand/mtrand.pyx | 45 | ||||
-rw-r--r-- | numpy/random/tests/test_random.py | 51 |
2 files changed, 56 insertions, 40 deletions
diff --git a/numpy/random/mtrand/mtrand.pyx b/numpy/random/mtrand/mtrand.pyx index ff8171d45..f70f578cc 100644 --- a/numpy/random/mtrand/mtrand.pyx +++ b/numpy/random/mtrand/mtrand.pyx @@ -24,6 +24,8 @@ include "Python.pxi" include "numpy.pxd" +from libc cimport string + cdef extern from "math.h": double exp(double x) double log(double x) @@ -4979,33 +4981,42 @@ cdef class RandomState: [0, 1, 2]]) """ - cdef npy_intp i, j - - i = len(x) - 1 - - # Logic adapted from random.shuffle() - if isinstance(x, np.ndarray) and \ - (x.ndim > 1 or x.dtype.fields is not None): - # For a multi-dimensional ndarray, indexing returns a view onto - # each row. So we can't just use ordinary assignment to swap the - # rows; we need a bounce buffer. + cdef: + npy_intp i, j, n = len(x) + size_t stride, nbytes + char* x_ptr + char* buf_ptr + + if type(x) is np.ndarray and x.ndim == 1 and x.size: + # Fast, statically typed path: shuffle the underlying buffer. + # Only for non-empty, 1d objects of class ndarray (subclasses such + # as MaskedArrays may not support this approach). + x_ptr = <char*><size_t>x.ctypes.data + stride = x.strides[0] + nbytes = x[:1].nbytes + buf = np.empty_like(x[0]) # GC'd at function exit + buf_ptr = <char*><size_t>buf.ctypes.data + with self.lock: + for i in reversed(range(1, n)): + j = rk_interval(i, self.internal_state) + string.memcpy(buf_ptr, x_ptr + j * stride, nbytes) + string.memcpy(x_ptr + j * stride, x_ptr + i * stride, nbytes) + string.memcpy(x_ptr + i * stride, buf_ptr, nbytes) + elif isinstance(x, np.ndarray) and x.ndim > 1 and x.size: + # Multidimensional ndarrays require a bounce buffer. buf = np.empty_like(x[0]) with self.lock: - while i > 0: + for i in reversed(range(1, n)): j = rk_interval(i, self.internal_state) buf[...] = x[j] x[j] = x[i] x[i] = buf - i = i - 1 else: - # For single-dimensional arrays, lists, and any other Python - # sequence types, indexing returns a real object that's - # independent of the array contents, so we can just swap directly. + # Untyped path. with self.lock: - while i > 0: + for i in reversed(range(1, n)): j = rk_interval(i, self.internal_state) x[i], x[j] = x[j], x[i] - i = i - 1 def permutation(self, object x): """ diff --git a/numpy/random/tests/test_random.py b/numpy/random/tests/test_random.py index 37c1876bf..e3391a9a2 100644 --- a/numpy/random/tests/test_random.py +++ b/numpy/random/tests/test_random.py @@ -10,6 +10,7 @@ import sys import warnings + class TestSeed(TestCase): def test_scalar(self): s = np.random.RandomState(0) @@ -40,6 +41,7 @@ class TestSeed(TestCase): assert_raises(ValueError, np.random.RandomState, [1, 2, 4294967296]) assert_raises(ValueError, np.random.RandomState, [1, -2, 4294967296]) + class TestBinomial(TestCase): def test_n_zero(self): # Tests the corner case of n == 0 for the binomial distribution. @@ -130,6 +132,7 @@ class TestSetState(TestCase): # arguments without truncation. self.prng.negative_binomial(0.5, 0.5) + class TestRandint(TestCase): rfunc = np.random.randint @@ -207,12 +210,13 @@ class TestRandint(TestCase): assert_(tgt[np.dtype(np.bool).name] == res) -class TestRandomDist(TestCase): +class TestRandomDist: # Make sure the random distribution returns the correct value for a # given seed - def setUp(self): - self.seed = 1234567890 + @classmethod + def setup_class(cls): + cls.seed = 1234567890 def test_rand(self): np.random.seed(self.seed) @@ -368,40 +372,41 @@ class TestRandomDist(TestCase): np.testing.assert_equal(actual, desired) def test_shuffle(self): - # Test lists, arrays, and multidimensional versions of both: - for conv in [lambda x: x, - np.asarray, + # Test lists, arrays (of various dtypes), and multidimensional versions + # of both, c-contiguous or not: + for conv in [lambda x: np.array([]), + lambda x: x, + lambda x: np.asarray(x).astype(np.int8), + lambda x: np.asarray(x).astype(np.float32), + lambda x: np.asarray(x).astype(np.complex64), + lambda x: np.asarray(x).astype(object), lambda x: [(i, i) for i in x], - lambda x: np.asarray([(i, i) for i in x])]: + lambda x: np.asarray([[i, i] for i in x]), + lambda x: np.vstack([x, x]).T, + # gh-4270 + lambda x: np.asarray([(i, i) for i in x], + [("a", object, 1), + ("b", np.int32, 1)])]: np.random.seed(self.seed) alist = conv([1, 2, 3, 4, 5, 6, 7, 8, 9, 0]) np.random.shuffle(alist) actual = alist desired = conv([0, 1, 9, 6, 2, 4, 5, 8, 7, 3]) - np.testing.assert_array_equal(actual, desired) - - def test_shuffle_flexible(self): - # gh-4270 - arr = [(0, 1), (2, 3)] - dt = np.dtype([('a', np.int32, 1), ('b', np.int32, 1)]) - nparr = np.array(arr, dtype=dt) - a, b = nparr[0].copy(), nparr[1].copy() - for i in range(50): - np.random.shuffle(nparr) - assert_(a in nparr) - assert_(b in nparr) + yield np.testing.assert_array_equal, actual, desired def test_shuffle_masked(self): # gh-3263 a = np.ma.masked_values(np.reshape(range(20), (5,4)) % 3 - 1, -1) b = np.ma.masked_values(np.arange(20) % 3 - 1, -1) - ma = np.ma.count_masked(a) - mb = np.ma.count_masked(b) + a_orig = a.copy() + b_orig = b.copy() for i in range(50): np.random.shuffle(a) - self.assertEqual(ma, np.ma.count_masked(a)) + assert_equal( + sorted(a.data[~a.mask]), sorted(a_orig.data[~a_orig.mask])) np.random.shuffle(b) - self.assertEqual(mb, np.ma.count_masked(b)) + assert_equal( + sorted(b.data[~b.mask]), sorted(b_orig.data[~b_orig.mask])) def test_beta(self): np.random.seed(self.seed) |