diff options
author | Stephan Hoyer <shoyer@gmail.com> | 2016-10-17 21:32:40 -0400 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2016-10-17 21:32:40 -0400 |
commit | 0a02bb6f62a5c09103cf748bafe7a622f3dfe98b (patch) | |
tree | 04ccebede3afc55afd08eae732a3ececa3846f69 /numpy/lib | |
parent | 15e82e2181d9ae92b6a1a6fb80454bc3a4aa2cb5 (diff) | |
download | numpy-0a02bb6f62a5c09103cf748bafe7a622f3dfe98b.tar.gz |
ENH: add signature argument to vectorize for vectorizing like generalized ufuncs (#8054)
* ENH: add signature argument to vectorize for generalized ufuncs
Example usage (from the docstring):
Vectorized convolution:
>>> convolve = np.vectorize(np.convolve, signature='(n),(m)->(k)')
>>> convolve(np.eye(4), [1, 2, 1])
array([[ 1., 2., 1., 0., 0., 0.],
[ 0., 1., 2., 1., 0., 0.],
[ 0., 0., 1., 2., 1., 0.],
[ 0., 0., 0., 1., 2., 1.]])
* Use str.format rather than %
* Fix spelling typo
* BUG: fix np.vectorize for size 0 inputs
* DOC: add vectorize to 1.12.0 release notes
* [ci-skip] Remove outdated comment
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/function_base.py | 288 | ||||
-rw-r--r-- | numpy/lib/tests/test_function_base.py | 154 |
2 files changed, 407 insertions, 35 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index cb1a04f47..98b0413a1 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -1,9 +1,10 @@ from __future__ import division, absolute_import, print_function -import warnings -import sys import collections import operator +import re +import sys +import warnings import numpy as np import numpy.core.numeric as _nx @@ -24,16 +25,19 @@ from numpy.core.numerictypes import typecodes, number from numpy.lib.twodim_base import diag from .utils import deprecate from numpy.core.multiarray import ( - _insert, add_docstring, digitize, bincount, + _insert, add_docstring, digitize, bincount, interp as compiled_interp, interp_complex as compiled_interp_complex ) from numpy.core.umath import _add_newdoc_ufunc as add_newdoc_ufunc from numpy.compat import long from numpy.compat.py3k import basestring -# Force range to be a generator, for np.delete's usage. if sys.version_info[0] < 3: + # Force range to be a generator, for np.delete's usage. range = xrange + import __builtin__ as builtins +else: + import builtins __all__ = [ @@ -1792,7 +1796,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): Returns ------- - y : float or complex (corresponding to fp) or ndarray + y : float or complex (corresponding to fp) or ndarray The interpolated values, same shape as `x`. Raises @@ -1899,7 +1903,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): return interp_func(x, xp, fp, left, right) else: return interp_func(x, xp, fp, left, right).item() - + def angle(z, deg=0): """ Return the angle of the complex argument. @@ -2242,17 +2246,126 @@ def disp(mesg, device=None, linefeed=True): return +# See http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html +_DIMENSION_NAME = r'\w+' +_CORE_DIMENSION_LIST = '(?:{0:}(?:,{0:})*)?'.format(_DIMENSION_NAME) +_ARGUMENT = r'\({}\)'.format(_CORE_DIMENSION_LIST) +_ARGUMENT_LIST = '{0:}(?:,{0:})*'.format(_ARGUMENT) +_SIGNATURE = '^{0:}->{0:}$'.format(_ARGUMENT_LIST) + + +def _parse_gufunc_signature(signature): + """ + Parse string signatures for a generalized universal function. + + Arguments + --------- + signature : string + Generalized universal function signature, e.g., ``(m,n),(n,p)->(m,p)`` + for ``np.matmul``. + + Returns + ------- + Tuple of input and output core dimensions parsed from the signature, each + of the form List[Tuple[str, ...]]. + """ + if not re.match(_SIGNATURE, signature): + raise ValueError( + 'not a valid gufunc signature: {}'.format(signature)) + return tuple([tuple(re.findall(_DIMENSION_NAME, arg)) + for arg in re.findall(_ARGUMENT, arg_list)] + for arg_list in signature.split('->')) + + +def _update_dim_sizes(dim_sizes, arg, core_dims): + """ + Incrementally check and update core dimension sizes for a single argument. + + Arguments + --------- + dim_sizes : Dict[str, int] + Sizes of existing core dimensions. Will be updated in-place. + arg : ndarray + Argument to examine. + core_dims : Tuple[str, ...] + Core dimensions for this argument. + """ + if not core_dims: + return + + num_core_dims = len(core_dims) + if arg.ndim < num_core_dims: + raise ValueError( + '%d-dimensional argument does not have enough ' + 'dimensions for all core dimensions %r' + % (arg.ndim, core_dims)) + + core_shape = arg.shape[-num_core_dims:] + for dim, size in zip(core_dims, core_shape): + if dim in dim_sizes: + if size != dim_sizes[dim]: + raise ValueError( + 'inconsistent size for core dimension %r: %r vs %r' + % (dim, size, dim_sizes[dim])) + else: + dim_sizes[dim] = size + + +def _parse_input_dimensions(args, input_core_dims): + """ + Parse broadcast and core dimensions for vectorize with a signature. + + Arguments + --------- + args : Tuple[ndarray, ...] + Tuple of input arguments to examine. + input_core_dims : List[Tuple[str, ...]] + List of core dimensions corresponding to each input. + + Returns + ------- + broadcast_shape : Tuple[int, ...] + Common shape to broadcast all non-core dimensions to. + dim_sizes : Dict[str, int] + Common sizes for named core dimensions. + """ + broadcast_args = [] + dim_sizes = {} + for arg, core_dims in zip(args, input_core_dims): + _update_dim_sizes(dim_sizes, arg, core_dims) + ndim = arg.ndim - len(core_dims) + dummy_array = np.lib.stride_tricks.as_strided(0, arg.shape[:ndim]) + broadcast_args.append(dummy_array) + broadcast_shape = np.lib.stride_tricks._broadcast_shape(*broadcast_args) + return broadcast_shape, dim_sizes + + +def _calculate_shapes(broadcast_shape, dim_sizes, list_of_core_dims): + """Helper for calculating broadcast shapes with core dimensions.""" + return [broadcast_shape + tuple(dim_sizes[dim] for dim in core_dims) + for core_dims in list_of_core_dims] + + +def _create_arrays(broadcast_shape, dim_sizes, list_of_core_dims, dtypes): + """Helper for creating output arrays in vectorize.""" + shapes = _calculate_shapes(broadcast_shape, dim_sizes, list_of_core_dims) + arrays = tuple(np.empty(shape, dtype=dtype) + for shape, dtype in zip(shapes, dtypes)) + return arrays + + class vectorize(object): """ - vectorize(pyfunc, otypes='', doc=None, excluded=None, cache=False) + vectorize(pyfunc, otypes=None, doc=None, excluded=None, cache=False, + signature=None) Generalized function class. - Define a vectorized function which takes a nested sequence - of objects or numpy arrays as inputs and returns a - numpy array as output. The vectorized function evaluates `pyfunc` over - successive tuples of the input arrays like the python map function, - except it uses the broadcasting rules of numpy. + Define a vectorized function which takes a nested sequence of objects or + numpy arrays as inputs and returns an single or tuple of numpy array as + output. The vectorized function evaluates `pyfunc` over successive tuples + of the input arrays like the python map function, except it uses the + broadcasting rules of numpy. The data type of the output of `vectorized` is determined by calling the function with the first element of the input. This can be avoided @@ -2282,6 +2395,15 @@ class vectorize(object): .. versionadded:: 1.7.0 + signature : string, optional + Generalized universal function signature, e.g., ``(m,n),(n)->(m)`` for + vectorized matrix-vector multiplication. If provided, ``pyfunc`` will + be called with (and expected to return) arrays with shapes given by the + size of corresponding core dimensions. By default, ``pyfunc`` is + assumed to take scalars as input and output. + + .. versionadded:: 1.12.0 + Returns ------- vectorized : callable @@ -2301,7 +2423,7 @@ class vectorize(object): array([3, 4, 1, 2]) The docstring is taken from the input function to `vectorize` unless it - is specified + is specified: >>> vfunc.__doc__ 'Return a-b if a>b, otherwise return a+b' @@ -2310,7 +2432,7 @@ class vectorize(object): 'Vectorized `myfunc`' The output type is determined by evaluating the first element of the input, - unless it is specified + unless it is specified: >>> out = vfunc([1, 2, 3, 4], 2) >>> type(out[0]) @@ -2340,6 +2462,25 @@ class vectorize(object): >>> vpolyval([1, 2, 3], x=[0, 1]) array([3, 6]) + The `signature` argument allows for vectorizing functions that act on + non-scalar arrays of fixed length. For example, you can use it for a + vectorized calculation of Pearson correlation coefficient and its p-value: + + >>> import scipy.stats + >>> pearsonr = np.vectorize(scipy.stats.pearsonr, + ... signature='(n),(n)->(),()') + >>> pearsonr([[0, 1, 2, 3]], [[1, 2, 3, 4], [4, 3, 2, 1]]) + (array([ 1., -1.]), array([ 0., 0.])) + + Or for a vectorized convolution: + + >>> convolve = np.vectorize(np.convolve, signature='(n),(m)->(k)') + >>> convolve(np.eye(4), [1, 2, 1]) + array([[ 1., 2., 1., 0., 0., 0.], + [ 0., 1., 2., 1., 0., 0.], + [ 0., 0., 1., 2., 1., 0.], + [ 0., 0., 0., 1., 2., 1.]]) + See Also -------- frompyfunc : Takes an arbitrary Python function and returns a ufunc @@ -2359,12 +2500,17 @@ class vectorize(object): The new keyword argument interface and `excluded` argument support further degrades performance. + References + ---------- + .. [1] NumPy Reference, section `Generalized Universal Function API + <http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html>`_. """ - def __init__(self, pyfunc, otypes='', doc=None, excluded=None, - cache=False): + def __init__(self, pyfunc, otypes=None, doc=None, excluded=None, + cache=False, signature=None): self.pyfunc = pyfunc self.cache = cache + self.signature = signature self._ufunc = None # Caching to improve default performance if doc is None: @@ -2373,22 +2519,25 @@ class vectorize(object): self.__doc__ = doc if isinstance(otypes, str): - self.otypes = otypes - for char in self.otypes: + for char in otypes: if char not in typecodes['All']: - raise ValueError( - "Invalid otype specified: %s" % (char,)) + raise ValueError("Invalid otype specified: %s" % (char,)) elif iterable(otypes): - self.otypes = ''.join([_nx.dtype(x).char for x in otypes]) - else: - raise ValueError( - "Invalid otype specification") + otypes = ''.join([_nx.dtype(x).char for x in otypes]) + elif otypes is not None: + raise ValueError("Invalid otype specification") + self.otypes = otypes # Excluded variable support if excluded is None: excluded = set() self.excluded = set(excluded) + if signature is not None: + self._in_and_out_core_dims = _parse_gufunc_signature(signature) + else: + self._in_and_out_core_dims = None + def __call__(self, *args, **kwargs): """ Return arrays with the results of `pyfunc` broadcast (vectorized) over @@ -2425,7 +2574,7 @@ class vectorize(object): if not args: raise ValueError('args can not be empty') - if self.otypes: + if self.otypes is not None: otypes = self.otypes nout = len(otypes) @@ -2441,7 +2590,12 @@ class vectorize(object): # the subsequent call when the ufunc is evaluated. # Assumes that ufunc first evaluates the 0th elements in the input # arrays (the input values are not checked to ensure this) - inputs = [asarray(_a).flat[0] for _a in args] + args = [asarray(arg) for arg in args] + if builtins.any(arg.size == 0 for arg in args): + raise ValueError('cannot call `vectorize` on size 0 inputs ' + 'unless `otypes` is set') + + inputs = [arg.flat[0] for arg in args] outputs = func(*inputs) # Performance note: profiling indicates that -- for simple @@ -2477,24 +2631,88 @@ class vectorize(object): def _vectorize_call(self, func, args): """Vectorized call to `func` over positional `args`.""" - if not args: - _res = func() + if self.signature is not None: + res = self._vectorize_call_with_signature(func, args) + elif not args: + res = func() else: ufunc, otypes = self._get_ufunc_and_otypes(func=func, args=args) # Convert args to object arrays first - inputs = [array(_a, copy=False, subok=True, dtype=object) - for _a in args] + inputs = [array(a, copy=False, subok=True, dtype=object) + for a in args] outputs = ufunc(*inputs) if ufunc.nout == 1: - _res = array(outputs, - copy=False, subok=True, dtype=otypes[0]) + res = array(outputs, copy=False, subok=True, dtype=otypes[0]) else: - _res = tuple([array(_x, copy=False, subok=True, dtype=_t) - for _x, _t in zip(outputs, otypes)]) - return _res + res = tuple([array(x, copy=False, subok=True, dtype=t) + for x, t in zip(outputs, otypes)]) + return res + + def _vectorize_call_with_signature(self, func, args): + """Vectorized call over positional arguments with a signature.""" + input_core_dims, output_core_dims = self._in_and_out_core_dims + + if len(args) != len(input_core_dims): + raise TypeError('wrong number of positional arguments: ' + 'expected %r, got %r' + % (len(input_core_dims), len(args))) + args = tuple(asanyarray(arg) for arg in args) + + broadcast_shape, dim_sizes = _parse_input_dimensions( + args, input_core_dims) + input_shapes = _calculate_shapes(broadcast_shape, dim_sizes, + input_core_dims) + args = [np.broadcast_to(arg, shape, subok=True) + for arg, shape in zip(args, input_shapes)] + + outputs = None + otypes = self.otypes + nout = len(output_core_dims) + + for index in np.ndindex(*broadcast_shape): + results = func(*(arg[index] for arg in args)) + + n_results = len(results) if isinstance(results, tuple) else 1 + + if nout != n_results: + raise ValueError( + 'wrong number of outputs from pyfunc: expected %r, got %r' + % (nout, n_results)) + + if nout == 1: + results = (results,) + + if outputs is None: + for result, core_dims in zip(results, output_core_dims): + _update_dim_sizes(dim_sizes, result, core_dims) + + if otypes is None: + otypes = [asarray(result).dtype for result in results] + + outputs = _create_arrays(broadcast_shape, dim_sizes, + output_core_dims, otypes) + + for output, result in zip(outputs, results): + output[index] = result + + if outputs is None: + # did not call the function even once + if otypes is None: + raise ValueError('cannot call `vectorize` on size 0 inputs ' + 'unless `otypes` is set') + if builtins.any(dim not in dim_sizes + for dims in output_core_dims + for dim in dims): + raise ValueError('cannot call `vectorize` with a signature ' + 'including new output dimensions on size 0 ' + 'inputs') + outputs = _create_arrays(broadcast_shape, dim_sizes, + output_core_dims, otypes) + + return outputs[0] if nout == 1 else outputs def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 4535c1e7f..6327aaf7c 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -1,5 +1,6 @@ from __future__ import division, absolute_import, print_function +import operator import warnings import sys @@ -1013,8 +1014,12 @@ class TestVectorize(TestCase): def test_assigning_docstring(self): def foo(x): + """Original documentation""" return x + f = vectorize(foo) + assert_equal(f.__doc__, foo.__doc__) + doc = "Provided documentation" f = vectorize(foo, doc=doc) assert_equal(f.__doc__, doc) @@ -1069,6 +1074,155 @@ class TestVectorize(TestCase): x = np.arange(5) assert_array_equal(f(x), x) + def test_parse_gufunc_signature(self): + assert_equal(nfb._parse_gufunc_signature('(x)->()'), ([('x',)], [()])) + assert_equal(nfb._parse_gufunc_signature('(x,y)->()'), + ([('x', 'y')], [()])) + assert_equal(nfb._parse_gufunc_signature('(x),(y)->()'), + ([('x',), ('y',)], [()])) + assert_equal(nfb._parse_gufunc_signature('(x)->(y)'), + ([('x',)], [('y',)])) + assert_equal(nfb._parse_gufunc_signature('(x)->(y),()'), + ([('x',)], [('y',), ()])) + assert_equal(nfb._parse_gufunc_signature('(),(a,b,c),(d)->(d,e)'), + ([(), ('a', 'b', 'c'), ('d',)], [('d', 'e')])) + with assert_raises(ValueError): + nfb._parse_gufunc_signature('(x)(y)->()') + with assert_raises(ValueError): + nfb._parse_gufunc_signature('(x),(y)->') + with assert_raises(ValueError): + nfb._parse_gufunc_signature('((x))->(x)') + + def test_signature_simple(self): + def addsubtract(a, b): + if a > b: + return a - b + else: + return a + b + + f = vectorize(addsubtract, signature='(),()->()') + r = f([0, 3, 6, 9], [1, 3, 5, 7]) + assert_array_equal(r, [1, 6, 1, 2]) + + def test_signature_mean_last(self): + def mean(a): + return a.mean() + + f = vectorize(mean, signature='(n)->()') + r = f([[1, 3], [2, 4]]) + assert_array_equal(r, [2, 3]) + + def test_signature_center(self): + def center(a): + return a - a.mean() + + f = vectorize(center, signature='(n)->(n)') + r = f([[1, 3], [2, 4]]) + assert_array_equal(r, [[-1, 1], [-1, 1]]) + + def test_signature_two_outputs(self): + f = vectorize(lambda x: (x, x), signature='()->(),()') + r = f([1, 2, 3]) + assert_(isinstance(r, tuple) and len(r) == 2) + assert_array_equal(r[0], [1, 2, 3]) + assert_array_equal(r[1], [1, 2, 3]) + + def test_signature_outer(self): + f = vectorize(np.outer, signature='(a),(b)->(a,b)') + r = f([1, 2], [1, 2, 3]) + assert_array_equal(r, [[1, 2, 3], [2, 4, 6]]) + + r = f([[[1, 2]]], [1, 2, 3]) + assert_array_equal(r, [[[[1, 2, 3], [2, 4, 6]]]]) + + r = f([[1, 0], [2, 0]], [1, 2, 3]) + assert_array_equal(r, [[[1, 2, 3], [0, 0, 0]], + [[2, 4, 6], [0, 0, 0]]]) + + r = f([1, 2], [[1, 2, 3], [0, 0, 0]]) + assert_array_equal(r, [[[1, 2, 3], [2, 4, 6]], + [[0, 0, 0], [0, 0, 0]]]) + + def test_signature_computed_size(self): + f = vectorize(lambda x: x[:-1], signature='(n)->(m)') + r = f([1, 2, 3]) + assert_array_equal(r, [1, 2]) + + r = f([[1, 2, 3], [2, 3, 4]]) + assert_array_equal(r, [[1, 2], [2, 3]]) + + def test_signature_excluded(self): + + def foo(a, b=1): + return a + b + + f = vectorize(foo, signature='()->()', excluded={'b'}) + assert_array_equal(f([1, 2, 3]), [2, 3, 4]) + assert_array_equal(f([1, 2, 3], b=0), [1, 2, 3]) + + def test_signature_otypes(self): + f = vectorize(lambda x: x, signature='(n)->(n)', otypes=['float64']) + r = f([1, 2, 3]) + assert_equal(r.dtype, np.dtype('float64')) + assert_array_equal(r, [1, 2, 3]) + + def test_signature_invalid_inputs(self): + f = vectorize(operator.add, signature='(n),(n)->(n)') + with assert_raises_regex(TypeError, 'wrong number of positional'): + f([1, 2]) + with assert_raises_regex( + ValueError, 'does not have enough dimensions'): + f(1, 2) + with assert_raises_regex( + ValueError, 'inconsistent size for core dimension'): + f([1, 2], [1, 2, 3]) + + f = vectorize(operator.add, signature='()->()') + with assert_raises_regex(TypeError, 'wrong number of positional'): + f(1, 2) + + def test_signature_invalid_outputs(self): + + f = vectorize(lambda x: x[:-1], signature='(n)->(n)') + with assert_raises_regex( + ValueError, 'inconsistent size for core dimension'): + f([1, 2, 3]) + + f = vectorize(lambda x: x, signature='()->(),()') + with assert_raises_regex(ValueError, 'wrong number of outputs'): + f(1) + + f = vectorize(lambda x: (x, x), signature='()->()') + with assert_raises_regex(ValueError, 'wrong number of outputs'): + f([1, 2]) + + def test_size_zero_output(self): + # see issue 5868 + f = np.vectorize(lambda x: x) + x = np.zeros([0, 5], dtype=int) + with assert_raises_regex(ValueError, 'otypes'): + f(x) + + f.otypes = 'i' + assert_array_equal(f(x), x) + + f = np.vectorize(lambda x: x, signature='()->()') + with assert_raises_regex(ValueError, 'otypes'): + f(x) + + f = np.vectorize(lambda x: x, signature='()->()', otypes='i') + assert_array_equal(f(x), x) + + f = np.vectorize(lambda x: x, signature='(n)->(n)', otypes='i') + assert_array_equal(f(x), x) + + f = np.vectorize(lambda x: x, signature='(n)->(n)') + assert_array_equal(f(x.T), x.T) + + f = np.vectorize(lambda x: [x], signature='()->(n)', otypes='i') + with assert_raises_regex(ValueError, 'new output dimensions'): + f(x) + class TestDigitize(TestCase): |