summaryrefslogtreecommitdiff
path: root/numpy/lib
diff options
context:
space:
mode:
authorStephan Hoyer <shoyer@gmail.com>2016-10-17 21:32:40 -0400
committerCharles Harris <charlesr.harris@gmail.com>2016-10-17 21:32:40 -0400
commit0a02bb6f62a5c09103cf748bafe7a622f3dfe98b (patch)
tree04ccebede3afc55afd08eae732a3ececa3846f69 /numpy/lib
parent15e82e2181d9ae92b6a1a6fb80454bc3a4aa2cb5 (diff)
downloadnumpy-0a02bb6f62a5c09103cf748bafe7a622f3dfe98b.tar.gz
ENH: add signature argument to vectorize for vectorizing like generalized ufuncs (#8054)
* ENH: add signature argument to vectorize for generalized ufuncs Example usage (from the docstring): Vectorized convolution: >>> convolve = np.vectorize(np.convolve, signature='(n),(m)->(k)') >>> convolve(np.eye(4), [1, 2, 1]) array([[ 1., 2., 1., 0., 0., 0.], [ 0., 1., 2., 1., 0., 0.], [ 0., 0., 1., 2., 1., 0.], [ 0., 0., 0., 1., 2., 1.]]) * Use str.format rather than % * Fix spelling typo * BUG: fix np.vectorize for size 0 inputs * DOC: add vectorize to 1.12.0 release notes * [ci-skip] Remove outdated comment
Diffstat (limited to 'numpy/lib')
-rw-r--r--numpy/lib/function_base.py288
-rw-r--r--numpy/lib/tests/test_function_base.py154
2 files changed, 407 insertions, 35 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index cb1a04f47..98b0413a1 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -1,9 +1,10 @@
from __future__ import division, absolute_import, print_function
-import warnings
-import sys
import collections
import operator
+import re
+import sys
+import warnings
import numpy as np
import numpy.core.numeric as _nx
@@ -24,16 +25,19 @@ from numpy.core.numerictypes import typecodes, number
from numpy.lib.twodim_base import diag
from .utils import deprecate
from numpy.core.multiarray import (
- _insert, add_docstring, digitize, bincount,
+ _insert, add_docstring, digitize, bincount,
interp as compiled_interp, interp_complex as compiled_interp_complex
)
from numpy.core.umath import _add_newdoc_ufunc as add_newdoc_ufunc
from numpy.compat import long
from numpy.compat.py3k import basestring
-# Force range to be a generator, for np.delete's usage.
if sys.version_info[0] < 3:
+ # Force range to be a generator, for np.delete's usage.
range = xrange
+ import __builtin__ as builtins
+else:
+ import builtins
__all__ = [
@@ -1792,7 +1796,7 @@ def interp(x, xp, fp, left=None, right=None, period=None):
Returns
-------
- y : float or complex (corresponding to fp) or ndarray
+ y : float or complex (corresponding to fp) or ndarray
The interpolated values, same shape as `x`.
Raises
@@ -1899,7 +1903,7 @@ def interp(x, xp, fp, left=None, right=None, period=None):
return interp_func(x, xp, fp, left, right)
else:
return interp_func(x, xp, fp, left, right).item()
-
+
def angle(z, deg=0):
"""
Return the angle of the complex argument.
@@ -2242,17 +2246,126 @@ def disp(mesg, device=None, linefeed=True):
return
+# See http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html
+_DIMENSION_NAME = r'\w+'
+_CORE_DIMENSION_LIST = '(?:{0:}(?:,{0:})*)?'.format(_DIMENSION_NAME)
+_ARGUMENT = r'\({}\)'.format(_CORE_DIMENSION_LIST)
+_ARGUMENT_LIST = '{0:}(?:,{0:})*'.format(_ARGUMENT)
+_SIGNATURE = '^{0:}->{0:}$'.format(_ARGUMENT_LIST)
+
+
+def _parse_gufunc_signature(signature):
+ """
+ Parse string signatures for a generalized universal function.
+
+ Arguments
+ ---------
+ signature : string
+ Generalized universal function signature, e.g., ``(m,n),(n,p)->(m,p)``
+ for ``np.matmul``.
+
+ Returns
+ -------
+ Tuple of input and output core dimensions parsed from the signature, each
+ of the form List[Tuple[str, ...]].
+ """
+ if not re.match(_SIGNATURE, signature):
+ raise ValueError(
+ 'not a valid gufunc signature: {}'.format(signature))
+ return tuple([tuple(re.findall(_DIMENSION_NAME, arg))
+ for arg in re.findall(_ARGUMENT, arg_list)]
+ for arg_list in signature.split('->'))
+
+
+def _update_dim_sizes(dim_sizes, arg, core_dims):
+ """
+ Incrementally check and update core dimension sizes for a single argument.
+
+ Arguments
+ ---------
+ dim_sizes : Dict[str, int]
+ Sizes of existing core dimensions. Will be updated in-place.
+ arg : ndarray
+ Argument to examine.
+ core_dims : Tuple[str, ...]
+ Core dimensions for this argument.
+ """
+ if not core_dims:
+ return
+
+ num_core_dims = len(core_dims)
+ if arg.ndim < num_core_dims:
+ raise ValueError(
+ '%d-dimensional argument does not have enough '
+ 'dimensions for all core dimensions %r'
+ % (arg.ndim, core_dims))
+
+ core_shape = arg.shape[-num_core_dims:]
+ for dim, size in zip(core_dims, core_shape):
+ if dim in dim_sizes:
+ if size != dim_sizes[dim]:
+ raise ValueError(
+ 'inconsistent size for core dimension %r: %r vs %r'
+ % (dim, size, dim_sizes[dim]))
+ else:
+ dim_sizes[dim] = size
+
+
+def _parse_input_dimensions(args, input_core_dims):
+ """
+ Parse broadcast and core dimensions for vectorize with a signature.
+
+ Arguments
+ ---------
+ args : Tuple[ndarray, ...]
+ Tuple of input arguments to examine.
+ input_core_dims : List[Tuple[str, ...]]
+ List of core dimensions corresponding to each input.
+
+ Returns
+ -------
+ broadcast_shape : Tuple[int, ...]
+ Common shape to broadcast all non-core dimensions to.
+ dim_sizes : Dict[str, int]
+ Common sizes for named core dimensions.
+ """
+ broadcast_args = []
+ dim_sizes = {}
+ for arg, core_dims in zip(args, input_core_dims):
+ _update_dim_sizes(dim_sizes, arg, core_dims)
+ ndim = arg.ndim - len(core_dims)
+ dummy_array = np.lib.stride_tricks.as_strided(0, arg.shape[:ndim])
+ broadcast_args.append(dummy_array)
+ broadcast_shape = np.lib.stride_tricks._broadcast_shape(*broadcast_args)
+ return broadcast_shape, dim_sizes
+
+
+def _calculate_shapes(broadcast_shape, dim_sizes, list_of_core_dims):
+ """Helper for calculating broadcast shapes with core dimensions."""
+ return [broadcast_shape + tuple(dim_sizes[dim] for dim in core_dims)
+ for core_dims in list_of_core_dims]
+
+
+def _create_arrays(broadcast_shape, dim_sizes, list_of_core_dims, dtypes):
+ """Helper for creating output arrays in vectorize."""
+ shapes = _calculate_shapes(broadcast_shape, dim_sizes, list_of_core_dims)
+ arrays = tuple(np.empty(shape, dtype=dtype)
+ for shape, dtype in zip(shapes, dtypes))
+ return arrays
+
+
class vectorize(object):
"""
- vectorize(pyfunc, otypes='', doc=None, excluded=None, cache=False)
+ vectorize(pyfunc, otypes=None, doc=None, excluded=None, cache=False,
+ signature=None)
Generalized function class.
- Define a vectorized function which takes a nested sequence
- of objects or numpy arrays as inputs and returns a
- numpy array as output. The vectorized function evaluates `pyfunc` over
- successive tuples of the input arrays like the python map function,
- except it uses the broadcasting rules of numpy.
+ Define a vectorized function which takes a nested sequence of objects or
+ numpy arrays as inputs and returns an single or tuple of numpy array as
+ output. The vectorized function evaluates `pyfunc` over successive tuples
+ of the input arrays like the python map function, except it uses the
+ broadcasting rules of numpy.
The data type of the output of `vectorized` is determined by calling
the function with the first element of the input. This can be avoided
@@ -2282,6 +2395,15 @@ class vectorize(object):
.. versionadded:: 1.7.0
+ signature : string, optional
+ Generalized universal function signature, e.g., ``(m,n),(n)->(m)`` for
+ vectorized matrix-vector multiplication. If provided, ``pyfunc`` will
+ be called with (and expected to return) arrays with shapes given by the
+ size of corresponding core dimensions. By default, ``pyfunc`` is
+ assumed to take scalars as input and output.
+
+ .. versionadded:: 1.12.0
+
Returns
-------
vectorized : callable
@@ -2301,7 +2423,7 @@ class vectorize(object):
array([3, 4, 1, 2])
The docstring is taken from the input function to `vectorize` unless it
- is specified
+ is specified:
>>> vfunc.__doc__
'Return a-b if a>b, otherwise return a+b'
@@ -2310,7 +2432,7 @@ class vectorize(object):
'Vectorized `myfunc`'
The output type is determined by evaluating the first element of the input,
- unless it is specified
+ unless it is specified:
>>> out = vfunc([1, 2, 3, 4], 2)
>>> type(out[0])
@@ -2340,6 +2462,25 @@ class vectorize(object):
>>> vpolyval([1, 2, 3], x=[0, 1])
array([3, 6])
+ The `signature` argument allows for vectorizing functions that act on
+ non-scalar arrays of fixed length. For example, you can use it for a
+ vectorized calculation of Pearson correlation coefficient and its p-value:
+
+ >>> import scipy.stats
+ >>> pearsonr = np.vectorize(scipy.stats.pearsonr,
+ ... signature='(n),(n)->(),()')
+ >>> pearsonr([[0, 1, 2, 3]], [[1, 2, 3, 4], [4, 3, 2, 1]])
+ (array([ 1., -1.]), array([ 0., 0.]))
+
+ Or for a vectorized convolution:
+
+ >>> convolve = np.vectorize(np.convolve, signature='(n),(m)->(k)')
+ >>> convolve(np.eye(4), [1, 2, 1])
+ array([[ 1., 2., 1., 0., 0., 0.],
+ [ 0., 1., 2., 1., 0., 0.],
+ [ 0., 0., 1., 2., 1., 0.],
+ [ 0., 0., 0., 1., 2., 1.]])
+
See Also
--------
frompyfunc : Takes an arbitrary Python function and returns a ufunc
@@ -2359,12 +2500,17 @@ class vectorize(object):
The new keyword argument interface and `excluded` argument support
further degrades performance.
+ References
+ ----------
+ .. [1] NumPy Reference, section `Generalized Universal Function API
+ <http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html>`_.
"""
- def __init__(self, pyfunc, otypes='', doc=None, excluded=None,
- cache=False):
+ def __init__(self, pyfunc, otypes=None, doc=None, excluded=None,
+ cache=False, signature=None):
self.pyfunc = pyfunc
self.cache = cache
+ self.signature = signature
self._ufunc = None # Caching to improve default performance
if doc is None:
@@ -2373,22 +2519,25 @@ class vectorize(object):
self.__doc__ = doc
if isinstance(otypes, str):
- self.otypes = otypes
- for char in self.otypes:
+ for char in otypes:
if char not in typecodes['All']:
- raise ValueError(
- "Invalid otype specified: %s" % (char,))
+ raise ValueError("Invalid otype specified: %s" % (char,))
elif iterable(otypes):
- self.otypes = ''.join([_nx.dtype(x).char for x in otypes])
- else:
- raise ValueError(
- "Invalid otype specification")
+ otypes = ''.join([_nx.dtype(x).char for x in otypes])
+ elif otypes is not None:
+ raise ValueError("Invalid otype specification")
+ self.otypes = otypes
# Excluded variable support
if excluded is None:
excluded = set()
self.excluded = set(excluded)
+ if signature is not None:
+ self._in_and_out_core_dims = _parse_gufunc_signature(signature)
+ else:
+ self._in_and_out_core_dims = None
+
def __call__(self, *args, **kwargs):
"""
Return arrays with the results of `pyfunc` broadcast (vectorized) over
@@ -2425,7 +2574,7 @@ class vectorize(object):
if not args:
raise ValueError('args can not be empty')
- if self.otypes:
+ if self.otypes is not None:
otypes = self.otypes
nout = len(otypes)
@@ -2441,7 +2590,12 @@ class vectorize(object):
# the subsequent call when the ufunc is evaluated.
# Assumes that ufunc first evaluates the 0th elements in the input
# arrays (the input values are not checked to ensure this)
- inputs = [asarray(_a).flat[0] for _a in args]
+ args = [asarray(arg) for arg in args]
+ if builtins.any(arg.size == 0 for arg in args):
+ raise ValueError('cannot call `vectorize` on size 0 inputs '
+ 'unless `otypes` is set')
+
+ inputs = [arg.flat[0] for arg in args]
outputs = func(*inputs)
# Performance note: profiling indicates that -- for simple
@@ -2477,24 +2631,88 @@ class vectorize(object):
def _vectorize_call(self, func, args):
"""Vectorized call to `func` over positional `args`."""
- if not args:
- _res = func()
+ if self.signature is not None:
+ res = self._vectorize_call_with_signature(func, args)
+ elif not args:
+ res = func()
else:
ufunc, otypes = self._get_ufunc_and_otypes(func=func, args=args)
# Convert args to object arrays first
- inputs = [array(_a, copy=False, subok=True, dtype=object)
- for _a in args]
+ inputs = [array(a, copy=False, subok=True, dtype=object)
+ for a in args]
outputs = ufunc(*inputs)
if ufunc.nout == 1:
- _res = array(outputs,
- copy=False, subok=True, dtype=otypes[0])
+ res = array(outputs, copy=False, subok=True, dtype=otypes[0])
else:
- _res = tuple([array(_x, copy=False, subok=True, dtype=_t)
- for _x, _t in zip(outputs, otypes)])
- return _res
+ res = tuple([array(x, copy=False, subok=True, dtype=t)
+ for x, t in zip(outputs, otypes)])
+ return res
+
+ def _vectorize_call_with_signature(self, func, args):
+ """Vectorized call over positional arguments with a signature."""
+ input_core_dims, output_core_dims = self._in_and_out_core_dims
+
+ if len(args) != len(input_core_dims):
+ raise TypeError('wrong number of positional arguments: '
+ 'expected %r, got %r'
+ % (len(input_core_dims), len(args)))
+ args = tuple(asanyarray(arg) for arg in args)
+
+ broadcast_shape, dim_sizes = _parse_input_dimensions(
+ args, input_core_dims)
+ input_shapes = _calculate_shapes(broadcast_shape, dim_sizes,
+ input_core_dims)
+ args = [np.broadcast_to(arg, shape, subok=True)
+ for arg, shape in zip(args, input_shapes)]
+
+ outputs = None
+ otypes = self.otypes
+ nout = len(output_core_dims)
+
+ for index in np.ndindex(*broadcast_shape):
+ results = func(*(arg[index] for arg in args))
+
+ n_results = len(results) if isinstance(results, tuple) else 1
+
+ if nout != n_results:
+ raise ValueError(
+ 'wrong number of outputs from pyfunc: expected %r, got %r'
+ % (nout, n_results))
+
+ if nout == 1:
+ results = (results,)
+
+ if outputs is None:
+ for result, core_dims in zip(results, output_core_dims):
+ _update_dim_sizes(dim_sizes, result, core_dims)
+
+ if otypes is None:
+ otypes = [asarray(result).dtype for result in results]
+
+ outputs = _create_arrays(broadcast_shape, dim_sizes,
+ output_core_dims, otypes)
+
+ for output, result in zip(outputs, results):
+ output[index] = result
+
+ if outputs is None:
+ # did not call the function even once
+ if otypes is None:
+ raise ValueError('cannot call `vectorize` on size 0 inputs '
+ 'unless `otypes` is set')
+ if builtins.any(dim not in dim_sizes
+ for dims in output_core_dims
+ for dim in dims):
+ raise ValueError('cannot call `vectorize` with a signature '
+ 'including new output dimensions on size 0 '
+ 'inputs')
+ outputs = _create_arrays(broadcast_shape, dim_sizes,
+ output_core_dims, otypes)
+
+ return outputs[0] if nout == 1 else outputs
def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None,
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 4535c1e7f..6327aaf7c 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -1,5 +1,6 @@
from __future__ import division, absolute_import, print_function
+import operator
import warnings
import sys
@@ -1013,8 +1014,12 @@ class TestVectorize(TestCase):
def test_assigning_docstring(self):
def foo(x):
+ """Original documentation"""
return x
+ f = vectorize(foo)
+ assert_equal(f.__doc__, foo.__doc__)
+
doc = "Provided documentation"
f = vectorize(foo, doc=doc)
assert_equal(f.__doc__, doc)
@@ -1069,6 +1074,155 @@ class TestVectorize(TestCase):
x = np.arange(5)
assert_array_equal(f(x), x)
+ def test_parse_gufunc_signature(self):
+ assert_equal(nfb._parse_gufunc_signature('(x)->()'), ([('x',)], [()]))
+ assert_equal(nfb._parse_gufunc_signature('(x,y)->()'),
+ ([('x', 'y')], [()]))
+ assert_equal(nfb._parse_gufunc_signature('(x),(y)->()'),
+ ([('x',), ('y',)], [()]))
+ assert_equal(nfb._parse_gufunc_signature('(x)->(y)'),
+ ([('x',)], [('y',)]))
+ assert_equal(nfb._parse_gufunc_signature('(x)->(y),()'),
+ ([('x',)], [('y',), ()]))
+ assert_equal(nfb._parse_gufunc_signature('(),(a,b,c),(d)->(d,e)'),
+ ([(), ('a', 'b', 'c'), ('d',)], [('d', 'e')]))
+ with assert_raises(ValueError):
+ nfb._parse_gufunc_signature('(x)(y)->()')
+ with assert_raises(ValueError):
+ nfb._parse_gufunc_signature('(x),(y)->')
+ with assert_raises(ValueError):
+ nfb._parse_gufunc_signature('((x))->(x)')
+
+ def test_signature_simple(self):
+ def addsubtract(a, b):
+ if a > b:
+ return a - b
+ else:
+ return a + b
+
+ f = vectorize(addsubtract, signature='(),()->()')
+ r = f([0, 3, 6, 9], [1, 3, 5, 7])
+ assert_array_equal(r, [1, 6, 1, 2])
+
+ def test_signature_mean_last(self):
+ def mean(a):
+ return a.mean()
+
+ f = vectorize(mean, signature='(n)->()')
+ r = f([[1, 3], [2, 4]])
+ assert_array_equal(r, [2, 3])
+
+ def test_signature_center(self):
+ def center(a):
+ return a - a.mean()
+
+ f = vectorize(center, signature='(n)->(n)')
+ r = f([[1, 3], [2, 4]])
+ assert_array_equal(r, [[-1, 1], [-1, 1]])
+
+ def test_signature_two_outputs(self):
+ f = vectorize(lambda x: (x, x), signature='()->(),()')
+ r = f([1, 2, 3])
+ assert_(isinstance(r, tuple) and len(r) == 2)
+ assert_array_equal(r[0], [1, 2, 3])
+ assert_array_equal(r[1], [1, 2, 3])
+
+ def test_signature_outer(self):
+ f = vectorize(np.outer, signature='(a),(b)->(a,b)')
+ r = f([1, 2], [1, 2, 3])
+ assert_array_equal(r, [[1, 2, 3], [2, 4, 6]])
+
+ r = f([[[1, 2]]], [1, 2, 3])
+ assert_array_equal(r, [[[[1, 2, 3], [2, 4, 6]]]])
+
+ r = f([[1, 0], [2, 0]], [1, 2, 3])
+ assert_array_equal(r, [[[1, 2, 3], [0, 0, 0]],
+ [[2, 4, 6], [0, 0, 0]]])
+
+ r = f([1, 2], [[1, 2, 3], [0, 0, 0]])
+ assert_array_equal(r, [[[1, 2, 3], [2, 4, 6]],
+ [[0, 0, 0], [0, 0, 0]]])
+
+ def test_signature_computed_size(self):
+ f = vectorize(lambda x: x[:-1], signature='(n)->(m)')
+ r = f([1, 2, 3])
+ assert_array_equal(r, [1, 2])
+
+ r = f([[1, 2, 3], [2, 3, 4]])
+ assert_array_equal(r, [[1, 2], [2, 3]])
+
+ def test_signature_excluded(self):
+
+ def foo(a, b=1):
+ return a + b
+
+ f = vectorize(foo, signature='()->()', excluded={'b'})
+ assert_array_equal(f([1, 2, 3]), [2, 3, 4])
+ assert_array_equal(f([1, 2, 3], b=0), [1, 2, 3])
+
+ def test_signature_otypes(self):
+ f = vectorize(lambda x: x, signature='(n)->(n)', otypes=['float64'])
+ r = f([1, 2, 3])
+ assert_equal(r.dtype, np.dtype('float64'))
+ assert_array_equal(r, [1, 2, 3])
+
+ def test_signature_invalid_inputs(self):
+ f = vectorize(operator.add, signature='(n),(n)->(n)')
+ with assert_raises_regex(TypeError, 'wrong number of positional'):
+ f([1, 2])
+ with assert_raises_regex(
+ ValueError, 'does not have enough dimensions'):
+ f(1, 2)
+ with assert_raises_regex(
+ ValueError, 'inconsistent size for core dimension'):
+ f([1, 2], [1, 2, 3])
+
+ f = vectorize(operator.add, signature='()->()')
+ with assert_raises_regex(TypeError, 'wrong number of positional'):
+ f(1, 2)
+
+ def test_signature_invalid_outputs(self):
+
+ f = vectorize(lambda x: x[:-1], signature='(n)->(n)')
+ with assert_raises_regex(
+ ValueError, 'inconsistent size for core dimension'):
+ f([1, 2, 3])
+
+ f = vectorize(lambda x: x, signature='()->(),()')
+ with assert_raises_regex(ValueError, 'wrong number of outputs'):
+ f(1)
+
+ f = vectorize(lambda x: (x, x), signature='()->()')
+ with assert_raises_regex(ValueError, 'wrong number of outputs'):
+ f([1, 2])
+
+ def test_size_zero_output(self):
+ # see issue 5868
+ f = np.vectorize(lambda x: x)
+ x = np.zeros([0, 5], dtype=int)
+ with assert_raises_regex(ValueError, 'otypes'):
+ f(x)
+
+ f.otypes = 'i'
+ assert_array_equal(f(x), x)
+
+ f = np.vectorize(lambda x: x, signature='()->()')
+ with assert_raises_regex(ValueError, 'otypes'):
+ f(x)
+
+ f = np.vectorize(lambda x: x, signature='()->()', otypes='i')
+ assert_array_equal(f(x), x)
+
+ f = np.vectorize(lambda x: x, signature='(n)->(n)', otypes='i')
+ assert_array_equal(f(x), x)
+
+ f = np.vectorize(lambda x: x, signature='(n)->(n)')
+ assert_array_equal(f(x.T), x.T)
+
+ f = np.vectorize(lambda x: [x], signature='()->(n)', otypes='i')
+ with assert_raises_regex(ValueError, 'new output dimensions'):
+ f(x)
+
class TestDigitize(TestCase):