summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/lib/function_base.py320
-rw-r--r--numpy/lib/tests/test_function_base.py155
2 files changed, 307 insertions, 168 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 2de5c6193..473c8ea23 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -1,12 +1,13 @@
__docformat__ = "restructuredtext en"
__all__ = ['select', 'piecewise', 'trim_zeros', 'copy', 'iterable',
- 'percentile', 'diff', 'gradient', 'angle', 'unwrap', 'sort_complex',
- 'disp', 'extract', 'place', 'nansum', 'nanmax', 'nanargmax',
- 'nanargmin', 'nanmin', 'vectorize', 'asarray_chkfinite', 'average',
- 'histogram', 'histogramdd', 'bincount', 'digitize', 'cov', 'corrcoef',
- 'msort', 'median', 'sinc', 'hamming', 'hanning', 'bartlett',
- 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring',
- 'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc']
+ 'percentile', 'diff', 'gradient', 'angle', 'unwrap', 'sort_complex',
+ 'disp', 'extract', 'place', 'nansum', 'nanmax', 'nanargmax',
+ 'nanargmin', 'nanmin', 'vectorize', 'asarray_chkfinite', 'average',
+ 'histogram', 'histogramdd', 'bincount', 'digitize', 'cov',
+ 'corrcoef', 'msort', 'median', 'sinc', 'hamming', 'hanning',
+ 'bartlett', 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc',
+ 'add_docstring', 'meshgrid', 'delete', 'insert', 'append', 'interp',
+ 'add_newdoc_ufunc']
import warnings
import types
@@ -1698,80 +1699,9 @@ def disp(mesg, device=None, linefeed=True):
device.flush()
return
-# return number of input arguments and
-# number of default arguments
-
-def _get_nargs(obj):
- import re
-
- terr = re.compile(r'.*? takes (exactly|at least) (?P<exargs>(\d+)|(\w+))' +
- r' argument(s|) \((?P<gargs>(\d+)|(\w+)) given\)')
- def _convert_to_int(strval):
- try:
- result = int(strval)
- except ValueError:
- if strval=='zero':
- result = 0
- elif strval=='one':
- result = 1
- elif strval=='two':
- result = 2
- # How high to go? English only?
- else:
- raise
- return result
-
- if not callable(obj):
- raise TypeError(
- "Object is not callable.")
- if sys.version_info[0] >= 3:
- # inspect currently fails for binary extensions
- # like math.cos. So fall back to other methods if
- # it fails.
- import inspect
- try:
- spec = inspect.getargspec(obj)
- nargs = len(spec.args)
- if spec.defaults:
- ndefaults = len(spec.defaults)
- else:
- ndefaults = 0
- if inspect.ismethod(obj):
- nargs -= 1
- return nargs, ndefaults
- except:
- pass
-
- if hasattr(obj,'func_code'):
- fcode = obj.func_code
- nargs = fcode.co_argcount
- if obj.func_defaults is not None:
- ndefaults = len(obj.func_defaults)
- else:
- ndefaults = 0
- if isinstance(obj, types.MethodType):
- nargs -= 1
- return nargs, ndefaults
-
- try:
- obj()
- return 0, 0
- except TypeError, msg:
- m = terr.match(str(msg))
- if m:
- nargs = _convert_to_int(m.group('exargs'))
- ndefaults = _convert_to_int(m.group('gargs'))
- if isinstance(obj, types.MethodType):
- nargs -= 1
- return nargs, ndefaults
-
- raise ValueError(
- "failed to determine the number of arguments for %s" % (obj))
-
-
class vectorize(object):
"""
- vectorize(pyfunc, otypes='', doc=None)
+ vectorize(pyfunc, otypes='', doc=None, excluded=None, cache=False)
Generalized function class.
@@ -1794,13 +1724,30 @@ class vectorize(object):
typecode characters or a list of data type specifiers. There should
be one data type specifier for each output.
doc : str, optional
- The docstring for the function. If None, the docstring will be the
- `pyfunc` one.
+ The docstring for the function. If `None`, the docstring will be the
+ ``pyfunc.__doc__``.
+ excluded : set, optional
+ Set of strings or integers representing the positional or keyword
+ arguments for which the function will not be vectorized. These will be
+ passed directly to `pyfunc` unmodified.
+
+ .. versionadded:: 1.7.0
+
+ cache : bool, optional
+ If `True`, then cache the first function call that determines the number
+ of outputs if `otypes` is not provided.
+
+ .. versionadded:: 1.7.0
+
+ Returns
+ -------
+ vectorized : callable
+ Vectorized function.
Examples
--------
>>> def myfunc(a, b):
- ... \"\"\"Return a-b if a>b, otherwise return a+b\"\"\"
+ ... "Return a-b if a>b, otherwise return a+b"
... if a > b:
... return a - b
... else:
@@ -1830,78 +1777,169 @@ class vectorize(object):
>>> type(out[0])
<type 'numpy.float64'>
+ The `excluded` argument can be used to prevent vectorizing over certain
+ arguments. This can be useful for array-like arguments of a fixed length
+ such as the coefficients for a polynomial as in `polyval`:
+
+ >>> def mypolyval(p, x):
+ ... _p = list(p)
+ ... res = _p.pop(0)
+ ... while _p:
+ ... res = res*x + _p.pop(0)
+ ... return res
+ >>> vpolyval = np.vectorize(mypolyval, excluded=['p'])
+ >>> vpolyval(p=[1, 2, 3], x=[0, 1])
+ array([3, 6])
+
+ Positional arguments may also be excluded by specifying their position:
+
+ >>> vpolyval.excluded.add(0)
+ >>> vpolyval([1, 2, 3], x=[0, 1])
+ array([3, 6])
+
+ Notes
+ -----
+ The `vectorize` function is provided primarily for convenience, not for
+ performance. The implementation is essentially a for loop.
+
+ If `otypes` is not specified, then a call to the function with the first
+ argument will be used to determine the number of outputs. The results of
+ this call will be cached if `cache` is `True` to prevent calling the
+ function twice. However, to implement the cache, the original function must
+ be wrapped which will slow down subsequent calls, so only do this if your
+ function is expensive.
+
+ The new keyword argument interface and `excluded` argument support further
+ degrades performance.
"""
- def __init__(self, pyfunc, otypes='', doc=None):
- self.thefunc = pyfunc
- self.ufunc = None
- nin, ndefault = _get_nargs(pyfunc)
- if nin == 0 and ndefault == 0:
- self.nin = None
- self.nin_wo_defaults = None
- else:
- self.nin = nin
- self.nin_wo_defaults = nin - ndefault
- self.nout = None
+ def __init__(self, pyfunc, otypes='', doc=None, excluded=None, cache=False):
+ self.pyfunc = pyfunc
+ self.cache = cache
+
if doc is None:
self.__doc__ = pyfunc.__doc__
else:
self.__doc__ = doc
+
if isinstance(otypes, str):
self.otypes = otypes
for char in self.otypes:
if char not in typecodes['All']:
- raise ValueError(
- "invalid otype specified")
+ raise ValueError("Invalid otype specified: %s" % (char,))
elif iterable(otypes):
self.otypes = ''.join([_nx.dtype(x).char for x in otypes])
else:
- raise ValueError(
- "Invalid otype specification")
- self.lastcallargs = 0
-
- def __call__(self, *args):
- # get number of outputs and output types by calling
- # the function on the first entries of args
- nargs = len(args)
- if self.nin:
- if (nargs > self.nin) or (nargs < self.nin_wo_defaults):
- raise ValueError(
- "Invalid number of arguments")
-
- # we need a new ufunc if this is being called with more arguments.
- if (self.lastcallargs != nargs):
- self.lastcallargs = nargs
- self.ufunc = None
- self.nout = None
-
- if self.nout is None or self.otypes == '':
- newargs = []
- for arg in args:
- newargs.append(asarray(arg).flat[0])
- theout = self.thefunc(*newargs)
- if isinstance(theout, tuple):
- self.nout = len(theout)
+ raise ValueError("Invalid otype specification")
+
+ # Excluded variable support
+ if excluded is None:
+ excluded = set()
+ self.excluded = set(excluded)
+
+ if self.otypes and not self.excluded:
+ self._ufunc = None # Caching to improve default performance
+
+ def __call__(self, *args, **kwargs):
+ """
+ Return arrays with the results of `pyfunc` broadcast (vectorized) over
+ `args` and `kwargs` not in `excluded`.
+ """
+ excluded = self.excluded
+ if not kwargs and not excluded:
+ func = self.pyfunc
+ vargs = args
+ else:
+ # The wrapper accepts only positional arguments: we use `names` and
+ # `inds` to mutate `the_args` and `kwargs` to pass to the original
+ # function.
+ nargs = len(args)
+
+ names = [_n for _n in kwargs if _n not in excluded]
+ inds = [_i for _i in range(nargs) if _i not in excluded]
+ the_args = list(args)
+ def func(*vargs):
+ for _n, _i in enumerate(inds):
+ the_args[_i] = vargs[_n]
+ kwargs.update(zip(names, vargs[len(inds):]))
+ return self.pyfunc(*the_args, **kwargs)
+
+ vargs = [args[_i] for _i in inds]
+ vargs.extend([kwargs[_n] for _n in names])
+
+ return self._vectorize_call(func=func, args=vargs)
+
+ def _get_ufunc_and_otypes(self, func, args):
+ """Return (ufunc, otypes)."""
+ # frompyfunc will fail if args is empty
+ assert args
+
+ if self.otypes:
+ otypes = self.otypes
+ nout = len(otypes)
+
+ # Note logic here: We only *use* self._ufunc if func is self.pyfunc
+ # even though we set self._ufunc regardless.
+ if func is self.pyfunc and self._ufunc is not None:
+ ufunc = self._ufunc
+ else:
+ ufunc = self._ufunc = frompyfunc(func, len(args), nout)
+ else:
+ # Get number of outputs and output types by calling the function on
+ # the first entries of args. We also cache the result to prevent
+ # the subsequent call when the ufunc is evaluated.
+ # Assumes that ufunc first evaluates the 0th elements in the input
+ # arrays (the input values are not checked to ensure this)
+ inputs = [asarray(_a).flat[0] for _a in args]
+ outputs = func(*inputs)
+
+ # Performance note: profiling indicates that -- for simple functions
+ # at least -- this wrapping can almost double the execution time.
+ # Hence we make it optional.
+ if self.cache:
+ _cache = [outputs]
+ def _func(*vargs):
+ if _cache:
+ return _cache.pop()
+ else:
+ return func(*vargs)
+ else:
+ _func = func
+
+ if isinstance(outputs, tuple):
+ nout = len(outputs)
else:
- self.nout = 1
- theout = (theout,)
- if self.otypes == '':
- otypes = []
- for k in range(self.nout):
- otypes.append(asarray(theout[k]).dtype.char)
- self.otypes = ''.join(otypes)
-
- # Create ufunc if not already created
- if (self.ufunc is None):
- self.ufunc = frompyfunc(self.thefunc, nargs, self.nout)
-
- # Convert to object arrays first
- newargs = [array(arg,copy=False,subok=True,dtype=object) for arg in args]
- if self.nout == 1:
- _res = array(self.ufunc(*newargs),copy=False,
- subok=True,dtype=self.otypes[0])
+ nout = 1
+ outputs = (outputs,)
+
+ otypes = ''.join([asarray(outputs[_k]).dtype.char
+ for _k in range(nout)])
+
+ # Performance note: profiling indicates that creating the ufunc is
+ # not a significant cost compared with wrapping so it seems not
+ # worth trying to cache this.
+ ufunc = frompyfunc(_func, len(args), nout)
+
+ return ufunc, otypes
+
+ def _vectorize_call(self, func, args):
+ """Vectorized call to `func` over positional `args`."""
+ if not args:
+ _res = func()
else:
- _res = tuple([array(x,copy=False,subok=True,dtype=c) \
- for x, c in zip(self.ufunc(*newargs), self.otypes)])
+ ufunc, otypes = self._get_ufunc_and_otypes(func=func, args=args)
+
+ # Convert args to object arrays first
+ inputs = [array(_a, copy=False, subok=True, dtype=object)
+ for _a in args]
+
+ outputs = ufunc(*inputs)
+
+ if ufunc.nout == 1:
+ _res = array(outputs,
+ copy=False, subok=True, dtype=otypes[0])
+ else:
+ _res = tuple([array(_x, copy=False, subok=True, dtype=_t)
+ for _x, _t in zip(outputs, otypes)])
return _res
def cov(m, y=None, rowvar=1, bias=0, ddof=None):
@@ -2595,7 +2633,7 @@ def i0(x):
References
----------
- .. [1] C. W. Clenshaw, "Chebyshev series for mathematical functions," in
+ .. [1] C. W. Clenshaw, "Chebyshev series for mathematical functions", in
*National Physical Laboratory Mathematical Tables*, vol. 5, London:
Her Majesty's Stationery Office, 1962.
.. [2] M. Abramowitz and I. A. Stegun, *Handbook of Mathematical
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 7245b8962..95b32e47c 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -274,7 +274,7 @@ class TestGradient(TestCase):
assert_array_equal(gradient(v), dx)
def test_badargs(self):
- # for 2D array, gradient can take 0,1, or 2 extra args
+ # for 2D array, gradient can take 0, 1, or 2 extra args
x = np.array([[1, 1], [3, 4]])
assert_raises(SyntaxError, gradient, x, np.array([1., 1.]),
np.array([1., 1.]), np.array([1., 1.]))
@@ -394,12 +394,12 @@ class TestVectorize(TestCase):
def foo(a, b=1):
return a + b
f = vectorize(foo)
- args = np.array([1,2,3])
+ args = np.array([1, 2, 3])
r1 = f(args)
- r2 = np.array([2,3,4])
+ r2 = np.array([2, 3, 4])
assert_array_equal(r1, r2)
r1 = f(args, 2)
- r2 = np.array([3,4,5])
+ r2 = np.array([3, 4, 5])
assert_array_equal(r1, r2)
def test_keywords_no_func_code(self):
@@ -412,6 +412,107 @@ class TestVectorize(TestCase):
except:
raise AssertionError()
+ def test_keywords2_ticket_2100(self):
+ r"""Test kwarg support: enhancement ticket 2100"""
+ import math
+ def foo(a, b=1):
+ return a + b
+ f = vectorize(foo)
+ args = np.array([1, 2, 3])
+ r1 = f(a=args)
+ r2 = np.array([2, 3, 4])
+ assert_array_equal(r1, r2)
+ r1 = f(b=1, a=args)
+ assert_array_equal(r1, r2)
+ r1 = f(args, b=2)
+ r2 = np.array([3, 4, 5])
+ assert_array_equal(r1, r2)
+
+ def test_keywords3_ticket_2100(self):
+ """Test excluded with mixed positional and kwargs: ticket 2100"""
+ def mypolyval(x, p):
+ _p = list(p)
+ res = _p.pop(0)
+ while _p:
+ res = res*x + _p.pop(0)
+ return res
+ vpolyval = np.vectorize(mypolyval, excluded=['p',1])
+ ans = [3, 6]
+ assert_array_equal(ans, vpolyval(x=[0, 1], p=[1, 2, 3]))
+ assert_array_equal(ans, vpolyval([0, 1], p=[1, 2, 3]))
+ assert_array_equal(ans, vpolyval([0, 1], [1, 2, 3]))
+
+ def test_keywords4_ticket_2100(self):
+ """Test vectorizing function with no positional args."""
+ @vectorize
+ def f(**kw):
+ res = 1.0
+ for _k in kw:
+ res *= kw[_k]
+ return res
+ assert_array_equal(f(a=[1, 2], b=[3, 4]), [3, 8])
+
+ def test_keywords5_ticket_2100(self):
+ """Test vectorizing function with no kwargs args."""
+ @vectorize
+ def f(*v):
+ return np.prod(v)
+ assert_array_equal(f([1, 2], [3, 4]), [3, 8])
+
+ def test_coverage1_ticket_2100(self):
+ def foo():
+ return 1
+ f = vectorize(foo)
+ assert_array_equal(f(), 1)
+
+ def test_assigning_docstring(self):
+ def foo(x):
+ return x
+ doc = "Provided documentation"
+ f = vectorize(foo, doc=doc)
+ assert_equal(f.__doc__, doc)
+
+ def test_UnboundMethod_ticket_1156(self):
+ """Regression test for issue 1156"""
+ class Foo:
+ b = 2
+ def bar(self, a):
+ return a**self.b
+ assert_array_equal(vectorize(Foo().bar)(np.arange(9)),
+ np.arange(9)**2)
+ assert_array_equal(vectorize(Foo.bar)(Foo(), np.arange(9)),
+ np.arange(9)**2)
+
+ def test_execution_order_ticket_1487(self):
+ """Regression test for dependence on execution order: issue 1487"""
+ f1 = vectorize(lambda x: x)
+ res1a = f1(np.arange(3))
+ res1b = f1(np.arange(0.1, 3))
+ f2 = vectorize(lambda x: x)
+ res2b = f2(np.arange(0.1, 3))
+ res2a = f2(np.arange(3))
+ assert_equal(res1a, res2a)
+ assert_equal(res1b, res2b)
+
+ def test_string_ticket_1892(self):
+ """Test vectorization over strings: issue 1892."""
+ f = np.vectorize(lambda x:x)
+ s = '0123456789'*10
+ assert_equal(s, f(s))
+ #z = f(np.array([s,s]))
+ #assert_array_equal([s,s], f(s))
+
+ def test_cache(self):
+ """Ensure that vectorized func called exactly once per argument."""
+ _calls = [0]
+ @vectorize
+ def f(x):
+ _calls[0] += 1
+ return x**2
+ f.cache = True
+ x = np.arange(5)
+ assert_array_equal(f(x), x*x)
+ assert_equal(_calls[0], len(x))
class TestDigitize(TestCase):
def test_forward(self):
@@ -430,17 +531,17 @@ class TestDigitize(TestCase):
assert_(np.all(digitize(x, bin) != 0))
def test_right_basic(self):
- x = [1,5,4,10,8,11,0]
- bins = [1,5,10]
- default_answer = [1,2,1,3,2,3,0]
+ x = [1, 5, 4, 10, 8, 11, 0]
+ bins = [1, 5, 10]
+ default_answer = [1, 2, 1, 3, 2, 3, 0]
assert_array_equal(digitize(x, bins), default_answer)
- right_answer = [0,1,1,2,2,3,0]
+ right_answer = [0, 1, 1, 2, 2, 3, 0]
assert_array_equal(digitize(x, bins, True), right_answer)
def test_right_open(self):
x = np.arange(-6, 5)
bins = np.arange(-6, 4)
- assert_array_equal(digitize(x,bins,True), np.arange(11))
+ assert_array_equal(digitize(x, bins, True), np.arange(11))
def test_right_open_reverse(self):
x = np.arange(5, -6, -1)
@@ -598,10 +699,10 @@ class TestHistogram(TestCase):
def test_one_bin(self):
# Ticket 632
hist, edges = histogram([1, 2, 3, 4], [1, 2])
- assert_array_equal(hist, [2, ])
+ assert_array_equal(hist, [2,])
assert_array_equal(edges, [1, 2])
assert_raises(ValueError, histogram, [1, 2], bins=0)
- h, e = histogram([1,2], bins=1)
+ h, e = histogram([1, 2], bins=1)
assert_equal(h, np.array([2]))
assert_allclose(e, np.array([1., 2.]))
@@ -630,7 +731,7 @@ class TestHistogram(TestCase):
# Check with non-constant bin widths
v = np.arange(10)
- bins = [0,1,3,6,10]
+ bins = [0, 1, 3, 6, 10]
a, b = histogram(v, bins, density=True)
assert_array_equal(a, .1)
assert_equal(np.sum(a*diff(b)), 1)
@@ -638,13 +739,13 @@ class TestHistogram(TestCase):
# Variale bin widths are especially useful to deal with
# infinities.
v = np.arange(10)
- bins = [0,1,3,6,np.inf]
+ bins = [0, 1, 3, 6, np.inf]
a, b = histogram(v, bins, density=True)
- assert_array_equal(a, [.1,.1,.1,0.])
+ assert_array_equal(a, [.1, .1, .1, 0.])
# Taken from a bug report from N. Becker on the numpy-discussion
# mailing list Aug. 6, 2010.
- counts, dmy = np.histogram([1,2,3,4], [0.5,1.5,np.inf], density=True)
+ counts, dmy = np.histogram([1, 2, 3, 4], [0.5, 1.5, np.inf], density=True)
assert_equal(counts, [.25, 0])
def test_outliers(self):
@@ -709,12 +810,12 @@ class TestHistogram(TestCase):
assert_array_almost_equal(wa, np.array([4, 5, 0, 1]) / 10. / 3. * 4)
# Check weights with non-uniform bin widths
- a,b = histogram(np.arange(9), [0,1,3,6,10], \
- weights=[2,1,1,1,1,1,1,1,1], density=True)
+ a, b = histogram(np.arange(9), [0, 1, 3, 6, 10], \
+ weights=[2, 1, 1, 1, 1, 1, 1, 1, 1], density=True)
assert_almost_equal(a, [.2, .1, .1, .075])
def test_empty(self):
- a, b = histogram([], bins=([0,1]))
+ a, b = histogram([], bins=([0, 1]))
assert_array_equal(a, np.array([0]))
assert_array_equal(b, np.array([0, 1]))
@@ -792,7 +893,7 @@ class TestHistogramdd(TestCase):
assert_array_equal(edges[0], np.array([-0.5, 0. , 0.5]))
def test_empty(self):
- a, b = histogramdd([[], []], bins=([0,1], [0,1]))
+ a, b = histogramdd([[], []], bins=([0, 1], [0, 1]))
assert_array_max_ulp(a, np.array([[ 0.]]))
a, b = np.histogramdd([[], [], []], bins=2)
assert_array_max_ulp(a, np.zeros((2, 2, 2)))
@@ -1011,7 +1112,7 @@ class TestCorrCoef(TestCase):
class TestCov(TestCase):
def test_basic(self):
x = np.array([[0, 2], [1, 1], [2, 0]]).T
- assert_allclose(np.cov(x), np.array([[ 1.,-1.], [-1.,1.]]))
+ assert_allclose(np.cov(x), np.array([[ 1., -1.], [-1., 1.]]))
def test_empty(self):
assert_equal(cov(np.array([])).size, 0)
@@ -1162,7 +1263,7 @@ class TestBincount(TestCase):
def test_empty(self):
x = np.array([], dtype=int)
y = np.bincount(x)
- assert_array_equal(x,y)
+ assert_array_equal(x, y)
def test_empty_with_minlength(self):
x = np.array([], dtype=int)
@@ -1182,10 +1283,10 @@ class TestInterp(TestCase):
assert_almost_equal(np.interp(x0, x, y), x0)
def test_right_left_behavior(self):
- assert_equal(interp([-1, 0, 1], [0], [1]), [1,1,1])
- assert_equal(interp([-1, 0, 1], [0], [1], left=0), [0,1,1])
- assert_equal(interp([-1, 0, 1], [0], [1], right=0), [1,1,0])
- assert_equal(interp([-1, 0, 1], [0], [1], left=0, right=0), [0,1,0])
+ assert_equal(interp([-1, 0, 1], [0], [1]), [1, 1, 1])
+ assert_equal(interp([-1, 0, 1], [0], [1], left=0), [0, 1, 1])
+ assert_equal(interp([-1, 0, 1], [0], [1], right=0), [1, 1, 0])
+ assert_equal(interp([-1, 0, 1], [0], [1], left=0, right=0), [0, 1, 0])
def test_scalar_interpolation_point(self):
x = np.linspace(0, 1, 5)
@@ -1255,10 +1356,10 @@ class TestAdd_newdoc_ufunc(TestCase):
def test_ufunc_arg(self):
assert_raises(TypeError, add_newdoc_ufunc, 2, "blah")
- assert_raises(ValueError, add_newdoc_ufunc,np.add, "blah")
+ assert_raises(ValueError, add_newdoc_ufunc, np.add, "blah")
def test_string_arg(self):
- assert_raises(TypeError, add_newdoc_ufunc,np.add, 3)
+ assert_raises(TypeError, add_newdoc_ufunc, np.add, 3)