diff options
-rw-r--r-- | numpy/lib/function_base.py | 320 | ||||
-rw-r--r-- | numpy/lib/tests/test_function_base.py | 155 |
2 files changed, 307 insertions, 168 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 2de5c6193..473c8ea23 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -1,12 +1,13 @@ __docformat__ = "restructuredtext en" __all__ = ['select', 'piecewise', 'trim_zeros', 'copy', 'iterable', - 'percentile', 'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', - 'disp', 'extract', 'place', 'nansum', 'nanmax', 'nanargmax', - 'nanargmin', 'nanmin', 'vectorize', 'asarray_chkfinite', 'average', - 'histogram', 'histogramdd', 'bincount', 'digitize', 'cov', 'corrcoef', - 'msort', 'median', 'sinc', 'hamming', 'hanning', 'bartlett', - 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring', - 'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc'] + 'percentile', 'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', + 'disp', 'extract', 'place', 'nansum', 'nanmax', 'nanargmax', + 'nanargmin', 'nanmin', 'vectorize', 'asarray_chkfinite', 'average', + 'histogram', 'histogramdd', 'bincount', 'digitize', 'cov', + 'corrcoef', 'msort', 'median', 'sinc', 'hamming', 'hanning', + 'bartlett', 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', + 'add_docstring', 'meshgrid', 'delete', 'insert', 'append', 'interp', + 'add_newdoc_ufunc'] import warnings import types @@ -1698,80 +1699,9 @@ def disp(mesg, device=None, linefeed=True): device.flush() return -# return number of input arguments and -# number of default arguments - -def _get_nargs(obj): - import re - - terr = re.compile(r'.*? takes (exactly|at least) (?P<exargs>(\d+)|(\w+))' + - r' argument(s|) \((?P<gargs>(\d+)|(\w+)) given\)') - def _convert_to_int(strval): - try: - result = int(strval) - except ValueError: - if strval=='zero': - result = 0 - elif strval=='one': - result = 1 - elif strval=='two': - result = 2 - # How high to go? English only? - else: - raise - return result - - if not callable(obj): - raise TypeError( - "Object is not callable.") - if sys.version_info[0] >= 3: - # inspect currently fails for binary extensions - # like math.cos. So fall back to other methods if - # it fails. - import inspect - try: - spec = inspect.getargspec(obj) - nargs = len(spec.args) - if spec.defaults: - ndefaults = len(spec.defaults) - else: - ndefaults = 0 - if inspect.ismethod(obj): - nargs -= 1 - return nargs, ndefaults - except: - pass - - if hasattr(obj,'func_code'): - fcode = obj.func_code - nargs = fcode.co_argcount - if obj.func_defaults is not None: - ndefaults = len(obj.func_defaults) - else: - ndefaults = 0 - if isinstance(obj, types.MethodType): - nargs -= 1 - return nargs, ndefaults - - try: - obj() - return 0, 0 - except TypeError, msg: - m = terr.match(str(msg)) - if m: - nargs = _convert_to_int(m.group('exargs')) - ndefaults = _convert_to_int(m.group('gargs')) - if isinstance(obj, types.MethodType): - nargs -= 1 - return nargs, ndefaults - - raise ValueError( - "failed to determine the number of arguments for %s" % (obj)) - - class vectorize(object): """ - vectorize(pyfunc, otypes='', doc=None) + vectorize(pyfunc, otypes='', doc=None, excluded=None, cache=False) Generalized function class. @@ -1794,13 +1724,30 @@ class vectorize(object): typecode characters or a list of data type specifiers. There should be one data type specifier for each output. doc : str, optional - The docstring for the function. If None, the docstring will be the - `pyfunc` one. + The docstring for the function. If `None`, the docstring will be the + ``pyfunc.__doc__``. + excluded : set, optional + Set of strings or integers representing the positional or keyword + arguments for which the function will not be vectorized. These will be + passed directly to `pyfunc` unmodified. + + .. versionadded:: 1.7.0 + + cache : bool, optional + If `True`, then cache the first function call that determines the number + of outputs if `otypes` is not provided. + + .. versionadded:: 1.7.0 + + Returns + ------- + vectorized : callable + Vectorized function. Examples -------- >>> def myfunc(a, b): - ... \"\"\"Return a-b if a>b, otherwise return a+b\"\"\" + ... "Return a-b if a>b, otherwise return a+b" ... if a > b: ... return a - b ... else: @@ -1830,78 +1777,169 @@ class vectorize(object): >>> type(out[0]) <type 'numpy.float64'> + The `excluded` argument can be used to prevent vectorizing over certain + arguments. This can be useful for array-like arguments of a fixed length + such as the coefficients for a polynomial as in `polyval`: + + >>> def mypolyval(p, x): + ... _p = list(p) + ... res = _p.pop(0) + ... while _p: + ... res = res*x + _p.pop(0) + ... return res + >>> vpolyval = np.vectorize(mypolyval, excluded=['p']) + >>> vpolyval(p=[1, 2, 3], x=[0, 1]) + array([3, 6]) + + Positional arguments may also be excluded by specifying their position: + + >>> vpolyval.excluded.add(0) + >>> vpolyval([1, 2, 3], x=[0, 1]) + array([3, 6]) + + Notes + ----- + The `vectorize` function is provided primarily for convenience, not for + performance. The implementation is essentially a for loop. + + If `otypes` is not specified, then a call to the function with the first + argument will be used to determine the number of outputs. The results of + this call will be cached if `cache` is `True` to prevent calling the + function twice. However, to implement the cache, the original function must + be wrapped which will slow down subsequent calls, so only do this if your + function is expensive. + + The new keyword argument interface and `excluded` argument support further + degrades performance. """ - def __init__(self, pyfunc, otypes='', doc=None): - self.thefunc = pyfunc - self.ufunc = None - nin, ndefault = _get_nargs(pyfunc) - if nin == 0 and ndefault == 0: - self.nin = None - self.nin_wo_defaults = None - else: - self.nin = nin - self.nin_wo_defaults = nin - ndefault - self.nout = None + def __init__(self, pyfunc, otypes='', doc=None, excluded=None, cache=False): + self.pyfunc = pyfunc + self.cache = cache + if doc is None: self.__doc__ = pyfunc.__doc__ else: self.__doc__ = doc + if isinstance(otypes, str): self.otypes = otypes for char in self.otypes: if char not in typecodes['All']: - raise ValueError( - "invalid otype specified") + raise ValueError("Invalid otype specified: %s" % (char,)) elif iterable(otypes): self.otypes = ''.join([_nx.dtype(x).char for x in otypes]) else: - raise ValueError( - "Invalid otype specification") - self.lastcallargs = 0 - - def __call__(self, *args): - # get number of outputs and output types by calling - # the function on the first entries of args - nargs = len(args) - if self.nin: - if (nargs > self.nin) or (nargs < self.nin_wo_defaults): - raise ValueError( - "Invalid number of arguments") - - # we need a new ufunc if this is being called with more arguments. - if (self.lastcallargs != nargs): - self.lastcallargs = nargs - self.ufunc = None - self.nout = None - - if self.nout is None or self.otypes == '': - newargs = [] - for arg in args: - newargs.append(asarray(arg).flat[0]) - theout = self.thefunc(*newargs) - if isinstance(theout, tuple): - self.nout = len(theout) + raise ValueError("Invalid otype specification") + + # Excluded variable support + if excluded is None: + excluded = set() + self.excluded = set(excluded) + + if self.otypes and not self.excluded: + self._ufunc = None # Caching to improve default performance + + def __call__(self, *args, **kwargs): + """ + Return arrays with the results of `pyfunc` broadcast (vectorized) over + `args` and `kwargs` not in `excluded`. + """ + excluded = self.excluded + if not kwargs and not excluded: + func = self.pyfunc + vargs = args + else: + # The wrapper accepts only positional arguments: we use `names` and + # `inds` to mutate `the_args` and `kwargs` to pass to the original + # function. + nargs = len(args) + + names = [_n for _n in kwargs if _n not in excluded] + inds = [_i for _i in range(nargs) if _i not in excluded] + the_args = list(args) + def func(*vargs): + for _n, _i in enumerate(inds): + the_args[_i] = vargs[_n] + kwargs.update(zip(names, vargs[len(inds):])) + return self.pyfunc(*the_args, **kwargs) + + vargs = [args[_i] for _i in inds] + vargs.extend([kwargs[_n] for _n in names]) + + return self._vectorize_call(func=func, args=vargs) + + def _get_ufunc_and_otypes(self, func, args): + """Return (ufunc, otypes).""" + # frompyfunc will fail if args is empty + assert args + + if self.otypes: + otypes = self.otypes + nout = len(otypes) + + # Note logic here: We only *use* self._ufunc if func is self.pyfunc + # even though we set self._ufunc regardless. + if func is self.pyfunc and self._ufunc is not None: + ufunc = self._ufunc + else: + ufunc = self._ufunc = frompyfunc(func, len(args), nout) + else: + # Get number of outputs and output types by calling the function on + # the first entries of args. We also cache the result to prevent + # the subsequent call when the ufunc is evaluated. + # Assumes that ufunc first evaluates the 0th elements in the input + # arrays (the input values are not checked to ensure this) + inputs = [asarray(_a).flat[0] for _a in args] + outputs = func(*inputs) + + # Performance note: profiling indicates that -- for simple functions + # at least -- this wrapping can almost double the execution time. + # Hence we make it optional. + if self.cache: + _cache = [outputs] + def _func(*vargs): + if _cache: + return _cache.pop() + else: + return func(*vargs) + else: + _func = func + + if isinstance(outputs, tuple): + nout = len(outputs) else: - self.nout = 1 - theout = (theout,) - if self.otypes == '': - otypes = [] - for k in range(self.nout): - otypes.append(asarray(theout[k]).dtype.char) - self.otypes = ''.join(otypes) - - # Create ufunc if not already created - if (self.ufunc is None): - self.ufunc = frompyfunc(self.thefunc, nargs, self.nout) - - # Convert to object arrays first - newargs = [array(arg,copy=False,subok=True,dtype=object) for arg in args] - if self.nout == 1: - _res = array(self.ufunc(*newargs),copy=False, - subok=True,dtype=self.otypes[0]) + nout = 1 + outputs = (outputs,) + + otypes = ''.join([asarray(outputs[_k]).dtype.char + for _k in range(nout)]) + + # Performance note: profiling indicates that creating the ufunc is + # not a significant cost compared with wrapping so it seems not + # worth trying to cache this. + ufunc = frompyfunc(_func, len(args), nout) + + return ufunc, otypes + + def _vectorize_call(self, func, args): + """Vectorized call to `func` over positional `args`.""" + if not args: + _res = func() else: - _res = tuple([array(x,copy=False,subok=True,dtype=c) \ - for x, c in zip(self.ufunc(*newargs), self.otypes)]) + ufunc, otypes = self._get_ufunc_and_otypes(func=func, args=args) + + # Convert args to object arrays first + inputs = [array(_a, copy=False, subok=True, dtype=object) + for _a in args] + + outputs = ufunc(*inputs) + + if ufunc.nout == 1: + _res = array(outputs, + copy=False, subok=True, dtype=otypes[0]) + else: + _res = tuple([array(_x, copy=False, subok=True, dtype=_t) + for _x, _t in zip(outputs, otypes)]) return _res def cov(m, y=None, rowvar=1, bias=0, ddof=None): @@ -2595,7 +2633,7 @@ def i0(x): References ---------- - .. [1] C. W. Clenshaw, "Chebyshev series for mathematical functions," in + .. [1] C. W. Clenshaw, "Chebyshev series for mathematical functions", in *National Physical Laboratory Mathematical Tables*, vol. 5, London: Her Majesty's Stationery Office, 1962. .. [2] M. Abramowitz and I. A. Stegun, *Handbook of Mathematical diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 7245b8962..95b32e47c 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -274,7 +274,7 @@ class TestGradient(TestCase): assert_array_equal(gradient(v), dx) def test_badargs(self): - # for 2D array, gradient can take 0,1, or 2 extra args + # for 2D array, gradient can take 0, 1, or 2 extra args x = np.array([[1, 1], [3, 4]]) assert_raises(SyntaxError, gradient, x, np.array([1., 1.]), np.array([1., 1.]), np.array([1., 1.])) @@ -394,12 +394,12 @@ class TestVectorize(TestCase): def foo(a, b=1): return a + b f = vectorize(foo) - args = np.array([1,2,3]) + args = np.array([1, 2, 3]) r1 = f(args) - r2 = np.array([2,3,4]) + r2 = np.array([2, 3, 4]) assert_array_equal(r1, r2) r1 = f(args, 2) - r2 = np.array([3,4,5]) + r2 = np.array([3, 4, 5]) assert_array_equal(r1, r2) def test_keywords_no_func_code(self): @@ -412,6 +412,107 @@ class TestVectorize(TestCase): except: raise AssertionError() + def test_keywords2_ticket_2100(self): + r"""Test kwarg support: enhancement ticket 2100""" + import math + def foo(a, b=1): + return a + b + f = vectorize(foo) + args = np.array([1, 2, 3]) + r1 = f(a=args) + r2 = np.array([2, 3, 4]) + assert_array_equal(r1, r2) + r1 = f(b=1, a=args) + assert_array_equal(r1, r2) + r1 = f(args, b=2) + r2 = np.array([3, 4, 5]) + assert_array_equal(r1, r2) + + def test_keywords3_ticket_2100(self): + """Test excluded with mixed positional and kwargs: ticket 2100""" + def mypolyval(x, p): + _p = list(p) + res = _p.pop(0) + while _p: + res = res*x + _p.pop(0) + return res + vpolyval = np.vectorize(mypolyval, excluded=['p',1]) + ans = [3, 6] + assert_array_equal(ans, vpolyval(x=[0, 1], p=[1, 2, 3])) + assert_array_equal(ans, vpolyval([0, 1], p=[1, 2, 3])) + assert_array_equal(ans, vpolyval([0, 1], [1, 2, 3])) + + def test_keywords4_ticket_2100(self): + """Test vectorizing function with no positional args.""" + @vectorize + def f(**kw): + res = 1.0 + for _k in kw: + res *= kw[_k] + return res + assert_array_equal(f(a=[1, 2], b=[3, 4]), [3, 8]) + + def test_keywords5_ticket_2100(self): + """Test vectorizing function with no kwargs args.""" + @vectorize + def f(*v): + return np.prod(v) + assert_array_equal(f([1, 2], [3, 4]), [3, 8]) + + def test_coverage1_ticket_2100(self): + def foo(): + return 1 + f = vectorize(foo) + assert_array_equal(f(), 1) + + def test_assigning_docstring(self): + def foo(x): + return x + doc = "Provided documentation" + f = vectorize(foo, doc=doc) + assert_equal(f.__doc__, doc) + + def test_UnboundMethod_ticket_1156(self): + """Regression test for issue 1156""" + class Foo: + b = 2 + def bar(self, a): + return a**self.b + assert_array_equal(vectorize(Foo().bar)(np.arange(9)), + np.arange(9)**2) + assert_array_equal(vectorize(Foo.bar)(Foo(), np.arange(9)), + np.arange(9)**2) + + def test_execution_order_ticket_1487(self): + """Regression test for dependence on execution order: issue 1487""" + f1 = vectorize(lambda x: x) + res1a = f1(np.arange(3)) + res1b = f1(np.arange(0.1, 3)) + f2 = vectorize(lambda x: x) + res2b = f2(np.arange(0.1, 3)) + res2a = f2(np.arange(3)) + assert_equal(res1a, res2a) + assert_equal(res1b, res2b) + + def test_string_ticket_1892(self): + """Test vectorization over strings: issue 1892.""" + f = np.vectorize(lambda x:x) + s = '0123456789'*10 + assert_equal(s, f(s)) + #z = f(np.array([s,s])) + #assert_array_equal([s,s], f(s)) + + def test_cache(self): + """Ensure that vectorized func called exactly once per argument.""" + _calls = [0] + @vectorize + def f(x): + _calls[0] += 1 + return x**2 + f.cache = True + x = np.arange(5) + assert_array_equal(f(x), x*x) + assert_equal(_calls[0], len(x)) class TestDigitize(TestCase): def test_forward(self): @@ -430,17 +531,17 @@ class TestDigitize(TestCase): assert_(np.all(digitize(x, bin) != 0)) def test_right_basic(self): - x = [1,5,4,10,8,11,0] - bins = [1,5,10] - default_answer = [1,2,1,3,2,3,0] + x = [1, 5, 4, 10, 8, 11, 0] + bins = [1, 5, 10] + default_answer = [1, 2, 1, 3, 2, 3, 0] assert_array_equal(digitize(x, bins), default_answer) - right_answer = [0,1,1,2,2,3,0] + right_answer = [0, 1, 1, 2, 2, 3, 0] assert_array_equal(digitize(x, bins, True), right_answer) def test_right_open(self): x = np.arange(-6, 5) bins = np.arange(-6, 4) - assert_array_equal(digitize(x,bins,True), np.arange(11)) + assert_array_equal(digitize(x, bins, True), np.arange(11)) def test_right_open_reverse(self): x = np.arange(5, -6, -1) @@ -598,10 +699,10 @@ class TestHistogram(TestCase): def test_one_bin(self): # Ticket 632 hist, edges = histogram([1, 2, 3, 4], [1, 2]) - assert_array_equal(hist, [2, ]) + assert_array_equal(hist, [2,]) assert_array_equal(edges, [1, 2]) assert_raises(ValueError, histogram, [1, 2], bins=0) - h, e = histogram([1,2], bins=1) + h, e = histogram([1, 2], bins=1) assert_equal(h, np.array([2])) assert_allclose(e, np.array([1., 2.])) @@ -630,7 +731,7 @@ class TestHistogram(TestCase): # Check with non-constant bin widths v = np.arange(10) - bins = [0,1,3,6,10] + bins = [0, 1, 3, 6, 10] a, b = histogram(v, bins, density=True) assert_array_equal(a, .1) assert_equal(np.sum(a*diff(b)), 1) @@ -638,13 +739,13 @@ class TestHistogram(TestCase): # Variale bin widths are especially useful to deal with # infinities. v = np.arange(10) - bins = [0,1,3,6,np.inf] + bins = [0, 1, 3, 6, np.inf] a, b = histogram(v, bins, density=True) - assert_array_equal(a, [.1,.1,.1,0.]) + assert_array_equal(a, [.1, .1, .1, 0.]) # Taken from a bug report from N. Becker on the numpy-discussion # mailing list Aug. 6, 2010. - counts, dmy = np.histogram([1,2,3,4], [0.5,1.5,np.inf], density=True) + counts, dmy = np.histogram([1, 2, 3, 4], [0.5, 1.5, np.inf], density=True) assert_equal(counts, [.25, 0]) def test_outliers(self): @@ -709,12 +810,12 @@ class TestHistogram(TestCase): assert_array_almost_equal(wa, np.array([4, 5, 0, 1]) / 10. / 3. * 4) # Check weights with non-uniform bin widths - a,b = histogram(np.arange(9), [0,1,3,6,10], \ - weights=[2,1,1,1,1,1,1,1,1], density=True) + a, b = histogram(np.arange(9), [0, 1, 3, 6, 10], \ + weights=[2, 1, 1, 1, 1, 1, 1, 1, 1], density=True) assert_almost_equal(a, [.2, .1, .1, .075]) def test_empty(self): - a, b = histogram([], bins=([0,1])) + a, b = histogram([], bins=([0, 1])) assert_array_equal(a, np.array([0])) assert_array_equal(b, np.array([0, 1])) @@ -792,7 +893,7 @@ class TestHistogramdd(TestCase): assert_array_equal(edges[0], np.array([-0.5, 0. , 0.5])) def test_empty(self): - a, b = histogramdd([[], []], bins=([0,1], [0,1])) + a, b = histogramdd([[], []], bins=([0, 1], [0, 1])) assert_array_max_ulp(a, np.array([[ 0.]])) a, b = np.histogramdd([[], [], []], bins=2) assert_array_max_ulp(a, np.zeros((2, 2, 2))) @@ -1011,7 +1112,7 @@ class TestCorrCoef(TestCase): class TestCov(TestCase): def test_basic(self): x = np.array([[0, 2], [1, 1], [2, 0]]).T - assert_allclose(np.cov(x), np.array([[ 1.,-1.], [-1.,1.]])) + assert_allclose(np.cov(x), np.array([[ 1., -1.], [-1., 1.]])) def test_empty(self): assert_equal(cov(np.array([])).size, 0) @@ -1162,7 +1263,7 @@ class TestBincount(TestCase): def test_empty(self): x = np.array([], dtype=int) y = np.bincount(x) - assert_array_equal(x,y) + assert_array_equal(x, y) def test_empty_with_minlength(self): x = np.array([], dtype=int) @@ -1182,10 +1283,10 @@ class TestInterp(TestCase): assert_almost_equal(np.interp(x0, x, y), x0) def test_right_left_behavior(self): - assert_equal(interp([-1, 0, 1], [0], [1]), [1,1,1]) - assert_equal(interp([-1, 0, 1], [0], [1], left=0), [0,1,1]) - assert_equal(interp([-1, 0, 1], [0], [1], right=0), [1,1,0]) - assert_equal(interp([-1, 0, 1], [0], [1], left=0, right=0), [0,1,0]) + assert_equal(interp([-1, 0, 1], [0], [1]), [1, 1, 1]) + assert_equal(interp([-1, 0, 1], [0], [1], left=0), [0, 1, 1]) + assert_equal(interp([-1, 0, 1], [0], [1], right=0), [1, 1, 0]) + assert_equal(interp([-1, 0, 1], [0], [1], left=0, right=0), [0, 1, 0]) def test_scalar_interpolation_point(self): x = np.linspace(0, 1, 5) @@ -1255,10 +1356,10 @@ class TestAdd_newdoc_ufunc(TestCase): def test_ufunc_arg(self): assert_raises(TypeError, add_newdoc_ufunc, 2, "blah") - assert_raises(ValueError, add_newdoc_ufunc,np.add, "blah") + assert_raises(ValueError, add_newdoc_ufunc, np.add, "blah") def test_string_arg(self): - assert_raises(TypeError, add_newdoc_ufunc,np.add, 3) + assert_raises(TypeError, add_newdoc_ufunc, np.add, 3) |