diff options
Diffstat (limited to 'numpy/lib')
51 files changed, 5944 insertions, 3436 deletions
diff --git a/numpy/lib/__init__.py b/numpy/lib/__init__.py index 8c420b0c3..1d65db55e 100644 --- a/numpy/lib/__init__.py +++ b/numpy/lib/__init__.py @@ -21,7 +21,7 @@ from .utils import * from .arraysetops import * from .npyio import * from .financial import * -from .arrayterator import * +from .arrayterator import Arrayterator from .arraypad import * from ._version import * @@ -41,6 +41,6 @@ __all__ += npyio.__all__ __all__ += financial.__all__ __all__ += nanfunctions.__all__ -from numpy.testing import Tester -test = Tester().test -bench = Tester().bench +from numpy.testing.nosetester import _numpy_tester +test = _numpy_tester().test +bench = _numpy_tester().bench diff --git a/numpy/lib/_datasource.py b/numpy/lib/_datasource.py index 338c8b331..c528de608 100644 --- a/numpy/lib/_datasource.py +++ b/numpy/lib/_datasource.py @@ -6,7 +6,7 @@ low-level details. Through datasource, a researcher can obtain and use a file with one function call, regardless of location of the file. DataSource is meant to augment standard python libraries, not replace them. -It should work seemlessly with standard file IO operations and the os +It should work seamlessly with standard file IO operations and the os module. DataSource files can originate locally or remotely: @@ -317,7 +317,7 @@ class DataSource (object): return a path to that local file. The search will include possible compressed versions of the file - and return the first occurence found. + and return the first occurrence found. """ diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index 9108b2e4c..dfdc38b72 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -27,6 +27,7 @@ else: _bytes_to_complex = complex _bytes_to_name = str + def _is_string_like(obj): """ Check whether obj behaves like a string. @@ -37,6 +38,7 @@ def _is_string_like(obj): return False return True + def _is_bytes_like(obj): """ Check whether obj behaves like a bytes object. @@ -158,7 +160,7 @@ class LineSplitter(object): delimiter : str, int, or sequence of ints, optional If a string, character used to delimit consecutive fields. If an integer or a sequence of integers, width(s) of each field. - comment : str, optional + comments : str, optional Character used to mark the beginning of a comment. Default is '#'. autostrip : bool, optional Whether to strip each individual field. Default is True. @@ -269,7 +271,7 @@ class NameValidator(object): deletechars : str, optional A string combining invalid characters that must be deleted from the names. - casesensitive : {True, False, 'upper', 'lower'}, optional + case_sensitive : {True, False, 'upper', 'lower'}, optional * If True, field names are case-sensitive. * If False or 'upper', field names are converted to upper case. * If 'lower', field names are converted to lower case. @@ -318,12 +320,13 @@ class NameValidator(object): # Process the case option ..... if (case_sensitive is None) or (case_sensitive is True): self.case_converter = lambda x: x - elif (case_sensitive is False) or ('u' in case_sensitive): + elif (case_sensitive is False) or case_sensitive.startswith('u'): self.case_converter = lambda x: x.upper() - elif 'l' in case_sensitive: + elif case_sensitive.startswith('l'): self.case_converter = lambda x: x.lower() else: - self.case_converter = lambda x: x + msg = 'unrecognized case_sensitive value %s.' % case_sensitive + raise ValueError(msg) # self.replace_space = replace_space @@ -338,7 +341,7 @@ class NameValidator(object): defaultfmt : str, optional Default format string, used if validating a given string reduces its length to zero. - nboutput : integer, optional + nbfields : integer, optional Final number of validated names, used to expand or shrink the initial list of names. @@ -445,6 +448,7 @@ class ConverterError(Exception): """ pass + class ConverterLockError(ConverterError): """ Exception raised when an attempt is made to upgrade a locked converter. @@ -452,6 +456,7 @@ class ConverterLockError(ConverterError): """ pass + class ConversionWarning(UserWarning): """ Warning issued when a string converter has a problem. @@ -513,12 +518,19 @@ class StringConverter(object): """ # _mapper = [(nx.bool_, str2bool, False), - (nx.integer, int, -1), - (nx.floating, float, nx.nan), - (complex, _bytes_to_complex, nx.nan + 0j), - (nx.string_, bytes, asbytes('???'))] + (nx.integer, int, -1)] + + # On 32-bit systems, we need to make sure that we explicitly include + # nx.int64 since ns.integer is nx.int32. + if nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize: + _mapper.append((nx.int64, int, -1)) + + _mapper.extend([(nx.floating, float, nx.nan), + (complex, _bytes_to_complex, nx.nan + 0j), + (nx.longdouble, nx.longdouble, nx.nan), + (nx.string_, bytes, asbytes('???'))]) + (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper) - # @classmethod def _getdtype(cls, val): @@ -632,6 +644,18 @@ class StringConverter(object): else: self.default = default break + # if a converter for the specific dtype is available use that + last_func = func + for (i, (deftype, func, default_def)) in enumerate(self._mapper): + if dtype.type == deftype: + _status = i + last_func = func + if default is None: + self.default = default_def + else: + self.default = default + break + func = last_func if _status == -1: # We never found a match in the _mapper... _status = 0 @@ -672,7 +696,22 @@ class StringConverter(object): def _strict_call(self, value): try: - return self.func(value) + + # We check if we can convert the value using the current function + new_value = self.func(value) + + # In addition to having to check whether func can convert the + # value, we also have to make sure that we don't get overflow + # errors for integers. + if self.func is int: + try: + np.array(value, dtype=self.type) + except OverflowError: + raise ValueError + + # We're still here so we can now return the new value + return new_value + except ValueError: if value.strip() in self.missing_values: if not self._status: @@ -708,7 +747,7 @@ class StringConverter(object): """ self._checked = True try: - self._strict_call(value) + return self._strict_call(value) except ValueError: # Raise an exception if we locked the converter... if self._locked: @@ -728,7 +767,7 @@ class StringConverter(object): self.default = self._initial_default else: self.default = default - self.upgrade(value) + return self.upgrade(value) def iterupgrade(self, value): self._checked = True diff --git a/numpy/lib/_version.py b/numpy/lib/_version.py index 54b9c1dc7..0019c5607 100644 --- a/numpy/lib/_version.py +++ b/numpy/lib/_version.py @@ -1,4 +1,4 @@ -"""Utility to compare (Numpy) version strings. +"""Utility to compare (NumPy) version strings. The NumpyVersion class allows properly comparing numpy version strings. The LooseVersion and StrictVersion classes that distutils provides don't @@ -18,7 +18,7 @@ __all__ = ['NumpyVersion'] class NumpyVersion(): """Parse and compare numpy version strings. - Numpy has the following versioning scheme (numbers given are examples; they + NumPy has the following versioning scheme (numbers given are examples; they can be > 9) in principle): - Released version: '1.8.0', '1.8.1', etc. @@ -40,7 +40,7 @@ class NumpyVersion(): Parameters ---------- vstring : str - Numpy version string (``np.__version__``). + NumPy version string (``np.__version__``). Examples -------- diff --git a/numpy/lib/arraypad.py b/numpy/lib/arraypad.py index bbfdce794..15e3ed957 100644 --- a/numpy/lib/arraypad.py +++ b/numpy/lib/arraypad.py @@ -6,7 +6,6 @@ of an n-dimensional array. from __future__ import division, absolute_import, print_function import numpy as np -from numpy.compat import long __all__ = ['pad'] @@ -47,7 +46,7 @@ def _arange_ndarray(arr, shape, axis, reverse=False): """ initshape = tuple(1 if i != axis else shape[axis] - for (i, x) in enumerate(arr.shape)) + for (i, x) in enumerate(arr.shape)) if not reverse: padarr = np.arange(1, shape[axis] + 1) else: @@ -779,7 +778,7 @@ def _pad_ref(arr, pad_amt, method, axis=-1): Notes ----- This algorithm does not pad with repetition, i.e. the edges are not - repeated in the reflection. For that behavior, use `method='symmetric'`. + repeated in the reflection. For that behavior, use `mode='symmetric'`. The modes 'reflect', 'symmetric', and 'wrap' must be padded with a single function, lest the indexing tricks in non-integer multiples of the @@ -864,7 +863,7 @@ def _pad_sym(arr, pad_amt, method, axis=-1): Notes ----- This algorithm DOES pad with repetition, i.e. the edges are repeated. - For a method that does not repeat edges, use `method='reflect'`. + For padding without repeated edges, use `mode='reflect'`. The modes 'reflect', 'symmetric', and 'wrap' must be padded with a single function, lest the indexing tricks in non-integer multiples of the @@ -987,7 +986,7 @@ def _pad_wrap(arr, pad_amt, axis=-1): return np.concatenate((wrap_chunk1, arr, wrap_chunk2), axis=axis) -def _normalize_shape(narray, shape): +def _normalize_shape(ndarray, shape, cast_to_int=True): """ Private function which does some checks and normalizes the possibly much simpler representations of 'pad_width', 'stat_length', @@ -997,52 +996,54 @@ def _normalize_shape(narray, shape): ---------- narray : ndarray Input ndarray - shape : {sequence, int}, optional - The width of padding (pad_width) or the number of elements on the - edge of the narray used for statistics (stat_length). + shape : {sequence, array_like, float, int}, optional + The width of padding (pad_width), the number of elements on the + edge of the narray used for statistics (stat_length), the constant + value(s) to use when filling padded regions (constant_values), or the + endpoint target(s) for linear ramps (end_values). ((before_1, after_1), ... (before_N, after_N)) unique number of elements for each axis where `N` is rank of `narray`. ((before, after),) yields same before and after constants for each axis. - (constant,) or int is a shortcut for before = after = constant for + (constant,) or val is a shortcut for before = after = constant for all axes. + cast_to_int : bool, optional + Controls if values in ``shape`` will be rounded and cast to int + before being returned. Returns ------- - _normalize_shape : tuple of tuples - int => ((int, int), (int, int), ...) - [[int1, int2], [int3, int4], ...] => ((int1, int2), (int3, int4), ...) - ((int1, int2), (int3, int4), ...) => no change - [[int1, int2], ] => ((int1, int2), (int1, int2), ...) - ((int1, int2), ) => ((int1, int2), (int1, int2), ...) - [[int , ], ] => ((int, int), (int, int), ...) - ((int , ), ) => ((int, int), (int, int), ...) + normalized_shape : tuple of tuples + val => ((val, val), (val, val), ...) + [[val1, val2], [val3, val4], ...] => ((val1, val2), (val3, val4), ...) + ((val1, val2), (val3, val4), ...) => no change + [[val1, val2], ] => ((val1, val2), (val1, val2), ...) + ((val1, val2), ) => ((val1, val2), (val1, val2), ...) + [[val , ], ] => ((val, val), (val, val), ...) + ((val , ), ) => ((val, val), (val, val), ...) """ - normshp = None - shapelen = len(np.shape(narray)) - if (isinstance(shape, int)) or shape is None: - normshp = ((shape, shape), ) * shapelen - elif (isinstance(shape, (tuple, list)) - and isinstance(shape[0], (tuple, list)) - and len(shape) == shapelen): - normshp = shape - for i in normshp: - if len(i) != 2: - fmt = "Unable to create correctly shaped tuple from %s" - raise ValueError(fmt % (normshp,)) - elif (isinstance(shape, (tuple, list)) - and isinstance(shape[0], (int, float, long)) - and len(shape) == 1): - normshp = ((shape[0], shape[0]), ) * shapelen - elif (isinstance(shape, (tuple, list)) - and isinstance(shape[0], (int, float, long)) - and len(shape) == 2): - normshp = (shape, ) * shapelen - if normshp is None: + ndims = ndarray.ndim + + # Shortcut shape=None + if shape is None: + return ((None, None), ) * ndims + + # Convert any input `info` to a NumPy array + shape_arr = np.asarray(shape) + + try: + shape_arr = np.broadcast_to(shape_arr, (ndims, 2)) + except ValueError: fmt = "Unable to create correctly shaped tuple from %s" raise ValueError(fmt % (shape,)) - return normshp + + # Cast if necessary + if cast_to_int is True: + shape_arr = np.round(shape_arr).astype(int) + + # Convert list of lists to tuple of tuples + return tuple(tuple(axis) for axis in shape_arr.tolist()) def _validate_lengths(narray, number_elements): @@ -1090,7 +1091,7 @@ def _validate_lengths(narray, number_elements): # Public functions -def pad(array, pad_width, mode=None, **kwargs): +def pad(array, pad_width, mode, **kwargs): """ Pads an array. @@ -1098,14 +1099,14 @@ def pad(array, pad_width, mode=None, **kwargs): ---------- array : array_like of rank N Input array - pad_width : {sequence, int} + pad_width : {sequence, array_like, int} Number of values padded to the edges of each axis. ((before_1, after_1), ... (before_N, after_N)) unique pad widths for each axis. ((before, after),) yields same before and after pad for each axis. (pad,) or int is a shortcut for before = after = pad width for all axes. - mode : {str, function} + mode : str or function One of the following string values or a user supplied function. 'constant' @@ -1140,7 +1141,7 @@ def pad(array, pad_width, mode=None, **kwargs): end values are used to pad the beginning. <function> Padding function, see Notes. - stat_length : {sequence, int}, optional + stat_length : sequence or int, optional Used in 'maximum', 'mean', 'median', and 'minimum'. Number of values at edge of each axis used to calculate the statistic value. @@ -1154,7 +1155,7 @@ def pad(array, pad_width, mode=None, **kwargs): length for all axes. Default is ``None``, to use the entire axis. - constant_values : {sequence, int}, optional + constant_values : sequence or int, optional Used in 'constant'. The values to set the padded values for each axis. @@ -1168,7 +1169,7 @@ def pad(array, pad_width, mode=None, **kwargs): all axes. Default is 0. - end_values : {sequence, int}, optional + end_values : sequence or int, optional Used in 'linear_ramp'. The values used for the ending value of the linear_ramp and that will form the edge of the padded array. @@ -1182,7 +1183,7 @@ def pad(array, pad_width, mode=None, **kwargs): all axes. Default is 0. - reflect_type : str {'even', 'odd'}, optional + reflect_type : {'even', 'odd'}, optional Used in 'reflect', and 'symmetric'. The 'even' style is the default with an unaltered reflection around the edge value. For the 'odd' style, the extented part of the array is created by @@ -1227,13 +1228,13 @@ def pad(array, pad_width, mode=None, **kwargs): Examples -------- >>> a = [1, 2, 3, 4, 5] - >>> np.lib.pad(a, (2,3), 'constant', constant_values=(4,6)) + >>> np.lib.pad(a, (2,3), 'constant', constant_values=(4, 6)) array([4, 4, 1, 2, 3, 4, 5, 6, 6, 6]) - >>> np.lib.pad(a, (2,3), 'edge') + >>> np.lib.pad(a, (2, 3), 'edge') array([1, 1, 1, 2, 3, 4, 5, 5, 5, 5]) - >>> np.lib.pad(a, (2,3), 'linear_ramp', end_values=(5,-4)) + >>> np.lib.pad(a, (2, 3), 'linear_ramp', end_values=(5, -4)) array([ 5, 3, 1, 2, 3, 4, 5, 2, -1, -4]) >>> np.lib.pad(a, (2,), 'maximum') @@ -1245,7 +1246,7 @@ def pad(array, pad_width, mode=None, **kwargs): >>> np.lib.pad(a, (2,), 'median') array([3, 3, 1, 2, 3, 4, 5, 3, 3]) - >>> a = [[1,2], [3,4]] + >>> a = [[1, 2], [3, 4]] >>> np.lib.pad(a, ((3, 2), (2, 3)), 'minimum') array([[1, 1, 1, 2, 1, 1, 1], [1, 1, 1, 2, 1, 1, 1], @@ -1256,19 +1257,19 @@ def pad(array, pad_width, mode=None, **kwargs): [1, 1, 1, 2, 1, 1, 1]]) >>> a = [1, 2, 3, 4, 5] - >>> np.lib.pad(a, (2,3), 'reflect') + >>> np.lib.pad(a, (2, 3), 'reflect') array([3, 2, 1, 2, 3, 4, 5, 4, 3, 2]) - >>> np.lib.pad(a, (2,3), 'reflect', reflect_type='odd') + >>> np.lib.pad(a, (2, 3), 'reflect', reflect_type='odd') array([-1, 0, 1, 2, 3, 4, 5, 6, 7, 8]) - >>> np.lib.pad(a, (2,3), 'symmetric') + >>> np.lib.pad(a, (2, 3), 'symmetric') array([2, 1, 1, 2, 3, 4, 5, 5, 4, 3]) - >>> np.lib.pad(a, (2,3), 'symmetric', reflect_type='odd') + >>> np.lib.pad(a, (2, 3), 'symmetric', reflect_type='odd') array([0, 1, 1, 2, 3, 4, 5, 5, 6, 7]) - >>> np.lib.pad(a, (2,3), 'wrap') + >>> np.lib.pad(a, (2, 3), 'wrap') array([4, 5, 1, 2, 3, 4, 5, 1, 2, 3]) >>> def padwithtens(vector, pad_width, iaxis, kwargs): @@ -1277,7 +1278,7 @@ def pad(array, pad_width, mode=None, **kwargs): ... return vector >>> a = np.arange(6) - >>> a = a.reshape((2,3)) + >>> a = a.reshape((2, 3)) >>> np.lib.pad(a, 2, padwithtens) array([[10, 10, 10, 10, 10, 10, 10], @@ -1287,6 +1288,8 @@ def pad(array, pad_width, mode=None, **kwargs): [10, 10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, 10, 10]]) """ + if not np.asarray(pad_width).dtype.kind == 'i': + raise TypeError('`pad_width` must be of integral type.') narray = np.array(array) pad_width = _validate_lengths(narray, pad_width) @@ -1311,7 +1314,7 @@ def pad(array, pad_width, mode=None, **kwargs): 'reflect_type': 'even', } - if isinstance(mode, str): + if isinstance(mode, np.compat.basestring): # Make sure have allowed kwargs appropriate for mode for key in kwargs: if key not in allowedkwargs[mode]: @@ -1327,10 +1330,8 @@ def pad(array, pad_width, mode=None, **kwargs): if i == 'stat_length': kwargs[i] = _validate_lengths(narray, kwargs[i]) if i in ['end_values', 'constant_values']: - kwargs[i] = _normalize_shape(narray, kwargs[i]) - elif mode is None: - raise ValueError('Keyword "mode" must be a function or one of %s.' % - (list(allowedkwargs.keys()),)) + kwargs[i] = _normalize_shape(narray, kwargs[i], + cast_to_int=False) else: # Drop back to old, slower np.apply_along_axis mode for user-supplied # vector function @@ -1420,7 +1421,6 @@ def pad(array, pad_width, mode=None, **kwargs): method = kwargs['reflect_type'] safe_pad = newmat.shape[axis] - 1 while ((pad_before > safe_pad) or (pad_after > safe_pad)): - offset = 0 pad_iter_b = min(safe_pad, safe_pad * (pad_before // safe_pad)) pad_iter_a = min(safe_pad, safe_pad * (pad_after // safe_pad)) @@ -1428,10 +1428,6 @@ def pad(array, pad_width, mode=None, **kwargs): pad_iter_a), method, axis) pad_before -= pad_iter_b pad_after -= pad_iter_a - if pad_iter_b > 0: - offset += 1 - if pad_iter_a > 0: - offset += 1 safe_pad += pad_iter_b + pad_iter_a newmat = _pad_ref(newmat, (pad_before, pad_after), method, axis) diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index d3b6119f4..836f4583f 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -78,29 +78,46 @@ def ediff1d(ary, to_end=None, to_begin=None): array([ 1, 2, -3, 5, 18]) """ - ary = np.asanyarray(ary).flat - ed = ary[1:] - ary[:-1] - arrays = [ed] - if to_begin is not None: - arrays.insert(0, to_begin) - if to_end is not None: - arrays.append(to_end) + # force a 1d array + ary = np.asanyarray(ary).ravel() - if len(arrays) != 1: - # We'll save ourselves a copy of a potentially large array in - # the common case where neither to_begin or to_end was given. - ed = np.hstack(arrays) + # fast track default case + if to_begin is None and to_end is None: + return ary[1:] - ary[:-1] + + if to_begin is None: + l_begin = 0 + else: + to_begin = np.asanyarray(to_begin).ravel() + l_begin = len(to_begin) + + if to_end is None: + l_end = 0 + else: + to_end = np.asanyarray(to_end).ravel() + l_end = len(to_end) + + # do the calculation in place and copy to_begin and to_end + l_diff = max(len(ary) - 1, 0) + result = np.empty(l_diff + l_begin + l_end, dtype=ary.dtype) + result = ary.__array_wrap__(result) + if l_begin > 0: + result[:l_begin] = to_begin + if l_end > 0: + result[l_begin + l_diff:] = to_end + np.subtract(ary[1:], ary[:-1], result[l_begin:l_begin + l_diff]) + return result - return ed def unique(ar, return_index=False, return_inverse=False, return_counts=False): """ Find the unique elements of an array. - Returns the sorted unique elements of an array. There are two optional + Returns the sorted unique elements of an array. There are three optional outputs in addition to the unique elements: the indices of the input array - that give the unique values, and the indices of the unique array that - reconstruct the input array. + that give the unique values, the indices of the unique array that + reconstruct the input array, and the number of times each unique value + comes up in the input array. Parameters ---------- @@ -113,10 +130,11 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False): If True, also return the indices of the unique array that can be used to reconstruct `ar`. return_counts : bool, optional - .. versionadded:: 1.9.0 If True, also return the number of times each unique value comes up in `ar`. + .. versionadded:: 1.9.0 + Returns ------- unique : ndarray @@ -128,10 +146,11 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False): The indices to reconstruct the (flattened) original array from the unique array. Only provided if `return_inverse` is True. unique_counts : ndarray, optional - .. versionadded:: 1.9.0 The number of times each of the unique values comes up in the original array. Only provided if `return_counts` is True. + .. versionadded:: 1.9.0 + See Also -------- numpy.lib.arraysetops : Module with a number of other functions for @@ -392,12 +411,13 @@ def in1d(ar1, ar2, assume_unique=False, invert=False): else: bool_ar = (sar[1:] == sar[:-1]) flag = np.concatenate((bool_ar, [invert])) - indx = order.argsort(kind='mergesort')[:len(ar1)] + ret = np.empty(ar.shape, dtype=bool) + ret[order] = flag if assume_unique: - return flag[indx] + return ret[:len(ar1)] else: - return flag[indx][rev_idx] + return ret[rev_idx] def union1d(ar1, ar2): """ @@ -468,11 +488,9 @@ def setdiff1d(ar1, ar2, assume_unique=False): array([1, 2]) """ - if not assume_unique: + if assume_unique: + ar1 = np.asarray(ar1).ravel() + else: ar1 = unique(ar1) ar2 = unique(ar2) - aux = in1d(ar1, ar2, assume_unique=True) - if aux.size == 0: - return aux - else: - return np.asarray(ar1)[aux == 0] + return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)] diff --git a/numpy/lib/arrayterator.py b/numpy/lib/arrayterator.py index d9839feeb..fb52ada86 100644 --- a/numpy/lib/arrayterator.py +++ b/numpy/lib/arrayterator.py @@ -69,9 +69,8 @@ class Arrayterator(object): Examples -------- - >>> import numpy as np >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6) - >>> a_itor = np.lib.arrayterator.Arrayterator(a, 2) + >>> a_itor = np.lib.Arrayterator(a, 2) >>> a_itor.shape (3, 4, 5, 6) @@ -81,7 +80,7 @@ class Arrayterator(object): >>> for subarr in a_itor: ... if not subarr.all(): - ... print subarr, subarr.shape + ... print(subarr, subarr.shape) ... [[[[0 1]]]] (1, 1, 1, 2) @@ -149,17 +148,17 @@ class Arrayterator(object): See Also -------- - `Arrayterator` + Arrayterator flatiter Examples -------- >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6) - >>> a_itor = np.lib.arrayterator.Arrayterator(a, 2) + >>> a_itor = np.lib.Arrayterator(a, 2) >>> for subarr in a_itor.flat: ... if not subarr: - ... print subarr, type(subarr) + ... print(subarr, type(subarr)) ... 0 <type 'numpy.int32'> @@ -182,7 +181,7 @@ class Arrayterator(object): def __iter__(self): # Skip arrays with degenerate dimensions if [dim for dim in self.shape if dim <= 0]: - raise StopIteration + return start = self.start[:] stop = self.stop[:] @@ -223,4 +222,4 @@ class Arrayterator(object): start[i] = self.start[i] start[i-1] += self.step[i-1] if start[0] >= self.stop[0]: - raise StopIteration + return diff --git a/numpy/lib/bento.info b/numpy/lib/bento.info deleted file mode 100644 index 9f4fa6f0f..000000000 --- a/numpy/lib/bento.info +++ /dev/null @@ -1,6 +0,0 @@ -HookFile: bscript - -Library: - Extension: _compiled_base - Sources: - src/_compiled_base.c diff --git a/numpy/lib/bscript b/numpy/lib/bscript deleted file mode 100644 index 61debfe41..000000000 --- a/numpy/lib/bscript +++ /dev/null @@ -1,11 +0,0 @@ -from bento.commands import hooks - -@hooks.pre_build -def build(context): - context.tweak_extension("_compiled_base", - includes=["../core/include", "../core/include/numpy", "../core", - "../core/src/private"], - defines=['_FILE_OFFSET_BITS=64', - '_LARGEFILE_SOURCE=1', - '_LARGEFILE64_SOURCE=1'] - ) diff --git a/numpy/lib/financial.py b/numpy/lib/financial.py index 5b96e5b8e..95942da16 100644 --- a/numpy/lib/financial.py +++ b/numpy/lib/financial.py @@ -148,7 +148,7 @@ def pmt(rate, nper, pv, fv=0, when='end'): Number of compounding periods pv : array_like Present value - fv : array_like (optional) + fv : array_like, optional Future value (default = 0) when : {{'begin', 1}, {'end', 0}}, {string, int} When payments are due ('begin' (1) or 'end' (0)) @@ -207,12 +207,13 @@ def pmt(rate, nper, pv, fv=0, when='end'): """ when = _convert_when(when) - (rate, nper, pv, fv, when) = map(np.asarray, [rate, nper, pv, fv, when]) - temp = (1+rate)**nper - miter = np.broadcast(rate, nper, pv, fv, when) - zer = np.zeros(miter.shape) - fact = np.where(rate == zer, nper + zer, - (1 + rate*when)*(temp - 1)/rate + zer) + (rate, nper, pv, fv, when) = map(np.array, [rate, nper, pv, fv, when]) + temp = (1 + rate)**nper + mask = (rate == 0.0) + masked_rate = np.where(mask, 1.0, rate) + z = np.zeros(np.broadcast(masked_rate, nper, pv, fv, when).shape) + fact = np.where(mask != z, nper + z, + (1 + masked_rate*when)*(temp - 1)/masked_rate + z) return -(fv + pv*temp) / fact def nper(rate, pmt, pv, fv=0, when='end'): @@ -247,7 +248,7 @@ def nper(rate, pmt, pv, fv=0, when='end'): If you only had $150/month to pay towards the loan, how long would it take to pay-off a loan of $8,000 at 7% annual interest? - >>> print round(np.nper(0.07/12, -150, 8000), 5) + >>> print(round(np.nper(0.07/12, -150, 8000), 5)) 64.07335 So, over 64 months would be required to pay off the loan. @@ -347,7 +348,7 @@ def ipmt(rate, per, nper, pv, fv=0.0, when='end'): >>> for payment in per: ... index = payment - 1 ... principal = principal + ppmt[index] - ... print fmt.format(payment, ppmt[index], ipmt[index], principal) + ... print(fmt.format(payment, ppmt[index], ipmt[index], principal)) 1 -200.58 -17.17 2299.42 2 -201.96 -15.79 2097.46 3 -203.35 -14.40 1894.11 @@ -651,7 +652,7 @@ def irr(values): """ res = np.roots(values[::-1]) mask = (res.imag == 0) & (res.real > 0) - if res.size == 0: + if not mask.any(): return np.nan res = res[mask].real # NPV(rate) = 0 can have more than one solution so we return diff --git a/numpy/lib/format.py b/numpy/lib/format.py index b93f86ca3..633aee675 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -35,7 +35,7 @@ Capabilities - Is straightforward to reverse engineer. Datasets often live longer than the programs that created them. A competent developer should be - able to create a solution in his preferred programming language to + able to create a solution in their preferred programming language to read most ``.npy`` files that he has been given without much documentation. @@ -128,6 +128,19 @@ Consumers can figure out the number of bytes by multiplying the number of elements given by the shape (noting that ``shape=()`` means there is 1 element) by ``dtype.itemsize``. +Format Version 2.0 +------------------ + +The version 1.0 format only allowed the array header to have a total size of +65535 bytes. This can be exceeded by structured arrays with a large number of +columns. The version 2.0 format extends the header size to 4 GiB. +`numpy.save` will automatically save in 2.0 format if the data requires it, +else it will always use the more compatible 1.0 format. + +The description of the fourth element of the header therefore has become: +"The next 4 bytes form a little-endian unsigned int: the length of the header +data HEADER_LEN." + Notes ----- The ``.npy`` format, including reasons for creating it and a comparison of @@ -252,8 +265,7 @@ def header_data_from_array_1_0(array): This has the appropriate entries for writing its string representation to the header of the file. """ - d = {} - d['shape'] = array.shape + d = {'shape': array.shape} if array.flags.c_contiguous: d['fortran_order'] = False elif array.flags.f_contiguous: @@ -301,21 +313,19 @@ def _write_array_header(fp, d, version=None): header = header + ' '*topad + '\n' header = asbytes(_filter_header(header)) - if len(header) >= (256*256) and version == (1, 0): - raise ValueError("header does not fit inside %s bytes required by the" - " 1.0 format" % (256*256)) - if len(header) < (256*256): - header_len_str = struct.pack('<H', len(header)) + hlen = len(header) + if hlen < 256*256 and version in (None, (1, 0)): version = (1, 0) - elif len(header) < (2**32): - header_len_str = struct.pack('<I', len(header)) + header_prefix = magic(1, 0) + struct.pack('<H', hlen) + elif hlen < 2**32 and version in (None, (2, 0)): version = (2, 0) + header_prefix = magic(2, 0) + struct.pack('<I', hlen) else: - raise ValueError("header does not fit inside 4 GiB required by " - "the 2.0 format") + msg = "Header length %s too big for version=%s" + msg %= (hlen, version) + raise ValueError(msg) - fp.write(magic(*version)) - fp.write(header_len_str) + fp.write(header_prefix) fp.write(header) return version @@ -376,7 +386,7 @@ def read_array_header_1_0(fp): If the data is invalid. """ - _read_array_header(fp, version=(1, 0)) + return _read_array_header(fp, version=(1, 0)) def read_array_header_2_0(fp): """ @@ -409,7 +419,7 @@ def read_array_header_2_0(fp): If the data is invalid. """ - _read_array_header(fp, version=(2, 0)) + return _read_array_header(fp, version=(2, 0)) def _filter_header(s): @@ -504,7 +514,7 @@ def _read_array_header(fp, version): return d['shape'], d['fortran_order'], dtype -def write_array(fp, array, version=None): +def write_array(fp, array, version=None, allow_pickle=True, pickle_kwargs=None): """ Write an array to an NPY file, including a header. @@ -522,11 +532,18 @@ def write_array(fp, array, version=None): version : (int, int) or None, optional The version number of the format. None means use the oldest supported version that is able to store the data. Default: None + allow_pickle : bool, optional + Whether to allow writing pickled data. Default: True + pickle_kwargs : dict, optional + Additional keyword arguments to pass to pickle.dump, excluding + 'protocol'. These are only useful when pickling objects in object + arrays on Python 3 to Python 2 compatible format. Raises ------ ValueError - If the array cannot be persisted. + If the array cannot be persisted. This includes the case of + allow_pickle=False and array being an object array. Various other errors If the array contains Python objects as part of its dtype, the process of pickling them may raise various errors if the objects @@ -539,16 +556,24 @@ def write_array(fp, array, version=None): # this warning can be removed when 1.9 has aged enough if version != (2, 0) and used_ver == (2, 0): warnings.warn("Stored array in format 2.0. It can only be" - "read by NumPy >= 1.9", UserWarning) + "read by NumPy >= 1.9", UserWarning, stacklevel=2) - # Set buffer size to 16 MiB to hide the Python loop overhead. - buffersize = max(16 * 1024 ** 2 // array.itemsize, 1) + if array.itemsize == 0: + buffersize = 0 + else: + # Set buffer size to 16 MiB to hide the Python loop overhead. + buffersize = max(16 * 1024 ** 2 // array.itemsize, 1) if array.dtype.hasobject: # We contain Python objects so we cannot write out the data # directly. Instead, we will pickle it out with version 2 of the # pickle protocol. - pickle.dump(array, fp, protocol=2) + if not allow_pickle: + raise ValueError("Object arrays cannot be saved when " + "allow_pickle=False") + if pickle_kwargs is None: + pickle_kwargs = {} + pickle.dump(array, fp, protocol=2, **pickle_kwargs) elif array.flags.f_contiguous and not array.flags.c_contiguous: if isfileobj(fp): array.T.tofile(fp) @@ -567,7 +592,7 @@ def write_array(fp, array, version=None): fp.write(chunk.tobytes('C')) -def read_array(fp): +def read_array(fp, allow_pickle=True, pickle_kwargs=None): """ Read an array from an NPY file. @@ -576,6 +601,12 @@ def read_array(fp): fp : file_like object If this is not a real file object, then this may take extra memory and time. + allow_pickle : bool, optional + Whether to allow reading pickled data. Default: True + pickle_kwargs : dict + Additional keyword arguments to pass to pickle.load. These are only + useful when loading object arrays saved on Python 2 when using + Python 3. Returns ------- @@ -585,7 +616,8 @@ def read_array(fp): Raises ------ ValueError - If the data is invalid. + If the data is invalid, or allow_pickle=False and the file contains + an object array. """ version = read_magic(fp) @@ -594,12 +626,25 @@ def read_array(fp): if len(shape) == 0: count = 1 else: - count = numpy.multiply.reduce(shape) + count = numpy.multiply.reduce(shape, dtype=numpy.int64) # Now read the actual data. if dtype.hasobject: # The array contained Python objects. We need to unpickle the data. - array = pickle.load(fp) + if not allow_pickle: + raise ValueError("Object arrays cannot be loaded when " + "allow_pickle=False") + if pickle_kwargs is None: + pickle_kwargs = {} + try: + array = pickle.load(fp, **pickle_kwargs) + except UnicodeError as err: + if sys.version_info[0] >= 3: + # Friendlier error message + raise UnicodeError("Unpickling a python object failed: %r\n" + "You may need to pass the encoding= option " + "to numpy.load" % (err,)) + raise else: if isfileobj(fp): # We can use the fast fromfile() function. @@ -613,15 +658,21 @@ def read_array(fp): # of the read. In non-chunked case count < max_read_count, so # only one read is performed. - max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dtype.itemsize) + # Use np.ndarray instead of np.empty since the latter does + # not correctly instantiate zero-width string dtypes; see + # https://github.com/numpy/numpy/pull/6430 + array = numpy.ndarray(count, dtype=dtype) + + if dtype.itemsize > 0: + # If dtype.itemsize == 0 then there's nothing more to read + max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, dtype.itemsize) - array = numpy.empty(count, dtype=dtype) - for i in range(0, count, max_read_count): - read_count = min(max_read_count, count - i) - read_size = int(read_count * dtype.itemsize) - data = _read_bytes(fp, read_size, "array data") - array[i:i+read_count] = numpy.frombuffer(data, dtype=dtype, - count=read_count) + for i in range(0, count, max_read_count): + read_count = min(max_read_count, count - i) + read_size = int(read_count * dtype.itemsize) + data = _read_bytes(fp, read_size, "array data") + array[i:i+read_count] = numpy.frombuffer(data, dtype=dtype, + count=read_count) if fortran_order: array.shape = shape[::-1] @@ -708,7 +759,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, # this warning can be removed when 1.9 has aged enough if version != (2, 0) and used_ver == (2, 0): warnings.warn("Stored array in format 2.0. It can only be" - "read by NumPy >= 1.9", UserWarning) + "read by NumPy >= 1.9", UserWarning, stacklevel=2) offset = fp.tell() finally: fp.close() diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 29de703d8..4172c26b5 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -1,42 +1,49 @@ from __future__ import division, absolute_import, print_function -import warnings -import sys import collections import operator +import re +import sys +import warnings import numpy as np import numpy.core.numeric as _nx -from numpy.core import linspace, atleast_1d, atleast_2d +from numpy.core import linspace, atleast_1d, atleast_2d, transpose from numpy.core.numeric import ( ones, zeros, arange, concatenate, array, asarray, asanyarray, empty, empty_like, ndarray, around, floor, ceil, take, dot, where, intp, - integer, isscalar + integer, isscalar, absolute ) from numpy.core.umath import ( pi, multiply, add, arctan2, frompyfunc, cos, less_equal, sqrt, sin, mod, exp, log10 ) from numpy.core.fromnumeric import ( - ravel, nonzero, sort, partition, mean + ravel, nonzero, sort, partition, mean, any, sum ) from numpy.core.numerictypes import typecodes, number from numpy.lib.twodim_base import diag from .utils import deprecate -from ._compiled_base import _insert, add_docstring -from ._compiled_base import digitize, bincount, interp as compiled_interp -from ._compiled_base import add_newdoc_ufunc +from numpy.core.multiarray import ( + _insert, add_docstring, digitize, bincount, + interp as compiled_interp, interp_complex as compiled_interp_complex + ) +from numpy.core.umath import _add_newdoc_ufunc as add_newdoc_ufunc from numpy.compat import long +from numpy.compat.py3k import basestring -# Force range to be a generator, for np.delete's usage. if sys.version_info[0] < 3: + # Force range to be a generator, for np.delete's usage. range = xrange + import __builtin__ as builtins +else: + import builtins __all__ = [ 'select', 'piecewise', 'trim_zeros', 'copy', 'iterable', 'percentile', - 'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', 'disp', - 'extract', 'place', 'vectorize', 'asarray_chkfinite', 'average', + 'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', 'disp', 'flip', + 'rot90', 'extract', 'place', 'vectorize', 'asarray_chkfinite', 'average', 'histogram', 'histogramdd', 'bincount', 'digitize', 'cov', 'corrcoef', 'msort', 'median', 'sinc', 'hamming', 'hanning', 'bartlett', 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring', @@ -44,6 +51,164 @@ __all__ = [ ] +def rot90(m, k=1, axes=(0,1)): + """ + Rotate an array by 90 degrees in the plane specified by axes. + + Rotation direction is from the first towards the second axis. + + .. versionadded:: 1.12.0 + + Parameters + ---------- + m : array_like + Array of two or more dimensions. + k : integer + Number of times the array is rotated by 90 degrees. + axes: (2,) array_like + The array is rotated in the plane defined by the axes. + Axes must be different. + + Returns + ------- + y : ndarray + A rotated view of `m`. + + See Also + -------- + flip : Reverse the order of elements in an array along the given axis. + fliplr : Flip an array horizontally. + flipud : Flip an array vertically. + + Notes + ----- + rot90(m, k=1, axes=(1,0)) is the reverse of rot90(m, k=1, axes=(0,1)) + rot90(m, k=1, axes=(1,0)) is equivalent to rot90(m, k=-1, axes=(0,1)) + + Examples + -------- + >>> m = np.array([[1,2],[3,4]], int) + >>> m + array([[1, 2], + [3, 4]]) + >>> np.rot90(m) + array([[2, 4], + [1, 3]]) + >>> np.rot90(m, 2) + array([[4, 3], + [2, 1]]) + >>> m = np.arange(8).reshape((2,2,2)) + >>> np.rot90(m, 1, (1,2)) + array([[[1, 3], + [0, 2]], + + [[5, 7], + [4, 6]]]) + + """ + axes = tuple(axes) + if len(axes) != 2: + raise ValueError("len(axes) must be 2.") + + m = asanyarray(m) + + if axes[0] == axes[1] or absolute(axes[0] - axes[1]) == m.ndim: + raise ValueError("Axes must be different.") + + if (axes[0] >= m.ndim or axes[0] < -m.ndim + or axes[1] >= m.ndim or axes[1] < -m.ndim): + raise ValueError("Axes={} out of range for array of ndim={}." + .format(axes, m.ndim)) + + k %= 4 + + if k == 0: + return m[:] + if k == 2: + return flip(flip(m, axes[0]), axes[1]) + + axes_list = arange(0, m.ndim) + axes_list[axes[0]], axes_list[axes[1]] = axes_list[axes[1]], axes_list[axes[0]] + + if k == 1: + return transpose(flip(m,axes[1]), axes_list) + else: + # k == 3 + return flip(transpose(m, axes_list), axes[1]) + + +def flip(m, axis): + """ + Reverse the order of elements in an array along the given axis. + + The shape of the array is preserved, but the elements are reordered. + + .. versionadded:: 1.12.0 + + Parameters + ---------- + m : array_like + Input array. + axis : integer + Axis in array, which entries are reversed. + + + Returns + ------- + out : array_like + A view of `m` with the entries of axis reversed. Since a view is + returned, this operation is done in constant time. + + See Also + -------- + flipud : Flip an array vertically (axis=0). + fliplr : Flip an array horizontally (axis=1). + + Notes + ----- + flip(m, 0) is equivalent to flipud(m). + flip(m, 1) is equivalent to fliplr(m). + flip(m, n) corresponds to ``m[...,::-1,...]`` with ``::-1`` at position n. + + Examples + -------- + >>> A = np.arange(8).reshape((2,2,2)) + >>> A + array([[[0, 1], + [2, 3]], + + [[4, 5], + [6, 7]]]) + + >>> flip(A, 0) + array([[[4, 5], + [6, 7]], + + [[0, 1], + [2, 3]]]) + + >>> flip(A, 1) + array([[[2, 3], + [0, 1]], + + [[6, 7], + [4, 5]]]) + + >>> A = np.random.randn(3,4,5) + >>> np.all(flip(A,2) == A[:,:,::-1,...]) + True + """ + if not hasattr(m, 'ndim'): + m = asarray(m) + indexer = [slice(None)] * m.ndim + try: + indexer[axis] = slice(None, None, -1) + except IndexError: + raise ValueError("axis=%i is invalid for the %i-dimensional input array" + % (axis, m.ndim)) + return m[tuple(indexer)] + + def iterable(y): """ Check whether or not an object can be iterated over. @@ -55,71 +220,307 @@ def iterable(y): Returns ------- - b : {0, 1} - Return 1 if the object has an iterator method or is a sequence, - and 0 otherwise. + b : bool + Return ``True`` if the object has an iterator method or is a + sequence and ``False`` otherwise. Examples -------- >>> np.iterable([1, 2, 3]) - 1 + True >>> np.iterable(2) - 0 + False """ try: iter(y) - except: - return 0 - return 1 + except TypeError: + return False + return True + + +def _hist_bin_sqrt(x): + """ + Square root histogram bin estimator. + + Bin width is inversely proportional to the data size. Used by many + programs for its simplicity. + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + return x.ptp() / np.sqrt(x.size) + + +def _hist_bin_sturges(x): + """ + Sturges histogram bin estimator. + + A very simplistic estimator based on the assumption of normality of + the data. This estimator has poor performance for non-normal data, + which becomes especially obvious for large data sets. The estimate + depends only on size of the data. + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + return x.ptp() / (np.log2(x.size) + 1.0) + + +def _hist_bin_rice(x): + """ + Rice histogram bin estimator. + + Another simple estimator with no normality assumption. It has better + performance for large data than Sturges, but tends to overestimate + the number of bins. The number of bins is proportional to the cube + root of data size (asymptotically optimal). The estimate depends + only on size of the data. + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + return x.ptp() / (2.0 * x.size ** (1.0 / 3)) + + +def _hist_bin_scott(x): + """ + Scott histogram bin estimator. + + The binwidth is proportional to the standard deviation of the data + and inversely proportional to the cube root of data size + (asymptotically optimal). + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + return (24.0 * np.pi**0.5 / x.size)**(1.0 / 3.0) * np.std(x) + + +def _hist_bin_doane(x): + """ + Doane's histogram bin estimator. + + Improved version of Sturges' formula which works better for + non-normal data. See + http://stats.stackexchange.com/questions/55134/doanes-formula-for-histogram-binning + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + if x.size > 2: + sg1 = np.sqrt(6.0 * (x.size - 2) / ((x.size + 1.0) * (x.size + 3))) + sigma = np.std(x) + if sigma > 0.0: + # These three operations add up to + # g1 = np.mean(((x - np.mean(x)) / sigma)**3) + # but use only one temp array instead of three + temp = x - np.mean(x) + np.true_divide(temp, sigma, temp) + np.power(temp, 3, temp) + g1 = np.mean(temp) + return x.ptp() / (1.0 + np.log2(x.size) + + np.log2(1.0 + np.absolute(g1) / sg1)) + return 0.0 + + +def _hist_bin_fd(x): + """ + The Freedman-Diaconis histogram bin estimator. + + The Freedman-Diaconis rule uses interquartile range (IQR) to + estimate binwidth. It is considered a variation of the Scott rule + with more robustness as the IQR is less affected by outliers than + the standard deviation. However, the IQR depends on fewer points + than the standard deviation, so it is less accurate, especially for + long tailed distributions. + + If the IQR is 0, this function returns 1 for the number of bins. + Binwidth is inversely proportional to the cube root of data size + (asymptotically optimal). + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + iqr = np.subtract(*np.percentile(x, [75, 25])) + return 2.0 * iqr * x.size ** (-1.0 / 3.0) + + +def _hist_bin_auto(x): + """ + Histogram bin estimator that uses the minimum width of the + Freedman-Diaconis and Sturges estimators. + + The FD estimator is usually the most robust method, but its width + estimate tends to be too large for small `x`. The Sturges estimator + is quite good for small (<1000) datasets and is the default in the R + language. This method gives good off the shelf behaviour. + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + + See Also + -------- + _hist_bin_fd, _hist_bin_sturges + """ + # There is no need to check for zero here. If ptp is, so is IQR and + # vice versa. Either both are zero or neither one is. + return min(_hist_bin_fd(x), _hist_bin_sturges(x)) + + +# Private dict initialized at module load time +_hist_bin_selectors = {'auto': _hist_bin_auto, + 'doane': _hist_bin_doane, + 'fd': _hist_bin_fd, + 'rice': _hist_bin_rice, + 'scott': _hist_bin_scott, + 'sqrt': _hist_bin_sqrt, + 'sturges': _hist_bin_sturges} def histogram(a, bins=10, range=None, normed=False, weights=None, density=None): - """ + r""" Compute the histogram of a set of data. Parameters ---------- a : array_like Input data. The histogram is computed over the flattened array. - bins : int or sequence of scalars, optional + bins : int or sequence of scalars or str, optional If `bins` is an int, it defines the number of equal-width - bins in the given range (10, by default). If `bins` is a sequence, - it defines the bin edges, including the rightmost edge, allowing - for non-uniform bin widths. + bins in the given range (10, by default). If `bins` is a + sequence, it defines the bin edges, including the rightmost + edge, allowing for non-uniform bin widths. + + .. versionadded:: 1.11.0 + + If `bins` is a string from the list below, `histogram` will use + the method chosen to calculate the optimal bin width and + consequently the number of bins (see `Notes` for more detail on + the estimators) from the data that falls within the requested + range. While the bin width will be optimal for the actual data + in the range, the number of bins will be computed to fill the + entire range, including the empty portions. For visualisation, + using the 'auto' option is suggested. Weighted data is not + supported for automated bin size selection. + + 'auto' + Maximum of the 'sturges' and 'fd' estimators. Provides good + all around performance. + + 'fd' (Freedman Diaconis Estimator) + Robust (resilient to outliers) estimator that takes into + account data variability and data size. + + 'doane' + An improved version of Sturges' estimator that works better + with non-normal datasets. + + 'scott' + Less robust estimator that that takes into account data + variability and data size. + + 'rice' + Estimator does not take variability into account, only data + size. Commonly overestimates number of bins required. + + 'sturges' + R's default method, only accounts for data size. Only + optimal for gaussian data and underestimates number of bins + for large non-gaussian datasets. + + 'sqrt' + Square root (of data size) estimator, used by Excel and + other programs for its speed and simplicity. + range : (float, float), optional The lower and upper range of the bins. If not provided, range is simply ``(a.min(), a.max())``. Values outside the range are - ignored. + ignored. The first element of the range must be less than or + equal to the second. `range` affects the automatic bin + computation as well. While bin width is computed to be optimal + based on the actual data within `range`, the bin count will fill + the entire range including portions containing no data. normed : bool, optional - This keyword is deprecated in Numpy 1.6 due to confusing/buggy - behavior. It will be removed in Numpy 2.0. Use the density keyword - instead. - If False, the result will contain the number of samples - in each bin. If True, the result is the value of the - probability *density* function at the bin, normalized such that - the *integral* over the range is 1. Note that this latter behavior is - known to be buggy with unequal bin widths; use `density` instead. + This keyword is deprecated in NumPy 1.6.0 due to confusing/buggy + behavior. It will be removed in NumPy 2.0.0. Use the ``density`` + keyword instead. If ``False``, the result will contain the + number of samples in each bin. If ``True``, the result is the + value of the probability *density* function at the bin, + normalized such that the *integral* over the range is 1. Note + that this latter behavior is known to be buggy with unequal bin + widths; use ``density`` instead. weights : array_like, optional - An array of weights, of the same shape as `a`. Each value in `a` - only contributes its associated weight towards the bin count - (instead of 1). If `normed` is True, the weights are normalized, - so that the integral of the density over the range remains 1 + An array of weights, of the same shape as `a`. Each value in + `a` only contributes its associated weight towards the bin count + (instead of 1). If `density` is True, the weights are + normalized, so that the integral of the density over the range + remains 1. density : bool, optional - If False, the result will contain the number of samples - in each bin. If True, the result is the value of the + If ``False``, the result will contain the number of samples in + each bin. If ``True``, the result is the value of the probability *density* function at the bin, normalized such that the *integral* over the range is 1. Note that the sum of the histogram values will not be equal to 1 unless bins of unity width are chosen; it is not a probability *mass* function. - Overrides the `normed` keyword if given. + + Overrides the ``normed`` keyword if given. Returns ------- hist : array - The values of the histogram. See `normed` and `weights` for a + The values of the histogram. See `density` and `weights` for a description of the possible semantics. bin_edges : array of dtype float Return the bin edges ``(length(hist)+1)``. @@ -131,14 +532,84 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, Notes ----- - All but the last (righthand-most) bin is half-open. In other words, if - `bins` is:: + All but the last (righthand-most) bin is half-open. In other words, + if `bins` is:: [1, 2, 3, 4] - then the first bin is ``[1, 2)`` (including 1, but excluding 2) and the - second ``[2, 3)``. The last bin, however, is ``[3, 4]``, which *includes* - 4. + then the first bin is ``[1, 2)`` (including 1, but excluding 2) and + the second ``[2, 3)``. The last bin, however, is ``[3, 4]``, which + *includes* 4. + + .. versionadded:: 1.11.0 + + The methods to estimate the optimal number of bins are well founded + in literature, and are inspired by the choices R provides for + histogram visualisation. Note that having the number of bins + proportional to :math:`n^{1/3}` is asymptotically optimal, which is + why it appears in most estimators. These are simply plug-in methods + that give good starting points for number of bins. In the equations + below, :math:`h` is the binwidth and :math:`n_h` is the number of + bins. All estimators that compute bin counts are recast to bin width + using the `ptp` of the data. The final bin count is obtained from + ``np.round(np.ceil(range / h))`. + + 'Auto' (maximum of the 'Sturges' and 'FD' estimators) + A compromise to get a good value. For small datasets the Sturges + value will usually be chosen, while larger datasets will usually + default to FD. Avoids the overly conservative behaviour of FD + and Sturges for small and large datasets respectively. + Switchover point is usually :math:`a.size \approx 1000`. + + 'FD' (Freedman Diaconis Estimator) + .. math:: h = 2 \frac{IQR}{n^{1/3}} + + The binwidth is proportional to the interquartile range (IQR) + and inversely proportional to cube root of a.size. Can be too + conservative for small datasets, but is quite good for large + datasets. The IQR is very robust to outliers. + + 'Scott' + .. math:: h = \sigma \sqrt[3]{\frac{24 * \sqrt{\pi}}{n}} + + The binwidth is proportional to the standard deviation of the + data and inversely proportional to cube root of ``x.size``. Can + be too conservative for small datasets, but is quite good for + large datasets. The standard deviation is not very robust to + outliers. Values are very similar to the Freedman-Diaconis + estimator in the absence of outliers. + + 'Rice' + .. math:: n_h = 2n^{1/3} + + The number of bins is only proportional to cube root of + ``a.size``. It tends to overestimate the number of bins and it + does not take into account data variability. + + 'Sturges' + .. math:: n_h = \log _{2}n+1 + + The number of bins is the base 2 log of ``a.size``. This + estimator assumes normality of data and is too conservative for + larger, non-normal datasets. This is the default method in R's + ``hist`` method. + + 'Doane' + .. math:: n_h = 1 + \log_{2}(n) + + \log_{2}(1 + \frac{|g_1|}{\sigma_{g_1}}) + + g_1 = mean[(\frac{x - \mu}{\sigma})^3] + + \sigma_{g_1} = \sqrt{\frac{6(n - 2)}{(n + 1)(n + 3)}} + + An improved version of Sturges' formula that produces better + estimates for non-normal datasets. This estimator attempts to + account for the skew of the data. + + 'Sqrt' + .. math:: n_h = \sqrt n + The simplest and fastest estimator. Only takes into account the + data size. Examples -------- @@ -158,8 +629,20 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, >>> np.sum(hist*np.diff(bin_edges)) 1.0 - """ + .. versionadded:: 1.11.0 + Automated Bin Selection Methods example, using 2 peak random data + with 2000 points: + + >>> import matplotlib.pyplot as plt + >>> rng = np.random.RandomState(10) # deterministic random data + >>> a = np.hstack((rng.normal(size=1000), + ... rng.normal(loc=5, scale=2, size=1000))) + >>> plt.hist(a, bins='auto') # plt.hist passes it's arguments to np.histogram + >>> plt.title("Histogram with 'auto' bins") + >>> plt.show() + + """ a = asarray(a) if weights is not None: weights = asarray(weights) @@ -169,60 +652,159 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, weights = weights.ravel() a = a.ravel() - if (range is not None): - mn, mx = range - if (mn > mx): - raise AttributeError( - 'max must be larger than min in range parameter.') + # Do not modify the original value of range so we can check for `None` + if range is None: + if a.size == 0: + # handle empty arrays. Can't determine range, so use 0-1. + mn, mx = 0.0, 1.0 + else: + mn, mx = a.min() + 0.0, a.max() + 0.0 + else: + mn, mx = [mi + 0.0 for mi in range] + if mn > mx: + raise ValueError( + 'max must be larger than min in range parameter.') + if not np.all(np.isfinite([mn, mx])): + raise ValueError( + 'range parameter must be finite.') + if mn == mx: + mn -= 0.5 + mx += 0.5 + + if isinstance(bins, basestring): + # if `bins` is a string for an automatic method, + # this will replace it with the number of bins calculated + if bins not in _hist_bin_selectors: + raise ValueError("{0} not a valid estimator for bins".format(bins)) + if weights is not None: + raise TypeError("Automated estimation of the number of " + "bins is not supported for weighted data") + # Make a reference to `a` + b = a + # Update the reference if the range needs truncation + if range is not None: + keep = (a >= mn) + keep &= (a <= mx) + if not np.logical_and.reduce(keep): + b = a[keep] + + if b.size == 0: + bins = 1 + else: + # Do not call selectors on empty arrays + width = _hist_bin_selectors[bins](b) + if width: + bins = int(np.ceil((mx - mn) / width)) + else: + # Width can be zero for some estimators, e.g. FD when + # the IQR of the data is zero. + bins = 1 + + # Histogram is an integer or a float array depending on the weights. + if weights is None: + ntype = np.dtype(np.intp) + else: + ntype = weights.dtype + + # We set a block size, as this allows us to iterate over chunks when + # computing histograms, to minimize memory usage. + BLOCK = 65536 if not iterable(bins): if np.isscalar(bins) and bins < 1: raise ValueError( '`bins` should be a positive integer.') - if range is None: - if a.size == 0: - # handle empty arrays. Can't determine range, so use 0-1. - range = (0, 1) + # At this point, if the weights are not integer, floating point, or + # complex, we have to use the slow algorithm. + if weights is not None and not (np.can_cast(weights.dtype, np.double) or + np.can_cast(weights.dtype, np.complex)): + bins = linspace(mn, mx, bins + 1, endpoint=True) + + if not iterable(bins): + # We now convert values of a to bin indices, under the assumption of + # equal bin widths (which is valid here). + + # Initialize empty histogram + n = np.zeros(bins, ntype) + # Pre-compute histogram scaling factor + norm = bins / (mx - mn) + + # Compute the bin edges for potential correction. + bin_edges = linspace(mn, mx, bins + 1, endpoint=True) + + # We iterate over blocks here for two reasons: the first is that for + # large arrays, it is actually faster (for example for a 10^8 array it + # is 2x as fast) and it results in a memory footprint 3x lower in the + # limit of large arrays. + for i in arange(0, len(a), BLOCK): + tmp_a = a[i:i+BLOCK] + if weights is None: + tmp_w = None else: - range = (a.min(), a.max()) - mn, mx = [mi + 0.0 for mi in range] - if mn == mx: - mn -= 0.5 - mx += 0.5 - bins = linspace(mn, mx, bins + 1, endpoint=True) + tmp_w = weights[i:i + BLOCK] + + # Only include values in the right range + keep = (tmp_a >= mn) + keep &= (tmp_a <= mx) + if not np.logical_and.reduce(keep): + tmp_a = tmp_a[keep] + if tmp_w is not None: + tmp_w = tmp_w[keep] + tmp_a_data = tmp_a.astype(float) + tmp_a = tmp_a_data - mn + tmp_a *= norm + + # Compute the bin indices, and for values that lie exactly on mx we + # need to subtract one + indices = tmp_a.astype(np.intp) + indices[indices == bins] -= 1 + + # The index computation is not guaranteed to give exactly + # consistent results within ~1 ULP of the bin edges. + decrement = tmp_a_data < bin_edges[indices] + indices[decrement] -= 1 + # The last bin includes the right edge. The other bins do not. + increment = (tmp_a_data >= bin_edges[indices + 1]) & (indices != bins - 1) + indices[increment] += 1 + + # We now compute the histogram using bincount + if ntype.kind == 'c': + n.real += np.bincount(indices, weights=tmp_w.real, minlength=bins) + n.imag += np.bincount(indices, weights=tmp_w.imag, minlength=bins) + else: + n += np.bincount(indices, weights=tmp_w, minlength=bins).astype(ntype) + + # Rename the bin edges for return. + bins = bin_edges else: bins = asarray(bins) if (np.diff(bins) < 0).any(): - raise AttributeError( + raise ValueError( 'bins must increase monotonically.') - # Histogram is an integer or a float array depending on the weights. - if weights is None: - ntype = int - else: - ntype = weights.dtype - n = np.zeros(bins.shape, ntype) + # Initialize empty histogram + n = np.zeros(bins.shape, ntype) - block = 65536 - if weights is None: - for i in arange(0, len(a), block): - sa = sort(a[i:i+block]) - n += np.r_[sa.searchsorted(bins[:-1], 'left'), - sa.searchsorted(bins[-1], 'right')] - else: - zero = array(0, dtype=ntype) - for i in arange(0, len(a), block): - tmp_a = a[i:i+block] - tmp_w = weights[i:i+block] - sorting_index = np.argsort(tmp_a) - sa = tmp_a[sorting_index] - sw = tmp_w[sorting_index] - cw = np.concatenate(([zero, ], sw.cumsum())) - bin_index = np.r_[sa.searchsorted(bins[:-1], 'left'), - sa.searchsorted(bins[-1], 'right')] - n += cw[bin_index] - - n = np.diff(n) + if weights is None: + for i in arange(0, len(a), BLOCK): + sa = sort(a[i:i+BLOCK]) + n += np.r_[sa.searchsorted(bins[:-1], 'left'), + sa.searchsorted(bins[-1], 'right')] + else: + zero = array(0, dtype=ntype) + for i in arange(0, len(a), BLOCK): + tmp_a = a[i:i+BLOCK] + tmp_w = weights[i:i+BLOCK] + sorting_index = np.argsort(tmp_a) + sa = tmp_a[sorting_index] + sw = tmp_w[sorting_index] + cw = np.concatenate(([zero, ], sw.cumsum())) + bin_index = np.r_[sa.searchsorted(bins[:-1], 'left'), + sa.searchsorted(bins[-1], 'right')] + n += cw[bin_index] + + + n = np.diff(n) if density is not None: if density: @@ -231,7 +813,7 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, else: return n, bins else: - # deprecated, buggy behavior. Remove for Numpy 2.0 + # deprecated, buggy behavior. Remove for NumPy 2.0.0 if normed: db = array(np.diff(bins), float) return n/(n*db).sum(), bins @@ -263,7 +845,7 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): normed : bool, optional If False, returns the number of samples in each bin. If True, returns the bin density ``bin_count / sample_count / bin_volume``. - weights : array_like (N,), optional + weights : (N,) array_like, optional An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`. Weights are normalized to 1 if normed is True. If normed is False, the values of the returned histogram are equal to the sum of the @@ -308,7 +890,7 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): try: M = len(bins) if M != D: - raise AttributeError( + raise ValueError( 'The dimension of bins must be equal to the dimension of the ' ' sample x.') except TypeError: @@ -326,6 +908,9 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): smin = atleast_1d(array(sample.min(0), float)) smax = atleast_1d(array(sample.max(0), float)) else: + if not np.all(np.isfinite(range)): + raise ValueError( + 'range parameter must be finite.') smin = zeros(D) smax = zeros(D) for i in arange(D): @@ -461,7 +1046,7 @@ def average(a, axis=None, weights=None, returned=False): Returns ------- - average, [sum_of_weights] : {array_type, double} + average, [sum_of_weights] : array_type or double Return the average along the specified axis. When returned is `True`, return a tuple with the average as the first element and the sum of the weights as the second element. The return type is `Float` @@ -507,6 +1092,17 @@ def average(a, axis=None, weights=None, returned=False): TypeError: Axis must be specified when shapes of a and weights differ. """ + # 3/19/2016 1.12.0: + # replace the next few lines with "a = np.asanyarray(a)" + if (type(a) not in (np.ndarray, np.matrix) and + issubclass(type(a), np.ndarray)): + warnings.warn("np.average currently does not preserve subclasses, but " + "will do so in the future to match the behavior of most " + "other numpy functions such as np.mean. In particular, " + "this means calls which returned a scalar may return a " + "0-d subclass object instead.", + FutureWarning, stacklevel=2) + if not isinstance(a, np.matrix): a = np.asarray(a) @@ -514,8 +1110,13 @@ def average(a, axis=None, weights=None, returned=False): avg = a.mean(axis) scl = avg.dtype.type(a.size/avg.size) else: - a = a + 0.0 - wgt = np.asarray(weights) + wgt = np.asanyarray(weights) + + if issubclass(a.dtype.type, (np.integer, np.bool_)): + result_dtype = np.result_type(a.dtype, wgt.dtype, 'f8') + else: + result_dtype = np.result_type(a.dtype, wgt.dtype) + # Sanity checks if a.shape != wgt.shape: if axis is None: @@ -530,25 +1131,26 @@ def average(a, axis=None, weights=None, returned=False): "Length of weights not compatible with specified axis.") # setup wgt to broadcast along axis - wgt = np.array(wgt, copy=0, ndmin=a.ndim).swapaxes(-1, axis) + wgt = np.broadcast_to(wgt, (a.ndim-1)*(1,) + wgt.shape) + wgt = wgt.swapaxes(-1, axis) - scl = wgt.sum(axis=axis, dtype=np.result_type(a.dtype, wgt.dtype)) + scl = wgt.sum(axis=axis, dtype=result_dtype) if (scl == 0.0).any(): raise ZeroDivisionError( "Weights sum to zero, can't be normalized") - avg = np.multiply(a, wgt).sum(axis)/scl + avg = np.multiply(a, wgt, dtype=result_dtype).sum(axis)/scl if returned: - scl = np.multiply(avg, 0) + scl + if scl.shape != avg.shape: + scl = np.broadcast_to(scl, avg.shape).copy() return avg, scl else: return avg def asarray_chkfinite(a, dtype=None, order=None): - """ - Convert the input to an array, checking for NaNs or Infs. + """Convert the input to an array, checking for NaNs or Infs. Parameters ---------- @@ -559,8 +1161,9 @@ def asarray_chkfinite(a, dtype=None, order=None): dtype : data-type, optional By default, the data-type is inferred from the input data. order : {'C', 'F'}, optional - Whether to use row-major ('C') or column-major ('FORTRAN') memory - representation. Defaults to 'C'. + Whether to use row-major (C-style) or + column-major (Fortran-style) memory representation. + Defaults to 'C'. Returns ------- @@ -601,7 +1204,7 @@ def asarray_chkfinite(a, dtype=None, order=None): >>> try: ... np.asarray_chkfinite(a) ... except ValueError: - ... print 'ValueError' + ... print('ValueError') ... ValueError @@ -648,8 +1251,8 @@ def piecewise(x, condlist, funclist, *args, **kw): kw : dict, optional Keyword arguments used in calling `piecewise` are passed to the functions upon execution, i.e., if called - ``piecewise(..., ..., lambda=1)``, then each function is called as - ``f(x, lambda=1)``. + ``piecewise(..., ..., alpha=1)``, then each function is called as + ``f(x, alpha=1)``. Returns ------- @@ -712,11 +1315,16 @@ def piecewise(x, condlist, funclist, *args, **kw): condlist = condlist.T if n == n2 - 1: # compute the "otherwise" condition. totlist = np.logical_or.reduce(condlist, axis=0) - condlist = np.vstack([condlist, ~totlist]) + # Only able to stack vertically if the array is 1d or less + if x.ndim <= 1: + condlist = np.vstack([condlist, ~totlist]) + else: + condlist = [asarray(c, dtype=bool) for c in condlist] + totlist = condlist[0] + for k in range(1, n): + totlist |= condlist[k] + condlist.append(~totlist) n += 1 - if (n != n2): - raise ValueError( - "function list and condition list must be the same") y = zeros(x.shape, x.dtype) for k in range(n): @@ -776,9 +1384,10 @@ def select(condlist, choicelist, default=0): # Now that the dtype is known, handle the deprecated select([], []) case if len(condlist) == 0: + # 2014-02-24, 1.9 warnings.warn("select with an empty condition list is not possible" "and will be deprecated", - DeprecationWarning) + DeprecationWarning, stacklevel=2) return np.asarray(default)[()] choicelist = [np.asarray(choice) for choice in choicelist] @@ -789,7 +1398,7 @@ def select(condlist, choicelist, default=0): dtype = np.result_type(*choicelist) # Convert conditions to arrays and broadcast conditions and choices - # as the shape is needed for the result. Doing it seperatly optimizes + # as the shape is needed for the result. Doing it separately optimizes # for example when all choices are scalars. condlist = np.broadcast_arrays(*condlist) choicelist = np.broadcast_arrays(*choicelist) @@ -809,10 +1418,11 @@ def select(condlist, choicelist, default=0): 'invalid entry in choicelist: should be boolean ndarray') if deprecated_ints: + # 2014-02-24, 1.9 msg = "select condlists containing integer ndarrays is deprecated " \ "and will be removed in the future. Use `.astype(bool)` to " \ "convert to bools." - warnings.warn(msg, DeprecationWarning) + warnings.warn(msg, DeprecationWarning, stacklevel=2) if choicelist[0].ndim == 0: # This may be common, so avoid the call. @@ -885,9 +1495,9 @@ def copy(a, order='K'): def gradient(f, *varargs, **kwargs): """ Return the gradient of an N-dimensional array. - + The gradient is computed using second order accurate central differences - in the interior and either first differences or second order accurate + in the interior and either first differences or second order accurate one-sides (forward or backwards) differences at the boundaries. The returned gradient hence has the same shape as the input array. @@ -895,20 +1505,30 @@ def gradient(f, *varargs, **kwargs): ---------- f : array_like An N-dimensional array containing samples of a scalar function. - varargs : list of scalar, optional + varargs : scalar or list of scalar, optional N scalars specifying the sample distances for each dimension, i.e. `dx`, `dy`, `dz`, ... Default distance: 1. + single scalar specifies sample distance for all dimensions. + if `axis` is given, the number of varargs must equal the number of axes. edge_order : {1, 2}, optional - Gradient is calculated using N\ :sup:`th` order accurate differences + Gradient is calculated using N-th order accurate differences at the boundaries. Default: 1. - + .. versionadded:: 1.9.1 + axis : None or int or tuple of ints, optional + Gradient is calculated only along the given axis or axes + The default (axis = None) is to calculate the gradient for all the axes of the input array. + axis may be negative, in which case it counts from the last to the first axis. + + .. versionadded:: 1.11.0 + Returns ------- - gradient : ndarray - N arrays of the same shape as `f` giving the derivative of `f` with - respect to each dimension. + gradient : ndarray or list of ndarray + A set of ndarrays (or a single ndarray if there is only one dimension) + correposnding to the derivatives of f with respect to each dimension. + Each derivative has the same shape as f. Examples -------- @@ -918,29 +1538,57 @@ def gradient(f, *varargs, **kwargs): >>> np.gradient(x, 2) array([ 0.5 , 0.75, 1.25, 1.75, 2.25, 2.5 ]) + For two dimensional arrays, the return will be two arrays ordered by + axis. In this example the first array stands for the gradient in + rows and the second one in columns direction: + >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=np.float)) [array([[ 2., 2., -1.], [ 2., 2., -1.]]), array([[ 1. , 2.5, 4. ], [ 1. , 1. , 1. ]])] >>> x = np.array([0, 1, 2, 3, 4]) - >>> dx = np.gradient(x) >>> y = x**2 - >>> np.gradient(y, dx, edge_order=2) + >>> np.gradient(y, edge_order=2) array([-0., 2., 4., 6., 8.]) + + The axis keyword can be used to specify a subset of axes of which the gradient is calculated + >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=np.float), axis=0) + array([[ 2., 2., -1.], + [ 2., 2., -1.]]) """ f = np.asanyarray(f) N = len(f.shape) # number of dimensions + + axes = kwargs.pop('axis', None) + if axes is None: + axes = tuple(range(N)) + # check axes to have correct type and no duplicate entries + if isinstance(axes, int): + axes = (axes,) + if not isinstance(axes, tuple): + raise TypeError("A tuple of integers or a single integer is required") + + # normalize axis values: + axes = tuple(x + N if x < 0 else x for x in axes) + if max(axes) >= N or min(axes) < 0: + raise ValueError("'axis' entry is out of bounds") + + if len(set(axes)) != len(axes): + raise ValueError("duplicate value in 'axis'") + n = len(varargs) if n == 0: dx = [1.0]*N elif n == 1: dx = [varargs[0]]*N - elif n == N: + elif n == len(axes): dx = list(varargs) else: raise SyntaxError( "invalid number of arguments") + if any([not np.isscalar(dxi) for dxi in dx]): + raise ValueError("distances must be scalars") edge_order = kwargs.pop('edge_order', 1) if kwargs: @@ -974,13 +1622,13 @@ def gradient(f, *varargs, **kwargs): # Convert datetime64 data into ints. Make dummy variable `y` # that is a view of ints if the data is datetime64, otherwise - # just set y equal to the the array `f`. + # just set y equal to the array `f`. if f.dtype.char in ["M", "m"]: y = f.view('int64') else: y = f - for axis in range(N): + for i, axis in enumerate(axes): if y.shape[axis] < 2: raise ValueError( @@ -1036,7 +1684,7 @@ def gradient(f, *varargs, **kwargs): out[slice1] = (3.0*y[slice2] - 4.0*y[slice3] + y[slice4])/2.0 # divide by step size - out /= dx[axis] + out /= dx[i] outvals.append(out) # reset the slice object in this dimension to ":" @@ -1045,7 +1693,7 @@ def gradient(f, *varargs, **kwargs): slice3[axis] = slice(None) slice4[axis] = slice(None) - if N == 1: + if len(axes) == 1: return outvals[0] else: return outvals @@ -1053,10 +1701,10 @@ def gradient(f, *varargs, **kwargs): def diff(a, n=1, axis=-1): """ - Calculate the n-th order discrete difference along given axis. + Calculate the n-th discrete difference along given axis. - The first order difference is given by ``out[n] = a[n+1] - a[n]`` along - the given axis, higher order differences are calculated by using `diff` + The first difference is given by ``out[n] = a[n+1] - a[n]`` along + the given axis, higher differences are calculated by using `diff` recursively. Parameters @@ -1071,7 +1719,7 @@ def diff(a, n=1, axis=-1): Returns ------- diff : ndarray - The `n` order differences. The shape of the output is the same as `a` + The n-th differences. The shape of the output is the same as `a` except along `axis` where the dimension is smaller by `n`. See Also @@ -1130,24 +1778,25 @@ def interp(x, xp, fp, left=None, right=None, period=None): `period` is not specified. Otherwise, `xp` is internally sorted after normalizing the periodic boundaries with ``xp = xp % period``. - fp : 1-D sequence of floats + fp : 1-D sequence of float or complex The y-coordinates of the data points, same length as `xp`. - left : float, optional + left : optional float or complex corresponding to fp Value to return for `x < xp[0]`, default is `fp[0]`. - right : float, optional + right : optional float or complex corresponding to fp Value to return for `x > xp[-1]`, default is `fp[-1]`. period : None or float, optional - .. versionadded:: 1.10.0 A period for the x-coordinates. This parameter allows the proper interpolation of angular x-coordinates. Parameters `left` and `right` are ignored if `period` is specified. + .. versionadded:: 1.10.0 + Returns ------- - y : {float, ndarray} + y : float or complex (corresponding to fp) or ndarray The interpolated values, same shape as `x`. Raises @@ -1198,14 +1847,31 @@ def interp(x, xp, fp, left=None, right=None, period=None): >>> np.interp(x, xp, fp, period=360) array([7.5, 5., 8.75, 6.25, 3., 3.25, 3.5, 3.75]) + Complex interpolation + >>> x = [1.5, 4.0] + >>> xp = [2,3,5] + >>> fp = [1.0j, 0, 2+3j] + >>> np.interp(x, xp, fp) + array([ 0.+1.j , 1.+1.5j]) + """ + + fp = np.asarray(fp) + + if np.iscomplexobj(fp): + interp_func = compiled_interp_complex + input_dtype = np.complex128 + else: + interp_func = compiled_interp + input_dtype = np.float64 + if period is None: if isinstance(x, (float, int, number)): - return compiled_interp([x], xp, fp, left, right).item() + return interp_func([x], xp, fp, left, right).item() elif isinstance(x, np.ndarray) and x.ndim == 0: - return compiled_interp([x], xp, fp, left, right).item() + return interp_func([x], xp, fp, left, right).item() else: - return compiled_interp(x, xp, fp, left, right) + return interp_func(x, xp, fp, left, right) else: if period == 0: raise ValueError("period must be a non-zero value") @@ -1218,7 +1884,8 @@ def interp(x, xp, fp, left=None, right=None, period=None): x = [x] x = np.asarray(x, dtype=np.float64) xp = np.asarray(xp, dtype=np.float64) - fp = np.asarray(fp, dtype=np.float64) + fp = np.asarray(fp, dtype=input_dtype) + if xp.ndim != 1 or fp.ndim != 1: raise ValueError("Data points must be 1-D sequences") if xp.shape[0] != fp.shape[0]: @@ -1231,11 +1898,11 @@ def interp(x, xp, fp, left=None, right=None, period=None): fp = fp[asort_xp] xp = np.concatenate((xp[-1:]-period, xp, xp[0:1]+period)) fp = np.concatenate((fp[-1:], fp, fp[0:1])) + if return_array: - return compiled_interp(x, xp, fp, left, right) + return interp_func(x, xp, fp, left, right) else: - return compiled_interp(x, xp, fp, left, right).item() - + return interp_func(x, xp, fp, left, right).item() def angle(z, deg=0): """ @@ -1250,7 +1917,7 @@ def angle(z, deg=0): Returns ------- - angle : {ndarray, scalar} + angle : ndarray or scalar The counterclockwise angle from the positive real axis on the complex plane, with dtype as numpy.float64. @@ -1507,14 +2174,15 @@ def place(arr, mask, vals): Parameters ---------- - arr : array_like + arr : ndarray Array to put data into. mask : array_like Boolean mask array. Must have the same size as `a`. vals : 1-D sequence Values to put into `a`. Only the first N elements are used, where N is the number of True values in `mask`. If `vals` is smaller - than N it will be repeated. + than N, it will be repeated, and if elements of `a` are to be masked, + this sequence must be non-empty. See Also -------- @@ -1529,6 +2197,10 @@ def place(arr, mask, vals): [44, 55, 44]]) """ + if not isinstance(arr, np.ndarray): + raise TypeError("argument 1 must be numpy.ndarray, " + "not {name}".format(name=type(arr).__name__)) + return _insert(arr, mask, vals) @@ -1574,17 +2246,126 @@ def disp(mesg, device=None, linefeed=True): return +# See http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html +_DIMENSION_NAME = r'\w+' +_CORE_DIMENSION_LIST = '(?:{0:}(?:,{0:})*)?'.format(_DIMENSION_NAME) +_ARGUMENT = r'\({}\)'.format(_CORE_DIMENSION_LIST) +_ARGUMENT_LIST = '{0:}(?:,{0:})*'.format(_ARGUMENT) +_SIGNATURE = '^{0:}->{0:}$'.format(_ARGUMENT_LIST) + + +def _parse_gufunc_signature(signature): + """ + Parse string signatures for a generalized universal function. + + Arguments + --------- + signature : string + Generalized universal function signature, e.g., ``(m,n),(n,p)->(m,p)`` + for ``np.matmul``. + + Returns + ------- + Tuple of input and output core dimensions parsed from the signature, each + of the form List[Tuple[str, ...]]. + """ + if not re.match(_SIGNATURE, signature): + raise ValueError( + 'not a valid gufunc signature: {}'.format(signature)) + return tuple([tuple(re.findall(_DIMENSION_NAME, arg)) + for arg in re.findall(_ARGUMENT, arg_list)] + for arg_list in signature.split('->')) + + +def _update_dim_sizes(dim_sizes, arg, core_dims): + """ + Incrementally check and update core dimension sizes for a single argument. + + Arguments + --------- + dim_sizes : Dict[str, int] + Sizes of existing core dimensions. Will be updated in-place. + arg : ndarray + Argument to examine. + core_dims : Tuple[str, ...] + Core dimensions for this argument. + """ + if not core_dims: + return + + num_core_dims = len(core_dims) + if arg.ndim < num_core_dims: + raise ValueError( + '%d-dimensional argument does not have enough ' + 'dimensions for all core dimensions %r' + % (arg.ndim, core_dims)) + + core_shape = arg.shape[-num_core_dims:] + for dim, size in zip(core_dims, core_shape): + if dim in dim_sizes: + if size != dim_sizes[dim]: + raise ValueError( + 'inconsistent size for core dimension %r: %r vs %r' + % (dim, size, dim_sizes[dim])) + else: + dim_sizes[dim] = size + + +def _parse_input_dimensions(args, input_core_dims): + """ + Parse broadcast and core dimensions for vectorize with a signature. + + Arguments + --------- + args : Tuple[ndarray, ...] + Tuple of input arguments to examine. + input_core_dims : List[Tuple[str, ...]] + List of core dimensions corresponding to each input. + + Returns + ------- + broadcast_shape : Tuple[int, ...] + Common shape to broadcast all non-core dimensions to. + dim_sizes : Dict[str, int] + Common sizes for named core dimensions. + """ + broadcast_args = [] + dim_sizes = {} + for arg, core_dims in zip(args, input_core_dims): + _update_dim_sizes(dim_sizes, arg, core_dims) + ndim = arg.ndim - len(core_dims) + dummy_array = np.lib.stride_tricks.as_strided(0, arg.shape[:ndim]) + broadcast_args.append(dummy_array) + broadcast_shape = np.lib.stride_tricks._broadcast_shape(*broadcast_args) + return broadcast_shape, dim_sizes + + +def _calculate_shapes(broadcast_shape, dim_sizes, list_of_core_dims): + """Helper for calculating broadcast shapes with core dimensions.""" + return [broadcast_shape + tuple(dim_sizes[dim] for dim in core_dims) + for core_dims in list_of_core_dims] + + +def _create_arrays(broadcast_shape, dim_sizes, list_of_core_dims, dtypes): + """Helper for creating output arrays in vectorize.""" + shapes = _calculate_shapes(broadcast_shape, dim_sizes, list_of_core_dims) + arrays = tuple(np.empty(shape, dtype=dtype) + for shape, dtype in zip(shapes, dtypes)) + return arrays + + class vectorize(object): """ - vectorize(pyfunc, otypes='', doc=None, excluded=None, cache=False) + vectorize(pyfunc, otypes=None, doc=None, excluded=None, cache=False, + signature=None) Generalized function class. - Define a vectorized function which takes a nested sequence - of objects or numpy arrays as inputs and returns a - numpy array as output. The vectorized function evaluates `pyfunc` over - successive tuples of the input arrays like the python map function, - except it uses the broadcasting rules of numpy. + Define a vectorized function which takes a nested sequence of objects or + numpy arrays as inputs and returns an single or tuple of numpy array as + output. The vectorized function evaluates `pyfunc` over successive tuples + of the input arrays like the python map function, except it uses the + broadcasting rules of numpy. The data type of the output of `vectorized` is determined by calling the function with the first element of the input. This can be avoided @@ -1614,6 +2395,15 @@ class vectorize(object): .. versionadded:: 1.7.0 + signature : string, optional + Generalized universal function signature, e.g., ``(m,n),(n)->(m)`` for + vectorized matrix-vector multiplication. If provided, ``pyfunc`` will + be called with (and expected to return) arrays with shapes given by the + size of corresponding core dimensions. By default, ``pyfunc`` is + assumed to take scalars as input and output. + + .. versionadded:: 1.12.0 + Returns ------- vectorized : callable @@ -1633,7 +2423,7 @@ class vectorize(object): array([3, 4, 1, 2]) The docstring is taken from the input function to `vectorize` unless it - is specified + is specified: >>> vfunc.__doc__ 'Return a-b if a>b, otherwise return a+b' @@ -1642,7 +2432,7 @@ class vectorize(object): 'Vectorized `myfunc`' The output type is determined by evaluating the first element of the input, - unless it is specified + unless it is specified: >>> out = vfunc([1, 2, 3, 4], 2) >>> type(out[0]) @@ -1672,6 +2462,29 @@ class vectorize(object): >>> vpolyval([1, 2, 3], x=[0, 1]) array([3, 6]) + The `signature` argument allows for vectorizing functions that act on + non-scalar arrays of fixed length. For example, you can use it for a + vectorized calculation of Pearson correlation coefficient and its p-value: + + >>> import scipy.stats + >>> pearsonr = np.vectorize(scipy.stats.pearsonr, + ... signature='(n),(n)->(),()') + >>> pearsonr([[0, 1, 2, 3]], [[1, 2, 3, 4], [4, 3, 2, 1]]) + (array([ 1., -1.]), array([ 0., 0.])) + + Or for a vectorized convolution: + + >>> convolve = np.vectorize(np.convolve, signature='(n),(m)->(k)') + >>> convolve(np.eye(4), [1, 2, 1]) + array([[ 1., 2., 1., 0., 0., 0.], + [ 0., 1., 2., 1., 0., 0.], + [ 0., 0., 1., 2., 1., 0.], + [ 0., 0., 0., 1., 2., 1.]]) + + See Also + -------- + frompyfunc : Takes an arbitrary Python function and returns a ufunc + Notes ----- The `vectorize` function is provided primarily for convenience, not for @@ -1687,12 +2500,17 @@ class vectorize(object): The new keyword argument interface and `excluded` argument support further degrades performance. + References + ---------- + .. [1] NumPy Reference, section `Generalized Universal Function API + <http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html>`_. """ - def __init__(self, pyfunc, otypes='', doc=None, excluded=None, - cache=False): + def __init__(self, pyfunc, otypes=None, doc=None, excluded=None, + cache=False, signature=None): self.pyfunc = pyfunc self.cache = cache + self.signature = signature self._ufunc = None # Caching to improve default performance if doc is None: @@ -1701,22 +2519,25 @@ class vectorize(object): self.__doc__ = doc if isinstance(otypes, str): - self.otypes = otypes - for char in self.otypes: + for char in otypes: if char not in typecodes['All']: - raise ValueError( - "Invalid otype specified: %s" % (char,)) + raise ValueError("Invalid otype specified: %s" % (char,)) elif iterable(otypes): - self.otypes = ''.join([_nx.dtype(x).char for x in otypes]) - else: - raise ValueError( - "Invalid otype specification") + otypes = ''.join([_nx.dtype(x).char for x in otypes]) + elif otypes is not None: + raise ValueError("Invalid otype specification") + self.otypes = otypes # Excluded variable support if excluded is None: excluded = set() self.excluded = set(excluded) + if signature is not None: + self._in_and_out_core_dims = _parse_gufunc_signature(signature) + else: + self._in_and_out_core_dims = None + def __call__(self, *args, **kwargs): """ Return arrays with the results of `pyfunc` broadcast (vectorized) over @@ -1753,7 +2574,7 @@ class vectorize(object): if not args: raise ValueError('args can not be empty') - if self.otypes: + if self.otypes is not None: otypes = self.otypes nout = len(otypes) @@ -1769,7 +2590,12 @@ class vectorize(object): # the subsequent call when the ufunc is evaluated. # Assumes that ufunc first evaluates the 0th elements in the input # arrays (the input values are not checked to ensure this) - inputs = [asarray(_a).flat[0] for _a in args] + args = [asarray(arg) for arg in args] + if builtins.any(arg.size == 0 for arg in args): + raise ValueError('cannot call `vectorize` on size 0 inputs ' + 'unless `otypes` is set') + + inputs = [arg.flat[0] for arg in args] outputs = func(*inputs) # Performance note: profiling indicates that -- for simple @@ -1805,29 +2631,94 @@ class vectorize(object): def _vectorize_call(self, func, args): """Vectorized call to `func` over positional `args`.""" - if not args: - _res = func() + if self.signature is not None: + res = self._vectorize_call_with_signature(func, args) + elif not args: + res = func() else: ufunc, otypes = self._get_ufunc_and_otypes(func=func, args=args) # Convert args to object arrays first - inputs = [array(_a, copy=False, subok=True, dtype=object) - for _a in args] + inputs = [array(a, copy=False, subok=True, dtype=object) + for a in args] outputs = ufunc(*inputs) if ufunc.nout == 1: - _res = array(outputs, - copy=False, subok=True, dtype=otypes[0]) + res = array(outputs, copy=False, subok=True, dtype=otypes[0]) else: - _res = tuple([array(_x, copy=False, subok=True, dtype=_t) - for _x, _t in zip(outputs, otypes)]) - return _res + res = tuple([array(x, copy=False, subok=True, dtype=t) + for x, t in zip(outputs, otypes)]) + return res + + def _vectorize_call_with_signature(self, func, args): + """Vectorized call over positional arguments with a signature.""" + input_core_dims, output_core_dims = self._in_and_out_core_dims + + if len(args) != len(input_core_dims): + raise TypeError('wrong number of positional arguments: ' + 'expected %r, got %r' + % (len(input_core_dims), len(args))) + args = tuple(asanyarray(arg) for arg in args) + + broadcast_shape, dim_sizes = _parse_input_dimensions( + args, input_core_dims) + input_shapes = _calculate_shapes(broadcast_shape, dim_sizes, + input_core_dims) + args = [np.broadcast_to(arg, shape, subok=True) + for arg, shape in zip(args, input_shapes)] + outputs = None + otypes = self.otypes + nout = len(output_core_dims) -def cov(m, y=None, rowvar=1, bias=0, ddof=None): + for index in np.ndindex(*broadcast_shape): + results = func(*(arg[index] for arg in args)) + + n_results = len(results) if isinstance(results, tuple) else 1 + + if nout != n_results: + raise ValueError( + 'wrong number of outputs from pyfunc: expected %r, got %r' + % (nout, n_results)) + + if nout == 1: + results = (results,) + + if outputs is None: + for result, core_dims in zip(results, output_core_dims): + _update_dim_sizes(dim_sizes, result, core_dims) + + if otypes is None: + otypes = [asarray(result).dtype for result in results] + + outputs = _create_arrays(broadcast_shape, dim_sizes, + output_core_dims, otypes) + + for output, result in zip(outputs, results): + output[index] = result + + if outputs is None: + # did not call the function even once + if otypes is None: + raise ValueError('cannot call `vectorize` on size 0 inputs ' + 'unless `otypes` is set') + if builtins.any(dim not in dim_sizes + for dims in output_core_dims + for dim in dims): + raise ValueError('cannot call `vectorize` with a signature ' + 'including new output dimensions on size 0 ' + 'inputs') + outputs = _create_arrays(broadcast_shape, dim_sizes, + output_core_dims, otypes) + + return outputs[0] if nout == 1 else outputs + + +def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, + aweights=None): """ - Estimate a covariance matrix, given data. + Estimate a covariance matrix, given data and weights. Covariance indicates the level to which two variables vary together. If we examine N-dimensional samples, :math:`X = [x_1, x_2, ... x_N]^T`, @@ -1835,6 +2726,8 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None): :math:`x_i` and :math:`x_j`. The element :math:`C_{ii}` is the variance of :math:`x_i`. + See the notes for an outline of the algorithm. + Parameters ---------- m : array_like @@ -1842,23 +2735,38 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None): Each row of `m` represents a variable, and each column a single observation of all those variables. Also see `rowvar` below. y : array_like, optional - An additional set of variables and observations. `y` has the same - form as that of `m`. - rowvar : int, optional - If `rowvar` is non-zero (default), then each row represents a + An additional set of variables and observations. `y` has the same form + as that of `m`. + rowvar : bool, optional + If `rowvar` is True (default), then each row represents a variable, with observations in the columns. Otherwise, the relationship is transposed: each column represents a variable, while the rows contain observations. - bias : int, optional - Default normalization is by ``(N - 1)``, where ``N`` is the number of - observations given (unbiased estimate). If `bias` is 1, then - normalization is by ``N``. These values can be overridden by using - the keyword ``ddof`` in numpy versions >= 1.5. + bias : bool, optional + Default normalization (False) is by ``(N - 1)``, where ``N`` is the + number of observations given (unbiased estimate). If `bias` is True, then + normalization is by ``N``. These values can be overridden by using the + keyword ``ddof`` in numpy versions >= 1.5. ddof : int, optional + If not ``None`` the default value implied by `bias` is overridden. + Note that ``ddof=1`` will return the unbiased estimate, even if both + `fweights` and `aweights` are specified, and ``ddof=0`` will return + the simple average. See the notes for the details. The default value + is ``None``. + .. versionadded:: 1.5 - If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is - the number of observations; this overrides the value implied by - ``bias``. The default value is ``None``. + fweights : array_like, int, optional + 1-D array of integer freguency weights; the number of times each + observation vector should be repeated. + + .. versionadded:: 1.10 + aweights : array_like, optional + 1-D array of observation vector weights. These relative weights are + typically large for observations considered "important" and smaller for + observations considered less "important". If ``ddof=0`` the array of + weights can be used to assign probabilities to observation vectors. + + .. versionadded:: 1.10 Returns ------- @@ -1869,6 +2777,22 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None): -------- corrcoef : Normalized covariance matrix + Notes + ----- + Assume that the observations are in the columns of the observation + array `m` and let ``f = fweights`` and ``a = aweights`` for brevity. The + steps to compute the weighted covariance are as follows:: + + >>> w = f * a + >>> v1 = np.sum(w) + >>> v2 = np.sum(w * a) + >>> m -= np.sum(m * w, axis=1, keepdims=True) / v1 + >>> cov = np.dot(m * w, m.T) * v1 / (v1**2 - ddof * v2) + + Note that when ``a == 1``, the normalization factor + ``v1 / (v1**2 - ddof * v2)`` goes over to ``1 / (np.sum(f) - ddof)`` + as it should. + Examples -------- Consider two variables, :math:`x_0` and :math:`x_1`, which @@ -1894,13 +2818,13 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None): >>> x = [-2.1, -1, 4.3] >>> y = [3, 1.1, 0.12] >>> X = np.vstack((x,y)) - >>> print np.cov(X) + >>> print(np.cov(X)) [[ 11.71 -4.286 ] [ -4.286 2.14413333]] - >>> print np.cov(x, y) + >>> print(np.cov(x, y)) [[ 11.71 -4.286 ] [ -4.286 2.14413333]] - >>> print np.cov(x) + >>> print(np.cov(x)) 11.71 """ @@ -1911,98 +2835,175 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None): # Handles complex arrays too m = np.asarray(m) + if m.ndim > 2: + raise ValueError("m has more than 2 dimensions") + if y is None: dtype = np.result_type(m, np.float64) else: y = np.asarray(y) + if y.ndim > 2: + raise ValueError("y has more than 2 dimensions") dtype = np.result_type(m, y, np.float64) - X = array(m, ndmin=2, dtype=dtype) - if X.shape[0] == 1: - rowvar = 1 - if rowvar: - N = X.shape[1] - axis = 0 - else: - N = X.shape[0] - axis = 1 + X = array(m, ndmin=2, dtype=dtype) + if rowvar == 0 and X.shape[0] != 1: + X = X.T + if X.shape[0] == 0: + return np.array([]).reshape(0, 0) + if y is not None: + y = array(y, copy=False, ndmin=2, dtype=dtype) + if rowvar == 0 and y.shape[0] != 1: + y = y.T + X = np.vstack((X, y)) - # check ddof if ddof is None: if bias == 0: ddof = 1 else: ddof = 0 - fact = float(N - ddof) + + # Get the product of frequencies and weights + w = None + if fweights is not None: + fweights = np.asarray(fweights, dtype=np.float) + if not np.all(fweights == np.around(fweights)): + raise TypeError( + "fweights must be integer") + if fweights.ndim > 1: + raise RuntimeError( + "cannot handle multidimensional fweights") + if fweights.shape[0] != X.shape[1]: + raise RuntimeError( + "incompatible numbers of samples and fweights") + if any(fweights < 0): + raise ValueError( + "fweights cannot be negative") + w = fweights + if aweights is not None: + aweights = np.asarray(aweights, dtype=np.float) + if aweights.ndim > 1: + raise RuntimeError( + "cannot handle multidimensional aweights") + if aweights.shape[0] != X.shape[1]: + raise RuntimeError( + "incompatible numbers of samples and aweights") + if any(aweights < 0): + raise ValueError( + "aweights cannot be negative") + if w is None: + w = aweights + else: + w *= aweights + + avg, w_sum = average(X, axis=1, weights=w, returned=True) + w_sum = w_sum[0] + + # Determine the normalization + if w is None: + fact = X.shape[1] - ddof + elif ddof == 0: + fact = w_sum + elif aweights is None: + fact = w_sum - ddof + else: + fact = w_sum - ddof*sum(w*aweights)/w_sum + if fact <= 0: - warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning) + warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning, stacklevel=2) fact = 0.0 - if y is not None: - y = array(y, copy=False, ndmin=2, dtype=dtype) - X = concatenate((X, y), axis) - - X -= X.mean(axis=1-axis, keepdims=True) - if not rowvar: - return (dot(X.T, X.conj()) / fact).squeeze() + X -= avg[:, None] + if w is None: + X_T = X.T else: - return (dot(X, X.T.conj()) / fact).squeeze() + X_T = (X*w).T + c = dot(X, X_T.conj()) + c *= 1. / np.float64(fact) + return c.squeeze() -def corrcoef(x, y=None, rowvar=1, bias=0, ddof=None): +def corrcoef(x, y=None, rowvar=1, bias=np._NoValue, ddof=np._NoValue): """ - Return correlation coefficients. + Return Pearson product-moment correlation coefficients. Please refer to the documentation for `cov` for more detail. The - relationship between the correlation coefficient matrix, `P`, and the + relationship between the correlation coefficient matrix, `R`, and the covariance matrix, `C`, is - .. math:: P_{ij} = \\frac{ C_{ij} } { \\sqrt{ C_{ii} * C_{jj} } } + .. math:: R_{ij} = \\frac{ C_{ij} } { \\sqrt{ C_{ii} * C_{jj} } } - The values of `P` are between -1 and 1, inclusive. + The values of `R` are between -1 and 1, inclusive. Parameters ---------- x : array_like A 1-D or 2-D array containing multiple variables and observations. - Each row of `m` represents a variable, and each column a single + Each row of `x` represents a variable, and each column a single observation of all those variables. Also see `rowvar` below. y : array_like, optional An additional set of variables and observations. `y` has the same - shape as `m`. + shape as `x`. rowvar : int, optional If `rowvar` is non-zero (default), then each row represents a variable, with observations in the columns. Otherwise, the relationship is transposed: each column represents a variable, while the rows contain observations. - bias : int, optional - Default normalization is by ``(N - 1)``, where ``N`` is the number of - observations (unbiased estimate). If `bias` is 1, then - normalization is by ``N``. These values can be overridden by using - the keyword ``ddof`` in numpy versions >= 1.5. - ddof : {None, int}, optional - .. versionadded:: 1.5 - If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is - the number of observations; this overrides the value implied by - ``bias``. The default value is ``None``. + bias : _NoValue, optional + Has no effect, do not use. + + .. deprecated:: 1.10.0 + ddof : _NoValue, optional + Has no effect, do not use. + + .. deprecated:: 1.10.0 Returns ------- - out : ndarray + R : ndarray The correlation coefficient matrix of the variables. See Also -------- cov : Covariance matrix + Notes + ----- + Due to floating point rounding the resulting array may not be Hermitian, + the diagonal elements may not be 1, and the elements may not satisfy the + inequality abs(a) <= 1. The real and imaginary parts are clipped to the + interval [-1, 1] in an attempt to improve on that situation but is not + much help in the complex case. + + This function accepts but discards arguments `bias` and `ddof`. This is + for backwards compatibility with previous versions of this function. These + arguments had no effect on the return values of the function and can be + safely ignored in this and previous versions of numpy. + """ - c = cov(x, y, rowvar, bias, ddof) + if bias is not np._NoValue or ddof is not np._NoValue: + # 2015-03-15, 1.10 + warnings.warn('bias and ddof have no effect and are deprecated', + DeprecationWarning, stacklevel=2) + c = cov(x, y, rowvar) try: d = diag(c) - except ValueError: # scalar covariance + except ValueError: + # scalar covariance # nan if incorrect value (nan, inf, 0), 1 otherwise return c / c - return c / sqrt(multiply.outer(d, d)) + stddev = sqrt(d.real) + c /= stddev[:, None] + c /= stddev[None, :] + + # Clip real and imaginary parts to [-1, 1]. This does not guarantee + # abs(a[i,j]) <= 1 for complex arrays, but is the best we can do without + # excessive work. + np.clip(c.real, -1, 1, out=c.real) + if np.iscomplexobj(c): + np.clip(c.imag, -1, 1, out=c.imag) + + return c def blackman(M): @@ -2160,7 +3161,6 @@ def bartlett(M): .. [5] W.H. Press, B.P. Flannery, S.A. Teukolsky, and W.T. Vetterling, "Numerical Recipes", Cambridge University Press, 1986, page 429. - Examples -------- >>> np.bartlett(12) @@ -2239,7 +3239,7 @@ def hanning(M): .. math:: w(n) = 0.5 - 0.5cos\\left(\\frac{2\\pi{n}}{M-1}\\right) \\qquad 0 \\leq n \\leq M-1 - The Hanning was named for Julius van Hann, an Austrian meteorologist. + The Hanning was named for Julius von Hann, an Austrian meteorologist. It is also known as the Cosine Bell. Some authors prefer that it be called a Hann window, to help avoid confusion with the very similar Hamming window. @@ -2805,9 +3805,9 @@ def _ureduce(a, func, **kwargs): a : array_like Input array or object that can be converted to an array. func : callable - Reduction function Kapable of receiving an axis argument. + Reduction function capable of receiving a single axis argument. It is is called with `a` as first argument followed by `kwargs`. - kwargs : keyword arguments + kwargs : keyword arguments additional keyword arguments to pass to `func`. Returns @@ -2862,22 +3862,22 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False): ---------- a : array_like Input array or object that can be converted to an array. - axis : int or sequence of int, optional - Axis along which the medians are computed. The default (axis=None) + axis : {int, sequence of int, None}, optional + Axis or axes along which the medians are computed. The default is to compute the median along a flattened version of the array. A sequence of axes is supported since version 1.9.0. out : ndarray, optional - Alternative output array in which to place the result. It must have - the same shape and buffer length as the expected output, but the - type (of the output) will be cast if necessary. + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. overwrite_input : bool, optional - If True, then allow use of memory of input array (a) for + If True, then allow use of memory of input array `a` for calculations. The input array will be modified by the call to - median. This will save memory when you do not need to preserve the - contents of the input array. Treat the input as undefined, but it - will probably be fully or partially sorted. Default is False. Note - that, if `overwrite_input` is True and the input is not already an - ndarray, an error will be raised. + `median`. This will save memory when you do not need to preserve + the contents of the input array. Treat the input as undefined, + but it will probably be fully or partially sorted. Default is + False. If `overwrite_input` is ``True`` and `a` is not already an + `ndarray`, an error will be raised. keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, @@ -2885,15 +3885,14 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False): .. versionadded:: 1.9.0 - Returns ------- median : ndarray - A new array holding the result (unless `out` is specified, in which - case that array is returned instead). If the input contains - integers, or floats of smaller precision than 64, then the output - data-type is float64. Otherwise, the output data-type is the same - as that of the input. + A new array holding the result. If the input contains integers + or floats smaller than ``float64``, then the output data-type is + ``np.float64``. Otherwise, the data-type of the output is the + same as that of the input. If `out` is specified, that array is + returned instead. See Also -------- @@ -2901,10 +3900,10 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False): Notes ----- - Given a vector V of length N, the median of V is the middle value of - a sorted copy of V, ``V_sorted`` - i.e., ``V_sorted[(N-1)/2]``, when N is - odd. When N is even, it is the average of the two middle values of - ``V_sorted``. + Given a vector ``V`` of length ``N``, the median of ``V`` is the + middle value of a sorted copy of ``V``, ``V_sorted`` - i + e., ``V_sorted[(N-1)/2]``, when ``N`` is odd, and the average of the + two middle values of ``V_sorted`` when ``N`` is even. Examples -------- @@ -2945,41 +3944,37 @@ def _median(a, axis=None, out=None, overwrite_input=False): # can't be reasonably be implemented in terms of percentile as we have to # call mean to not break astropy a = np.asanyarray(a) - if axis is not None and axis >= a.ndim: - raise IndexError( - "axis %d out of bounds (%d)" % (axis, a.ndim)) + + # Set the partition indexes + if axis is None: + sz = a.size + else: + sz = a.shape[axis] + if sz % 2 == 0: + szh = sz // 2 + kth = [szh - 1, szh] + else: + kth = [(sz - 1) // 2] + # Check if the array contains any nan's + if np.issubdtype(a.dtype, np.inexact): + kth.append(-1) if overwrite_input: if axis is None: part = a.ravel() - sz = part.size - if sz % 2 == 0: - szh = sz // 2 - part.partition((szh - 1, szh)) - else: - part.partition((sz - 1) // 2) + part.partition(kth) else: - sz = a.shape[axis] - if sz % 2 == 0: - szh = sz // 2 - a.partition((szh - 1, szh), axis=axis) - else: - a.partition((sz - 1) // 2, axis=axis) + a.partition(kth, axis=axis) part = a else: - if axis is None: - sz = a.size - else: - sz = a.shape[axis] - if sz % 2 == 0: - part = partition(a, ((sz // 2) - 1, sz // 2), axis=axis) - else: - part = partition(a, (sz - 1) // 2, axis=axis) + part = partition(a, kth, axis=axis) + if part.shape == (): # make 0-D arrays work return part.item() if axis is None: axis = 0 + indexer = [slice(None)] * part.ndim index = part.shape[axis] // 2 if part.shape[axis] % 2 == 1: @@ -2987,9 +3982,33 @@ def _median(a, axis=None, out=None, overwrite_input=False): indexer[axis] = slice(index, index+1) else: indexer[axis] = slice(index-1, index+1) - # Use mean in odd and even case to coerce data type - # and check, use out array. - return mean(part[indexer], axis=axis, out=out) + + # Check if the array contains any nan's + if np.issubdtype(a.dtype, np.inexact) and sz > 0: + # warn and return nans like mean would + rout = mean(part[indexer], axis=axis, out=out) + part = np.rollaxis(part, axis, part.ndim) + n = np.isnan(part[..., -1]) + if rout.ndim == 0: + if n == True: + warnings.warn("Invalid value encountered in median", + RuntimeWarning, stacklevel=3) + if out is not None: + out[...] = a.dtype.type(np.nan) + rout = out + else: + rout = a.dtype.type(np.nan) + elif np.count_nonzero(n.ravel()) > 0: + warnings.warn("Invalid value encountered in median for" + + " %d results" % np.count_nonzero(n.ravel()), + RuntimeWarning, stacklevel=3) + rout[n] = np.nan + return rout + else: + # if there are no nans + # Use mean in odd and even case to coerce data type + # and check, use out array. + return mean(part[indexer], axis=axis, out=out) def percentile(a, q, axis=None, out=None, @@ -2997,73 +4016,79 @@ def percentile(a, q, axis=None, out=None, """ Compute the qth percentile of the data along the specified axis. - Returns the qth percentile of the array elements. + Returns the qth percentile(s) of the array elements. Parameters ---------- a : array_like Input array or object that can be converted to an array. q : float in range of [0,100] (or sequence of floats) - Percentile to compute which must be between 0 and 100 inclusive. - axis : int or sequence of int, optional - Axis along which the percentiles are computed. The default (None) - is to compute the percentiles along a flattened version of the array. - A sequence of axes is supported since version 1.9.0. + Percentile to compute, which must be between 0 and 100 inclusive. + axis : {int, sequence of int, None}, optional + Axis or axes along which the percentiles are computed. The + default is to compute the percentile(s) along a flattened + version of the array. A sequence of axes is supported since + version 1.9.0. out : ndarray, optional Alternative output array in which to place the result. It must have the same shape and buffer length as the expected output, but the type (of the output) will be cast if necessary. overwrite_input : bool, optional - If True, then allow use of memory of input array `a` for + If True, then allow use of memory of input array `a` calculations. The input array will be modified by the call to - percentile. This will save memory when you do not need to preserve - the contents of the input array. In this case you should not make - any assumptions about the content of the passed in array `a` after - this function completes -- treat it as undefined. Default is False. - Note that, if the `a` input is not already an array this parameter - will have no effect, `a` will be converted to an array internally - regardless of the value of this parameter. + `percentile`. This will save memory when you do not need to + preserve the contents of the input array. In this case you + should not make any assumptions about the contents of the input + `a` after this function completes -- treat it as undefined. + Default is False. If `a` is not already an array, this parameter + will have no effect as `a` will be converted to an array + internally regardless of the value of this parameter. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} - This optional parameter specifies the interpolation method to use, - when the desired quantile lies between two data points `i` and `j`: - * linear: `i + (j - i) * fraction`, where `fraction` is the - fractional part of the index surrounded by `i` and `j`. - * lower: `i`. - * higher: `j`. - * nearest: `i` or `j` whichever is nearest. - * midpoint: (`i` + `j`) / 2. + This optional parameter specifies the interpolation method to + use when the desired quantile lies between two data points + ``i < j``: + * linear: ``i + (j - i) * fraction``, where ``fraction`` + is the fractional part of the index surrounded by ``i`` + and ``j``. + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. .. versionadded:: 1.9.0 keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. .. versionadded:: 1.9.0 Returns ------- percentile : scalar or ndarray - If a single percentile `q` is given and axis=None a scalar is - returned. If multiple percentiles `q` are given an array holding - the result is returned. The results are listed in the first axis. - (If `out` is specified, in which case that array is returned - instead). If the input contains integers, or floats of smaller - precision than 64, then the output data-type is float64. Otherwise, - the output data-type is the same as that of the input. + If `q` is a single percentile and `axis=None`, then the result + is a scalar. If multiple percentiles are given, first axis of + the result corresponds to the percentiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. See Also -------- - mean, median + mean, median, nanpercentile Notes ----- - Given a vector V of length N, the q-th percentile of V is the q-th ranked - value in a sorted copy of V. The values and distances of the two - nearest neighbors as well as the `interpolation` parameter will - determine the percentile if the normalized ranking does not match q - exactly. This function is the same as the median if ``q=50``, the same - as the minimum if ``q=0`` and the same as the maximum if ``q=100``. + Given a vector ``V`` of length ``N``, the ``q``-th percentile of + ``V`` is the value ``q/100`` of the way from the mimumum to the + maximum in in a sorted copy of ``V``. The values and distances of + the two nearest neighbors as well as the `interpolation` parameter + will determine the percentile if the normalized ranking does not + match the location of ``q`` exactly. This function is the same as + the median if ``q=50``, the same as the minimum if ``q=0`` and the + same as the maximum if ``q=100``. Examples -------- @@ -3072,28 +4097,26 @@ def percentile(a, q, axis=None, out=None, array([[10, 7, 4], [ 3, 2, 1]]) >>> np.percentile(a, 50) - array([ 3.5]) + 3.5 >>> np.percentile(a, 50, axis=0) array([[ 6.5, 4.5, 2.5]]) >>> np.percentile(a, 50, axis=1) + array([ 7., 2.]) + >>> np.percentile(a, 50, axis=1, keepdims=True) array([[ 7.], [ 2.]]) >>> m = np.percentile(a, 50, axis=0) >>> out = np.zeros_like(m) - >>> np.percentile(a, 50, axis=0, out=m) + >>> np.percentile(a, 50, axis=0, out=out) array([[ 6.5, 4.5, 2.5]]) >>> m array([[ 6.5, 4.5, 2.5]]) >>> b = a.copy() >>> np.percentile(b, 50, axis=1, overwrite_input=True) - array([[ 7.], - [ 2.]]) - >>> assert not np.all(a==b) - >>> b = a.copy() - >>> np.percentile(b, 50, axis=None, overwrite_input=True) - array([ 3.5]) + array([ 7., 2.]) + >>> assert not np.all(a == b) """ q = array(q, dtype=np.float64, copy=True) @@ -3155,7 +4178,7 @@ def _percentile(a, q, axis=None, out=None, elif interpolation == 'higher': indices = ceil(indices).astype(intp) elif interpolation == 'midpoint': - indices = floor(indices) + 0.5 + indices = 0.5 * (floor(indices) + ceil(indices)) elif interpolation == 'nearest': indices = around(indices).astype(intp) elif interpolation == 'linear': @@ -3165,20 +4188,36 @@ def _percentile(a, q, axis=None, out=None, "interpolation can only be 'linear', 'lower' 'higher', " "'midpoint', or 'nearest'") + n = np.array(False, dtype=bool) # check for nan's flag if indices.dtype == intp: # take the points along axis + # Check if the array contains any nan's + if np.issubdtype(a.dtype, np.inexact): + indices = concatenate((indices, [-1])) + ap.partition(indices, axis=axis) # ensure axis with qth is first ap = np.rollaxis(ap, axis, 0) axis = 0 + # Check if the array contains any nan's + if np.issubdtype(a.dtype, np.inexact): + indices = indices[:-1] + n = np.isnan(ap[-1:, ...]) + if zerod: indices = indices[0] r = take(ap, indices, axis=axis, out=out) + + else: # weight the points above and below the indices indices_below = floor(indices).astype(intp) indices_above = indices_below + 1 indices_above[indices_above > Nx - 1] = Nx - 1 + # Check if the array contains any nan's + if np.issubdtype(a.dtype, np.inexact): + indices_above = concatenate((indices_above, [-1])) + weights_above = indices - indices_below weights_below = 1.0 - weights_above @@ -3188,6 +4227,18 @@ def _percentile(a, q, axis=None, out=None, weights_above.shape = weights_shape ap.partition(concatenate((indices_below, indices_above)), axis=axis) + + # ensure axis with qth is first + ap = np.rollaxis(ap, axis, 0) + weights_below = np.rollaxis(weights_below, axis, 0) + weights_above = np.rollaxis(weights_above, axis, 0) + axis = 0 + + # Check if the array contains any nan's + if np.issubdtype(a.dtype, np.inexact): + indices_above = indices_above[:-1] + n = np.isnan(ap[-1:, ...]) + x1 = take(ap, indices_below, axis=axis) * weights_below x2 = take(ap, indices_above, axis=axis) * weights_above @@ -3204,6 +4255,24 @@ def _percentile(a, q, axis=None, out=None, else: r = add(x1, x2) + if np.any(n): + warnings.warn("Invalid value encountered in percentile", + RuntimeWarning, stacklevel=3) + if zerod: + if ap.ndim == 1: + if out is not None: + out[...] = a.dtype.type(np.nan) + r = out + else: + r = a.dtype.type(np.nan) + else: + r[..., n.squeeze(0)] = a.dtype.type(np.nan) + else: + if r.ndim == 1: + r[:] = a.dtype.type(np.nan) + else: + r[..., n.repeat(q.size, 0)] = a.dtype.type(np.nan) + return r @@ -3218,11 +4287,13 @@ def trapz(y, x=None, dx=1.0, axis=-1): y : array_like Input array to integrate. x : array_like, optional - If `x` is None, then spacing between all `y` elements is `dx`. + The sample points corresponding to the `y` values. If `x` is None, + the sample points are assumed to be evenly spaced `dx` apart. The + default is None. dx : scalar, optional - If `x` is None, spacing given by `dx` is assumed. Default is 1. + The spacing between sample points when `x` is None. The default is 1. axis : int, optional - Specify the axis. + The axis along which to integrate. Returns ------- @@ -3297,7 +4368,8 @@ def trapz(y, x=None, dx=1.0, axis=-1): #always succeed def add_newdoc(place, obj, doc): - """Adds documentation to obj which is in module place. + """ + Adds documentation to obj which is in module place. If doc is a string add it to obj as a docstring @@ -3315,7 +4387,7 @@ def add_newdoc(place, obj, doc): in new-style classes or built-in functions. Because this routine never raises an error the caller must check manually that the docstrings were changed. - """ + """ try: new = getattr(__import__(place, globals(), {}, [obj]), obj) if isinstance(doc, str): @@ -3535,19 +4607,21 @@ def delete(arr, obj, axis=None): arr = asarray(arr) ndim = arr.ndim + arrorder = 'F' if arr.flags.fnc else 'C' if axis is None: if ndim != 1: arr = arr.ravel() ndim = arr.ndim axis = ndim - 1 if ndim == 0: + # 2013-09-24, 1.9 warnings.warn( "in the future the special handling of scalars will be removed " - "from delete and raise an error", DeprecationWarning) + "from delete and raise an error", DeprecationWarning, stacklevel=2) if wrap: return wrap(arr) else: - return arr.copy() + return arr.copy(order=arrorder) slobj = [slice(None)]*ndim N = arr.shape[axis] @@ -3560,9 +4634,9 @@ def delete(arr, obj, axis=None): if numtodel <= 0: if wrap: - return wrap(arr.copy()) + return wrap(arr.copy(order=arrorder)) else: - return arr.copy() + return arr.copy(order=arrorder) # Invert if step is negative: if step < 0: @@ -3571,7 +4645,7 @@ def delete(arr, obj, axis=None): stop = xr[0] + 1 newshape[axis] -= numtodel - new = empty(newshape, arr.dtype, arr.flags.fnc) + new = empty(newshape, arr.dtype, arrorder) # copy initial chunk if start == 0: pass @@ -3610,7 +4684,7 @@ def delete(arr, obj, axis=None): if obj.dtype == bool: warnings.warn( "in the future insert will treat boolean arrays and array-likes " - "as boolean index instead of casting it to integer", FutureWarning) + "as boolean index instead of casting it to integer", FutureWarning, stacklevel=2) obj = obj.astype(intp) if isinstance(_obj, (int, long, integer)): # optimization for a single value @@ -3622,7 +4696,7 @@ def delete(arr, obj, axis=None): if (obj < 0): obj += N newshape[axis] -= 1 - new = empty(newshape, arr.dtype, arr.flags.fnc) + new = empty(newshape, arr.dtype, arrorder) slobj[axis] = slice(None, obj) new[slobj] = arr[slobj] slobj[axis] = slice(obj, None) @@ -3635,25 +4709,27 @@ def delete(arr, obj, axis=None): if not np.can_cast(obj, intp, 'same_kind'): # obj.size = 1 special case always failed and would just # give superfluous warnings. + # 2013-09-24, 1.9 warnings.warn( "using a non-integer array as obj in delete will result in an " - "error in the future", DeprecationWarning) + "error in the future", DeprecationWarning, stacklevel=2) obj = obj.astype(intp) keep = ones(N, dtype=bool) # Test if there are out of bound indices, this is deprecated inside_bounds = (obj < N) & (obj >= -N) if not inside_bounds.all(): + # 2013-09-24, 1.9 warnings.warn( "in the future out of bounds indices will raise an error " "instead of being ignored by `numpy.delete`.", - DeprecationWarning) + DeprecationWarning, stacklevel=2) obj = obj[inside_bounds] positive_indices = obj >= 0 if not positive_indices.all(): warnings.warn( "in the future negative indices will not be ignored by " - "`numpy.delete`.", FutureWarning) + "`numpy.delete`.", FutureWarning, stacklevel=2) obj = obj[positive_indices] keep[obj, ] = False @@ -3702,7 +4778,7 @@ def insert(arr, obj, values, axis=None): See Also -------- append : Append elements at the end of an array. - concatenate : Join a sequence of arrays together. + concatenate : Join a sequence of arrays along an existing axis. delete : Delete elements from an array. Notes @@ -3726,6 +4802,7 @@ def insert(arr, obj, values, axis=None): [3, 5, 3]]) Difference between sequence and scalars: + >>> np.insert(a, [1], [[1],[2],[3]], axis=1) array([[1, 1, 1], [2, 2, 2], @@ -3762,6 +4839,7 @@ def insert(arr, obj, values, axis=None): arr = asarray(arr) ndim = arr.ndim + arrorder = 'F' if arr.flags.fnc else 'C' if axis is None: if ndim != 1: arr = arr.ravel() @@ -3775,10 +4853,11 @@ def insert(arr, obj, values, axis=None): if (axis < 0): axis += ndim if (ndim == 0): + # 2013-09-24, 1.9 warnings.warn( "in the future the special handling of scalars will be removed " - "from insert and raise an error", DeprecationWarning) - arr = arr.copy() + "from insert and raise an error", DeprecationWarning, stacklevel=2) + arr = arr.copy(order=arrorder) arr[...] = values if wrap: return wrap(arr) @@ -3799,7 +4878,7 @@ def insert(arr, obj, values, axis=None): warnings.warn( "in the future insert will treat boolean arrays and " "array-likes as a boolean index instead of casting it to " - "integer", FutureWarning) + "integer", FutureWarning, stacklevel=2) indices = indices.astype(intp) # Code after warning period: #if obj.ndim != 1: @@ -3829,7 +4908,7 @@ def insert(arr, obj, values, axis=None): values = np.rollaxis(values, 0, (axis % values.ndim) + 1) numnew = values.shape[axis] newshape[axis] += numnew - new = empty(newshape, arr.dtype, arr.flags.fnc) + new = empty(newshape, arr.dtype, arrorder) slobj[axis] = slice(None, index) new[slobj] = arr[slobj] slobj[axis] = slice(index, index+numnew) @@ -3846,9 +4925,10 @@ def insert(arr, obj, values, axis=None): indices = indices.astype(intp) if not np.can_cast(indices, intp, 'same_kind'): + # 2013-09-24, 1.9 warnings.warn( "using a non-integer array as obj in insert will result in an " - "error in the future", DeprecationWarning) + "error in the future", DeprecationWarning, stacklevel=2) indices = indices.astype(intp) indices[indices < 0] += N @@ -3861,7 +4941,7 @@ def insert(arr, obj, values, axis=None): old_mask = ones(newshape[axis], dtype=bool) old_mask[indices] = False - new = empty(newshape, arr.dtype, arr.flags.fnc) + new = empty(newshape, arr.dtype, arrorder) slobj2 = [slice(None)]*ndim slobj[axis] = indices slobj2[axis] = old_mask diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py index f83024961..a0875a25f 100644 --- a/numpy/lib/index_tricks.py +++ b/numpy/lib/index_tricks.py @@ -7,12 +7,12 @@ import numpy.core.numeric as _nx from numpy.core.numeric import ( asarray, ScalarType, array, alltrue, cumprod, arange ) -from numpy.core.numerictypes import find_common_type +from numpy.core.numerictypes import find_common_type, issubdtype from . import function_base import numpy.matrixlib as matrix from .function_base import diff -from numpy.lib._compiled_base import ravel_multi_index, unravel_index +from numpy.core.multiarray import ravel_multi_index, unravel_index from numpy.lib.stride_tricks import as_strided makemat = matrix.matrix @@ -71,17 +71,17 @@ def ix_(*args): """ out = [] nd = len(args) - baseshape = [1]*nd - for k in range(nd): - new = _nx.asarray(args[k]) - if (new.ndim != 1): + for k, new in enumerate(args): + new = asarray(new) + if new.ndim != 1: raise ValueError("Cross index must be 1 dimensional") - if issubclass(new.dtype.type, _nx.bool_): - new = new.nonzero()[0] - baseshape[k] = len(new) - new = new.reshape(tuple(baseshape)) + if new.size == 0: + # Explicitly type empty arrays to avoid float default + new = new.astype(_nx.intp) + if issubdtype(new.dtype, _nx.bool_): + new, = new.nonzero() + new = new.reshape((1,)*k + (new.size,) + (1,)*(nd-k-1)) out.append(new) - baseshape[k] = 1 return tuple(out) class nd_grid(object): @@ -404,7 +404,7 @@ class RClass(AxisConcatenator): See Also -------- - concatenate : Join a sequence of arrays together. + concatenate : Join a sequence of arrays along an existing axis. c_ : Translates slice objects to concatenation along the second axis. Examples @@ -480,7 +480,7 @@ class ndenumerate(object): Parameters ---------- - a : ndarray + arr : ndarray Input array. See Also @@ -491,7 +491,7 @@ class ndenumerate(object): -------- >>> a = np.array([[1, 2], [3, 4]]) >>> for index, x in np.ndenumerate(a): - ... print index, x + ... print(index, x) (0, 0) 1 (0, 1) 2 (1, 0) 3 @@ -542,7 +542,7 @@ class ndindex(object): Examples -------- >>> for index in np.ndindex(3, 2, 1): - ... print index + ... print(index) (0, 0, 0) (0, 1, 0) (1, 0, 0) @@ -681,7 +681,7 @@ def fill_diagonal(a, val, wrap=False): wrap : bool For tall matrices in NumPy version up to 1.6.2, the diagonal "wrapped" after N columns. You can have this behavior - with this option. This affect only tall matrices. + with this option. This affects only tall matrices. See also -------- @@ -724,7 +724,9 @@ def fill_diagonal(a, val, wrap=False): [0, 0, 0], [0, 0, 4]]) - # tall matrices no wrap + The wrap option affects only tall matrices: + + >>> # tall matrices no wrap >>> a = np.zeros((5, 3),int) >>> fill_diagonal(a, 4) >>> a @@ -734,7 +736,7 @@ def fill_diagonal(a, val, wrap=False): [0, 0, 0], [0, 0, 0]]) - # tall matrices wrap + >>> # tall matrices wrap >>> a = np.zeros((5, 3),int) >>> fill_diagonal(a, 4, wrap=True) >>> a @@ -744,7 +746,7 @@ def fill_diagonal(a, val, wrap=False): [0, 0, 0], [4, 0, 0]]) - # wide matrices + >>> # wide matrices >>> a = np.zeros((3, 5),int) >>> fill_diagonal(a, 4, wrap=True) >>> a diff --git a/numpy/lib/info.py b/numpy/lib/info.py index 3fbbab769..141df2ace 100644 --- a/numpy/lib/info.py +++ b/numpy/lib/info.py @@ -67,13 +67,14 @@ Shape Manipulation ------------------ ================ =================== squeeze Return a with length-one dimensions removed. -atleast_1d Force arrays to be > 1D -atleast_2d Force arrays to be > 2D -atleast_3d Force arrays to be > 3D +atleast_1d Force arrays to be >= 1D +atleast_2d Force arrays to be >= 2D +atleast_3d Force arrays to be >= 3D vstack Stack arrays vertically (row on row) hstack Stack arrays horizontally (column on column) column_stack Stack 1D arrays as columns into 2D array dstack Stack arrays depthwise (along third dimension) +stack Stack arrays along a new axis split Divide array into a list of sub-arrays hsplit Split into columns vsplit Split into rows @@ -108,6 +109,12 @@ polydiv Divide polynomials polyval Evaluate polynomial at given argument ================ =================== +Iterators +--------- +================ =================== +Arrayterator A buffered iterator for big arrays. +================ =================== + Import Tricks ------------- ================ =================== diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 7260a35b8..c024055ba 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -9,9 +9,14 @@ Functions - `nanargmin` -- index of minimum non-NaN value - `nanargmax` -- index of maximum non-NaN value - `nansum` -- sum of non-NaN values +- `nanprod` -- product of non-NaN values +- `nancumsum` -- cumulative sum of non-NaN values +- `nancumprod` -- cumulative product of non-NaN values - `nanmean` -- mean of non-NaN values - `nanvar` -- variance of non-NaN values - `nanstd` -- standard deviation of non-NaN values +- `nanmedian` -- median of non-NaN values +- `nanpercentile` -- qth percentile of non-NaN values """ from __future__ import division, absolute_import, print_function @@ -20,9 +25,11 @@ import warnings import numpy as np from numpy.lib.function_base import _ureduce as _ureduce + __all__ = [ 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', - 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd' + 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod', + 'nancumsum', 'nancumprod' ] @@ -123,7 +130,7 @@ def _divide_by_count(a, b, out=None): in place. If `a` is a numpy scalar, the division preserves its type. """ - with np.errstate(invalid='ignore'): + with np.errstate(invalid='ignore', divide='ignore'): if isinstance(a, np.ndarray): if out is None: return np.divide(a, b, out=a, casting='unsafe') @@ -138,7 +145,7 @@ def _divide_by_count(a, b, out=None): return np.divide(a, b, out=out, casting='unsafe') -def nanmin(a, axis=None, out=None, keepdims=False): +def nanmin(a, axis=None, out=None, keepdims=np._NoValue): """ Return minimum of an array or minimum along an axis, ignoring any NaNs. When all-NaN slices are encountered a ``RuntimeWarning`` is raised and @@ -160,9 +167,14 @@ def nanmin(a, axis=None, out=None, keepdims=False): .. versionadded:: 1.8.0 keepdims : bool, optional - If this is set to True, the axes which are reduced are left in the - result as dimensions with size one. With this option, the result - will broadcast correctly against the original `a`. + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `a`. + + If the value is anything but the default, then + `keepdims` will be passed through to the `min` method + of sub-classes of `ndarray`. If the sub-classes methods + does not implement `keepdims` any exceptions will be raised. .. versionadded:: 1.8.0 @@ -192,7 +204,7 @@ def nanmin(a, axis=None, out=None, keepdims=False): Notes ----- - Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic (IEEE 754). This means that Not a Number is not equivalent to infinity. Positive infinity is treated as a very large number and negative infinity is treated as a very small (i.e. negative) number. @@ -217,27 +229,30 @@ def nanmin(a, axis=None, out=None, keepdims=False): -inf """ + kwargs = {} + if keepdims is not np._NoValue: + kwargs['keepdims'] = keepdims if not isinstance(a, np.ndarray) or type(a) is np.ndarray: # Fast, but not safe for subclasses of ndarray - res = np.fmin.reduce(a, axis=axis, out=out, keepdims=keepdims) + res = np.fmin.reduce(a, axis=axis, out=out, **kwargs) if np.isnan(res).any(): - warnings.warn("All-NaN axis encountered", RuntimeWarning) + warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2) else: # Slow, but safe for subclasses of ndarray a, mask = _replace_nan(a, +np.inf) - res = np.amin(a, axis=axis, out=out, keepdims=keepdims) + res = np.amin(a, axis=axis, out=out, **kwargs) if mask is None: return res # Check for all-NaN axis - mask = np.all(mask, axis=axis, keepdims=keepdims) + mask = np.all(mask, axis=axis, **kwargs) if np.any(mask): res = _copyto(res, np.nan, mask) - warnings.warn("All-NaN axis encountered", RuntimeWarning) + warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2) return res -def nanmax(a, axis=None, out=None, keepdims=False): +def nanmax(a, axis=None, out=None, keepdims=np._NoValue): """ Return the maximum of an array or maximum along an axis, ignoring any NaNs. When all-NaN slices are encountered a ``RuntimeWarning`` is @@ -259,9 +274,14 @@ def nanmax(a, axis=None, out=None, keepdims=False): .. versionadded:: 1.8.0 keepdims : bool, optional - If this is set to True, the axes which are reduced are left in the - result as dimensions with size one. With this option, the result - will broadcast correctly against the original `a`. + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `a`. + + If the value is anything but the default, then + `keepdims` will be passed through to the `max` method + of sub-classes of `ndarray`. If the sub-classes methods + does not implement `keepdims` any exceptions will be raised. .. versionadded:: 1.8.0 @@ -291,7 +311,7 @@ def nanmax(a, axis=None, out=None, keepdims=False): Notes ----- - Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic (IEEE 754). This means that Not a Number is not equivalent to infinity. Positive infinity is treated as a very large number and negative infinity is treated as a very small (i.e. negative) number. @@ -316,23 +336,26 @@ def nanmax(a, axis=None, out=None, keepdims=False): inf """ + kwargs = {} + if keepdims is not np._NoValue: + kwargs['keepdims'] = keepdims if not isinstance(a, np.ndarray) or type(a) is np.ndarray: # Fast, but not safe for subclasses of ndarray - res = np.fmax.reduce(a, axis=axis, out=out, keepdims=keepdims) + res = np.fmax.reduce(a, axis=axis, out=out, **kwargs) if np.isnan(res).any(): - warnings.warn("All-NaN slice encountered", RuntimeWarning) + warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2) else: # Slow, but safe for subclasses of ndarray a, mask = _replace_nan(a, -np.inf) - res = np.amax(a, axis=axis, out=out, keepdims=keepdims) + res = np.amax(a, axis=axis, out=out, **kwargs) if mask is None: return res # Check for all-NaN axis - mask = np.all(mask, axis=axis, keepdims=keepdims) + mask = np.all(mask, axis=axis, **kwargs) if np.any(mask): res = _copyto(res, np.nan, mask) - warnings.warn("All-NaN axis encountered", RuntimeWarning) + warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2) return res @@ -425,12 +448,12 @@ def nanargmax(a, axis=None): return res -def nansum(a, axis=None, dtype=None, out=None, keepdims=0): +def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): """ Return the sum of array elements over a given axis treating Not a Numbers (NaNs) as zero. - In Numpy versions <= 1.8 Nan is returned for slices that are all-NaN or + In NumPy versions <= 1.8.0 Nan is returned for slices that are all-NaN or empty. In later versions zero is returned. Parameters @@ -459,15 +482,25 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=0): .. versionadded:: 1.8.0 keepdims : bool, optional - If True, the axes which are reduced are left in the result as - dimensions with size one. With this option, the result will - broadcast correctly against the original `arr`. + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `a`. + + + If the value is anything but the default, then + `keepdims` will be passed through to the `mean` or `sum` methods + of sub-classes of `ndarray`. If the sub-classes methods + does not implement `keepdims` any exceptions will be raised. .. versionadded:: 1.8.0 Returns ------- - y : ndarray or numpy scalar + nansum : ndarray. + A new array holding the result is returned unless `out` is + specified, in which it is returned. The result has the same + size as `a`, and the same shape as `a` if `axis` is not None + or `a` is a 1-d array. See Also -------- @@ -480,11 +513,6 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=0): If both positive and negative infinity are present, the sum will be Not A Number (NaN). - Numpy integer arithmetic is modular. If the size of a sum exceeds the - size of an integer accumulator, its value will wrap around and the - result will be incorrect. Specifying ``dtype=double`` can alleviate - that problem. - Examples -------- >>> np.nansum(1) @@ -510,7 +538,199 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=0): return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) -def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): +def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): + """ + Return the product of array elements over a given axis treating Not a + Numbers (NaNs) as ones. + + One is returned for slices that are all-NaN or empty. + + .. versionadded:: 1.10.0 + + Parameters + ---------- + a : array_like + Array containing numbers whose sum is desired. If `a` is not an + array, a conversion is attempted. + axis : int, optional + Axis along which the product is computed. The default is to compute + the product of the flattened array. + dtype : data-type, optional + The type of the returned array and of the accumulator in which the + elements are summed. By default, the dtype of `a` is used. An + exception is when `a` has an integer type with less precision than + the platform (u)intp. In that case, the default will be either + (u)int32 or (u)int64 depending on whether the platform is 32 or 64 + bits. For inexact inputs, dtype must be inexact. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``. If provided, it must have the same shape as the + expected output, but the type will be cast if necessary. See + `doc.ufuncs` for details. The casting of NaN to integer can yield + unexpected results. + keepdims : bool, optional + If True, the axes which are reduced are left in the result as + dimensions with size one. With this option, the result will + broadcast correctly against the original `arr`. + + Returns + ------- + nanprod : ndarray + A new array holding the result is returned unless `out` is + specified, in which case it is returned. + + See Also + -------- + numpy.prod : Product across array propagating NaNs. + isnan : Show which elements are NaN. + + Examples + -------- + >>> np.nanprod(1) + 1 + >>> np.nanprod([1]) + 1 + >>> np.nanprod([1, np.nan]) + 1.0 + >>> a = np.array([[1, 2], [3, np.nan]]) + >>> np.nanprod(a) + 6.0 + >>> np.nanprod(a, axis=0) + array([ 3., 2.]) + + """ + a, mask = _replace_nan(a, 1) + return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + + +def nancumsum(a, axis=None, dtype=None, out=None): + """ + Return the cumulative sum of array elements over a given axis treating Not a + Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are + encountered and leading NaNs are replaced by zeros. + + Zeros are returned for slices that are all-NaN or empty. + + .. versionadded:: 1.12.0 + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + Axis along which the cumulative sum is computed. The default + (None) is to compute the cumsum over the flattened array. + dtype : dtype, optional + Type of the returned array and of the accumulator in which the + elements are summed. If `dtype` is not specified, it defaults + to the dtype of `a`, unless `a` has an integer dtype with a + precision less than that of the default platform integer. In + that case, the default platform integer is used. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output + but the type will be cast if necessary. See `doc.ufuncs` + (Section "Output arguments") for more details. + + Returns + ------- + nancumsum : ndarray. + A new array holding the result is returned unless `out` is + specified, in which it is returned. The result has the same + size as `a`, and the same shape as `a` if `axis` is not None + or `a` is a 1-d array. + + See Also + -------- + numpy.cumsum : Cumulative sum across array propagating NaNs. + isnan : Show which elements are NaN. + + Examples + -------- + >>> np.nancumsum(1) + array([1]) + >>> np.nancumsum([1]) + array([1]) + >>> np.nancumsum([1, np.nan]) + array([ 1., 1.]) + >>> a = np.array([[1, 2], [3, np.nan]]) + >>> np.nancumsum(a) + array([ 1., 3., 6., 6.]) + >>> np.nancumsum(a, axis=0) + array([[ 1., 2.], + [ 4., 2.]]) + >>> np.nancumsum(a, axis=1) + array([[ 1., 3.], + [ 3., 3.]]) + + """ + a, mask = _replace_nan(a, 0) + return np.cumsum(a, axis=axis, dtype=dtype, out=out) + + +def nancumprod(a, axis=None, dtype=None, out=None): + """ + Return the cumulative product of array elements over a given axis treating Not a + Numbers (NaNs) as one. The cumulative product does not change when NaNs are + encountered and leading NaNs are replaced by ones. + + Ones are returned for slices that are all-NaN or empty. + + .. versionadded:: 1.12.0 + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + Axis along which the cumulative product is computed. By default + the input is flattened. + dtype : dtype, optional + Type of the returned array, as well as of the accumulator in which + the elements are multiplied. If *dtype* is not specified, it + defaults to the dtype of `a`, unless `a` has an integer dtype with + a precision less than that of the default platform integer. In + that case, the default platform integer is used instead. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output + but the type of the resulting values will be cast if necessary. + + Returns + ------- + nancumprod : ndarray + A new array holding the result is returned unless `out` is + specified, in which case it is returned. + + See Also + -------- + numpy.cumprod : Cumulative product across array propagating NaNs. + isnan : Show which elements are NaN. + + Examples + -------- + >>> np.nancumprod(1) + array([1]) + >>> np.nancumprod([1]) + array([1]) + >>> np.nancumprod([1, np.nan]) + array([ 1., 1.]) + >>> a = np.array([[1, 2], [3, np.nan]]) + >>> np.nancumprod(a) + array([ 1., 2., 6., 6.]) + >>> np.nancumprod(a, axis=0) + array([[ 1., 2.], + [ 3., 2.]]) + >>> np.nancumprod(a, axis=1) + array([[ 1., 2.], + [ 3., 3.]]) + + """ + a, mask = _replace_nan(a, 1) + return np.cumprod(a, axis=axis, dtype=dtype, out=out) + + +def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): """ Compute the arithmetic mean along the specified axis, ignoring NaNs. @@ -540,9 +760,14 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): expected output, but the type will be cast if necessary. See `doc.ufuncs` for details. keepdims : bool, optional - If this is set to True, the axes which are reduced are left in the - result as dimensions with size one. With this option, the result - will broadcast correctly against the original `arr`. + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `a`. + + If the value is anything but the default, then + `keepdims` will be passed through to the `mean` or `sum` methods + of sub-classes of `ndarray`. If the sub-classes methods + does not implement `keepdims` any exceptions will be raised. Returns ------- @@ -590,16 +815,13 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): if out is not None and not issubclass(out.dtype.type, np.inexact): raise TypeError("If a is inexact, then out must be inexact") - # The warning context speeds things up. - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims) - tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) - avg = _divide_by_count(tot, cnt, out=out) + cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims) + tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + avg = _divide_by_count(tot, cnt, out=out) isbad = (cnt == 0) if isbad.any(): - warnings.warn("Mean of empty slice", RuntimeWarning) + warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2) # NaN is the only possible bad value, so no further # action is needed to handle bad results. return avg @@ -613,7 +835,7 @@ def _nanmedian1d(arr1d, overwrite_input=False): c = np.isnan(arr1d) s = np.where(c)[0] if s.size == arr1d.size: - warnings.warn("All-NaN slice encountered", RuntimeWarning) + warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=3) return np.nan elif s.size == 0: return np.median(arr1d, overwrite_input=overwrite_input) @@ -654,22 +876,25 @@ def _nanmedian(a, axis=None, out=None, overwrite_input=False): out[...] = result return result + def _nanmedian_small(a, axis=None, out=None, overwrite_input=False): """ - sort + indexing median, faster for small medians along multiple dimensions - due to the high overhead of apply_along_axis + sort + indexing median, faster for small medians along multiple + dimensions due to the high overhead of apply_along_axis + see nanmedian for parameter usage """ a = np.ma.masked_array(a, np.isnan(a)) m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input) for i in range(np.count_nonzero(m.mask.ravel())): - warnings.warn("All-NaN slice encountered", RuntimeWarning) + warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=3) if out is not None: out[...] = m.filled(np.nan) return out return m.filled(np.nan) -def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False): + +def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue): """ Compute the median along the specified axis, while ignoring NaNs. @@ -681,36 +906,41 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False): ---------- a : array_like Input array or object that can be converted to an array. - axis : int, optional - Axis along which the medians are computed. The default (axis=None) + axis : {int, sequence of int, None}, optional + Axis or axes along which the medians are computed. The default is to compute the median along a flattened version of the array. A sequence of axes is supported since version 1.9.0. out : ndarray, optional - Alternative output array in which to place the result. It must have - the same shape and buffer length as the expected output, but the - type (of the output) will be cast if necessary. + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. overwrite_input : bool, optional - If True, then allow use of memory of input array (a) for + If True, then allow use of memory of input array `a` for calculations. The input array will be modified by the call to - median. This will save memory when you do not need to preserve + `median`. This will save memory when you do not need to preserve the contents of the input array. Treat the input as undefined, but it will probably be fully or partially sorted. Default is - False. Note that, if `overwrite_input` is True and the input - is not already an ndarray, an error will be raised. + False. If `overwrite_input` is ``True`` and `a` is not already an + `ndarray`, an error will be raised. keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. - + the result will broadcast correctly against the original `a`. + If this is anything but the default value it will be passed + through (in the special case of an empty array) to the + `mean` function of the underlying array. If the array is + a sub-class and `mean` does not have the kwarg `keepdims` this + will raise a RuntimeError. Returns ------- median : ndarray - A new array holding the result. If the input contains integers, or - floats of smaller precision than 64, then the output data-type is - float64. Otherwise, the output data-type is the same as that of the - input. + A new array holding the result. If the input contains integers + or floats smaller than ``float64``, then the output data-type is + ``np.float64``. Otherwise, the data-type of the output is the + same as that of the input. If `out` is specified, that array is + returned instead. See Also -------- @@ -718,10 +948,10 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False): Notes ----- - Given a vector V of length N, the median of V is the middle value of - a sorted copy of V, ``V_sorted`` - i.e., ``V_sorted[(N-1)/2]``, when N is - odd. When N is even, it is the average of the two middle values of - ``V_sorted``. + Given a vector ``V`` of length ``N``, the median of ``V`` is the + middle value of a sorted copy of ``V``, ``V_sorted`` - i.e., + ``V_sorted[(N-1)/2]``, when ``N`` is odd and the average of the two + middle values of ``V_sorted`` when ``N`` is even. Examples -------- @@ -756,30 +986,34 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False): r, k = _ureduce(a, func=_nanmedian, axis=axis, out=out, overwrite_input=overwrite_input) - if keepdims: + if keepdims and keepdims is not np._NoValue: return r.reshape(k) else: return r def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, - interpolation='linear', keepdims=False): + interpolation='linear', keepdims=np._NoValue): """ - Compute the qth percentile of the data along the specified axis, while - ignoring nan values. + Compute the qth percentile of the data along the specified axis, + while ignoring nan values. - Returns the qth percentile of the array elements. + Returns the qth percentile(s) of the array elements. + + .. versionadded:: 1.9.0 Parameters ---------- a : array_like Input array or object that can be converted to an array. q : float in range of [0,100] (or sequence of floats) - Percentile to compute which must be between 0 and 100 inclusive. - axis : int or sequence of int, optional - Axis along which the percentiles are computed. The default (None) - is to compute the percentiles along a flattened version of the array. - A sequence of axes is supported since version 1.9.0. + Percentile to compute, which must be between 0 and 100 + inclusive. + axis : {int, sequence of int, None}, optional + Axis or axes along which the percentiles are computed. The + default is to compute the percentile(s) along a flattened + version of the array. A sequence of axes is supported since + version 1.9.0. out : ndarray, optional Alternative output array in which to place the result. It must have the same shape and buffer length as the expected output, @@ -787,39 +1021,46 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, overwrite_input : bool, optional If True, then allow use of memory of input array `a` for calculations. The input array will be modified by the call to - percentile. This will save memory when you do not need to preserve - the contents of the input array. In this case you should not make - any assumptions about the content of the passed in array `a` after - this function completes -- treat it as undefined. Default is False. - Note that, if the `a` input is not already an array this parameter - will have no effect, `a` will be converted to an array internally - regardless of the value of this parameter. + `percentile`. This will save memory when you do not need to + preserve the contents of the input array. In this case you + should not make any assumptions about the contents of the input + `a` after this function completes -- treat it as undefined. + Default is False. If `a` is not already an array, this parameter + will have no effect as `a` will be converted to an array + internally regardless of the value of this parameter. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} - This optional parameter specifies the interpolation method to use, - when the desired quantile lies between two data points `i` and `j`: - * linear: `i + (j - i) * fraction`, where `fraction` is the - fractional part of the index surrounded by `i` and `j`. - * lower: `i`. - * higher: `j`. - * nearest: `i` or `j` whichever is nearest. - * midpoint: (`i` + `j`) / 2. - + This optional parameter specifies the interpolation method to + use when the desired quantile lies between two data points + ``i < j``: + * linear: ``i + (j - i) * fraction``, where ``fraction`` is + the fractional part of the index surrounded by ``i`` and + ``j``. + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + If this is anything but the default value it will be passed + through (in the special case of an empty array) to the + `mean` function of the underlying array. If the array is + a sub-class and `mean` does not have the kwarg `keepdims` this + will raise a RuntimeError. Returns ------- - nanpercentile : scalar or ndarray - If a single percentile `q` is given and axis=None a scalar is - returned. If multiple percentiles `q` are given an array holding - the result is returned. The results are listed in the first axis. - (If `out` is specified, in which case that array is returned - instead). If the input contains integers, or floats of smaller - precision than 64, then the output data-type is float64. Otherwise, - the output data-type is the same as that of the input. + percentile : scalar or ndarray + If `q` is a single percentile and `axis=None`, then the result + is a scalar. If multiple percentiles are given, first axis of + the result corresponds to the percentiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. See Also -------- @@ -827,12 +1068,14 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, Notes ----- - Given a vector V of length N, the q-th percentile of V is the q-th ranked - value in a sorted copy of V. The values and distances of the two - nearest neighbors as well as the `interpolation` parameter will - determine the percentile if the normalized ranking does not match q - exactly. This function is the same as the median if ``q=50``, the same - as the minimum if ``q=0``and the same as the maximum if ``q=100``. + Given a vector ``V`` of length ``N``, the ``q``-th percentile of + ``V`` is the value ``q/100`` of the way from the mimumum to the + maximum in in a sorted copy of ``V``. The values and distances of + the two nearest neighbors as well as the `interpolation` parameter + will determine the percentile if the normalized ranking does not + match the location of ``q`` exactly. This function is the same as + the median if ``q=50``, the same as the minimum if ``q=0`` and the + same as the maximum if ``q=100``. Examples -------- @@ -846,24 +1089,21 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, >>> np.nanpercentile(a, 50) 3.5 >>> np.nanpercentile(a, 50, axis=0) - array([[ 6.5, 4.5, 2.5]]) - >>> np.nanpercentile(a, 50, axis=1) + array([ 6.5, 2., 2.5]) + >>> np.nanpercentile(a, 50, axis=1, keepdims=True) array([[ 7.], [ 2.]]) >>> m = np.nanpercentile(a, 50, axis=0) >>> out = np.zeros_like(m) - >>> np.nanpercentile(a, 50, axis=0, out=m) - array([[ 6.5, 4.5, 2.5]]) + >>> np.nanpercentile(a, 50, axis=0, out=out) + array([ 6.5, 2., 2.5]) >>> m - array([[ 6.5, 4.5, 2.5]]) + array([ 6.5, 2. , 2.5]) + >>> b = a.copy() >>> np.nanpercentile(b, 50, axis=1, overwrite_input=True) - array([[ 7.], - [ 2.]]) + array([ 7., 2.]) >>> assert not np.all(a==b) - >>> b = a.copy() - >>> np.nanpercentile(b, 50, axis=None, overwrite_input=True) - array([ 3.5]) """ @@ -877,7 +1117,7 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, r, k = _ureduce(a, func=_nanpercentile, q=q, axis=axis, out=out, overwrite_input=overwrite_input, interpolation=interpolation) - if keepdims: + if keepdims and keepdims is not np._NoValue: if q.ndim == 0: return r.reshape(k) else: @@ -887,7 +1127,7 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, def _nanpercentile(a, q, axis=None, out=None, overwrite_input=False, - interpolation='linear', keepdims=False): + interpolation='linear'): """ Private function that doesn't support extended axis or keepdims. These methods are extended to this function using _ureduce @@ -900,6 +1140,11 @@ def _nanpercentile(a, q, axis=None, out=None, overwrite_input=False, else: result = np.apply_along_axis(_nanpercentile1d, axis, a, q, overwrite_input, interpolation) + # apply_along_axis fills in collapsed axis with results. + # Move that axis to the beginning to match percentile's + # convention. + if q.ndim != 0: + result = np.rollaxis(result, axis) if out is not None: out[...] = result @@ -908,15 +1153,19 @@ def _nanpercentile(a, q, axis=None, out=None, overwrite_input=False, def _nanpercentile1d(arr1d, q, overwrite_input=False, interpolation='linear'): """ - Private function for rank 1 arrays. Compute percentile ignoring NaNs. - See nanpercentile for parameter usage + Private function for rank 1 arrays. Compute percentile ignoring + NaNs. + See nanpercentile for parameter usage """ c = np.isnan(arr1d) s = np.where(c)[0] if s.size == arr1d.size: - warnings.warn("All-NaN slice encountered", RuntimeWarning) - return np.nan + warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=3) + if q.ndim == 0: + return np.nan + else: + return np.nan * np.ones((len(q),)) elif s.size == 0: return np.percentile(arr1d, q, overwrite_input=overwrite_input, interpolation=interpolation) @@ -934,7 +1183,7 @@ def _nanpercentile1d(arr1d, q, overwrite_input=False, interpolation='linear'): interpolation=interpolation) -def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): +def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): """ Compute the variance along the specified axis, while ignoring NaNs. @@ -970,7 +1219,8 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. + the result will broadcast correctly against the original `a`. + Returns ------- @@ -1009,6 +1259,9 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): below). Specifying a higher-accuracy accumulator using the ``dtype`` keyword can alleviate this issue. + For this function to work on sub-classes of ndarray, they must define + `sum` with the kwarg `keepdims` + Examples -------- >>> a = np.array([[1, np.nan], [3, 4]]) @@ -1032,40 +1285,46 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): if out is not None and not issubclass(out.dtype.type, np.inexact): raise TypeError("If a is inexact, then out must be inexact") - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - - # Compute mean - cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=True) - avg = np.sum(arr, axis=axis, dtype=dtype, keepdims=True) - avg = _divide_by_count(avg, cnt) - - # Compute squared deviation from mean. - np.subtract(arr, avg, out=arr, casting='unsafe') - arr = _copyto(arr, 0, mask) - if issubclass(arr.dtype.type, np.complexfloating): - sqr = np.multiply(arr, arr.conj(), out=arr).real - else: - sqr = np.multiply(arr, arr, out=arr) + # Compute mean + if type(arr) is np.matrix: + _keepdims = np._NoValue + else: + _keepdims = True + # we need to special case matrix for reverse compatibility + # in order for this to work, these sums need to be called with + # keepdims=True, however matrix now raises an error in this case, but + # the reason that it drops the keepdims kwarg is to force keepdims=True + # so this used to work by serendipity. + cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims) + avg = np.sum(arr, axis=axis, dtype=dtype, keepdims=_keepdims) + avg = _divide_by_count(avg, cnt) + + # Compute squared deviation from mean. + np.subtract(arr, avg, out=arr, casting='unsafe') + arr = _copyto(arr, 0, mask) + if issubclass(arr.dtype.type, np.complexfloating): + sqr = np.multiply(arr, arr.conj(), out=arr).real + else: + sqr = np.multiply(arr, arr, out=arr) - # Compute variance. - var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) - if var.ndim < cnt.ndim: - # Subclasses of ndarray may ignore keepdims, so check here. - cnt = cnt.squeeze(axis) - dof = cnt - ddof - var = _divide_by_count(var, dof) + # Compute variance. + var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + if var.ndim < cnt.ndim: + # Subclasses of ndarray may ignore keepdims, so check here. + cnt = cnt.squeeze(axis) + dof = cnt - ddof + var = _divide_by_count(var, dof) isbad = (dof <= 0) if np.any(isbad): - warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning) + warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning, stacklevel=2) # NaN, inf, or negative numbers are all possible bad # values, so explicitly replace them with NaN. var = _copyto(var, np.nan, isbad) return var -def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): +def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): """ Compute the standard deviation along the specified axis, while ignoring NaNs. @@ -1099,10 +1358,16 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): Means Delta Degrees of Freedom. The divisor used in calculations is ``N - ddof``, where ``N`` represents the number of non-NaN elements. By default `ddof` is zero. + keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. + the result will broadcast correctly against the original `a`. + + If this value is anything but the default it is passed through + as-is to the relevant functions of the sub-classes. If these + functions do not have a `keepdims` kwarg, a RuntimeError will + be raised. Returns ------- diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index cbef1a6e2..05010a2d0 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -6,20 +6,20 @@ import re import itertools import warnings import weakref -from operator import itemgetter +from operator import itemgetter, index as opindex import numpy as np from . import format from ._datasource import DataSource -from ._compiled_base import packbits, unpackbits +from numpy.core.multiarray import packbits, unpackbits from ._iotools import ( LineSplitter, NameValidator, StringConverter, ConverterError, - ConverterLockError, ConversionWarning, _is_string_like, has_nested_fields, - flatten_dtype, easy_dtype, _bytes_to_name + ConverterLockError, ConversionWarning, _is_string_like, + has_nested_fields, flatten_dtype, easy_dtype, _bytes_to_name ) from numpy.compat import ( - asbytes, asstr, asbytes_nested, bytes, basestring, unicode + asbytes, asstr, asbytes_nested, bytes, basestring, unicode, is_pathlib_path ) if sys.version_info[0] >= 3: @@ -37,52 +37,6 @@ __all__ = [ ] -def seek_gzip_factory(f): - """Use this factory to produce the class so that we can do a lazy - import on gzip. - - """ - import gzip - - class GzipFile(gzip.GzipFile): - - def seek(self, offset, whence=0): - # figure out new position (we can only seek forwards) - if whence == 1: - offset = self.offset + offset - - if whence not in [0, 1]: - raise IOError("Illegal argument") - - if offset < self.offset: - # for negative seek, rewind and do positive seek - self.rewind() - count = offset - self.offset - for i in range(count // 1024): - self.read(1024) - self.read(count % 1024) - - def tell(self): - return self.offset - - if isinstance(f, str): - f = GzipFile(f) - elif isinstance(f, gzip.GzipFile): - # cast to our GzipFile if its already a gzip.GzipFile - - try: - name = f.name - except AttributeError: - # Backward compatibility for <= 2.5 - name = f.filename - mode = f.mode - - f = GzipFile(fileobj=f.fileobj, filename=name) - f.mode = mode - - return f - - class BagObj(object): """ BagObj(obj) @@ -122,20 +76,29 @@ class BagObj(object): return object.__getattribute__(self, '_obj')[key] except KeyError: raise AttributeError(key) - + def __dir__(self): """ Enables dir(bagobj) to list the files in an NpzFile. - + This also enables tab-completion in an interpreter or IPython. """ return object.__getattribute__(self, '_obj').keys() -def zipfile_factory(*args, **kwargs): +def zipfile_factory(file, *args, **kwargs): + """ + Create a ZipFile. + + Allows for Zip64, and the `file` argument can accept file, str, or + pathlib.Path objects. `args` and `kwargs` are passed to the zipfile.ZipFile + constructor. + """ + if is_pathlib_path(file): + file = str(file) import zipfile kwargs['allowZip64'] = True - return zipfile.ZipFile(*args, **kwargs) + return zipfile.ZipFile(file, *args, **kwargs) class NpzFile(object): @@ -164,6 +127,12 @@ class NpzFile(object): f : BagObj instance An object on which attribute can be performed as an alternative to getitem access on the `NpzFile` instance itself. + allow_pickle : bool, optional + Allow loading pickled data. Default: True + pickle_kwargs : dict, optional + Additional keyword arguments to pass on to pickle.load. + These are only useful when loading object arrays saved on + Python 2 when using Python 3. Parameters ---------- @@ -195,12 +164,15 @@ class NpzFile(object): """ - def __init__(self, fid, own_fid=False): + def __init__(self, fid, own_fid=False, allow_pickle=True, + pickle_kwargs=None): # Import is postponed to here since zipfile depends on gzip, an # optional component of the so-called standard library. _zip = zipfile_factory(fid) self._files = _zip.namelist() self.files = [] + self.allow_pickle = allow_pickle + self.pickle_kwargs = pickle_kwargs for x in self._files: if x.endswith('.npy'): self.files.append(x[:-4]) @@ -256,7 +228,9 @@ class NpzFile(object): bytes.close() if magic == format.MAGIC_PREFIX: bytes = self.zip.open(key) - return format.read_array(bytes) + return format.read_array(bytes, + allow_pickle=self.allow_pickle, + pickle_kwargs=self.pickle_kwargs) else: return self.zip.read(key) else: @@ -289,13 +263,14 @@ class NpzFile(object): return self.files.__contains__(key) -def load(file, mmap_mode=None): +def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, + encoding='ASCII'): """ Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files. Parameters ---------- - file : file-like object or string + file : file-like object, string, or pathlib.Path The file to read. File-like objects must support the ``seek()`` and ``read()`` methods. Pickled files require that the file-like object support the ``readline()`` method as well. @@ -306,6 +281,23 @@ def load(file, mmap_mode=None): and sliced like any ndarray. Memory mapping is especially useful for accessing small fragments of large files without reading the entire file into memory. + allow_pickle : bool, optional + Allow loading pickled object arrays stored in npy files. Reasons for + disallowing pickles include security, as loading pickled data can + execute arbitrary code. If pickles are disallowed, loading object + arrays will fail. + Default: True + fix_imports : bool, optional + Only useful when loading Python 2 generated pickled files on Python 3, + which includes npy/npz files containing object arrays. If `fix_imports` + is True, pickle will try to map the old Python 2 names to the new names + used in Python 3. + encoding : str, optional + What encoding to use when reading Python 2 strings. Only useful when + loading Python 2 generated pickled files on Python 3, which includes + npy/npz files containing object arrays. Values other than 'latin1', + 'ASCII', and 'bytes' are not allowed, as they can corrupt numerical + data. Default: 'ASCII' Returns ------- @@ -317,11 +309,14 @@ def load(file, mmap_mode=None): ------ IOError If the input file does not exist or cannot be read. + ValueError + The file contains an object array, but allow_pickle=False given. See Also -------- save, savez, savez_compressed, loadtxt memmap : Create a memory-map to an array stored in a file on disk. + lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file. Notes ----- @@ -370,39 +365,65 @@ def load(file, mmap_mode=None): memmap([4, 5, 6]) """ - import gzip - own_fid = False if isinstance(file, basestring): fid = open(file, "rb") own_fid = True - elif isinstance(file, gzip.GzipFile): - fid = seek_gzip_factory(file) + elif is_pathlib_path(file): + fid = file.open("rb") + own_fid = True else: fid = file + if encoding not in ('ASCII', 'latin1', 'bytes'): + # The 'encoding' value for pickle also affects what encoding + # the serialized binary data of NumPy arrays is loaded + # in. Pickle does not pass on the encoding information to + # NumPy. The unpickling code in numpy.core.multiarray is + # written to assume that unicode data appearing where binary + # should be is in 'latin1'. 'bytes' is also safe, as is 'ASCII'. + # + # Other encoding values can corrupt binary data, and we + # purposefully disallow them. For the same reason, the errors= + # argument is not exposed, as values other than 'strict' + # result can similarly silently corrupt numerical data. + raise ValueError("encoding must be 'ASCII', 'latin1', or 'bytes'") + + if sys.version_info[0] >= 3: + pickle_kwargs = dict(encoding=encoding, fix_imports=fix_imports) + else: + # Nothing to do on Python 2 + pickle_kwargs = {} + try: # Code to distinguish from NumPy binary files and pickles. _ZIP_PREFIX = asbytes('PK\x03\x04') N = len(format.MAGIC_PREFIX) magic = fid.read(N) - fid.seek(-N, 1) # back-up + # If the file size is less than N, we need to make sure not + # to seek past the beginning of the file + fid.seek(-min(N, len(magic)), 1) # back-up if magic.startswith(_ZIP_PREFIX): # zip-file (assume .npz) # Transfer file ownership to NpzFile tmp = own_fid own_fid = False - return NpzFile(fid, own_fid=tmp) + return NpzFile(fid, own_fid=tmp, allow_pickle=allow_pickle, + pickle_kwargs=pickle_kwargs) elif magic == format.MAGIC_PREFIX: # .npy file if mmap_mode: return format.open_memmap(file, mode=mmap_mode) else: - return format.read_array(fid) + return format.read_array(fid, allow_pickle=allow_pickle, + pickle_kwargs=pickle_kwargs) else: # Try a pickle + if not allow_pickle: + raise ValueError("allow_pickle=False, but file does not contain " + "non-pickled data") try: - return pickle.load(fid) + return pickle.load(fid, **pickle_kwargs) except: raise IOError( "Failed to interpret file %s as a pickle" % repr(file)) @@ -411,17 +432,30 @@ def load(file, mmap_mode=None): fid.close() -def save(file, arr): +def save(file, arr, allow_pickle=True, fix_imports=True): """ Save an array to a binary file in NumPy ``.npy`` format. Parameters ---------- - file : file or str + file : file, str, or pathlib.Path File or filename to which the data is saved. If file is a file-object, - then the filename is unchanged. If file is a string, a ``.npy`` + then the filename is unchanged. If file is a string or Path, a ``.npy`` extension will be appended to the file name if it does not already have one. + allow_pickle : bool, optional + Allow saving object arrays using Python pickles. Reasons for disallowing + pickles include security (loading pickled data can execute arbitrary + code) and portability (pickled objects may not be loadable on different + Python installations, for example if the stored objects require libraries + that are not available, and not all pickled data is compatible between + Python 2 and Python 3). + Default: True + fix_imports : bool, optional + Only useful in forcing objects in object arrays on Python 3 to be + pickled in a Python 2 compatible way. If `fix_imports` is True, pickle + will try to map the new Python 3 names to the old module names used in + Python 2, so that the pickle data stream is readable with Python 2. arr : array_like Array data to be saved. @@ -432,7 +466,9 @@ def save(file, arr): Notes ----- - For a description of the ``.npy`` format, see `format`. + For a description of the ``.npy`` format, see the module docstring + of `numpy.lib.format` or the NumPy Enhancement Proposal + http://docs.scipy.org/doc/numpy/neps/npy-format.html Examples -------- @@ -453,12 +489,24 @@ def save(file, arr): file = file + '.npy' fid = open(file, "wb") own_fid = True + elif is_pathlib_path(file): + if not file.name.endswith('.npy'): + file = file.parent / (file.name + '.npy') + fid = file.open("wb") + own_fid = True else: fid = file + if sys.version_info[0] >= 3: + pickle_kwargs = dict(fix_imports=fix_imports) + else: + # Nothing to do on Python 2 + pickle_kwargs = None + try: arr = np.asanyarray(arr) - format.write_array(fid, arr) + format.write_array(fid, arr, allow_pickle=allow_pickle, + pickle_kwargs=pickle_kwargs) finally: if own_fid: fid.close() @@ -477,8 +525,9 @@ def savez(file, *args, **kwds): ---------- file : str or file Either the file name (string) or an open file (file-like object) - where the data will be saved. If file is a string, the ``.npz`` - extension will be appended to the file name if it is not already there. + where the data will be saved. If file is a string or a Path, the + ``.npz`` extension will be appended to the file name if it is not + already there. args : Arguments, optional Arrays to save to the file. Since it is not possible for Python to know the names of the arrays outside `savez`, the arrays will be saved @@ -503,7 +552,9 @@ def savez(file, *args, **kwds): The ``.npz`` file format is a zipped archive of files named after the variables they contain. The archive is not compressed and each file in the archive contains one variable in ``.npy`` format. For a - description of the ``.npy`` format, see `format`. + description of the ``.npy`` format, see `numpy.lib.format` or the + NumPy Enhancement Proposal + http://docs.scipy.org/doc/numpy/neps/npy-format.html When opening the saved ``.npz`` file with `load` a `NpzFile` object is returned. This is a dictionary-like object which can be queried for @@ -568,7 +619,7 @@ def savez_compressed(file, *args, **kwds): _savez(file, args, kwds, True) -def _savez(file, args, kwds, compress): +def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None): # Import is postponed to here since zipfile depends on gzip, an optional # component of the so-called standard library. import zipfile @@ -578,6 +629,9 @@ def _savez(file, args, kwds, compress): if isinstance(file, basestring): if not file.endswith('.npz'): file = file + '.npz' + elif is_pathlib_path(file): + if not file.name.endswith('.npz'): + file = file.parent / (file.name + '.npz') namedict = kwds for i, val in enumerate(args): @@ -595,17 +649,25 @@ def _savez(file, args, kwds, compress): zipf = zipfile_factory(file, mode="w", compression=compression) # Stage arrays in a temporary file on disk, before writing to zip. - fd, tmpfile = tempfile.mkstemp(suffix='-numpy.npy') + + # Since target file might be big enough to exceed capacity of a global + # temporary directory, create temp file side-by-side with the target file. + file_dir, file_prefix = os.path.split(file) if _is_string_like(file) else (None, 'tmp') + fd, tmpfile = tempfile.mkstemp(prefix=file_prefix, dir=file_dir, suffix='-numpy.npy') os.close(fd) try: for key, val in namedict.items(): fname = key + '.npy' fid = open(tmpfile, 'wb') try: - format.write_array(fid, np.asanyarray(val)) + format.write_array(fid, np.asanyarray(val), + allow_pickle=allow_pickle, + pickle_kwargs=pickle_kwargs) fid.close() fid = None zipf.write(tmpfile, arcname=fname) + except IOError as exc: + raise IOError("Failed to write to %s: %s" % (tmpfile, exc)) finally: if fid: fid.close() @@ -617,6 +679,13 @@ def _savez(file, args, kwds, compress): def _getconv(dtype): """ Find the correct dtype converter. Adapted from matplotlib """ + + def floatconv(x): + x.lower() + if b'0x' in x: + return float.fromhex(asstr(x)) + return float(x) + typ = dtype.type if issubclass(typ, np.bool_): return lambda x: bool(int(x)) @@ -626,10 +695,12 @@ def _getconv(dtype): return np.int64 if issubclass(typ, np.integer): return lambda x: int(float(x)) + elif issubclass(typ, np.longdouble): + return np.longdouble elif issubclass(typ, np.floating): - return float + return floatconv elif issubclass(typ, np.complex): - return complex + return lambda x: complex(asstr(x)) elif issubclass(typ, np.bytes_): return bytes else: @@ -646,18 +717,19 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, Parameters ---------- - fname : file or str + fname : file, str, or pathlib.Path File, filename, or generator to read. If the filename extension is ``.gz`` or ``.bz2``, the file is first decompressed. Note that generators should return byte strings for Python 3k. dtype : data-type, optional Data-type of the resulting array; default: float. If this is a - record data-type, the resulting array will be 1-dimensional, and + structured data-type, the resulting array will be 1-dimensional, and each row will be interpreted as an element of the array. In this case, the number of columns used must match the number of fields in the data-type. - comments : str, optional - The character used to indicate the start of a comment; + comments : str or sequence, optional + The characters or list of characters used to indicate the start of a + comment; default: '#'. delimiter : str, optional The string used to separate values. By default, this is any @@ -670,13 +742,21 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, ``converters = {3: lambda s: float(s.strip() or 0)}``. Default: None. skiprows : int, optional Skip the first `skiprows` lines; default: 0. - usecols : sequence, optional - Which columns to read, with 0 being the first. For example, - ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns. + + usecols : int or sequence, optional + Which columns to read, with 0 being the first. For example, + usecols = (1,4,5) will extract the 2nd, 5th and 6th columns. The default, None, results in all columns being read. + + .. versionadded:: 1.11.0 + + Also when a single column has to be read it is possible to use + an integer instead of a tuple. E.g ``usecols = 3`` reads the + fourth column the same way as `usecols = (3,)`` would. + unpack : bool, optional If True, the returned array is transposed, so that arguments may be - unpacked using ``x, y, z = loadtxt(...)``. When used with a record + unpacked using ``x, y, z = loadtxt(...)``. When used with a structured data-type, arrays are returned for each field. Default is False. ndmin : int, optional The returned array will have at least `ndmin` dimensions. @@ -702,9 +782,14 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, `genfromtxt` function provides more sophisticated handling of, e.g., lines with missing values. + .. versionadded:: 1.10.0 + + The strings produced by the Python float.hex method can be used as + input for floats. + Examples -------- - >>> from StringIO import StringIO # StringIO behaves like a file object + >>> from io import StringIO # StringIO behaves like a file object >>> c = StringIO("0 1\\n2 3") >>> np.loadtxt(c) array([[ 0., 1.], @@ -725,19 +810,47 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, """ # Type conversions for Py3 convenience - comments = asbytes(comments) + if comments is not None: + if isinstance(comments, (basestring, bytes)): + comments = [asbytes(comments)] + else: + comments = [asbytes(comment) for comment in comments] + + # Compile regex for comments beforehand + comments = (re.escape(comment) for comment in comments) + regex_comments = re.compile(asbytes('|').join(comments)) user_converters = converters if delimiter is not None: delimiter = asbytes(delimiter) + if usecols is not None: - usecols = list(usecols) + # Allow usecols to be a single int or a sequence of ints + try: + usecols_as_list = list(usecols) + except TypeError: + usecols_as_list = [usecols] + for col_idx in usecols_as_list: + try: + opindex(col_idx) + except TypeError as e: + e.args = ( + "usecols must be an int or a sequence of ints but " + "it contains at least one element of type %s" % + type(col_idx), + ) + raise + # Fall back to existing code + usecols = usecols_as_list fown = False try: + if is_pathlib_path(fname): + fname = str(fname) if _is_string_like(fname): fown = True if fname.endswith('.gz'): - fh = iter(seek_gzip_factory(fname)) + import gzip + fh = iter(gzip.GzipFile(fname)) elif fname.endswith('.bz2'): import bz2 fh = iter(bz2.BZ2File(fname)) @@ -797,8 +910,16 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, return tuple(ret) def split_line(line): - """Chop off comments, strip, and split at delimiter.""" - line = asbytes(line).split(comments)[0].strip(asbytes('\r\n')) + """Chop off comments, strip, and split at delimiter. + + Note that although the file is opened as text, this function + returns bytes. + + """ + line = asbytes(line) + if comments is not None: + line = regex_comments.split(asbytes(line), maxsplit=1)[0] + line = line.strip(asbytes('\r\n')) if line: return line.split(delimiter) else: @@ -824,7 +945,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, # End of lines reached first_line = '' first_vals = [] - warnings.warn('loadtxt: Empty input file: "%s"' % fname) + warnings.warn('loadtxt: Empty input file: "%s"' % fname, stacklevel=2) N = len(usecols or first_vals) dtype_types, packing = flatten_dtype(dtype) @@ -921,7 +1042,7 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', a) a single specifier, `fmt='%.4e'`, resulting in numbers formatted like `' (%s+%sj)' % (fmt, fmt)` b) a full string specifying every real and imaginary part, e.g. - `' %.4e %+.4j %.4e %+.4j %.4e %+.4j'` for 3 columns + `' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'` for 3 columns c) a list of specifiers, one per column - in this case, the real and imaginary part must have separate specifiers, e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns @@ -1020,6 +1141,8 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', delimiter = asstr(delimiter) own_fh = False + if is_pathlib_path(fname): + fname = str(fname) if _is_string_like(fname): own_fh = True if fname.endswith('.gz'): @@ -1088,7 +1211,12 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', fh.write(asbytes(format % tuple(row2) + newline)) else: for row in X: - fh.write(asbytes(format % tuple(row) + newline)) + try: + fh.write(asbytes(format % tuple(row) + newline)) + except TypeError: + raise TypeError("Mismatch between array dtype ('%s') and " + "format specifier ('%s')" + % (str(X.dtype), format)) if len(footer) > 0: footer = footer.replace('\n', '\n' + comments) fh.write(asbytes(comments + footer + newline)) @@ -1186,12 +1314,12 @@ def fromregex(file, regexp, dtype): def genfromtxt(fname, dtype=float, comments='#', delimiter=None, - skiprows=0, skip_header=0, skip_footer=0, converters=None, - missing='', missing_values=None, filling_values=None, - usecols=None, names=None, - excludelist=None, deletechars=None, replace_space='_', - autostrip=False, case_sensitive=True, defaultfmt="f%i", - unpack=None, usemask=False, loose=True, invalid_raise=True): + skip_header=0, skip_footer=0, converters=None, + missing_values=None, filling_values=None, usecols=None, + names=None, excludelist=None, deletechars=None, + replace_space='_', autostrip=False, case_sensitive=True, + defaultfmt="f%i", unpack=None, usemask=False, loose=True, + invalid_raise=True, max_rows=None): """ Load data from a text file, with missing values handled as specified. @@ -1200,10 +1328,11 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, Parameters ---------- - fname : file or str - File, filename, or generator to read. If the filename extension is - `.gz` or `.bz2`, the file is first decompressed. Note that - generators must return byte strings in Python 3k. + fname : file, str, pathlib.Path, list of str, generator + File, filename, list, or generator to read. If the filename + extension is `.gz` or `.bz2`, the file is first decompressed. Note + that generators must return byte strings in Python 3k. The strings + in a list or produced by a generator are treated as lines. dtype : dtype, optional Data type of the resulting array. If None, the dtypes will be determined by the contents of each @@ -1215,9 +1344,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, The string used to separate values. By default, any consecutive whitespaces act as delimiter. An integer or sequence of integers can also be provided as width(s) of each field. - skip_rows : int, optional - `skip_rows` was deprecated in numpy 1.5, and will be removed in - numpy 2.0. Please use `skip_header` instead. + skiprows : int, optional + `skiprows` was removed in numpy 1.10. Please use `skip_header` instead. skip_header : int, optional The number of lines to skip at the beginning of the file. skip_footer : int, optional @@ -1227,8 +1355,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, The converters can also be used to provide a default value for missing data: ``converters = {3: lambda s: float(s or 0)}``. missing : variable, optional - `missing` was deprecated in numpy 1.5, and will be removed in - numpy 2.0. Please use `missing_values` instead. + `missing` was removed in numpy 1.10. Please use `missing_values` + instead. missing_values : variable, optional The set of strings corresponding to missing data. filling_values : variable, optional @@ -1272,6 +1400,12 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, If True, an exception is raised if an inconsistency is detected in the number of columns. If False, a warning is emitted and the offending lines are skipped. + max_rows : int, optional + The maximum number of rows to read. Must not be used with skip_footer + at the same time. If given, the value must be at least 1. Default is + to read the entire file. + + .. versionadded:: 1.10.0 Returns ------- @@ -1295,12 +1429,12 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, References ---------- - .. [1] Numpy User Guide, section `I/O with Numpy + .. [1] NumPy User Guide, section `I/O with NumPy <http://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_. Examples --------- - >>> from StringIO import StringIO + >>> from io import StringIO >>> import numpy as np Comma delimited file with mixed dtype @@ -1340,13 +1474,19 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '|S5')]) """ + if max_rows is not None: + if skip_footer: + raise ValueError( + "The keywords 'skip_footer' and 'max_rows' can not be " + "specified at the same time.") + if max_rows < 1: + raise ValueError("'max_rows' must be at least 1.") + # Py3 data conversions to bytes, for convenience if comments is not None: comments = asbytes(comments) if isinstance(delimiter, unicode): delimiter = asbytes(delimiter) - if isinstance(missing, unicode): - missing = asbytes(missing) if isinstance(missing_values, (unicode, list, tuple)): missing_values = asbytes_nested(missing_values) @@ -1363,6 +1503,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Initialize the filehandle, the LineSplitter and the NameValidator own_fhd = False try: + if is_pathlib_path(fname): + fname = str(fname) if isinstance(fname, basestring): if sys.version_info[0] == 2: fhd = iter(np.lib._datasource.open(fname, 'rbU')) @@ -1373,8 +1515,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, fhd = iter(fname) except TypeError: raise TypeError( - "fname must be a string, filehandle, or generator. " - "(got %s instead)" % type(fname)) + "fname must be a string, filehandle, list of strings, " + "or generator. Got %s instead." % type(fname)) split_line = LineSplitter(delimiter=delimiter, comments=comments, autostrip=autostrip)._handyman @@ -1383,13 +1525,6 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, case_sensitive=case_sensitive, replace_space=replace_space) - # Get the first valid lines after the first skiprows ones .. - if skiprows: - warnings.warn( - "The use of `skiprows` is deprecated, it will be removed in " - "numpy 2.0.\nPlease use `skip_header` instead.", - DeprecationWarning) - skip_header = skiprows # Skip the first `skip_header` rows for i in range(skip_header): next(fhd) @@ -1408,7 +1543,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # return an empty array if the datafile is empty first_line = asbytes('') first_values = [] - warnings.warn('genfromtxt: Empty input file: "%s"' % fname) + warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2) # Should we take the first values as names ? if names is True: @@ -1438,7 +1573,11 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, names = validate_names(names) # Get the dtype if dtype is not None: - dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names) + dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names, + excludelist=excludelist, + deletechars=deletechars, + case_sensitive=case_sensitive, + replace_space=replace_space) # Make sure the names is a list (for 2.5) if names is not None: names = list(names) @@ -1514,16 +1653,6 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, for entry in missing_values: entry.extend([str(user_missing_values)]) - # Process the deprecated `missing` - if missing != asbytes(''): - warnings.warn( - "The use of `missing` is deprecated, it will be removed in " - "Numpy 2.0.\nPlease use `missing_values` instead.", - DeprecationWarning) - values = [str(_) for _ in missing.split(asbytes(","))] - for entry in missing_values: - entry.extend(values) - # Process the filling_values ............................... # Rename the input for convenience user_filling_values = filling_values @@ -1634,8 +1763,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Skip an empty line if nbvalues == 0: continue - # Select only the columns we need if usecols: + # Select only the columns we need try: values = [values[_] for _ in usecols] except IndexError: @@ -1648,7 +1777,10 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, append_to_rows(tuple(values)) if usemask: append_to_masks(tuple([v.strip() in m - for (v, m) in zip(values, missing_values)])) + for (v, m) in zip(values, + missing_values)])) + if len(rows) == max_rows: + break if own_fhd: fhd.close() @@ -1696,7 +1828,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, raise ValueError(errmsg) # Issue a warning ? else: - warnings.warn(errmsg, ConversionWarning) + warnings.warn(errmsg, ConversionWarning, stacklevel=2) # Strip the last skip_footer data if skip_footer > 0: diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py index 2b867e244..281d79ec5 100644 --- a/numpy/lib/polynomial.py +++ b/numpy/lib/polynomial.py @@ -15,7 +15,7 @@ import numpy.core.numeric as NX from numpy.core import (isscalar, abs, finfo, atleast_1d, hstack, dot, array, ones) from numpy.lib.twodim_base import diag, vander -from numpy.lib.function_base import trim_zeros, sort_complex +from numpy.lib.function_base import trim_zeros from numpy.lib.type_check import iscomplex, real, imag, mintypecode from numpy.linalg import eigvals, lstsq, inv @@ -61,7 +61,7 @@ def poly(seq_of_zeros): See Also -------- - polyval : Evaluate a polynomial at a point. + polyval : Compute polynomial values. roots : Return the roots of a polynomial. polyfit : Least squares polynomial fit. poly1d : A one-dimensional polynomial class. @@ -145,11 +145,7 @@ def poly(seq_of_zeros): if issubclass(a.dtype.type, NX.complexfloating): # if complex roots are all complex conjugates, the roots are real. roots = NX.asarray(seq_of_zeros, complex) - pos_roots = sort_complex(NX.compress(roots.imag > 0, roots)) - neg_roots = NX.conjugate(sort_complex( - NX.compress(roots.imag < 0, roots))) - if (len(pos_roots) == len(neg_roots) and - NX.alltrue(neg_roots == pos_roots)): + if NX.all(NX.sort(roots) == NX.sort(roots.conjugate())): a = a.real.copy() return a @@ -171,7 +167,7 @@ def roots(p): Returns ------- out : ndarray - An array containing the complex roots of the polynomial. + An array containing the roots of the polynomial. Raises ------ @@ -182,7 +178,7 @@ def roots(p): -------- poly : Find the coefficients of a polynomial with a given sequence of roots. - polyval : Evaluate a polynomial at a point. + polyval : Compute polynomial values. polyfit : Least squares polynomial fit. poly1d : A one-dimensional polynomial class. @@ -253,12 +249,12 @@ def polyint(p, m=1, k=None): Parameters ---------- - p : {array_like, poly1d} + p : array_like or poly1d Polynomial to differentiate. A sequence is interpreted as polynomial coefficients, see `poly1d`. m : int, optional Order of the antiderivative. (Default: 1) - k : {None, list of `m` scalars, scalar}, optional + k : list of `m` scalars or scalar, optional Integration constants. They are given in the order of integration: those corresponding to highest-order terms come first. @@ -427,18 +423,19 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): default) just the coefficients are returned, when True diagnostic information from the singular value decomposition is also returned. w : array_like, shape (M,), optional - weights to apply to the y-coordinates of the sample points. + Weights to apply to the y-coordinates of the sample points. For + gaussian uncertainties, use 1/sigma (not 1/sigma**2). cov : bool, optional Return the estimate and the covariance matrix of the estimate If full is True, then cov is not returned. Returns ------- - p : ndarray, shape (M,) or (M, K) + p : ndarray, shape (deg + 1,) or (deg + 1, K) Polynomial coefficients, highest power first. If `y` was 2-D, the coefficients for `k`-th data set are in ``p[:,k]``. - residuals, rank, singular_values, rcond : + residuals, rank, singular_values, rcond Present only if `full` = True. Residuals of the least-squares fit, the effective rank of the scaled Vandermonde coefficient matrix, its singular values, and the specified value of `rcond`. For more @@ -465,7 +462,7 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): See Also -------- - polyval : Computes polynomial values. + polyval : Compute polynomial values. linalg.lstsq : Computes a least-squares fit. scipy.interpolate.UnivariateSpline : Computes spline fits. @@ -591,7 +588,7 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): # warn on rank reduction, which indicates an ill conditioned matrix if rank != order and not full: msg = "Polyfit may be poorly conditioned" - warnings.warn(msg, RankWarning) + warnings.warn(msg, RankWarning, stacklevel=2) if full: return c, resids, rank, s, rcond @@ -602,6 +599,9 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): # it is included here because the covariance of Multivariate Student-T # (which is implied by a Bayesian uncertainty analysis) includes it. # Plus, it gives a slightly more conservative estimate of uncertainty. + if len(x) <= order + 2: + raise ValueError("the number of data points must exceed order + 2 " + "for Bayesian estimate the covariance matrix") fac = resids / (len(x) - order - 2.0) if y.ndim == 1: return c, Vbase * fac @@ -630,7 +630,7 @@ def polyval(p, x): to zero) from highest degree to the constant term, or an instance of poly1d. x : array_like or poly1d object - A number, a 1D array of numbers, or an instance of poly1d, "at" + A number, an array of numbers, or an instance of poly1d, at which to evaluate `p`. Returns @@ -714,12 +714,12 @@ def polyadd(a1, a2): >>> p1 = np.poly1d([1, 2]) >>> p2 = np.poly1d([9, 5, 4]) - >>> print p1 + >>> print(p1) 1 x + 2 - >>> print p2 + >>> print(p2) 2 9 x + 5 x + 4 - >>> print np.polyadd(p1, p2) + >>> print(np.polyadd(p1, p2)) 2 9 x + 6 x + 6 @@ -825,13 +825,13 @@ def polymul(a1, a2): >>> p1 = np.poly1d([1, 2, 3]) >>> p2 = np.poly1d([9, 5, 1]) - >>> print p1 + >>> print(p1) 2 1 x + 2 x + 3 - >>> print p2 + >>> print(p2) 2 9 x + 5 x + 1 - >>> print np.polymul(p1, p2) + >>> print(np.polymul(p1, p2)) 4 3 2 9 x + 23 x + 38 x + 17 x + 3 @@ -965,7 +965,7 @@ class poly1d(object): Construct the polynomial :math:`x^2 + 2x + 3`: >>> p = np.poly1d([1, 2, 3]) - >>> print np.poly1d(p) + >>> print(np.poly1d(p)) 2 1 x + 2 x + 3 @@ -1021,7 +1021,7 @@ class poly1d(object): using the `variable` parameter: >>> p = np.poly1d([1,2,3], variable='z') - >>> print p + >>> print(p) 2 1 z + 2 z + 3 diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index a61b1749b..4ae1079d2 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -268,7 +268,7 @@ def izip_records(seqarrays, fill_value=None, flatten=True): Parameters ---------- - seqarray : sequence of arrays + seqarrays : sequence of arrays Sequence of arrays. fill_value : {None, integer} Value used to pad shorter iterables. @@ -683,7 +683,7 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, Parameters ---------- - seqarrays : array or sequence + arrays : array or sequence Sequence of input arrays. defaults : dictionary, optional Dictionary mapping field names to the corresponding default values. diff --git a/numpy/lib/setup.py b/numpy/lib/setup.py index 62d1dfbb8..d342410b8 100644 --- a/numpy/lib/setup.py +++ b/numpy/lib/setup.py @@ -1,20 +1,10 @@ from __future__ import division, print_function -from os.path import join - def configuration(parent_package='',top_path=None): from numpy.distutils.misc_util import Configuration config = Configuration('lib', parent_package, top_path) - - config.add_include_dirs(join('..', 'core', 'include')) - - config.add_extension('_compiled_base', - sources=[join('src', '_compiled_base.c')] - ) - config.add_data_dir('tests') - return config if __name__ == '__main__': diff --git a/numpy/lib/shape_base.py b/numpy/lib/shape_base.py index 70fa3ab03..e580690d1 100644 --- a/numpy/lib/shape_base.py +++ b/numpy/lib/shape_base.py @@ -35,7 +35,7 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs): Input array. args : any Additional arguments to `func1d`. - kwargs: any + kwargs : any Additional named arguments to `func1d`. .. versionadded:: 1.9.0 @@ -74,7 +74,7 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs): [2, 5, 6]]) """ - arr = asarray(arr) + arr = asanyarray(arr) nd = arr.ndim if axis < 0: axis += nd @@ -109,11 +109,13 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs): k += 1 return outarr else: + res = asanyarray(res) Ntot = product(outshape) holdshape = outshape outshape = list(arr.shape) - outshape[axis] = len(res) - outarr = zeros(outshape, asarray(res).dtype) + outshape[axis] = res.size + outarr = zeros(outshape, res.dtype) + outarr = res.__array_wrap__(outarr) outarr[tuple(i.tolist())] = res k = 1 while k < Ntot: @@ -128,6 +130,8 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs): res = func1d(arr[tuple(i.tolist())], *args, **kwargs) outarr[tuple(i.tolist())] = res k += 1 + if res.shape == (): + outarr = outarr.squeeze(axis) return outarr @@ -325,6 +329,10 @@ def dstack(tup): This is a simple way to stack 2D arrays (images) into a single 3D array for processing. + This function continues to be supported for backward compatibility, but + you should prefer ``np.concatenate`` or ``np.stack``. The ``np.stack`` + function was added in NumPy 1.10. + Parameters ---------- tup : sequence of arrays @@ -338,9 +346,10 @@ def dstack(tup): See Also -------- + stack : Join a sequence of arrays along a new axis. vstack : Stack along first axis. hstack : Stack along second axis. - concatenate : Join arrays. + concatenate : Join a sequence of arrays along an existing axis. dsplit : Split array along third axis. Notes @@ -420,18 +429,9 @@ def array_split(ary, indices_or_sections, axis=0): end = div_points[i + 1] sub_arys.append(_nx.swapaxes(sary[st:end], axis, 0)) - # This "kludge" was introduced here to replace arrays shaped (0, 10) - # or similar with an array shaped (0,). - # There seems no need for this, so give a FutureWarning to remove later. - if sub_arys[-1].size == 0 and sub_arys[-1].ndim != 1: - warnings.warn("in the future np.array_split will retain the shape of " - "arrays with a zero size, instead of replacing them by " - "`array([])`, which always has a shape of (0,).", - FutureWarning) - sub_arys = _replace_zero_by_x_arrays(sub_arys) - return sub_arys + def split(ary,indices_or_sections,axis=0): """ Split an array into multiple sub-arrays. @@ -477,7 +477,8 @@ def split(ary,indices_or_sections,axis=0): hsplit : Split array into multiple sub-arrays horizontally (column-wise). vsplit : Split array into multiple sub-arrays vertically (row wise). dsplit : Split array into multiple sub-arrays along the 3rd axis (depth). - concatenate : Join arrays together. + concatenate : Join a sequence of arrays along an existing axis. + stack : Join a sequence of arrays along a new axis. hstack : Stack arrays in sequence horizontally (column wise). vstack : Stack arrays in sequence vertically (row wise). dstack : Stack arrays in sequence depth wise (along third dimension). @@ -711,7 +712,7 @@ def kron(a, b): Notes ----- - The function assumes that the number of dimenensions of `a` and `b` + The function assumes that the number of dimensions of `a` and `b` are the same, if necessary prepending the smallest with ones. If `a.shape = (r0,r1,..,rN)` and `b.shape = (s0,s1,...,sN)`, the Kronecker product has shape `(r0*s0, r1*s1, ..., rN*SN)`. @@ -806,6 +807,9 @@ def tile(A, reps): Thus for an `A` of shape (2, 3, 4, 5), a `reps` of (2, 2) is treated as (1, 1, 2, 2). + Note : Although tile may be used for broadcasting, it is strongly + recommended to use numpy's broadcasting operations and functions. + Parameters ---------- A : array_like @@ -821,6 +825,7 @@ def tile(A, reps): See Also -------- repeat : Repeat elements of an array. + broadcast_to : Broadcast an array to a new shape Examples -------- @@ -844,22 +849,33 @@ def tile(A, reps): [1, 2], [3, 4]]) + >>> c = np.array([1,2,3,4]) + >>> np.tile(c,(4,1)) + array([[1, 2, 3, 4], + [1, 2, 3, 4], + [1, 2, 3, 4], + [1, 2, 3, 4]]) """ try: tup = tuple(reps) except TypeError: tup = (reps,) d = len(tup) - c = _nx.array(A, copy=False, subok=True, ndmin=d) - shape = list(c.shape) - n = max(c.size, 1) + if all(x == 1 for x in tup) and isinstance(A, _nx.ndarray): + # Fixes the problem that the function does not make a copy if A is a + # numpy array and the repetitions are 1 in all dimensions + return _nx.array(A, copy=True, subok=True, ndmin=d) + else: + # Note that no copy of zero-sized arrays is made. However since they + # have no data there is no risk of an inadvertent overwrite. + c = _nx.array(A, copy=False, subok=True, ndmin=d) if (d < c.ndim): tup = (1,)*(c.ndim-d) + tup - for i, nrep in enumerate(tup): - if nrep != 1: - c = c.reshape(-1, n).repeat(nrep, 0) - dim_in = shape[i] - dim_out = dim_in*nrep - shape[i] = dim_out - n //= max(dim_in, 1) - return c.reshape(shape) + shape_out = tuple(s*t for s, t in zip(c.shape, tup)) + n = c.size + if n > 0: + for dim_in, nrep in zip(c.shape, tup): + if nrep != 1: + c = c.reshape(-1, n).repeat(nrep, 0) + n //= dim_in + return c.reshape(shape_out) diff --git a/numpy/lib/src/_compiled_base.c b/numpy/lib/src/_compiled_base.c deleted file mode 100644 index daf96a823..000000000 --- a/numpy/lib/src/_compiled_base.c +++ /dev/null @@ -1,1678 +0,0 @@ -#define NPY_NO_DEPRECATED_API NPY_API_VERSION -#include "Python.h" -#include "structmember.h" -#include "numpy/arrayobject.h" -#include "numpy/npy_3kcompat.h" -#include "npy_config.h" -#include "numpy/ufuncobject.h" -#include "string.h" - - -/* - * Returns -1 if the array is monotonic decreasing, - * +1 if the array is monotonic increasing, - * and 0 if the array is not monotonic. - */ -static int -check_array_monotonic(const double *a, npy_int lena) -{ - npy_intp i; - double next; - double last = a[0]; - - /* Skip repeated values at the beginning of the array */ - for (i = 1; (i < lena) && (a[i] == last); i++); - - if (i == lena) { - /* all bin edges hold the same value */ - return 1; - } - - next = a[i]; - if (last < next) { - /* Possibly monotonic increasing */ - for (i += 1; i < lena; i++) { - last = next; - next = a[i]; - if (last > next) { - return 0; - } - } - return 1; - } - else { - /* last > next, possibly monotonic decreasing */ - for (i += 1; i < lena; i++) { - last = next; - next = a[i]; - if (last < next) { - return 0; - } - } - return -1; - } -} - -/* Find the minimum and maximum of an integer array */ -static void -minmax(const npy_intp *data, npy_intp data_len, npy_intp *mn, npy_intp *mx) -{ - npy_intp min = *data; - npy_intp max = *data; - - while (--data_len) { - const npy_intp val = *(++data); - if (val < min) { - min = val; - } - else if (val > max) { - max = val; - } - } - - *mn = min; - *mx = max; -} - -/* - * arr_bincount is registered as bincount. - * - * bincount accepts one, two or three arguments. The first is an array of - * non-negative integers The second, if present, is an array of weights, - * which must be promotable to double. Call these arguments list and - * weight. Both must be one-dimensional with len(weight) == len(list). If - * weight is not present then bincount(list)[i] is the number of occurrences - * of i in list. If weight is present then bincount(self,list, weight)[i] - * is the sum of all weight[j] where list [j] == i. Self is not used. - * The third argument, if present, is a minimum length desired for the - * output array. - */ -static PyObject * -arr_bincount(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds) -{ - PyArray_Descr *type; - PyObject *list = NULL, *weight=Py_None, *mlength=Py_None; - PyArrayObject *lst=NULL, *ans=NULL, *wts=NULL; - npy_intp *numbers, *ians, len , mx, mn, ans_size, minlength; - int i; - double *weights , *dans; - static char *kwlist[] = {"list", "weights", "minlength", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO", - kwlist, &list, &weight, &mlength)) { - goto fail; - } - - lst = (PyArrayObject *)PyArray_ContiguousFromAny(list, NPY_INTP, 1, 1); - if (lst == NULL) { - goto fail; - } - len = PyArray_SIZE(lst); - type = PyArray_DescrFromType(NPY_INTP); - - if (mlength == Py_None) { - minlength = 0; - } - else { - minlength = PyArray_PyIntAsIntp(mlength); - if (minlength <= 0) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "minlength must be positive"); - } - goto fail; - } - } - - /* handle empty list */ - if (len == 0) { - if (!(ans = (PyArrayObject *)PyArray_Zeros(1, &minlength, type, 0))){ - goto fail; - } - Py_DECREF(lst); - return (PyObject *)ans; - } - - numbers = (npy_intp *) PyArray_DATA(lst); - minmax(numbers, len, &mn, &mx); - if (mn < 0) { - PyErr_SetString(PyExc_ValueError, - "The first argument of bincount must be non-negative"); - goto fail; - } - ans_size = mx + 1; - if (mlength != Py_None) { - if (ans_size < minlength) { - ans_size = minlength; - } - } - if (weight == Py_None) { - ans = (PyArrayObject *)PyArray_Zeros(1, &ans_size, type, 0); - if (ans == NULL) { - goto fail; - } - ians = (npy_intp *)(PyArray_DATA(ans)); - NPY_BEGIN_ALLOW_THREADS; - for (i = 0; i < len; i++) - ians [numbers [i]] += 1; - NPY_END_ALLOW_THREADS; - Py_DECREF(lst); - } - else { - wts = (PyArrayObject *)PyArray_ContiguousFromAny( - weight, NPY_DOUBLE, 1, 1); - if (wts == NULL) { - goto fail; - } - weights = (double *)PyArray_DATA (wts); - if (PyArray_SIZE(wts) != len) { - PyErr_SetString(PyExc_ValueError, - "The weights and list don't have the same length."); - goto fail; - } - type = PyArray_DescrFromType(NPY_DOUBLE); - ans = (PyArrayObject *)PyArray_Zeros(1, &ans_size, type, 0); - if (ans == NULL) { - goto fail; - } - dans = (double *)PyArray_DATA(ans); - NPY_BEGIN_ALLOW_THREADS; - for (i = 0; i < len; i++) { - dans[numbers[i]] += weights[i]; - } - NPY_END_ALLOW_THREADS; - Py_DECREF(lst); - Py_DECREF(wts); - } - return (PyObject *)ans; - -fail: - Py_XDECREF(lst); - Py_XDECREF(wts); - Py_XDECREF(ans); - return NULL; -} - -/* - * digitize(x, bins, right=False) returns an array of integers the same length - * as x. The values i returned are such that bins[i - 1] <= x < bins[i] if - * bins is monotonically increasing, or bins[i - 1] > x >= bins[i] if bins - * is monotonically decreasing. Beyond the bounds of bins, returns either - * i = 0 or i = len(bins) as appropriate. If right == True the comparison - * is bins [i - 1] < x <= bins[i] or bins [i - 1] >= x > bins[i] - */ -static PyObject * -arr_digitize(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds) -{ - PyObject *obj_x = NULL; - PyObject *obj_bins = NULL; - PyArrayObject *arr_x = NULL; - PyArrayObject *arr_bins = NULL; - PyObject *ret = NULL; - npy_intp len_bins; - int monotonic, right = 0; - NPY_BEGIN_THREADS_DEF - - static char *kwlist[] = {"x", "bins", "right", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|i", kwlist, - &obj_x, &obj_bins, &right)) { - goto fail; - } - - /* PyArray_SearchSorted will make `x` contiguous even if we don't */ - arr_x = (PyArrayObject *)PyArray_FROMANY(obj_x, NPY_DOUBLE, 0, 0, - NPY_ARRAY_CARRAY_RO); - if (arr_x == NULL) { - goto fail; - } - - /* TODO: `bins` could be strided, needs change to check_array_monotonic */ - arr_bins = (PyArrayObject *)PyArray_FROMANY(obj_bins, NPY_DOUBLE, 1, 1, - NPY_ARRAY_CARRAY_RO); - if (arr_bins == NULL) { - goto fail; - } - - len_bins = PyArray_SIZE(arr_bins); - if (len_bins == 0) { - PyErr_SetString(PyExc_ValueError, "bins must have non-zero length"); - goto fail; - } - - NPY_BEGIN_THREADS_THRESHOLDED(len_bins) - monotonic = check_array_monotonic((const double *)PyArray_DATA(arr_bins), - len_bins); - NPY_END_THREADS - - if (monotonic == 0) { - PyErr_SetString(PyExc_ValueError, - "bins must be monotonically increasing or decreasing"); - goto fail; - } - - /* PyArray_SearchSorted needs an increasing array */ - if (monotonic == - 1) { - PyArrayObject *arr_tmp = NULL; - npy_intp shape = PyArray_DIM(arr_bins, 0); - npy_intp stride = -PyArray_STRIDE(arr_bins, 0); - void *data = (void *)(PyArray_BYTES(arr_bins) - stride * (shape - 1)); - - arr_tmp = (PyArrayObject *)PyArray_New(&PyArray_Type, 1, &shape, - NPY_DOUBLE, &stride, data, 0, - PyArray_FLAGS(arr_bins), NULL); - if (!arr_tmp) { - goto fail; - } - - if (PyArray_SetBaseObject(arr_tmp, (PyObject *)arr_bins) < 0) { - - Py_DECREF(arr_tmp); - goto fail; - } - arr_bins = arr_tmp; - } - - ret = PyArray_SearchSorted(arr_bins, (PyObject *)arr_x, - right ? NPY_SEARCHLEFT : NPY_SEARCHRIGHT, NULL); - if (!ret) { - goto fail; - } - - /* If bins is decreasing, ret has bins from end, not start */ - if (monotonic == -1) { - npy_intp *ret_data = - (npy_intp *)PyArray_DATA((PyArrayObject *)ret); - npy_intp len_ret = PyArray_SIZE((PyArrayObject *)ret); - - NPY_BEGIN_THREADS_THRESHOLDED(len_ret) - while (len_ret--) { - *ret_data = len_bins - *ret_data; - ret_data++; - } - NPY_END_THREADS - } - - fail: - Py_DECREF(arr_x); - Py_DECREF(arr_bins); - return ret; -} - -static char arr_insert__doc__[] = "Insert vals sequentially into equivalent 1-d positions indicated by mask."; - -/* - * Insert values from an input array into an output array, at positions - * indicated by a mask. If the arrays are of dtype object (indicated by - * the objarray flag), take care of reference counting. - * - * This function implements the copying logic of arr_insert() defined - * below. - */ -static void -arr_insert_loop(char *mptr, char *vptr, char *input_data, char *zero, - char *avals_data, int melsize, int delsize, int objarray, - int totmask, int numvals, int nd, npy_intp *instrides, - npy_intp *inshape) -{ - int mindx, rem_indx, indx, i, copied; - - /* - * Walk through mask array, when non-zero is encountered - * copy next value in the vals array to the input array. - * If we get through the value array, repeat it as necessary. - */ - copied = 0; - for (mindx = 0; mindx < totmask; mindx++) { - if (memcmp(mptr,zero,melsize) != 0) { - /* compute indx into input array */ - rem_indx = mindx; - indx = 0; - for (i = nd - 1; i > 0; --i) { - indx += (rem_indx % inshape[i]) * instrides[i]; - rem_indx /= inshape[i]; - } - indx += rem_indx * instrides[0]; - /* fprintf(stderr, "mindx = %d, indx=%d\n", mindx, indx); */ - /* Copy value element over to input array */ - memcpy(input_data+indx,vptr,delsize); - if (objarray) { - Py_INCREF(*((PyObject **)vptr)); - } - vptr += delsize; - copied += 1; - /* If we move past value data. Reset */ - if (copied >= numvals) { - vptr = avals_data; - } - } - mptr += melsize; - } -} - -/* - * Returns input array with values inserted sequentially into places - * indicated by the mask - */ -static PyObject * -arr_insert(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwdict) -{ - PyObject *mask = NULL, *vals = NULL; - PyArrayObject *ainput = NULL, *amask = NULL, *avals = NULL, *tmp = NULL; - int numvals, totmask, sameshape; - char *input_data, *mptr, *vptr, *zero = NULL; - int melsize, delsize, nd, objarray, k; - npy_intp *instrides, *inshape; - - static char *kwlist[] = {"input", "mask", "vals", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "O&OO", kwlist, - PyArray_Converter, &ainput, - &mask, &vals)) { - goto fail; - } - - amask = (PyArrayObject *)PyArray_FROM_OF(mask, NPY_ARRAY_CARRAY); - if (amask == NULL) { - goto fail; - } - /* Cast an object array */ - if (PyArray_DESCR(amask)->type_num == NPY_OBJECT) { - tmp = (PyArrayObject *)PyArray_Cast(amask, NPY_INTP); - if (tmp == NULL) { - goto fail; - } - Py_DECREF(amask); - amask = tmp; - } - - sameshape = 1; - if (PyArray_NDIM(amask) == PyArray_NDIM(ainput)) { - for (k = 0; k < PyArray_NDIM(amask); k++) { - if (PyArray_DIMS(amask)[k] != PyArray_DIMS(ainput)[k]) { - sameshape = 0; - } - } - } - else { - /* Test to see if amask is 1d */ - if (PyArray_NDIM(amask) != 1) { - sameshape = 0; - } - else if ((PyArray_SIZE(ainput)) != PyArray_SIZE(amask)) { - sameshape = 0; - } - } - if (!sameshape) { - PyErr_SetString(PyExc_TypeError, - "mask array must be 1-d or same shape as input array"); - goto fail; - } - - avals = (PyArrayObject *)PyArray_FromObject(vals, - PyArray_DESCR(ainput)->type_num, 0, 1); - if (avals == NULL) { - goto fail; - } - numvals = PyArray_SIZE(avals); - nd = PyArray_NDIM(ainput); - input_data = PyArray_DATA(ainput); - mptr = PyArray_DATA(amask); - melsize = PyArray_DESCR(amask)->elsize; - vptr = PyArray_DATA(avals); - delsize = PyArray_DESCR(avals)->elsize; - zero = PyArray_Zero(amask); - if (zero == NULL) { - goto fail; - } - objarray = (PyArray_DESCR(ainput)->type_num == NPY_OBJECT); - - /* Handle zero-dimensional case separately */ - if (nd == 0) { - if (memcmp(mptr,zero,melsize) != 0) { - /* Copy value element over to input array */ - memcpy(input_data,vptr,delsize); - if (objarray) { - Py_INCREF(*((PyObject **)vptr)); - } - } - Py_DECREF(amask); - Py_DECREF(avals); - PyDataMem_FREE(zero); - Py_DECREF(ainput); - Py_INCREF(Py_None); - return Py_None; - } - - totmask = (int) PyArray_SIZE(amask); - instrides = PyArray_STRIDES(ainput); - inshape = PyArray_DIMS(ainput); - if (objarray) { - /* object array, need to refcount, can't release the GIL */ - arr_insert_loop(mptr, vptr, input_data, zero, PyArray_DATA(avals), - melsize, delsize, objarray, totmask, numvals, nd, - instrides, inshape); - } - else { - /* No increfs take place in arr_insert_loop, so release the GIL */ - NPY_BEGIN_ALLOW_THREADS; - arr_insert_loop(mptr, vptr, input_data, zero, PyArray_DATA(avals), - melsize, delsize, objarray, totmask, numvals, nd, - instrides, inshape); - NPY_END_ALLOW_THREADS; - } - - Py_DECREF(amask); - Py_DECREF(avals); - PyDataMem_FREE(zero); - Py_DECREF(ainput); - Py_INCREF(Py_None); - return Py_None; - -fail: - PyDataMem_FREE(zero); - Py_XDECREF(ainput); - Py_XDECREF(amask); - Py_XDECREF(avals); - return NULL; -} - -/** @brief Use bisection on a sorted array to find first entry > key. - * - * Use bisection to find an index i s.t. arr[i] <= key < arr[i + 1]. If there is - * no such i the error returns are: - * key < arr[0] -- -1 - * key == arr[len - 1] -- len - 1 - * key > arr[len - 1] -- len - * The array is assumed contiguous and sorted in ascending order. - * - * @param key key value. - * @param arr contiguous sorted array to be searched. - * @param len length of the array. - * @return index - */ -static npy_intp -binary_search(double key, double arr [], npy_intp len) -{ - npy_intp imin = 0; - npy_intp imax = len; - - if (key > arr[len - 1]) { - return len; - } - while (imin < imax) { - npy_intp imid = imin + ((imax - imin) >> 1); - if (key >= arr[imid]) { - imin = imid + 1; - } - else { - imax = imid; - } - } - return imin - 1; -} - -static PyObject * -arr_interp(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwdict) -{ - - PyObject *fp, *xp, *x; - PyObject *left = NULL, *right = NULL; - PyArrayObject *afp = NULL, *axp = NULL, *ax = NULL, *af = NULL; - npy_intp i, lenx, lenxp; - double lval, rval; - double *dy, *dx, *dz, *dres, *slopes; - - static char *kwlist[] = {"x", "xp", "fp", "left", "right", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "OOO|OO", kwlist, - &x, &xp, &fp, &left, &right)) { - return NULL; - } - - afp = (PyArrayObject *)PyArray_ContiguousFromAny(fp, NPY_DOUBLE, 1, 1); - if (afp == NULL) { - return NULL; - } - axp = (PyArrayObject *)PyArray_ContiguousFromAny(xp, NPY_DOUBLE, 1, 1); - if (axp == NULL) { - goto fail; - } - ax = (PyArrayObject *)PyArray_ContiguousFromAny(x, NPY_DOUBLE, 1, 0); - if (ax == NULL) { - goto fail; - } - lenxp = PyArray_DIMS(axp)[0]; - if (lenxp == 0) { - PyErr_SetString(PyExc_ValueError, - "array of sample points is empty"); - goto fail; - } - if (PyArray_DIMS(afp)[0] != lenxp) { - PyErr_SetString(PyExc_ValueError, - "fp and xp are not of the same length."); - goto fail; - } - - af = (PyArrayObject *)PyArray_SimpleNew(PyArray_NDIM(ax), - PyArray_DIMS(ax), NPY_DOUBLE); - if (af == NULL) { - goto fail; - } - lenx = PyArray_SIZE(ax); - - dy = (double *)PyArray_DATA(afp); - dx = (double *)PyArray_DATA(axp); - dz = (double *)PyArray_DATA(ax); - dres = (double *)PyArray_DATA(af); - - /* Get left and right fill values. */ - if ((left == NULL) || (left == Py_None)) { - lval = dy[0]; - } - else { - lval = PyFloat_AsDouble(left); - if ((lval == -1) && PyErr_Occurred()) { - goto fail; - } - } - if ((right == NULL) || (right == Py_None)) { - rval = dy[lenxp-1]; - } - else { - rval = PyFloat_AsDouble(right); - if ((rval == -1) && PyErr_Occurred()) { - goto fail; - } - } - - /* only pre-calculate slopes if there are relatively few of them. */ - if (lenxp <= lenx) { - slopes = (double *) PyArray_malloc((lenxp - 1)*sizeof(double)); - if (! slopes) { - goto fail; - } - NPY_BEGIN_ALLOW_THREADS; - for (i = 0; i < lenxp - 1; i++) { - slopes[i] = (dy[i + 1] - dy[i])/(dx[i + 1] - dx[i]); - } - for (i = 0; i < lenx; i++) { - const double x = dz[i]; - npy_intp j; - - if (npy_isnan(x)) { - dres[i] = x; - continue; - } - - j = binary_search(x, dx, lenxp); - if (j == -1) { - dres[i] = lval; - } - else if (j == lenxp - 1) { - dres[i] = dy[j]; - } - else if (j == lenxp) { - dres[i] = rval; - } - else { - dres[i] = slopes[j]*(x - dx[j]) + dy[j]; - } - } - NPY_END_ALLOW_THREADS; - PyArray_free(slopes); - } - else { - NPY_BEGIN_ALLOW_THREADS; - for (i = 0; i < lenx; i++) { - const double x = dz[i]; - npy_intp j; - - if (npy_isnan(x)) { - dres[i] = x; - continue; - } - - j = binary_search(x, dx, lenxp); - if (j == -1) { - dres[i] = lval; - } - else if (j == lenxp - 1) { - dres[i] = dy[j]; - } - else if (j == lenxp) { - dres[i] = rval; - } - else { - const double slope = (dy[j + 1] - dy[j])/(dx[j + 1] - dx[j]); - dres[i] = slope*(x - dx[j]) + dy[j]; - } - } - NPY_END_ALLOW_THREADS; - } - - Py_DECREF(afp); - Py_DECREF(axp); - Py_DECREF(ax); - return (PyObject *)af; - -fail: - Py_XDECREF(afp); - Py_XDECREF(axp); - Py_XDECREF(ax); - Py_XDECREF(af); - return NULL; -} - -/* - * Converts a Python sequence into 'count' PyArrayObjects - * - * seq - Input Python object, usually a tuple but any sequence works. - * op - Where the arrays are placed. - * count - How many arrays there should be (errors if it doesn't match). - * paramname - The name of the parameter that produced 'seq'. - */ -static int sequence_to_arrays(PyObject *seq, - PyArrayObject **op, int count, - char *paramname) -{ - int i; - - if (!PySequence_Check(seq) || PySequence_Size(seq) != count) { - PyErr_Format(PyExc_ValueError, - "parameter %s must be a sequence of length %d", - paramname, count); - return -1; - } - - for (i = 0; i < count; ++i) { - PyObject *item = PySequence_GetItem(seq, i); - if (item == NULL) { - while (--i >= 0) { - Py_DECREF(op[i]); - op[i] = NULL; - } - return -1; - } - - op[i] = (PyArrayObject *)PyArray_FromAny(item, NULL, 0, 0, 0, NULL); - if (op[i] == NULL) { - while (--i >= 0) { - Py_DECREF(op[i]); - op[i] = NULL; - } - Py_DECREF(item); - return -1; - } - - Py_DECREF(item); - } - - return 0; -} - -/* Inner loop for unravel_index */ -static int -ravel_multi_index_loop(int ravel_ndim, npy_intp *ravel_dims, - npy_intp *ravel_strides, - npy_intp count, - NPY_CLIPMODE *modes, - char **coords, npy_intp *coords_strides) -{ - int i; - char invalid; - npy_intp j, m; - - NPY_BEGIN_ALLOW_THREADS; - invalid = 0; - while (count--) { - npy_intp raveled = 0; - for (i = 0; i < ravel_ndim; ++i) { - m = ravel_dims[i]; - j = *(npy_intp *)coords[i]; - switch (modes[i]) { - case NPY_RAISE: - if (j < 0 || j >= m) { - invalid = 1; - goto end_while; - } - break; - case NPY_WRAP: - if (j < 0) { - j += m; - if (j < 0) { - j = j % m; - if (j != 0) { - j += m; - } - } - } - else if (j >= m) { - j -= m; - if (j >= m) { - j = j % m; - } - } - break; - case NPY_CLIP: - if (j < 0) { - j = 0; - } - else if (j >= m) { - j = m - 1; - } - break; - - } - raveled += j * ravel_strides[i]; - - coords[i] += coords_strides[i]; - } - *(npy_intp *)coords[ravel_ndim] = raveled; - coords[ravel_ndim] += coords_strides[ravel_ndim]; - } -end_while: - NPY_END_ALLOW_THREADS; - if (invalid) { - PyErr_SetString(PyExc_ValueError, - "invalid entry in coordinates array"); - return NPY_FAIL; - } - return NPY_SUCCEED; -} - -/* ravel_multi_index implementation - see add_newdocs.py */ -static PyObject * -arr_ravel_multi_index(PyObject *self, PyObject *args, PyObject *kwds) -{ - int i, s; - PyObject *mode0=NULL, *coords0=NULL; - PyArrayObject *ret = NULL; - PyArray_Dims dimensions={0,0}; - npy_intp ravel_strides[NPY_MAXDIMS]; - NPY_ORDER order = NPY_CORDER; - NPY_CLIPMODE modes[NPY_MAXDIMS]; - - PyArrayObject *op[NPY_MAXARGS]; - PyArray_Descr *dtype[NPY_MAXARGS]; - npy_uint32 op_flags[NPY_MAXARGS]; - - NpyIter *iter = NULL; - - char *kwlist[] = {"multi_index", "dims", "mode", "order", NULL}; - - memset(op, 0, sizeof(op)); - dtype[0] = NULL; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO&|OO&:ravel_multi_index", kwlist, - &coords0, - PyArray_IntpConverter, &dimensions, - &mode0, - PyArray_OrderConverter, &order)) { - goto fail; - } - - if (dimensions.len+1 > NPY_MAXARGS) { - PyErr_SetString(PyExc_ValueError, - "too many dimensions passed to ravel_multi_index"); - goto fail; - } - - if (!PyArray_ConvertClipmodeSequence(mode0, modes, dimensions.len)) { - goto fail; - } - - switch (order) { - case NPY_CORDER: - s = 1; - for (i = dimensions.len-1; i >= 0; --i) { - ravel_strides[i] = s; - s *= dimensions.ptr[i]; - } - break; - case NPY_FORTRANORDER: - s = 1; - for (i = 0; i < dimensions.len; ++i) { - ravel_strides[i] = s; - s *= dimensions.ptr[i]; - } - break; - default: - PyErr_SetString(PyExc_ValueError, - "only 'C' or 'F' order is permitted"); - goto fail; - } - - /* Get the multi_index into op */ - if (sequence_to_arrays(coords0, op, dimensions.len, "multi_index") < 0) { - goto fail; - } - - - for (i = 0; i < dimensions.len; ++i) { - op_flags[i] = NPY_ITER_READONLY| - NPY_ITER_ALIGNED; - } - op_flags[dimensions.len] = NPY_ITER_WRITEONLY| - NPY_ITER_ALIGNED| - NPY_ITER_ALLOCATE; - dtype[0] = PyArray_DescrFromType(NPY_INTP); - for (i = 1; i <= dimensions.len; ++i) { - dtype[i] = dtype[0]; - } - - iter = NpyIter_MultiNew(dimensions.len+1, op, NPY_ITER_BUFFERED| - NPY_ITER_EXTERNAL_LOOP| - NPY_ITER_ZEROSIZE_OK, - NPY_KEEPORDER, - NPY_SAME_KIND_CASTING, - op_flags, dtype); - if (iter == NULL) { - goto fail; - } - - if (NpyIter_GetIterSize(iter) != 0) { - NpyIter_IterNextFunc *iternext; - char **dataptr; - npy_intp *strides; - npy_intp *countptr; - - iternext = NpyIter_GetIterNext(iter, NULL); - if (iternext == NULL) { - goto fail; - } - dataptr = NpyIter_GetDataPtrArray(iter); - strides = NpyIter_GetInnerStrideArray(iter); - countptr = NpyIter_GetInnerLoopSizePtr(iter); - - do { - if (ravel_multi_index_loop(dimensions.len, dimensions.ptr, - ravel_strides, *countptr, modes, - dataptr, strides) != NPY_SUCCEED) { - goto fail; - } - } while(iternext(iter)); - } - - ret = NpyIter_GetOperandArray(iter)[dimensions.len]; - Py_INCREF(ret); - - Py_DECREF(dtype[0]); - for (i = 0; i < dimensions.len; ++i) { - Py_XDECREF(op[i]); - } - PyDimMem_FREE(dimensions.ptr); - NpyIter_Deallocate(iter); - return PyArray_Return(ret); - -fail: - Py_XDECREF(dtype[0]); - for (i = 0; i < dimensions.len; ++i) { - Py_XDECREF(op[i]); - } - PyDimMem_FREE(dimensions.ptr); - NpyIter_Deallocate(iter); - return NULL; -} - -/* C-order inner loop for unravel_index */ -static int -unravel_index_loop_corder(int unravel_ndim, npy_intp *unravel_dims, - npy_intp unravel_size, npy_intp count, - char *indices, npy_intp indices_stride, - npy_intp *coords) -{ - int i; - char invalid; - npy_intp val; - - NPY_BEGIN_ALLOW_THREADS; - invalid = 0; - while (count--) { - val = *(npy_intp *)indices; - if (val < 0 || val >= unravel_size) { - invalid = 1; - break; - } - for (i = unravel_ndim-1; i >= 0; --i) { - coords[i] = val % unravel_dims[i]; - val /= unravel_dims[i]; - } - coords += unravel_ndim; - indices += indices_stride; - } - NPY_END_ALLOW_THREADS; - if (invalid) { - PyErr_SetString(PyExc_ValueError, - "invalid entry in index array"); - return NPY_FAIL; - } - return NPY_SUCCEED; -} - -/* Fortran-order inner loop for unravel_index */ -static int -unravel_index_loop_forder(int unravel_ndim, npy_intp *unravel_dims, - npy_intp unravel_size, npy_intp count, - char *indices, npy_intp indices_stride, - npy_intp *coords) -{ - int i; - char invalid; - npy_intp val; - - NPY_BEGIN_ALLOW_THREADS; - invalid = 0; - while (count--) { - val = *(npy_intp *)indices; - if (val < 0 || val >= unravel_size) { - invalid = 1; - break; - } - for (i = 0; i < unravel_ndim; ++i) { - *coords++ = val % unravel_dims[i]; - val /= unravel_dims[i]; - } - indices += indices_stride; - } - NPY_END_ALLOW_THREADS; - if (invalid) { - PyErr_SetString(PyExc_ValueError, - "invalid entry in index array"); - return NPY_FAIL; - } - return NPY_SUCCEED; -} - -/* unravel_index implementation - see add_newdocs.py */ -static PyObject * -arr_unravel_index(PyObject *self, PyObject *args, PyObject *kwds) -{ - PyObject *indices0 = NULL, *ret_tuple = NULL; - PyArrayObject *ret_arr = NULL; - PyArrayObject *indices = NULL; - PyArray_Descr *dtype = NULL; - PyArray_Dims dimensions={0,0}; - NPY_ORDER order = NPY_CORDER; - npy_intp unravel_size; - - NpyIter *iter = NULL; - int i, ret_ndim; - npy_intp ret_dims[NPY_MAXDIMS], ret_strides[NPY_MAXDIMS]; - - char *kwlist[] = {"indices", "dims", "order", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&|O&:unravel_index", - kwlist, - &indices0, - PyArray_IntpConverter, &dimensions, - PyArray_OrderConverter, &order)) { - goto fail; - } - - if (dimensions.len == 0) { - PyErr_SetString(PyExc_ValueError, - "dims must have at least one value"); - goto fail; - } - - unravel_size = PyArray_MultiplyList(dimensions.ptr, dimensions.len); - - if (!PyArray_Check(indices0)) { - indices = (PyArrayObject*)PyArray_FromAny(indices0, - NULL, 0, 0, 0, NULL); - if (indices == NULL) { - goto fail; - } - } - else { - indices = (PyArrayObject *)indices0; - Py_INCREF(indices); - } - - dtype = PyArray_DescrFromType(NPY_INTP); - if (dtype == NULL) { - goto fail; - } - - iter = NpyIter_New(indices, NPY_ITER_READONLY| - NPY_ITER_ALIGNED| - NPY_ITER_BUFFERED| - NPY_ITER_ZEROSIZE_OK| - NPY_ITER_DONT_NEGATE_STRIDES| - NPY_ITER_MULTI_INDEX, - NPY_KEEPORDER, NPY_SAME_KIND_CASTING, - dtype); - if (iter == NULL) { - goto fail; - } - - /* - * Create the return array with a layout compatible with the indices - * and with a dimension added to the end for the multi-index - */ - ret_ndim = PyArray_NDIM(indices) + 1; - if (NpyIter_GetShape(iter, ret_dims) != NPY_SUCCEED) { - goto fail; - } - ret_dims[ret_ndim-1] = dimensions.len; - if (NpyIter_CreateCompatibleStrides(iter, - dimensions.len*sizeof(npy_intp), ret_strides) != NPY_SUCCEED) { - goto fail; - } - ret_strides[ret_ndim-1] = sizeof(npy_intp); - - /* Remove the multi-index and inner loop */ - if (NpyIter_RemoveMultiIndex(iter) != NPY_SUCCEED) { - goto fail; - } - if (NpyIter_EnableExternalLoop(iter) != NPY_SUCCEED) { - goto fail; - } - - ret_arr = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, - ret_ndim, ret_dims, ret_strides, NULL, 0, NULL); - dtype = NULL; - if (ret_arr == NULL) { - goto fail; - } - - if (order == NPY_CORDER) { - if (NpyIter_GetIterSize(iter) != 0) { - NpyIter_IterNextFunc *iternext; - char **dataptr; - npy_intp *strides; - npy_intp *countptr, count; - npy_intp *coordsptr = (npy_intp *)PyArray_DATA(ret_arr); - - iternext = NpyIter_GetIterNext(iter, NULL); - if (iternext == NULL) { - goto fail; - } - dataptr = NpyIter_GetDataPtrArray(iter); - strides = NpyIter_GetInnerStrideArray(iter); - countptr = NpyIter_GetInnerLoopSizePtr(iter); - - do { - count = *countptr; - if (unravel_index_loop_corder(dimensions.len, dimensions.ptr, - unravel_size, count, *dataptr, *strides, - coordsptr) != NPY_SUCCEED) { - goto fail; - } - coordsptr += count*dimensions.len; - } while(iternext(iter)); - } - } - else if (order == NPY_FORTRANORDER) { - if (NpyIter_GetIterSize(iter) != 0) { - NpyIter_IterNextFunc *iternext; - char **dataptr; - npy_intp *strides; - npy_intp *countptr, count; - npy_intp *coordsptr = (npy_intp *)PyArray_DATA(ret_arr); - - iternext = NpyIter_GetIterNext(iter, NULL); - if (iternext == NULL) { - goto fail; - } - dataptr = NpyIter_GetDataPtrArray(iter); - strides = NpyIter_GetInnerStrideArray(iter); - countptr = NpyIter_GetInnerLoopSizePtr(iter); - - do { - count = *countptr; - if (unravel_index_loop_forder(dimensions.len, dimensions.ptr, - unravel_size, count, *dataptr, *strides, - coordsptr) != NPY_SUCCEED) { - goto fail; - } - coordsptr += count*dimensions.len; - } while(iternext(iter)); - } - } - else { - PyErr_SetString(PyExc_ValueError, - "only 'C' or 'F' order is permitted"); - goto fail; - } - - /* Now make a tuple of views, one per index */ - ret_tuple = PyTuple_New(dimensions.len); - if (ret_tuple == NULL) { - goto fail; - } - for (i = 0; i < dimensions.len; ++i) { - PyArrayObject *view; - - view = (PyArrayObject *)PyArray_New(&PyArray_Type, ret_ndim-1, - ret_dims, NPY_INTP, - ret_strides, - PyArray_BYTES(ret_arr) + i*sizeof(npy_intp), - 0, 0, NULL); - if (view == NULL) { - goto fail; - } - Py_INCREF(ret_arr); - if (PyArray_SetBaseObject(view, (PyObject *)ret_arr) < 0) { - Py_DECREF(view); - goto fail; - } - PyTuple_SET_ITEM(ret_tuple, i, PyArray_Return(view)); - } - - Py_DECREF(ret_arr); - Py_XDECREF(indices); - PyDimMem_FREE(dimensions.ptr); - NpyIter_Deallocate(iter); - - return ret_tuple; - -fail: - Py_XDECREF(ret_tuple); - Py_XDECREF(ret_arr); - Py_XDECREF(dtype); - Py_XDECREF(indices); - PyDimMem_FREE(dimensions.ptr); - NpyIter_Deallocate(iter); - return NULL; -} - - -static PyTypeObject *PyMemberDescr_TypePtr = NULL; -static PyTypeObject *PyGetSetDescr_TypePtr = NULL; -static PyTypeObject *PyMethodDescr_TypePtr = NULL; - -/* Can only be called if doc is currently NULL */ -static PyObject * -arr_add_docstring(PyObject *NPY_UNUSED(dummy), PyObject *args) -{ - PyObject *obj; - PyObject *str; - char *docstr; - static char *msg = "already has a docstring"; - - /* Don't add docstrings */ - if (Py_OptimizeFlag > 1) { - Py_INCREF(Py_None); - return Py_None; - } -#if defined(NPY_PY3K) - if (!PyArg_ParseTuple(args, "OO!", &obj, &PyUnicode_Type, &str)) { - return NULL; - } - - docstr = PyBytes_AS_STRING(PyUnicode_AsUTF8String(str)); -#else - if (!PyArg_ParseTuple(args, "OO!", &obj, &PyString_Type, &str)) { - return NULL; - } - - docstr = PyString_AS_STRING(str); -#endif - -#define _TESTDOC1(typebase) (Py_TYPE(obj) == &Py##typebase##_Type) -#define _TESTDOC2(typebase) (Py_TYPE(obj) == Py##typebase##_TypePtr) -#define _ADDDOC(typebase, doc, name) do { \ - Py##typebase##Object *new = (Py##typebase##Object *)obj; \ - if (!(doc)) { \ - doc = docstr; \ - } \ - else { \ - PyErr_Format(PyExc_RuntimeError, "%s method %s", name, msg); \ - return NULL; \ - } \ - } while (0) - - if (_TESTDOC1(CFunction)) { - _ADDDOC(CFunction, new->m_ml->ml_doc, new->m_ml->ml_name); - } - else if (_TESTDOC1(Type)) { - _ADDDOC(Type, new->tp_doc, new->tp_name); - } - else if (_TESTDOC2(MemberDescr)) { - _ADDDOC(MemberDescr, new->d_member->doc, new->d_member->name); - } - else if (_TESTDOC2(GetSetDescr)) { - _ADDDOC(GetSetDescr, new->d_getset->doc, new->d_getset->name); - } - else if (_TESTDOC2(MethodDescr)) { - _ADDDOC(MethodDescr, new->d_method->ml_doc, new->d_method->ml_name); - } - else { - PyObject *doc_attr; - - doc_attr = PyObject_GetAttrString(obj, "__doc__"); - if (doc_attr != NULL && doc_attr != Py_None) { - PyErr_Format(PyExc_RuntimeError, "object %s", msg); - return NULL; - } - Py_XDECREF(doc_attr); - - if (PyObject_SetAttrString(obj, "__doc__", str) < 0) { - PyErr_SetString(PyExc_TypeError, - "Cannot set a docstring for that object"); - return NULL; - } - Py_INCREF(Py_None); - return Py_None; - } - -#undef _TESTDOC1 -#undef _TESTDOC2 -#undef _ADDDOC - - Py_INCREF(str); - Py_INCREF(Py_None); - return Py_None; -} - - -/* docstring in numpy.add_newdocs.py */ -static PyObject * -add_newdoc_ufunc(PyObject *NPY_UNUSED(dummy), PyObject *args) -{ - PyUFuncObject *ufunc; - PyObject *str; - char *docstr, *newdocstr; - -#if defined(NPY_PY3K) - if (!PyArg_ParseTuple(args, "O!O!", &PyUFunc_Type, &ufunc, - &PyUnicode_Type, &str)) { - return NULL; - } - docstr = PyBytes_AS_STRING(PyUnicode_AsUTF8String(str)); -#else - if (!PyArg_ParseTuple(args, "O!O!", &PyUFunc_Type, &ufunc, - &PyString_Type, &str)) { - return NULL; - } - docstr = PyString_AS_STRING(str); -#endif - - if (NULL != ufunc->doc) { - PyErr_SetString(PyExc_ValueError, - "Cannot change docstring of ufunc with non-NULL docstring"); - return NULL; - } - - /* - * This introduces a memory leak, as the memory allocated for the doc - * will not be freed even if the ufunc itself is deleted. In practice - * this should not be a problem since the user would have to - * repeatedly create, document, and throw away ufuncs. - */ - newdocstr = malloc(strlen(docstr) + 1); - strcpy(newdocstr, docstr); - ufunc->doc = newdocstr; - - Py_INCREF(Py_None); - return Py_None; -} - -/* PACKBITS - * - * This function packs binary (0 or 1) 1-bit per pixel arrays - * into contiguous bytes. - * - */ - -static void -_packbits( void *In, - int element_size, /* in bytes */ - npy_intp in_N, - npy_intp in_stride, - void *Out, - npy_intp out_N, - npy_intp out_stride -) -{ - char build; - int i, index; - npy_intp out_Nm1; - int maxi, remain, nonzero, j; - char *outptr,*inptr; - NPY_BEGIN_THREADS_DEF; - - NPY_BEGIN_THREADS_THRESHOLDED(out_N); - - outptr = Out; /* pointer to output buffer */ - inptr = In; /* pointer to input buffer */ - - /* - * Loop through the elements of In - * Determine whether or not it is nonzero. - * Yes: set correspdoning bit (and adjust build value) - * No: move on - * Every 8th value, set the value of build and increment the outptr - */ - - remain = in_N % 8; /* uneven bits */ - if (remain == 0) { - remain = 8; - } - out_Nm1 = out_N - 1; - for (index = 0; index < out_N; index++) { - build = 0; - maxi = (index != out_Nm1 ? 8 : remain); - for (i = 0; i < maxi; i++) { - build <<= 1; - nonzero = 0; - for (j = 0; j < element_size; j++) { - nonzero += (*(inptr++) != 0); - } - inptr += (in_stride - element_size); - build += (nonzero != 0); - } - if (index == out_Nm1) build <<= (8-remain); - /* printf("Here: %d %d %d %d\n",build,slice,index,maxi); */ - *outptr = build; - outptr += out_stride; - } - - NPY_END_THREADS; - return; -} - - -static void -_unpackbits(void *In, - int NPY_UNUSED(el_size), /* unused */ - npy_intp in_N, - npy_intp in_stride, - void *Out, - npy_intp NPY_UNUSED(out_N), - npy_intp out_stride - ) -{ - unsigned char mask; - int i, index; - char *inptr, *outptr; - NPY_BEGIN_THREADS_DEF; - - NPY_BEGIN_THREADS_THRESHOLDED(in_N); - - outptr = Out; - inptr = In; - for (index = 0; index < in_N; index++) { - mask = 128; - for (i = 0; i < 8; i++) { - *outptr = ((mask & (unsigned char)(*inptr)) != 0); - outptr += out_stride; - mask >>= 1; - } - inptr += in_stride; - } - - NPY_END_THREADS; - return; -} - -/* Fixme -- pack and unpack should be separate routines */ -static PyObject * -pack_or_unpack_bits(PyObject *input, int axis, int unpack) -{ - PyArrayObject *inp; - PyArrayObject *new = NULL; - PyArrayObject *out = NULL; - npy_intp outdims[NPY_MAXDIMS]; - int i; - void (*thefunc)(void *, int, npy_intp, npy_intp, void *, npy_intp, npy_intp); - PyArrayIterObject *it, *ot; - - inp = (PyArrayObject *)PyArray_FROM_O(input); - - if (inp == NULL) { - return NULL; - } - if (unpack) { - if (PyArray_TYPE(inp) != NPY_UBYTE) { - PyErr_SetString(PyExc_TypeError, - "Expected an input array of unsigned byte data type"); - goto fail; - } - } - else if (!PyArray_ISINTEGER(inp)) { - PyErr_SetString(PyExc_TypeError, - "Expected an input array of integer data type"); - goto fail; - } - - new = (PyArrayObject *)PyArray_CheckAxis(inp, &axis, 0); - Py_DECREF(inp); - if (new == NULL) { - return NULL; - } - /* Handle zero-dim array separately */ - if (PyArray_SIZE(new) == 0) { - return PyArray_Copy(new); - } - - if (PyArray_NDIM(new) == 0) { - if (unpack) { - /* Handle 0-d array by converting it to a 1-d array */ - PyArrayObject *temp; - PyArray_Dims newdim = {NULL, 1}; - npy_intp shape = 1; - - newdim.ptr = &shape; - temp = (PyArrayObject *)PyArray_Newshape(new, &newdim, NPY_CORDER); - if (temp == NULL) { - goto fail; - } - Py_DECREF(new); - new = temp; - } - else { - char *optr, *iptr; - out = (PyArrayObject *)PyArray_New(Py_TYPE(new), 0, NULL, NPY_UBYTE, - NULL, NULL, 0, 0, NULL); - if (out == NULL) { - goto fail; - } - optr = PyArray_DATA(out); - iptr = PyArray_DATA(new); - *optr = 0; - for (i = 0; i<PyArray_ITEMSIZE(new); i++) { - if (*iptr != 0) { - *optr = 1; - break; - } - iptr++; - } - goto finish; - } - } - - - /* Setup output shape */ - for (i=0; i<PyArray_NDIM(new); i++) { - outdims[i] = PyArray_DIM(new, i); - } - - if (unpack) { - /* Multiply axis dimension by 8 */ - outdims[axis] <<= 3; - thefunc = _unpackbits; - } - else { - /* - * Divide axis dimension by 8 - * 8 -> 1, 9 -> 2, 16 -> 2, 17 -> 3 etc.. - */ - outdims[axis] = ((outdims[axis] - 1) >> 3) + 1; - thefunc = _packbits; - } - - /* Create output array */ - out = (PyArrayObject *)PyArray_New(Py_TYPE(new), - PyArray_NDIM(new), outdims, NPY_UBYTE, - NULL, NULL, 0, PyArray_ISFORTRAN(new), NULL); - if (out == NULL) { - goto fail; - } - /* Setup iterators to iterate over all but given axis */ - it = (PyArrayIterObject *)PyArray_IterAllButAxis((PyObject *)new, &axis); - ot = (PyArrayIterObject *)PyArray_IterAllButAxis((PyObject *)out, &axis); - if (it == NULL || ot == NULL) { - Py_XDECREF(it); - Py_XDECREF(ot); - goto fail; - } - - while(PyArray_ITER_NOTDONE(it)) { - thefunc(PyArray_ITER_DATA(it), PyArray_ITEMSIZE(new), - PyArray_DIM(new, axis), PyArray_STRIDE(new, axis), - PyArray_ITER_DATA(ot), PyArray_DIM(out, axis), - PyArray_STRIDE(out, axis)); - PyArray_ITER_NEXT(it); - PyArray_ITER_NEXT(ot); - } - Py_DECREF(it); - Py_DECREF(ot); - -finish: - Py_DECREF(new); - return (PyObject *)out; - -fail: - Py_XDECREF(new); - Py_XDECREF(out); - return NULL; -} - - -static PyObject * -io_pack(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds) -{ - PyObject *obj; - int axis = NPY_MAXDIMS; - static char *kwlist[] = {"in", "axis", NULL}; - - if (!PyArg_ParseTupleAndKeywords( args, kwds, "O|O&" , kwlist, - &obj, PyArray_AxisConverter, &axis)) { - return NULL; - } - return pack_or_unpack_bits(obj, axis, 0); -} - -static PyObject * -io_unpack(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds) -{ - PyObject *obj; - int axis = NPY_MAXDIMS; - static char *kwlist[] = {"in", "axis", NULL}; - - if (!PyArg_ParseTupleAndKeywords( args, kwds, "O|O&" , kwlist, - &obj, PyArray_AxisConverter, &axis)) { - return NULL; - } - return pack_or_unpack_bits(obj, axis, 1); -} - -/* The docstrings for many of these methods are in add_newdocs.py. */ -static struct PyMethodDef methods[] = { - {"_insert", (PyCFunction)arr_insert, - METH_VARARGS | METH_KEYWORDS, arr_insert__doc__}, - {"bincount", (PyCFunction)arr_bincount, - METH_VARARGS | METH_KEYWORDS, NULL}, - {"digitize", (PyCFunction)arr_digitize, - METH_VARARGS | METH_KEYWORDS, NULL}, - {"interp", (PyCFunction)arr_interp, - METH_VARARGS | METH_KEYWORDS, NULL}, - {"ravel_multi_index", (PyCFunction)arr_ravel_multi_index, - METH_VARARGS | METH_KEYWORDS, NULL}, - {"unravel_index", (PyCFunction)arr_unravel_index, - METH_VARARGS | METH_KEYWORDS, NULL}, - {"add_docstring", (PyCFunction)arr_add_docstring, - METH_VARARGS, NULL}, - {"add_newdoc_ufunc", (PyCFunction)add_newdoc_ufunc, - METH_VARARGS, NULL}, - {"packbits", (PyCFunction)io_pack, - METH_VARARGS | METH_KEYWORDS, NULL}, - {"unpackbits", (PyCFunction)io_unpack, - METH_VARARGS | METH_KEYWORDS, NULL}, - {NULL, NULL, 0, NULL} /* sentinel */ -}; - -static void -define_types(void) -{ - PyObject *tp_dict; - PyObject *myobj; - - tp_dict = PyArrayDescr_Type.tp_dict; - /* Get "subdescr" */ - myobj = PyDict_GetItemString(tp_dict, "fields"); - if (myobj == NULL) { - return; - } - PyGetSetDescr_TypePtr = Py_TYPE(myobj); - myobj = PyDict_GetItemString(tp_dict, "alignment"); - if (myobj == NULL) { - return; - } - PyMemberDescr_TypePtr = Py_TYPE(myobj); - myobj = PyDict_GetItemString(tp_dict, "newbyteorder"); - if (myobj == NULL) { - return; - } - PyMethodDescr_TypePtr = Py_TYPE(myobj); - return; -} - -#if defined(NPY_PY3K) -static struct PyModuleDef moduledef = { - PyModuleDef_HEAD_INIT, - "_compiled_base", - NULL, - -1, - methods, - NULL, - NULL, - NULL, - NULL -}; -#endif - -#if defined(NPY_PY3K) -#define RETVAL m -PyMODINIT_FUNC PyInit__compiled_base(void) -#else -#define RETVAL -PyMODINIT_FUNC -init_compiled_base(void) -#endif -{ - PyObject *m, *d; - -#if defined(NPY_PY3K) - m = PyModule_Create(&moduledef); -#else - m = Py_InitModule("_compiled_base", methods); -#endif - if (!m) { - return RETVAL; - } - - /* Import the array objects */ - import_array(); - import_umath(); - - /* Add some symbolic constants to the module */ - d = PyModule_GetDict(m); - - /* - * PyExc_Exception should catch all the standard errors that are - * now raised instead of the string exception "numpy.lib.error". - * This is for backward compatibility with existing code. - */ - PyDict_SetItemString(d, "error", PyExc_Exception); - - - /* define PyGetSetDescr_Type and PyMemberDescr_Type */ - define_types(); - - return RETVAL; -} diff --git a/numpy/lib/stride_tricks.py b/numpy/lib/stride_tricks.py index b81307a65..f390cf49b 100644 --- a/numpy/lib/stride_tricks.py +++ b/numpy/lib/stride_tricks.py @@ -9,7 +9,8 @@ from __future__ import division, absolute_import, print_function import numpy as np -__all__ = ['broadcast_arrays'] +__all__ = ['broadcast_to', 'broadcast_arrays'] + class DummyArray(object): """Dummy object that just exists to hang __array_interface__ dictionaries @@ -20,8 +21,75 @@ class DummyArray(object): self.__array_interface__ = interface self.base = base -def as_strided(x, shape=None, strides=None, subok=False): - """ Make an ndarray from the given array with the given shape and strides. + +def _maybe_view_as_subclass(original_array, new_array): + if type(original_array) is not type(new_array): + # if input was an ndarray subclass and subclasses were OK, + # then view the result as that subclass. + new_array = new_array.view(type=type(original_array)) + # Since we have done something akin to a view from original_array, we + # should let the subclass finalize (if it has it implemented, i.e., is + # not None). + if new_array.__array_finalize__: + new_array.__array_finalize__(original_array) + return new_array + + +def as_strided(x, shape=None, strides=None, subok=False, writeable=True): + """ + Create a view into the array with the given shape and strides. + + .. warning:: This function has to be used with extreme care, see notes. + + Parameters + ---------- + x : ndarray + Array to create a new. + shape : sequence of int, optional + The shape of the new array. Defaults to ``x.shape``. + strides : sequence of int, optional + The strides of the new array. Defaults to ``x.strides``. + subok : bool, optional + .. versionadded:: 1.10 + + If True, subclasses are preserved. + writeable : bool, optional + .. versionadded:: 1.12 + + If set to False, the returned array will always be readonly. + Otherwise it will be writable if the original array was. It + is advisable to set this to False if possible (see Notes). + + Returns + ------- + view : ndarray + + See also + -------- + broadcast_to: broadcast an array to a given shape. + reshape : reshape an array. + + Notes + ----- + ``as_strided`` creates a view into the array given the exact strides + and shape. This means it manipulates the internal data structure of + ndarray and, if done incorrectly, the array elements can point to + invalid memory and can corrupt results or crash your program. + It is advisable to always use the original ``x.strides`` when + calculating new strides to avoid reliance on a contiguous memory + layout. + + Furthermore, arrays created with this function often contain self + overlapping memory, so that two elements are identical. + Vectorized write operations on such arrays will typically be + unpredictable. They may even give different results for small, large, + or transposed arrays. + Since writing to these arrays has to be tested and done with great + care, you may want to use ``writeable=False`` to avoid accidental write + operations. + + For these reasons it is advisable to avoid ``as_strided`` when + possible. """ # first convert input to array, possibly keeping subclass x = np.array(x, copy=False, subok=subok) @@ -30,19 +98,100 @@ def as_strided(x, shape=None, strides=None, subok=False): interface['shape'] = tuple(shape) if strides is not None: interface['strides'] = tuple(strides) + array = np.asarray(DummyArray(interface, base=x)) - # Make sure dtype is correct in case of custom dtype - if array.dtype.kind == 'V': + + if array.dtype.fields is None and x.dtype.fields is not None: + # This should only happen if x.dtype is [('', 'Vx')] array.dtype = x.dtype - if type(x) is not type(array): - # if input was an ndarray subclass and subclasses were OK, - # then view the result as that subclass. - array = array.view(type=type(x)) - # Since we have done something akin to a view from x, we should let - # the subclass finalize (if it has it implemented, i.e., is not None). - if array.__array_finalize__: - array.__array_finalize__(x) - return array + + view = _maybe_view_as_subclass(x, array) + + if view.flags.writeable and not writeable: + view.flags.writeable = False + + return view + + +def _broadcast_to(array, shape, subok, readonly): + shape = tuple(shape) if np.iterable(shape) else (shape,) + array = np.array(array, copy=False, subok=subok) + if not shape and array.shape: + raise ValueError('cannot broadcast a non-scalar to a scalar array') + if any(size < 0 for size in shape): + raise ValueError('all elements of broadcast shape must be non-' + 'negative') + needs_writeable = not readonly and array.flags.writeable + extras = ['reduce_ok'] if needs_writeable else [] + op_flag = 'readwrite' if needs_writeable else 'readonly' + broadcast = np.nditer( + (array,), flags=['multi_index', 'refs_ok', 'zerosize_ok'] + extras, + op_flags=[op_flag], itershape=shape, order='C').itviews[0] + result = _maybe_view_as_subclass(array, broadcast) + if needs_writeable and not result.flags.writeable: + result.flags.writeable = True + return result + + +def broadcast_to(array, shape, subok=False): + """Broadcast an array to a new shape. + + Parameters + ---------- + array : array_like + The array to broadcast. + shape : tuple + The shape of the desired array. + subok : bool, optional + If True, then sub-classes will be passed-through, otherwise + the returned array will be forced to be a base-class array (default). + + Returns + ------- + broadcast : array + A readonly view on the original array with the given shape. It is + typically not contiguous. Furthermore, more than one element of a + broadcasted array may refer to a single memory location. + + Raises + ------ + ValueError + If the array is not compatible with the new shape according to NumPy's + broadcasting rules. + + Notes + ----- + .. versionadded:: 1.10.0 + + Examples + -------- + >>> x = np.array([1, 2, 3]) + >>> np.broadcast_to(x, (3, 3)) + array([[1, 2, 3], + [1, 2, 3], + [1, 2, 3]]) + """ + return _broadcast_to(array, shape, subok=subok, readonly=True) + + +def _broadcast_shape(*args): + """Returns the shape of the arrays that would result from broadcasting the + supplied arrays against each other. + """ + if not args: + raise ValueError('must provide at least one argument') + # use the old-iterator because np.nditer does not handle size 0 arrays + # consistently + b = np.broadcast(*args[:32]) + # unfortunately, it cannot handle 32 or more arguments directly + for pos in range(32, len(args), 31): + # ironically, np.broadcast does not properly handle np.broadcast + # objects (it treats them as scalars) + # use broadcasting to avoid allocating the full array + b = broadcast_to(0, b.shape) + b = np.broadcast(b, *args[pos:(pos + 31)]) + return b.shape + def broadcast_arrays(*args, **kwargs): """ @@ -87,55 +236,24 @@ def broadcast_arrays(*args, **kwargs): [3, 3, 3]])] """ + # nditer is not used here to avoid the limit of 32 arrays. + # Otherwise, something like the following one-liner would suffice: + # return np.nditer(args, flags=['multi_index', 'zerosize_ok'], + # order='C').itviews + subok = kwargs.pop('subok', False) if kwargs: raise TypeError('broadcast_arrays() got an unexpected keyword ' - 'argument {}'.format(kwargs.pop())) + 'argument {!r}'.format(kwargs.keys()[0])) args = [np.array(_m, copy=False, subok=subok) for _m in args] - shapes = [x.shape for x in args] - if len(set(shapes)) == 1: + + shape = _broadcast_shape(*args) + + if all(array.shape == shape for array in args): # Common case where nothing needs to be broadcasted. return args - shapes = [list(s) for s in shapes] - strides = [list(x.strides) for x in args] - nds = [len(s) for s in shapes] - biggest = max(nds) - # Go through each array and prepend dimensions of length 1 to each of - # the shapes in order to make the number of dimensions equal. - for i in range(len(args)): - diff = biggest - nds[i] - if diff > 0: - shapes[i] = [1] * diff + shapes[i] - strides[i] = [0] * diff + strides[i] - # Chech each dimension for compatibility. A dimension length of 1 is - # accepted as compatible with any other length. - common_shape = [] - for axis in range(biggest): - lengths = [s[axis] for s in shapes] - unique = set(lengths + [1]) - if len(unique) > 2: - # There must be at least two non-1 lengths for this axis. - raise ValueError("shape mismatch: two or more arrays have " - "incompatible dimensions on axis %r." % (axis,)) - elif len(unique) == 2: - # There is exactly one non-1 length. The common shape will take - # this value. - unique.remove(1) - new_length = unique.pop() - common_shape.append(new_length) - # For each array, if this axis is being broadcasted from a - # length of 1, then set its stride to 0 so that it repeats its - # data. - for i in range(len(args)): - if shapes[i][axis] == 1: - shapes[i][axis] = new_length - strides[i][axis] = 0 - else: - # Every array has a length of 1 on this axis. Strides can be - # left alone as nothing is broadcasted. - common_shape.append(1) - - # Construct the new arrays. - broadcasted = [as_strided(x, shape=sh, strides=st, subok=subok) - for (x, sh, st) in zip(args, shapes, strides)] - return broadcasted + + # TODO: consider making the results of broadcast_arrays readonly to match + # broadcast_to. This will require a deprecation cycle. + return [_broadcast_to(array, shape, subok=subok, readonly=False) + for array in args] diff --git a/numpy/lib/tests/data/py2-objarr.npy b/numpy/lib/tests/data/py2-objarr.npy Binary files differnew file mode 100644 index 000000000..12936c92d --- /dev/null +++ b/numpy/lib/tests/data/py2-objarr.npy diff --git a/numpy/lib/tests/data/py2-objarr.npz b/numpy/lib/tests/data/py2-objarr.npz Binary files differnew file mode 100644 index 000000000..68a3b53a1 --- /dev/null +++ b/numpy/lib/tests/data/py2-objarr.npz diff --git a/numpy/lib/tests/data/py3-objarr.npy b/numpy/lib/tests/data/py3-objarr.npy Binary files differnew file mode 100644 index 000000000..6776074b4 --- /dev/null +++ b/numpy/lib/tests/data/py3-objarr.npy diff --git a/numpy/lib/tests/data/py3-objarr.npz b/numpy/lib/tests/data/py3-objarr.npz Binary files differnew file mode 100644 index 000000000..05eac0b76 --- /dev/null +++ b/numpy/lib/tests/data/py3-objarr.npz diff --git a/numpy/lib/tests/test__datasource.py b/numpy/lib/tests/test__datasource.py index 090f71f67..f4bece352 100644 --- a/numpy/lib/tests/test__datasource.py +++ b/numpy/lib/tests/test__datasource.py @@ -7,7 +7,7 @@ from shutil import rmtree from numpy.compat import asbytes from numpy.testing import ( - run_module_suite, TestCase, assert_ + run_module_suite, TestCase, assert_, SkipTest ) import numpy.lib._datasource as datasource @@ -137,8 +137,7 @@ class TestDataSourceOpen(TestCase): import gzip except ImportError: # We don't have the gzip capabilities to test. - import nose - raise nose.SkipTest + raise SkipTest # Test datasource's internal file_opener for Gzip files. filepath = os.path.join(self.tmpdir, 'foobar.txt.gz') fp = gzip.open(filepath, 'w') @@ -154,8 +153,7 @@ class TestDataSourceOpen(TestCase): import bz2 except ImportError: # We don't have the bz2 capabilities to test. - import nose - raise nose.SkipTest + raise SkipTest # Test datasource's internal file_opener for BZip2 files. filepath = os.path.join(self.tmpdir, 'foobar.txt.bz2') fp = bz2.BZ2File(filepath, 'w') diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py index 4db19382a..e0a917a21 100644 --- a/numpy/lib/tests/test__iotools.py +++ b/numpy/lib/tests/test__iotools.py @@ -7,7 +7,8 @@ from datetime import date import numpy as np from numpy.compat import asbytes, asbytes_nested from numpy.testing import ( - run_module_suite, TestCase, assert_, assert_equal + run_module_suite, TestCase, assert_, assert_equal, assert_allclose, + assert_raises ) from numpy.lib._iotools import ( LineSplitter, NameValidator, StringConverter, @@ -76,7 +77,7 @@ class TestLineSplitter(TestCase): test = LineSplitter((6, 6, 9))(strg) assert_equal(test, asbytes_nested(['1', '3 4', '5 6'])) -#------------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- class TestNameValidator(TestCase): @@ -93,6 +94,9 @@ class TestNameValidator(TestCase): test = NameValidator(case_sensitive='lower').validate(names) assert_equal(test, ['a', 'a_1', 'b', 'c']) + # check exceptions + assert_raises(ValueError, NameValidator, case_sensitive='foobar') + def test_excludelist(self): "Test excludelist" names = ['dates', 'data', 'Other Data', 'mask'] @@ -127,7 +131,7 @@ class TestNameValidator(TestCase): assert_(validator(namelist) is None) assert_equal(validator(namelist, nbfields=3), ['f0', 'f1', 'f2']) -#------------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- def _bytes_to_date(s): @@ -148,15 +152,33 @@ class TestStringConverter(TestCase): def test_upgrade(self): "Tests the upgrade method." + converter = StringConverter() assert_equal(converter._status, 0) - converter.upgrade(asbytes('0')) + + # test int + assert_equal(converter.upgrade(asbytes('0')), 0) assert_equal(converter._status, 1) - converter.upgrade(asbytes('0.')) - assert_equal(converter._status, 2) - converter.upgrade(asbytes('0j')) - assert_equal(converter._status, 3) - converter.upgrade(asbytes('a')) + + # On systems where integer defaults to 32-bit, the statuses will be + # offset by one, so we check for this here. + import numpy.core.numeric as nx + status_offset = int(nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize) + + # test int > 2**32 + assert_equal(converter.upgrade(asbytes('17179869184')), 17179869184) + assert_equal(converter._status, 1 + status_offset) + + # test float + assert_allclose(converter.upgrade(asbytes('0.')), 0.0) + assert_equal(converter._status, 2 + status_offset) + + # test complex + assert_equal(converter.upgrade(asbytes('0j')), complex('0j')) + assert_equal(converter._status, 3 + status_offset) + + # test str + assert_equal(converter.upgrade(asbytes('a')), asbytes('a')) assert_equal(converter._status, len(converter._mapper) - 1) def test_missing(self): diff --git a/numpy/lib/tests/test__version.py b/numpy/lib/tests/test__version.py index bbafe68eb..993c9d507 100644 --- a/numpy/lib/tests/test__version.py +++ b/numpy/lib/tests/test__version.py @@ -48,6 +48,19 @@ def test_dev_a_b_rc_mixed(): assert_(NumpyVersion('1.9.0a2.dev-6acvda54') < '1.9.0a2') +def test_dev0_version(): + assert_(NumpyVersion('1.9.0.dev0+Unknown') < '1.9.0') + for ver in ['1.9.0', '1.9.0a1', '1.9.0b2', '1.9.0b2.dev0+ffffffff']: + assert_(NumpyVersion('1.9.0.dev0+f16acvda') < ver) + + assert_(NumpyVersion('1.9.0.dev0+f16acvda') == '1.9.0.dev0+11111111') + + +def test_dev0_a_b_rc_mixed(): + assert_(NumpyVersion('1.9.0a2.dev0+f16acvda') == '1.9.0a2.dev0+11111111') + assert_(NumpyVersion('1.9.0a2.dev0+6acvda54') < '1.9.0a2') + + def test_raises(): for ver in ['1.9', '1,9.0', '1.7.x']: assert_raises(ValueError, NumpyVersion, ver) diff --git a/numpy/lib/tests/test_arraypad.py b/numpy/lib/tests/test_arraypad.py index f8ba8643a..d037962e6 100644 --- a/numpy/lib/tests/test_arraypad.py +++ b/numpy/lib/tests/test_arraypad.py @@ -1,14 +1,57 @@ -"""Tests for the pad functions. +"""Tests for the array padding functions. """ from __future__ import division, absolute_import, print_function -from numpy.testing import TestCase, run_module_suite, assert_array_equal -from numpy.testing import assert_raises, assert_array_almost_equal import numpy as np +from numpy.testing import (assert_array_equal, assert_raises, assert_allclose, + TestCase) from numpy.lib import pad +class TestConditionalShortcuts(TestCase): + def test_zero_padding_shortcuts(self): + test = np.arange(120).reshape(4, 5, 6) + pad_amt = [(0, 0) for axis in test.shape] + modes = ['constant', + 'edge', + 'linear_ramp', + 'maximum', + 'mean', + 'median', + 'minimum', + 'reflect', + 'symmetric', + 'wrap', + ] + for mode in modes: + assert_array_equal(test, pad(test, pad_amt, mode=mode)) + + def test_shallow_statistic_range(self): + test = np.arange(120).reshape(4, 5, 6) + pad_amt = [(1, 1) for axis in test.shape] + modes = ['maximum', + 'mean', + 'median', + 'minimum', + ] + for mode in modes: + assert_array_equal(pad(test, pad_amt, mode='edge'), + pad(test, pad_amt, mode=mode, stat_length=1)) + + def test_clip_statistic_range(self): + test = np.arange(30).reshape(5, 6) + pad_amt = [(3, 3) for axis in test.shape] + modes = ['maximum', + 'mean', + 'median', + 'minimum', + ] + for mode in modes: + assert_array_equal(pad(test, pad_amt, mode=mode), + pad(test, pad_amt, mode=mode, stat_length=30)) + + class TestStatistic(TestCase): def test_check_mean_stat_length(self): a = np.arange(100).astype('f') @@ -82,6 +125,30 @@ class TestStatistic(TestCase): ) assert_array_equal(a, b) + def test_check_maximum_stat_length(self): + a = np.arange(100) + 1 + a = pad(a, (25, 20), 'maximum', stat_length=10) + b = np.array( + [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, + + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, + + 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 100, 100, 100, 100] + ) + assert_array_equal(a, b) + def test_check_minimum_1(self): a = np.arange(100) a = pad(a, (25, 20), 'minimum') @@ -130,6 +197,30 @@ class TestStatistic(TestCase): ) assert_array_equal(a, b) + def test_check_minimum_stat_length(self): + a = np.arange(100) + 1 + a = pad(a, (25, 20), 'minimum', stat_length=10) + b = np.array( + [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, + + 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, + 91, 91, 91, 91, 91, 91, 91, 91, 91, 91] + ) + assert_array_equal(a, b) + def test_check_median(self): a = np.arange(100).astype('f') a = pad(a, (25, 20), 'median') @@ -182,6 +273,32 @@ class TestStatistic(TestCase): ) assert_array_equal(a, b) + def test_check_median_stat_length(self): + a = np.arange(100).astype('f') + a[1] = 2. + a[97] = 96. + a = pad(a, (25, 20), 'median', stat_length=(3, 5)) + b = np.array( + [ 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., + 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., + 2., 2., 2., 2., 2., + + 0., 2., 2., 3., 4., 5., 6., 7., 8., 9., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., + 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., + 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., + 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., + 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., + 60., 61., 62., 63., 64., 65., 66., 67., 68., 69., + 70., 71., 72., 73., 74., 75., 76., 77., 78., 79., + 80., 81., 82., 83., 84., 85., 86., 87., 88., 89., + 90., 91., 92., 93., 94., 95., 96., 96., 98., 99., + + 96., 96., 96., 96., 96., 96., 96., 96., 96., 96., + 96., 96., 96., 96., 96., 96., 96., 96., 96., 96.] + ) + assert_array_equal(a, b) + def test_check_mean_shape_one(self): a = [[4, 5, 6]] a = pad(a, (5, 7), 'mean', stat_length=2) @@ -254,6 +371,125 @@ class TestConstant(TestCase): ) assert_array_equal(a, b) + def test_check_constant_zeros(self): + a = np.arange(100) + a = pad(a, (25, 20), 'constant') + b = np.array( + [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ) + assert_array_equal(a, b) + + def test_check_constant_float(self): + # If input array is int, but constant_values are float, the dtype of + # the array to be padded is kept + arr = np.arange(30).reshape(5, 6) + test = pad(arr, (1, 2), mode='constant', + constant_values=1.1) + expected = np.array( + [[ 1, 1, 1, 1, 1, 1, 1, 1, 1], + + [ 1, 0, 1, 2, 3, 4, 5, 1, 1], + [ 1, 6, 7, 8, 9, 10, 11, 1, 1], + [ 1, 12, 13, 14, 15, 16, 17, 1, 1], + [ 1, 18, 19, 20, 21, 22, 23, 1, 1], + [ 1, 24, 25, 26, 27, 28, 29, 1, 1], + + [ 1, 1, 1, 1, 1, 1, 1, 1, 1], + [ 1, 1, 1, 1, 1, 1, 1, 1, 1]] + ) + assert_allclose(test, expected) + + def test_check_constant_float2(self): + # If input array is float, and constant_values are float, the dtype of + # the array to be padded is kept - here retaining the float constants + arr = np.arange(30).reshape(5, 6) + arr_float = arr.astype(np.float64) + test = pad(arr_float, ((1, 2), (1, 2)), mode='constant', + constant_values=1.1) + expected = np.array( + [[ 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1], + + [ 1.1, 0. , 1. , 2. , 3. , 4. , 5. , 1.1, 1.1], + [ 1.1, 6. , 7. , 8. , 9. , 10. , 11. , 1.1, 1.1], + [ 1.1, 12. , 13. , 14. , 15. , 16. , 17. , 1.1, 1.1], + [ 1.1, 18. , 19. , 20. , 21. , 22. , 23. , 1.1, 1.1], + [ 1.1, 24. , 25. , 26. , 27. , 28. , 29. , 1.1, 1.1], + + [ 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1], + [ 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1]] + ) + assert_allclose(test, expected) + + def test_check_constant_float3(self): + a = np.arange(100, dtype=float) + a = pad(a, (25, 20), 'constant', constant_values=(-1.1, -1.2)) + b = np.array( + [-1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, + -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, + -1.1, -1.1, -1.1, -1.1, -1.1, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, + -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2, -1.2] + ) + assert_allclose(a, b) + + def test_check_constant_odd_pad_amount(self): + arr = np.arange(30).reshape(5, 6) + test = pad(arr, ((1,), (2,)), mode='constant', + constant_values=3) + expected = np.array( + [[ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], + + [ 3, 3, 0, 1, 2, 3, 4, 5, 3, 3], + [ 3, 3, 6, 7, 8, 9, 10, 11, 3, 3], + [ 3, 3, 12, 13, 14, 15, 16, 17, 3, 3], + [ 3, 3, 18, 19, 20, 21, 22, 23, 3, 3], + [ 3, 3, 24, 25, 26, 27, 28, 29, 3, 3], + + [ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]] + ) + assert_allclose(test, expected) + + def test_check_constant_pad_2d(self): + arr = np.arange(4).reshape(2, 2) + test = np.lib.pad(arr, ((1, 2), (1, 3)), mode='constant', + constant_values=((1, 2), (3, 4))) + expected = np.array( + [[3, 1, 1, 4, 4, 4], + [3, 0, 1, 4, 4, 4], + [3, 2, 3, 4, 4, 4], + [3, 2, 2, 4, 4, 4], + [3, 2, 2, 4, 4, 4]] + ) + assert_allclose(test, expected) + class TestLinearRamp(TestCase): def test_check_simple(self): @@ -278,7 +514,21 @@ class TestLinearRamp(TestCase): 94.3, 89.6, 84.9, 80.2, 75.5, 70.8, 66.1, 61.4, 56.7, 52.0, 47.3, 42.6, 37.9, 33.2, 28.5, 23.8, 19.1, 14.4, 9.7, 5.] ) - assert_array_almost_equal(a, b, decimal=5) + assert_allclose(a, b, rtol=1e-5, atol=1e-5) + + def test_check_2d(self): + arr = np.arange(20).reshape(4, 5).astype(np.float64) + test = pad(arr, (2, 2), mode='linear_ramp', end_values=(0, 0)) + expected = np.array( + [[0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0.5, 1., 1.5, 2., 1., 0.], + [0., 0., 0., 1., 2., 3., 4., 2., 0.], + [0., 2.5, 5., 6., 7., 8., 9., 4.5, 0.], + [0., 5., 10., 11., 12., 13., 14., 7., 0.], + [0., 7.5, 15., 16., 17., 18., 19., 9.5, 0.], + [0., 3.75, 7.5, 8., 8.5, 9., 9.5, 4.75, 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0.]]) + assert_allclose(test, expected) class TestReflect(TestCase): @@ -306,6 +556,30 @@ class TestReflect(TestCase): ) assert_array_equal(a, b) + def test_check_odd_method(self): + a = np.arange(100) + a = pad(a, (25, 20), 'reflect', reflect_type='odd') + b = np.array( + [-25, -24, -23, -22, -21, -20, -19, -18, -17, -16, + -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, + -5, -4, -3, -2, -1, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119] + ) + assert_array_equal(a, b) + def test_check_large_pad(self): a = [[4, 5, 6], [6, 7, 8]] a = pad(a, (5, 7), 'reflect') @@ -367,6 +641,140 @@ class TestReflect(TestCase): assert_array_equal(a, b) +class TestSymmetric(TestCase): + def test_check_simple(self): + a = np.arange(100) + a = pad(a, (25, 20), 'symmetric') + b = np.array( + [24, 23, 22, 21, 20, 19, 18, 17, 16, 15, + 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, + 4, 3, 2, 1, 0, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, + 89, 88, 87, 86, 85, 84, 83, 82, 81, 80] + ) + assert_array_equal(a, b) + + def test_check_odd_method(self): + a = np.arange(100) + a = pad(a, (25, 20), 'symmetric', reflect_type='odd') + b = np.array( + [-24, -23, -22, -21, -20, -19, -18, -17, -16, -15, + -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, + -4, -3, -2, -1, 0, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + + 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, + 109, 110, 111, 112, 113, 114, 115, 116, 117, 118] + ) + assert_array_equal(a, b) + + def test_check_large_pad(self): + a = [[4, 5, 6], [6, 7, 8]] + a = pad(a, (5, 7), 'symmetric') + b = np.array( + [[5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + [7, 8, 8, 7, 6, 6, 7, 8, 8, 7, 6, 6, 7, 8, 8], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6]] + ) + + assert_array_equal(a, b) + + def test_check_large_pad_odd(self): + a = [[4, 5, 6], [6, 7, 8]] + a = pad(a, (5, 7), 'symmetric', reflect_type='odd') + b = np.array( + [[-3, -2, -2, -1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6], + [-3, -2, -2, -1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6], + [-1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8], + [-1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8], + [ 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10], + + [ 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10], + [ 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12], + + [ 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12], + [ 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14, 14], + [ 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14, 14], + [ 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14, 14, 15, 16, 16], + [ 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14, 14, 15, 16, 16], + [ 9, 10, 10, 11, 12, 12, 13, 14, 14, 15, 16, 16, 17, 18, 18], + [ 9, 10, 10, 11, 12, 12, 13, 14, 14, 15, 16, 16, 17, 18, 18]] + ) + assert_array_equal(a, b) + + def test_check_shape(self): + a = [[4, 5, 6]] + a = pad(a, (5, 7), 'symmetric') + b = np.array( + [[5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], + [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6]] + ) + assert_array_equal(a, b) + + def test_check_01(self): + a = pad([1, 2, 3], 2, 'symmetric') + b = np.array([2, 1, 1, 2, 3, 3, 2]) + assert_array_equal(a, b) + + def test_check_02(self): + a = pad([1, 2, 3], 3, 'symmetric') + b = np.array([3, 2, 1, 1, 2, 3, 3, 2, 1]) + assert_array_equal(a, b) + + def test_check_03(self): + a = pad([1, 2, 3], 6, 'symmetric') + b = np.array([1, 2, 3, 3, 2, 1, 1, 2, 3, 3, 2, 1, 1, 2, 3]) + assert_array_equal(a, b) + + class TestWrap(TestCase): def test_check_simple(self): a = np.arange(100) @@ -506,6 +914,24 @@ class TestEdge(TestCase): ) assert_array_equal(a, b) + def test_check_width_shape_1_2(self): + # Check a pad_width of the form ((1, 2),). + # Regression test for issue gh-7808. + a = np.array([1, 2, 3]) + padded = pad(a, ((1, 2),), 'edge') + expected = np.array([1, 1, 2, 3, 3, 3]) + assert_array_equal(padded, expected) + + a = np.array([[1, 2, 3], [4, 5, 6]]) + padded = pad(a, ((1, 2),), 'edge') + expected = pad(a, ((1, 2), (1, 2)), 'edge') + assert_array_equal(padded, expected) + + a = np.arange(24).reshape(2, 3, 4) + padded = pad(a, ((1, 2),), 'edge') + expected = pad(a, ((1, 2), (1, 2), (1, 2)), 'edge') + assert_array_equal(padded, expected) + class TestZeroPadWidth(TestCase): def test_zero_pad_width(self): @@ -515,6 +941,60 @@ class TestZeroPadWidth(TestCase): assert_array_equal(arr, pad(arr, pad_width, mode='constant')) +class TestLegacyVectorFunction(TestCase): + def test_legacy_vector_functionality(self): + def _padwithtens(vector, pad_width, iaxis, kwargs): + vector[:pad_width[0]] = 10 + vector[-pad_width[1]:] = 10 + return vector + + a = np.arange(6).reshape(2, 3) + a = pad(a, 2, _padwithtens) + b = np.array( + [[10, 10, 10, 10, 10, 10, 10], + [10, 10, 10, 10, 10, 10, 10], + + [10, 10, 0, 1, 2, 10, 10], + [10, 10, 3, 4, 5, 10, 10], + + [10, 10, 10, 10, 10, 10, 10], + [10, 10, 10, 10, 10, 10, 10]] + ) + assert_array_equal(a, b) + + +class TestNdarrayPadWidth(TestCase): + def test_check_simple(self): + a = np.arange(12) + a = np.reshape(a, (4, 3)) + a = pad(a, np.array(((2, 3), (3, 2))), 'edge') + b = np.array( + [[0, 0, 0, 0, 1, 2, 2, 2], + [0, 0, 0, 0, 1, 2, 2, 2], + + [0, 0, 0, 0, 1, 2, 2, 2], + [3, 3, 3, 3, 4, 5, 5, 5], + [6, 6, 6, 6, 7, 8, 8, 8], + [9, 9, 9, 9, 10, 11, 11, 11], + + [9, 9, 9, 9, 10, 11, 11, 11], + [9, 9, 9, 9, 10, 11, 11, 11], + [9, 9, 9, 9, 10, 11, 11, 11]] + ) + assert_array_equal(a, b) + + +class TestUnicodeInput(TestCase): + def test_unicode_mode(self): + try: + constant_mode = unicode('constant') + except NameError: + constant_mode = 'constant' + a = np.pad([1], 2, mode=constant_mode) + b = np.array([0, 0, 1, 0, 0]) + assert_array_equal(a, b) + + class ValueError1(TestCase): def test_check_simple(self): arr = np.arange(30) @@ -539,22 +1019,71 @@ class ValueError1(TestCase): class ValueError2(TestCase): - def test_check_simple(self): + def test_check_negative_pad_amount(self): arr = np.arange(30) arr = np.reshape(arr, (6, 5)) kwargs = dict(mode='mean', stat_length=(3, )) - assert_raises(ValueError, pad, arr, ((2, 3, 4), (3, 2)), + assert_raises(ValueError, pad, arr, ((-2, 3), (3, 2)), **kwargs) class ValueError3(TestCase): - def test_check_simple(self): + def test_check_kwarg_not_allowed(self): + arr = np.arange(30).reshape(5, 6) + assert_raises(ValueError, pad, arr, 4, mode='mean', + reflect_type='odd') + + def test_mode_not_set(self): + arr = np.arange(30).reshape(5, 6) + assert_raises(TypeError, pad, arr, 4) + + def test_malformed_pad_amount(self): + arr = np.arange(30).reshape(5, 6) + assert_raises(ValueError, pad, arr, (4, 5, 6, 7), mode='constant') + + def test_malformed_pad_amount2(self): + arr = np.arange(30).reshape(5, 6) + assert_raises(ValueError, pad, arr, ((3, 4, 5), (0, 1, 2)), + mode='constant') + + def test_pad_too_many_axes(self): + arr = np.arange(30).reshape(5, 6) + + # Attempt to pad using a 3D array equivalent + bad_shape = (((3,), (4,), (5,)), ((0,), (1,), (2,))) + assert_raises(ValueError, pad, arr, bad_shape, + mode='constant') + + +class TypeError1(TestCase): + def test_float(self): + arr = np.arange(30) + assert_raises(TypeError, pad, arr, ((-2.1, 3), (3, 2))) + assert_raises(TypeError, pad, arr, np.array(((-2.1, 3), (3, 2)))) + + def test_str(self): + arr = np.arange(30) + assert_raises(TypeError, pad, arr, 'foo') + assert_raises(TypeError, pad, arr, np.array('foo')) + + def test_object(self): + class FooBar(object): + pass + arr = np.arange(30) + assert_raises(TypeError, pad, arr, FooBar()) + + def test_complex(self): + arr = np.arange(30) + assert_raises(TypeError, pad, arr, complex(1, -1)) + assert_raises(TypeError, pad, arr, np.array(complex(1, -1))) + + def test_check_wrong_pad_amount(self): arr = np.arange(30) arr = np.reshape(arr, (6, 5)) kwargs = dict(mode='mean', stat_length=(3, )) - assert_raises(ValueError, pad, arr, ((-2, 3), (3, 2)), + assert_raises(TypeError, pad, arr, ((2, 3, 4), (3, 2)), **kwargs) if __name__ == "__main__": - run_module_suite() + np.testing.run_module_suite() diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index 39196f4bc..75918fbee 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -5,7 +5,7 @@ from __future__ import division, absolute_import, print_function import numpy as np from numpy.testing import ( - run_module_suite, TestCase, assert_array_equal + run_module_suite, TestCase, assert_array_equal, assert_equal ) from numpy.lib.arraysetops import ( ediff1d, intersect1d, setxor1d, union1d, setdiff1d, unique, in1d @@ -169,6 +169,14 @@ class TestSetOps(TestCase): assert_array_equal([-1, 0], ediff1d(zero_elem, to_begin=-1, to_end=0)) assert_array_equal([], ediff1d(one_elem)) assert_array_equal([1], ediff1d(two_elem)) + assert_array_equal([7,1,9], ediff1d(two_elem, to_begin=7, to_end=9)) + assert_array_equal([5,6,1,7,8], ediff1d(two_elem, to_begin=[5,6], to_end=[7,8])) + assert_array_equal([1,9], ediff1d(two_elem, to_end=9)) + assert_array_equal([1,7,8], ediff1d(two_elem, to_end=[7,8])) + assert_array_equal([7,1], ediff1d(two_elem, to_begin=7)) + assert_array_equal([5,6,1], ediff1d(two_elem, to_begin=[5,6])) + assert(isinstance(ediff1d(np.matrix(1)), np.matrix)) + assert(isinstance(ediff1d(np.matrix(1), to_begin=1), np.matrix)) def test_in1d(self): # we use two different sizes for the b array here to test the @@ -286,6 +294,8 @@ class TestSetOps(TestCase): assert_array_equal(c, ec) assert_array_equal([], setdiff1d([], [])) + a = np.array((), np.uint32) + assert_equal(setdiff1d(a, []).dtype, np.uint32) def test_setdiff1d_char_array(self): a = np.array(['a', 'b', 'c']) diff --git a/numpy/lib/tests/test_financial.py b/numpy/lib/tests/test_financial.py index a4b9cfe2e..cc8ba55e5 100644 --- a/numpy/lib/tests/test_financial.py +++ b/numpy/lib/tests/test_financial.py @@ -2,7 +2,8 @@ from __future__ import division, absolute_import, print_function import numpy as np from numpy.testing import ( - run_module_suite, TestCase, assert_, assert_almost_equal + run_module_suite, TestCase, assert_, assert_almost_equal, + assert_allclose, assert_equal ) @@ -13,35 +14,42 @@ class TestFinancial(TestCase): def test_irr(self): v = [-150000, 15000, 25000, 35000, 45000, 60000] - assert_almost_equal(np.irr(v), - 0.0524, 2) + assert_almost_equal(np.irr(v), 0.0524, 2) v = [-100, 0, 0, 74] - assert_almost_equal(np.irr(v), - -0.0955, 2) + assert_almost_equal(np.irr(v), -0.0955, 2) v = [-100, 39, 59, 55, 20] - assert_almost_equal(np.irr(v), - 0.28095, 2) + assert_almost_equal(np.irr(v), 0.28095, 2) v = [-100, 100, 0, -7] - assert_almost_equal(np.irr(v), - -0.0833, 2) + assert_almost_equal(np.irr(v), -0.0833, 2) v = [-100, 100, 0, 7] - assert_almost_equal(np.irr(v), - 0.06206, 2) + assert_almost_equal(np.irr(v), 0.06206, 2) v = [-5, 10.5, 1, -8, 1] - assert_almost_equal(np.irr(v), - 0.0886, 2) + assert_almost_equal(np.irr(v), 0.0886, 2) + + # Test that if there is no solution then np.irr returns nan + # Fixes gh-6744 + v = [-1, -2, -3] + assert_equal(np.irr(v), np.nan) def test_pv(self): - assert_almost_equal(np.pv(0.07, 20, 12000, 0), - -127128.17, 2) + assert_almost_equal(np.pv(0.07, 20, 12000, 0), -127128.17, 2) def test_fv(self): - assert_almost_equal(np.fv(0.075, 20, -2000, 0, 0), - 86609.36, 2) + assert_almost_equal(np.fv(0.075, 20, -2000, 0, 0), 86609.36, 2) def test_pmt(self): - assert_almost_equal(np.pmt(0.08/12, 5*12, 15000), - -304.146, 3) + res = np.pmt(0.08/12, 5*12, 15000) + tgt = -304.145914 + assert_allclose(res, tgt) + # Test the edge case where rate == 0.0 + res = np.pmt(0.0, 5*12, 15000) + tgt = -250.0 + assert_allclose(res, tgt) + # Test the case where we use broadcast and + # the arguments passed in are arrays. + res = np.pmt([[0.0, 0.8],[0.3, 0.8]],[12, 3],[2000, 20000]) + tgt = np.array([[-166.66667, -19311.258],[-626.90814, -19311.258]]) + assert_allclose(res, tgt) def test_ppmt(self): np.round(np.ppmt(0.1/12, 1, 60, 55000), 2) == 710.25 diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py index ee77386bc..892b32a9c 100644 --- a/numpy/lib/tests/test_format.py +++ b/numpy/lib/tests/test_format.py @@ -112,7 +112,7 @@ Test the header writing. >>> for arr in basic_arrays + record_arrays: ... f = BytesIO() ... format.write_array_header_1_0(f, arr) # XXX: arr is not a dict, items gets called on it - ... print repr(f.getvalue()) + ... print(repr(f.getvalue())) ... "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (0,)} \n" "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': ()} \n" @@ -284,10 +284,10 @@ import warnings from io import BytesIO import numpy as np -from numpy.compat import asbytes, asbytes_nested +from numpy.compat import asbytes, asbytes_nested, sixu from numpy.testing import ( run_module_suite, assert_, assert_array_equal, assert_raises, raises, - dec + dec, SkipTest ) from numpy.lib import format @@ -534,6 +534,87 @@ def test_python2_python3_interoperability(): assert_array_equal(data, np.ones(2)) +def test_pickle_python2_python3(): + # Test that loading object arrays saved on Python 2 works both on + # Python 2 and Python 3 and vice versa + data_dir = os.path.join(os.path.dirname(__file__), 'data') + + if sys.version_info[0] >= 3: + xrange = range + else: + import __builtin__ + xrange = __builtin__.xrange + + expected = np.array([None, xrange, sixu('\u512a\u826f'), + asbytes('\xe4\xb8\x8d\xe8\x89\xaf')], + dtype=object) + + for fname in ['py2-objarr.npy', 'py2-objarr.npz', + 'py3-objarr.npy', 'py3-objarr.npz']: + path = os.path.join(data_dir, fname) + + if (fname.endswith('.npz') and sys.version_info[0] == 2 and + sys.version_info[1] < 7): + # Reading object arrays directly from zipfile appears to fail + # on Py2.6, see cfae0143b4 + continue + + for encoding in ['bytes', 'latin1']: + if (sys.version_info[0] >= 3 and sys.version_info[1] < 4 and + encoding == 'bytes'): + # The bytes encoding is available starting from Python 3.4 + continue + + data_f = np.load(path, encoding=encoding) + if fname.endswith('.npz'): + data = data_f['x'] + data_f.close() + else: + data = data_f + + if sys.version_info[0] >= 3: + if encoding == 'latin1' and fname.startswith('py2'): + assert_(isinstance(data[3], str)) + assert_array_equal(data[:-1], expected[:-1]) + # mojibake occurs + assert_array_equal(data[-1].encode(encoding), expected[-1]) + else: + assert_(isinstance(data[3], bytes)) + assert_array_equal(data, expected) + else: + assert_array_equal(data, expected) + + if sys.version_info[0] >= 3: + if fname.startswith('py2'): + if fname.endswith('.npz'): + data = np.load(path) + assert_raises(UnicodeError, data.__getitem__, 'x') + data.close() + data = np.load(path, fix_imports=False, encoding='latin1') + assert_raises(ImportError, data.__getitem__, 'x') + data.close() + else: + assert_raises(UnicodeError, np.load, path) + assert_raises(ImportError, np.load, path, + encoding='latin1', fix_imports=False) + + +def test_pickle_disallow(): + data_dir = os.path.join(os.path.dirname(__file__), 'data') + + path = os.path.join(data_dir, 'py2-objarr.npy') + assert_raises(ValueError, np.load, path, + allow_pickle=False, encoding='latin1') + + path = os.path.join(data_dir, 'py2-objarr.npz') + f = np.load(path, allow_pickle=False, encoding='latin1') + assert_raises(ValueError, f.__getitem__, 'x') + + path = os.path.join(tempdir, 'pickle-disabled.npy') + assert_raises(ValueError, np.save, path, np.array([None], dtype=object), + allow_pickle=False) + + def test_version_2_0(): f = BytesIO() # requires more than 2 byte for header @@ -554,6 +635,7 @@ def test_version_2_0(): assert_raises(ValueError, format.write_array, f, d, (1, 0)) +@dec.slow def test_version_2_0_memmap(): # requires more than 2 byte for header dt = [(("%d" % i) * 100, float) for i in range(500)] @@ -629,6 +711,26 @@ malformed_magic = asbytes_nested([ '', ]) +def test_read_magic(): + s1 = BytesIO() + s2 = BytesIO() + + arr = np.ones((3, 6), dtype=float) + + format.write_array(s1, arr, version=(1, 0)) + format.write_array(s2, arr, version=(2, 0)) + + s1.seek(0) + s2.seek(0) + + version1 = format.read_magic(s1) + version2 = format.read_magic(s2) + + assert_(version1 == (1, 0)) + assert_(version2 == (2, 0)) + + assert_(s1.tell() == format.MAGIC_LEN) + assert_(s2.tell() == format.MAGIC_LEN) def test_read_magic_bad_magic(): for magic in malformed_magic: @@ -659,6 +761,30 @@ def test_large_header(): assert_raises(ValueError, format.write_array_header_1_0, s, d) +def test_read_array_header_1_0(): + s = BytesIO() + + arr = np.ones((3, 6), dtype=float) + format.write_array(s, arr, version=(1, 0)) + + s.seek(format.MAGIC_LEN) + shape, fortran, dtype = format.read_array_header_1_0(s) + + assert_((shape, fortran, dtype) == ((3, 6), False, float)) + + +def test_read_array_header_2_0(): + s = BytesIO() + + arr = np.ones((3, 6), dtype=float) + format.write_array(s, arr, version=(2, 0)) + + s.seek(format.MAGIC_LEN) + shape, fortran, dtype = format.read_array_header_2_0(s) + + assert_((shape, fortran, dtype) == ((3, 6), False, float)) + + def test_bad_header(): # header of length less than 2 should fail s = BytesIO() @@ -687,7 +813,6 @@ def test_bad_header(): def test_large_file_support(): - from nose import SkipTest if (sys.platform == 'win32' or sys.platform == 'cygwin'): raise SkipTest("Unknown if Windows has sparse filesystems") # try creating a large sparse file @@ -712,5 +837,26 @@ def test_large_file_support(): assert_array_equal(r, d) +@dec.slow +@dec.skipif(np.dtype(np.intp).itemsize < 8, "test requires 64-bit system") +def test_large_archive(): + # Regression test for product of saving arrays with dimensions of array + # having a product that doesn't fit in int32. See gh-7598 for details. + try: + a = np.empty((2**30, 2), dtype=np.uint8) + except MemoryError: + raise SkipTest("Could not create large file") + + fname = os.path.join(tempdir, "large_archive") + + with open(fname, "wb") as f: + np.savez(f, arr=a) + + with open(fname, "rb") as f: + new_a = np.load(f)["arr"] + + assert_(a.shape == new_a.shape) + + if __name__ == "__main__": run_module_suite() diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index cdc4285e8..f396e036b 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -1,5 +1,6 @@ from __future__ import division, absolute_import, print_function +import operator import warnings import sys @@ -8,14 +9,170 @@ from numpy.testing import ( run_module_suite, TestCase, assert_, assert_equal, assert_array_equal, assert_almost_equal, assert_array_almost_equal, assert_raises, assert_allclose, assert_array_max_ulp, assert_warns, - assert_raises_regex, dec - ) + assert_raises_regex, dec, suppress_warnings +) +from numpy.testing.utils import HAS_REFCOUNT +import numpy.lib.function_base as nfb from numpy.random import rand -from numpy.lib import * +from numpy.lib import ( + add_newdoc_ufunc, angle, average, bartlett, blackman, corrcoef, cov, + delete, diff, digitize, extract, flipud, gradient, hamming, hanning, + histogram, histogramdd, i0, insert, interp, kaiser, meshgrid, msort, + piecewise, place, rot90, select, setxor1d, sinc, split, trapz, trim_zeros, + unwrap, unique, vectorize +) + from numpy.compat import long +def get_mat(n): + data = np.arange(n) + data = np.add.outer(data, data) + return data + + +class TestRot90(TestCase): + def test_basic(self): + self.assertRaises(ValueError, rot90, np.ones(4)) + assert_raises(ValueError, rot90, np.ones((2,2,2)), axes=(0,1,2)) + assert_raises(ValueError, rot90, np.ones((2,2)), axes=(0,2)) + assert_raises(ValueError, rot90, np.ones((2,2)), axes=(1,1)) + assert_raises(ValueError, rot90, np.ones((2,2,2)), axes=(-2,1)) + + a = [[0, 1, 2], + [3, 4, 5]] + b1 = [[2, 5], + [1, 4], + [0, 3]] + b2 = [[5, 4, 3], + [2, 1, 0]] + b3 = [[3, 0], + [4, 1], + [5, 2]] + b4 = [[0, 1, 2], + [3, 4, 5]] + + for k in range(-3, 13, 4): + assert_equal(rot90(a, k=k), b1) + for k in range(-2, 13, 4): + assert_equal(rot90(a, k=k), b2) + for k in range(-1, 13, 4): + assert_equal(rot90(a, k=k), b3) + for k in range(0, 13, 4): + assert_equal(rot90(a, k=k), b4) + + assert_equal(rot90(rot90(a, axes=(0,1)), axes=(1,0)), a) + assert_equal(rot90(a, k=1, axes=(1,0)), rot90(a, k=-1, axes=(0,1))) + + def test_axes(self): + a = np.ones((50, 40, 3)) + assert_equal(rot90(a).shape, (40, 50, 3)) + assert_equal(rot90(a, axes=(0,2)), rot90(a, axes=(0,-1))) + assert_equal(rot90(a, axes=(1,2)), rot90(a, axes=(-2,-1))) + + def test_rotation_axes(self): + a = np.arange(8).reshape((2,2,2)) + + a_rot90_01 = [[[2, 3], + [6, 7]], + [[0, 1], + [4, 5]]] + a_rot90_12 = [[[1, 3], + [0, 2]], + [[5, 7], + [4, 6]]] + a_rot90_20 = [[[4, 0], + [6, 2]], + [[5, 1], + [7, 3]]] + a_rot90_10 = [[[4, 5], + [0, 1]], + [[6, 7], + [2, 3]]] + + assert_equal(rot90(a, axes=(0, 1)), a_rot90_01) + assert_equal(rot90(a, axes=(1, 0)), a_rot90_10) + assert_equal(rot90(a, axes=(1, 2)), a_rot90_12) + + for k in range(1,5): + assert_equal(rot90(a, k=k, axes=(2, 0)), + rot90(a_rot90_20, k=k-1, axes=(2, 0))) + + +class TestFlip(TestCase): + + def test_axes(self): + self.assertRaises(ValueError, np.flip, np.ones(4), axis=1) + self.assertRaises(ValueError, np.flip, np.ones((4, 4)), axis=2) + self.assertRaises(ValueError, np.flip, np.ones((4, 4)), axis=-3) + + def test_basic_lr(self): + a = get_mat(4) + b = a[:, ::-1] + assert_equal(np.flip(a, 1), b) + a = [[0, 1, 2], + [3, 4, 5]] + b = [[2, 1, 0], + [5, 4, 3]] + assert_equal(np.flip(a, 1), b) + + def test_basic_ud(self): + a = get_mat(4) + b = a[::-1, :] + assert_equal(np.flip(a, 0), b) + a = [[0, 1, 2], + [3, 4, 5]] + b = [[3, 4, 5], + [0, 1, 2]] + assert_equal(np.flip(a, 0), b) + + def test_3d_swap_axis0(self): + a = np.array([[[0, 1], + [2, 3]], + [[4, 5], + [6, 7]]]) + + b = np.array([[[4, 5], + [6, 7]], + [[0, 1], + [2, 3]]]) + + assert_equal(np.flip(a, 0), b) + + def test_3d_swap_axis1(self): + a = np.array([[[0, 1], + [2, 3]], + [[4, 5], + [6, 7]]]) + + b = np.array([[[2, 3], + [0, 1]], + [[6, 7], + [4, 5]]]) + + assert_equal(np.flip(a, 1), b) + + def test_3d_swap_axis2(self): + a = np.array([[[0, 1], + [2, 3]], + [[4, 5], + [6, 7]]]) + + b = np.array([[[1, 0], + [3, 2]], + [[5, 4], + [7, 6]]]) + + assert_equal(np.flip(a, 2), b) + + def test_4d(self): + a = np.arange(2 * 3 * 4 * 5).reshape(2, 3, 4, 5) + for i in range(a.ndim): + assert_equal(np.flip(a, i), np.flipud(a.swapaxes(0, i)).swapaxes(i, 0)) + + class TestAny(TestCase): + def test_basic(self): y1 = [0, 0, 1, 0] y2 = [0, 0, 0, 0] @@ -32,6 +189,7 @@ class TestAny(TestCase): class TestAll(TestCase): + def test_basic(self): y1 = [0, 1, 1, 0] y2 = [0, 0, 0, 0] @@ -49,6 +207,7 @@ class TestAll(TestCase): class TestCopy(TestCase): + def test_basic(self): a = np.array([[1, 2], [3, 4]]) a_copy = np.copy(a) @@ -76,6 +235,7 @@ class TestCopy(TestCase): class TestAverage(TestCase): + def test_basic(self): y1 = np.array([1, 2, 3]) assert_(average(y1, axis=0) == 2.) @@ -101,7 +261,7 @@ class TestAverage(TestCase): y = np.arange(10) w = np.arange(10) actual = average(y, weights=w) - desired = (np.arange(10) ** 2).sum()*1. / np.arange(10).sum() + desired = (np.arange(10) ** 2).sum() * 1. / np.arange(10).sum() assert_almost_equal(actual, desired) y1 = np.array([[1, 2, 3], [4, 5, 6]]) @@ -155,6 +315,33 @@ class TestAverage(TestCase): avg, scl = average(y, weights=w2, axis=1, returned=True) assert_array_equal(scl, np.array([1., 6.])) + def test_subclasses(self): + class subclass(np.ndarray): + pass + a = np.array([[1,2],[3,4]]).view(subclass) + w = np.array([[1,2],[3,4]]).view(subclass) + + with suppress_warnings() as sup: + # Note that the warning is spurious, because the test checks + # for weights while a is ignored. + sup.filter(FutureWarning, "np.average currently does not preserve") + assert_equal(type(np.average(a, weights=w)), subclass) + + # also test matrices + a = np.matrix([[1,2],[3,4]]) + w = np.matrix([[1,2],[3,4]]) + + r = np.average(a, axis=0, weights=w) + assert_equal(type(r), np.matrix) + assert_equal(r, [[2.5, 10.0/3]]) + + def test_upcasting(self): + types = [('i4', 'i4', 'f8'), ('i4', 'f4', 'f8'), ('f4', 'i4', 'f8'), + ('f4', 'f4', 'f4'), ('f4', 'f8', 'f8')] + for at, wt, rt in types: + a = np.array([[1,2],[3,4]], dtype=at) + w = np.array([[1,2],[3,4]], dtype=wt) + assert_equal(np.average(a, weights=w).dtype, np.dtype(rt)) class TestSelect(TestCase): choices = [np.array([1, 2, 3]), @@ -229,6 +416,7 @@ class TestSelect(TestCase): class TestInsert(TestCase): + def test_basic(self): a = [1, 2, 3] assert_equal(insert(a, 0, 1), [1, 1, 2, 3]) @@ -242,11 +430,11 @@ class TestInsert(TestCase): assert_equal(insert(b, 0, b[0]), [0., 0., 1.]) assert_equal(insert(b, [], []), b) # Bools will be treated differently in the future: - #assert_equal(insert(a, np.array([True]*4), 9), [9,1,9,2,9,3,9]) + # assert_equal(insert(a, np.array([True]*4), 9), [9, 1, 9, 2, 9, 3, 9]) with warnings.catch_warnings(record=True) as w: warnings.filterwarnings('always', '', FutureWarning) assert_equal( - insert(a, np.array([True]*4), 9), [1, 9, 9, 9, 9, 2, 3]) + insert(a, np.array([True] * 4), 9), [1, 9, 9, 9, 9, 2, 3]) assert_(w[0].category is FutureWarning) def test_multidim(self): @@ -271,25 +459,25 @@ class TestInsert(TestCase): a = np.arange(4).reshape(2, 2) assert_equal(insert(a[:, :1], 1, a[:, 1], axis=1), a) - assert_equal(insert(a[:1, :], 1, a[1, :], axis=0), a) + assert_equal(insert(a[:1,:], 1, a[1,:], axis=0), a) # negative axis value a = np.arange(24).reshape((2, 3, 4)) - assert_equal(insert(a, 1, a[:, :, 3], axis=-1), - insert(a, 1, a[:, :, 3], axis=2)) - assert_equal(insert(a, 1, a[:, 2, :], axis=-2), - insert(a, 1, a[:, 2, :], axis=1)) + assert_equal(insert(a, 1, a[:,:, 3], axis=-1), + insert(a, 1, a[:,:, 3], axis=2)) + assert_equal(insert(a, 1, a[:, 2,:], axis=-2), + insert(a, 1, a[:, 2,:], axis=1)) # invalid axis value assert_raises(IndexError, insert, a, 1, a[:, 2, :], axis=3) assert_raises(IndexError, insert, a, 1, a[:, 2, :], axis=-4) # negative axis value - a = np.arange(24).reshape((2,3,4)) - assert_equal(insert(a, 1, a[:,:,3], axis=-1), - insert(a, 1, a[:,:,3], axis=2)) - assert_equal(insert(a, 1, a[:,2,:], axis=-2), - insert(a, 1, a[:,2,:], axis=1)) + a = np.arange(24).reshape((2, 3, 4)) + assert_equal(insert(a, 1, a[:, :, 3], axis=-1), + insert(a, 1, a[:, :, 3], axis=2)) + assert_equal(insert(a, 1, a[:, 2, :], axis=-2), + insert(a, 1, a[:, 2, :], axis=1)) def test_0d(self): # This is an error in the future @@ -329,6 +517,7 @@ class TestInsert(TestCase): class TestAmax(TestCase): + def test_basic(self): a = [3, 4, 5, 10, -3, -5, 6.0] assert_equal(np.amax(a), 10.0) @@ -340,6 +529,7 @@ class TestAmax(TestCase): class TestAmin(TestCase): + def test_basic(self): a = [3, 4, 5, 10, -3, -5, 6.0] assert_equal(np.amin(a), -5.0) @@ -351,17 +541,19 @@ class TestAmin(TestCase): class TestPtp(TestCase): + def test_basic(self): - a = [3, 4, 5, 10, -3, -5, 6.0] - assert_equal(np.ptp(a, axis=0), 15.0) - b = [[3, 6.0, 9.0], - [4, 10.0, 5.0], - [8, 3.0, 2.0]] - assert_equal(np.ptp(b, axis=0), [5.0, 7.0, 7.0]) - assert_equal(np.ptp(b, axis=-1), [6.0, 6.0, 6.0]) + a = np.array([3, 4, 5, 10, -3, -5, 6.0]) + assert_equal(a.ptp(axis=0), 15.0) + b = np.array([[3, 6.0, 9.0], + [4, 10.0, 5.0], + [8, 3.0, 2.0]]) + assert_equal(b.ptp(axis=0), [5.0, 7.0, 7.0]) + assert_equal(b.ptp(axis=-1), [6.0, 6.0, 6.0]) class TestCumsum(TestCase): + def test_basic(self): ba = [1, 2, 10, 11, 6, 5, 4] ba2 = [[1, 2, 3, 4], [5, 6, 7, 9], [10, 3, 4, 5]] @@ -383,6 +575,7 @@ class TestCumsum(TestCase): class TestProd(TestCase): + def test_basic(self): ba = [1, 2, 10, 11, 6, 5, 4] ba2 = [[1, 2, 3, 4], [5, 6, 7, 9], [10, 3, 4, 5]] @@ -391,18 +584,18 @@ class TestProd(TestCase): a = np.array(ba, ctype) a2 = np.array(ba2, ctype) if ctype in ['1', 'b']: - self.assertRaises(ArithmeticError, prod, a) - self.assertRaises(ArithmeticError, prod, a2, 1) - self.assertRaises(ArithmeticError, prod, a) + self.assertRaises(ArithmeticError, np.prod, a) + self.assertRaises(ArithmeticError, np.prod, a2, 1) else: - assert_equal(np.prod(a, axis=0), 26400) - assert_array_equal(np.prod(a2, axis=0), + assert_equal(a.prod(axis=0), 26400) + assert_array_equal(a2.prod(axis=0), np.array([50, 36, 84, 180], ctype)) - assert_array_equal(np.prod(a2, axis=-1), + assert_array_equal(a2.prod(axis=-1), np.array([24, 1890, 600], ctype)) class TestCumprod(TestCase): + def test_basic(self): ba = [1, 2, 10, 11, 6, 5, 4] ba2 = [[1, 2, 3, 4], [5, 6, 7, 9], [10, 3, 4, 5]] @@ -411,9 +604,9 @@ class TestCumprod(TestCase): a = np.array(ba, ctype) a2 = np.array(ba2, ctype) if ctype in ['1', 'b']: - self.assertRaises(ArithmeticError, cumprod, a) - self.assertRaises(ArithmeticError, cumprod, a2, 1) - self.assertRaises(ArithmeticError, cumprod, a) + self.assertRaises(ArithmeticError, np.cumprod, a) + self.assertRaises(ArithmeticError, np.cumprod, a2, 1) + self.assertRaises(ArithmeticError, np.cumprod, a) else: assert_array_equal(np.cumprod(a, axis=-1), np.array([1, 2, 20, 220, @@ -429,6 +622,7 @@ class TestCumprod(TestCase): class TestDiff(TestCase): + def test_basic(self): x = [1, 4, 6, 7, 12] out = np.array([3, 2, 1, 5]) @@ -451,6 +645,7 @@ class TestDiff(TestCase): class TestDelete(TestCase): + def setUp(self): self.a = np.arange(5) self.nd_a = np.arange(5).repeat(2).reshape(1, 5, 2) @@ -465,8 +660,8 @@ class TestDelete(TestCase): indices = indices[(indices >= 0) & (indices < 5)] assert_array_equal(setxor1d(a_del, self.a[indices, ]), self.a, err_msg=msg) - xor = setxor1d(nd_a_del[0, :, 0], self.nd_a[0, indices, 0]) - assert_array_equal(xor, self.nd_a[0, :, 0], err_msg=msg) + xor = setxor1d(nd_a_del[0,:, 0], self.nd_a[0, indices, 0]) + assert_array_equal(xor, self.nd_a[0,:, 0], err_msg=msg) def test_slices(self): lims = [-6, -2, 0, 1, 2, 4, 5] @@ -513,8 +708,19 @@ class TestDelete(TestCase): assert_(isinstance(delete(a, slice(1, 2)), SubClass)) assert_(isinstance(delete(a, slice(1, -2)), SubClass)) + def test_array_order_preserve(self): + # See gh-7113 + k = np.arange(10).reshape(2, 5, order='F') + m = delete(k, slice(60, None), axis=1) + + # 'k' is Fortran ordered, and 'm' should have the + # same ordering as 'k' and NOT become C ordered + assert_equal(m.flags.c_contiguous, k.flags.c_contiguous) + assert_equal(m.flags.f_contiguous, k.flags.f_contiguous) + class TestGradient(TestCase): + def test_basic(self): v = [[1, 1], [3, 4]] x = np.array(v) @@ -529,6 +735,9 @@ class TestGradient(TestCase): assert_raises(SyntaxError, gradient, x, np.array([1., 1.]), np.array([1., 1.]), np.array([1., 1.])) + # disallow arrays as distances, see gh-6847 + assert_raises(ValueError, gradient, np.arange(5), np.ones(5)) + def test_masked(self): # Make sure that gradient supports subclasses like masked arrays x = np.ma.array([[1, 1], [3, 4]], @@ -579,8 +788,34 @@ class TestGradient(TestCase): num_error = np.abs((np.gradient(y, dx, edge_order=2) / analytical) - 1) assert_(np.all(num_error < 0.03) == True) + def test_specific_axes(self): + # Testing that gradient can work on a given axis only + v = [[1, 1], [3, 4]] + x = np.array(v) + dx = [np.array([[2., 3.], [2., 3.]]), + np.array([[0., 0.], [1., 1.]])] + assert_array_equal(gradient(x, axis=0), dx[0]) + assert_array_equal(gradient(x, axis=1), dx[1]) + assert_array_equal(gradient(x, axis=-1), dx[1]) + assert_array_equal(gradient(x, axis=(1, 0)), [dx[1], dx[0]]) + + # test axis=None which means all axes + assert_almost_equal(gradient(x, axis=None), [dx[0], dx[1]]) + # and is the same as no axis keyword given + assert_almost_equal(gradient(x, axis=None), gradient(x)) + + # test vararg order + assert_array_equal(gradient(x, 2, 3, axis=(1, 0)), [dx[1]/2.0, dx[0]/3.0]) + # test maximal number of varargs + assert_raises(SyntaxError, gradient, x, 1, 2, axis=1) + + assert_raises(ValueError, gradient, x, axis=3) + assert_raises(ValueError, gradient, x, axis=-3) + assert_raises(TypeError, gradient, x, axis=[1,]) + class TestAngle(TestCase): + def test_basic(self): x = [1 + 3j, np.sqrt(2) / 2.0 + 1j * np.sqrt(2) / 2, 1, 1j, -1, -1j, 1 - 3j, -1 + 3j] @@ -596,8 +831,12 @@ class TestAngle(TestCase): class TestTrimZeros(TestCase): - """ only testing for integer splits. + + """ + Only testing for integer splits. + """ + def test_basic(self): a = np.array([0, 0, 1, 2, 3, 4, 0]) res = trim_zeros(a) @@ -615,16 +854,34 @@ class TestTrimZeros(TestCase): class TestExtins(TestCase): + def test_basic(self): a = np.array([1, 3, 2, 1, 2, 3, 3]) b = extract(a > 1, a) assert_array_equal(b, [3, 2, 2, 3, 3]) def test_place(self): + # Make sure that non-np.ndarray objects + # raise an error instead of doing nothing + assert_raises(TypeError, place, [1, 2, 3], [True, False], [0, 1]) + a = np.array([1, 4, 3, 2, 5, 8, 7]) place(a, [0, 1, 0, 1, 0, 1, 0], [2, 4, 6]) assert_array_equal(a, [1, 2, 3, 4, 5, 6, 7]) + place(a, np.zeros(7), []) + assert_array_equal(a, np.arange(1, 8)) + + place(a, [1, 0, 1, 0, 1, 0, 1], [8, 9]) + assert_array_equal(a, [8, 2, 9, 4, 8, 6, 9]) + assert_raises_regex(ValueError, "Cannot insert from an empty array", + lambda: place(a, [0, 0, 0, 0, 0, 1, 0], [])) + + # See Issue #6974 + a = np.array(['12', '34']) + place(a, [0, 1], '9') + assert_array_equal(a, ['12', '9']) + def test_both(self): a = rand(10) mask = a > 0.5 @@ -636,12 +893,14 @@ class TestExtins(TestCase): class TestVectorize(TestCase): + def test_simple(self): def addsubtract(a, b): if a > b: return a - b else: return a + b + f = vectorize(addsubtract) r = f([0, 3, 6, 9], [1, 3, 5, 7]) assert_array_equal(r, [1, 6, 1, 2]) @@ -652,6 +911,7 @@ class TestVectorize(TestCase): return a - b else: return a + b + f = vectorize(addsubtract) r = f([0, 3, 6, 9], 5) assert_array_equal(r, [5, 8, 1, 4]) @@ -665,16 +925,16 @@ class TestVectorize(TestCase): def test_ufunc(self): import math f = vectorize(math.cos) - args = np.array([0, 0.5*np.pi, np.pi, 1.5*np.pi, 2*np.pi]) + args = np.array([0, 0.5 * np.pi, np.pi, 1.5 * np.pi, 2 * np.pi]) r1 = f(args) r2 = np.cos(args) - assert_array_equal(r1, r2) + assert_array_almost_equal(r1, r2) def test_keywords(self): - import math def foo(a, b=1): return a + b + f = vectorize(foo) args = np.array([1, 2, 3]) r1 = f(args) @@ -690,16 +950,16 @@ class TestVectorize(TestCase): # inspect the func_code. import random try: - f = vectorize(random.randrange) + vectorize(random.randrange) # Should succeed except: raise AssertionError() def test_keywords2_ticket_2100(self): - r"""Test kwarg support: enhancement ticket 2100""" - import math + # Test kwarg support: enhancement ticket 2100 def foo(a, b=1): return a + b + f = vectorize(foo) args = np.array([1, 2, 3]) r1 = f(a=args) @@ -712,13 +972,14 @@ class TestVectorize(TestCase): assert_array_equal(r1, r2) def test_keywords3_ticket_2100(self): - """Test excluded with mixed positional and kwargs: ticket 2100""" + # Test excluded with mixed positional and kwargs: ticket 2100 def mypolyval(x, p): _p = list(p) res = _p.pop(0) while _p: - res = res*x + _p.pop(0) + res = res * x + _p.pop(0) return res + vpolyval = np.vectorize(mypolyval, excluded=['p', 1]) ans = [3, 6] assert_array_equal(ans, vpolyval(x=[0, 1], p=[1, 2, 3])) @@ -726,49 +987,58 @@ class TestVectorize(TestCase): assert_array_equal(ans, vpolyval([0, 1], [1, 2, 3])) def test_keywords4_ticket_2100(self): - """Test vectorizing function with no positional args.""" + # Test vectorizing function with no positional args. @vectorize def f(**kw): res = 1.0 for _k in kw: res *= kw[_k] return res + assert_array_equal(f(a=[1, 2], b=[3, 4]), [3, 8]) def test_keywords5_ticket_2100(self): - """Test vectorizing function with no kwargs args.""" + # Test vectorizing function with no kwargs args. @vectorize def f(*v): return np.prod(v) + assert_array_equal(f([1, 2], [3, 4]), [3, 8]) def test_coverage1_ticket_2100(self): def foo(): return 1 + f = vectorize(foo) assert_array_equal(f(), 1) def test_assigning_docstring(self): def foo(x): + """Original documentation""" return x + + f = vectorize(foo) + assert_equal(f.__doc__, foo.__doc__) + doc = "Provided documentation" f = vectorize(foo, doc=doc) assert_equal(f.__doc__, doc) def test_UnboundMethod_ticket_1156(self): - """Regression test for issue 1156""" + # Regression test for issue 1156 class Foo: b = 2 def bar(self, a): - return a**self.b + return a ** self.b + assert_array_equal(vectorize(Foo().bar)(np.arange(9)), - np.arange(9)**2) + np.arange(9) ** 2) assert_array_equal(vectorize(Foo.bar)(Foo(), np.arange(9)), - np.arange(9)**2) + np.arange(9) ** 2) def test_execution_order_ticket_1487(self): - """Regression test for dependence on execution order: issue 1487""" + # Regression test for dependence on execution order: issue 1487 f1 = vectorize(lambda x: x) res1a = f1(np.arange(3)) res1b = f1(np.arange(0.1, 3)) @@ -779,24 +1049,23 @@ class TestVectorize(TestCase): assert_equal(res1b, res2b) def test_string_ticket_1892(self): - """Test vectorization over strings: issue 1892.""" + # Test vectorization over strings: issue 1892. f = np.vectorize(lambda x: x) - s = '0123456789'*10 + s = '0123456789' * 10 assert_equal(s, f(s)) - #z = f(np.array([s,s])) - #assert_array_equal([s,s], f(s)) def test_cache(self): - """Ensure that vectorized func called exactly once per argument.""" + # Ensure that vectorized func called exactly once per argument. _calls = [0] @vectorize def f(x): _calls[0] += 1 - return x**2 + return x ** 2 + f.cache = True x = np.arange(5) - assert_array_equal(f(x), x*x) + assert_array_equal(f(x), x * x) assert_equal(_calls[0], len(x)) def test_otypes(self): @@ -805,8 +1074,158 @@ class TestVectorize(TestCase): x = np.arange(5) assert_array_equal(f(x), x) + def test_parse_gufunc_signature(self): + assert_equal(nfb._parse_gufunc_signature('(x)->()'), ([('x',)], [()])) + assert_equal(nfb._parse_gufunc_signature('(x,y)->()'), + ([('x', 'y')], [()])) + assert_equal(nfb._parse_gufunc_signature('(x),(y)->()'), + ([('x',), ('y',)], [()])) + assert_equal(nfb._parse_gufunc_signature('(x)->(y)'), + ([('x',)], [('y',)])) + assert_equal(nfb._parse_gufunc_signature('(x)->(y),()'), + ([('x',)], [('y',), ()])) + assert_equal(nfb._parse_gufunc_signature('(),(a,b,c),(d)->(d,e)'), + ([(), ('a', 'b', 'c'), ('d',)], [('d', 'e')])) + with assert_raises(ValueError): + nfb._parse_gufunc_signature('(x)(y)->()') + with assert_raises(ValueError): + nfb._parse_gufunc_signature('(x),(y)->') + with assert_raises(ValueError): + nfb._parse_gufunc_signature('((x))->(x)') + + def test_signature_simple(self): + def addsubtract(a, b): + if a > b: + return a - b + else: + return a + b + + f = vectorize(addsubtract, signature='(),()->()') + r = f([0, 3, 6, 9], [1, 3, 5, 7]) + assert_array_equal(r, [1, 6, 1, 2]) + + def test_signature_mean_last(self): + def mean(a): + return a.mean() + + f = vectorize(mean, signature='(n)->()') + r = f([[1, 3], [2, 4]]) + assert_array_equal(r, [2, 3]) + + def test_signature_center(self): + def center(a): + return a - a.mean() + + f = vectorize(center, signature='(n)->(n)') + r = f([[1, 3], [2, 4]]) + assert_array_equal(r, [[-1, 1], [-1, 1]]) + + def test_signature_two_outputs(self): + f = vectorize(lambda x: (x, x), signature='()->(),()') + r = f([1, 2, 3]) + assert_(isinstance(r, tuple) and len(r) == 2) + assert_array_equal(r[0], [1, 2, 3]) + assert_array_equal(r[1], [1, 2, 3]) + + def test_signature_outer(self): + f = vectorize(np.outer, signature='(a),(b)->(a,b)') + r = f([1, 2], [1, 2, 3]) + assert_array_equal(r, [[1, 2, 3], [2, 4, 6]]) + + r = f([[[1, 2]]], [1, 2, 3]) + assert_array_equal(r, [[[[1, 2, 3], [2, 4, 6]]]]) + + r = f([[1, 0], [2, 0]], [1, 2, 3]) + assert_array_equal(r, [[[1, 2, 3], [0, 0, 0]], + [[2, 4, 6], [0, 0, 0]]]) + + r = f([1, 2], [[1, 2, 3], [0, 0, 0]]) + assert_array_equal(r, [[[1, 2, 3], [2, 4, 6]], + [[0, 0, 0], [0, 0, 0]]]) + + def test_signature_computed_size(self): + f = vectorize(lambda x: x[:-1], signature='(n)->(m)') + r = f([1, 2, 3]) + assert_array_equal(r, [1, 2]) + + r = f([[1, 2, 3], [2, 3, 4]]) + assert_array_equal(r, [[1, 2], [2, 3]]) + + def test_signature_excluded(self): + + def foo(a, b=1): + return a + b + + f = vectorize(foo, signature='()->()', excluded={'b'}) + assert_array_equal(f([1, 2, 3]), [2, 3, 4]) + assert_array_equal(f([1, 2, 3], b=0), [1, 2, 3]) + + def test_signature_otypes(self): + f = vectorize(lambda x: x, signature='(n)->(n)', otypes=['float64']) + r = f([1, 2, 3]) + assert_equal(r.dtype, np.dtype('float64')) + assert_array_equal(r, [1, 2, 3]) + + def test_signature_invalid_inputs(self): + f = vectorize(operator.add, signature='(n),(n)->(n)') + with assert_raises_regex(TypeError, 'wrong number of positional'): + f([1, 2]) + with assert_raises_regex( + ValueError, 'does not have enough dimensions'): + f(1, 2) + with assert_raises_regex( + ValueError, 'inconsistent size for core dimension'): + f([1, 2], [1, 2, 3]) + + f = vectorize(operator.add, signature='()->()') + with assert_raises_regex(TypeError, 'wrong number of positional'): + f(1, 2) + + def test_signature_invalid_outputs(self): + + f = vectorize(lambda x: x[:-1], signature='(n)->(n)') + with assert_raises_regex( + ValueError, 'inconsistent size for core dimension'): + f([1, 2, 3]) + + f = vectorize(lambda x: x, signature='()->(),()') + with assert_raises_regex(ValueError, 'wrong number of outputs'): + f(1) + + f = vectorize(lambda x: (x, x), signature='()->()') + with assert_raises_regex(ValueError, 'wrong number of outputs'): + f([1, 2]) + + def test_size_zero_output(self): + # see issue 5868 + f = np.vectorize(lambda x: x) + x = np.zeros([0, 5], dtype=int) + with assert_raises_regex(ValueError, 'otypes'): + f(x) + + f.otypes = 'i' + assert_array_equal(f(x), x) + + f = np.vectorize(lambda x: x, signature='()->()') + with assert_raises_regex(ValueError, 'otypes'): + f(x) + + f = np.vectorize(lambda x: x, signature='()->()', otypes='i') + assert_array_equal(f(x), x) + + f = np.vectorize(lambda x: x, signature='(n)->(n)', otypes='i') + assert_array_equal(f(x), x) + + f = np.vectorize(lambda x: x, signature='(n)->(n)') + assert_array_equal(f(x.T), x.T) + + f = np.vectorize(lambda x: [x], signature='()->(n)', otypes='i') + with assert_raises_regex(ValueError, 'new output dimensions'): + f(x) + class TestDigitize(TestCase): + def test_forward(self): x = np.arange(-6, 5) bins = np.arange(-5, 5) @@ -861,50 +1280,69 @@ class TestDigitize(TestCase): bins = [1, 1, 0, 1] assert_raises(ValueError, digitize, x, bins) + def test_casting_error(self): + x = [1, 2, 3 + 1.j] + bins = [1, 2, 3] + assert_raises(TypeError, digitize, x, bins) + x, bins = bins, x + assert_raises(TypeError, digitize, x, bins) + + def test_return_type(self): + # Functions returning indices should always return base ndarrays + class A(np.ndarray): + pass + a = np.arange(5).view(A) + b = np.arange(1, 3).view(A) + assert_(not isinstance(digitize(b, a, False), A)) + assert_(not isinstance(digitize(b, a, True), A)) + class TestUnwrap(TestCase): + def test_simple(self): - #check that unwrap removes jumps greather that 2*pi + # check that unwrap removes jumps greather that 2*pi assert_array_equal(unwrap([1, 1 + 2 * np.pi]), [1, 1]) - #check that unwrap maintans continuity + # check that unwrap maintans continuity assert_(np.all(diff(unwrap(rand(10) * 100)) < np.pi)) class TestFilterwindows(TestCase): + def test_hanning(self): - #check symmetry + # check symmetry w = hanning(10) assert_array_almost_equal(w, flipud(w), 7) - #check known value + # check known value assert_almost_equal(np.sum(w, axis=0), 4.500, 4) def test_hamming(self): - #check symmetry + # check symmetry w = hamming(10) assert_array_almost_equal(w, flipud(w), 7) - #check known value + # check known value assert_almost_equal(np.sum(w, axis=0), 4.9400, 4) def test_bartlett(self): - #check symmetry + # check symmetry w = bartlett(10) assert_array_almost_equal(w, flipud(w), 7) - #check known value + # check known value assert_almost_equal(np.sum(w, axis=0), 4.4444, 4) def test_blackman(self): - #check symmetry + # check symmetry w = blackman(10) assert_array_almost_equal(w, flipud(w), 7) - #check known value + # check known value assert_almost_equal(np.sum(w, axis=0), 3.7800, 4) class TestTrapz(TestCase): + def test_simple(self): x = np.arange(-10, 10, .1) - r = trapz(np.exp(-.5*x**2) / np.sqrt(2*np.pi), dx=0.1) - #check integral of normal equals 1 + r = trapz(np.exp(-.5 * x ** 2) / np.sqrt(2 * np.pi), dx=0.1) + # check integral of normal equals 1 assert_almost_equal(r, 1, 7) def test_ndim(self): @@ -922,7 +1360,7 @@ class TestTrapz(TestCase): wz[0] /= 2 wz[-1] /= 2 - q = x[:, None, None] + y[None, :, None] + z[None, None, :] + q = x[:, None, None] + y[None,:, None] + z[None, None,:] qx = (q * wx[:, None, None]).sum(axis=0) qy = (q * wy[None, :, None]).sum(axis=1) @@ -931,9 +1369,9 @@ class TestTrapz(TestCase): # n-d `x` r = trapz(q, x=x[:, None, None], axis=0) assert_almost_equal(r, qx) - r = trapz(q, x=y[None, :, None], axis=1) + r = trapz(q, x=y[None,:, None], axis=1) assert_almost_equal(r, qy) - r = trapz(q, x=z[None, None, :], axis=2) + r = trapz(q, x=z[None, None,:], axis=2) assert_almost_equal(r, qz) # 1-d `x` @@ -945,8 +1383,8 @@ class TestTrapz(TestCase): assert_almost_equal(r, qz) def test_masked(self): - #Testing that masked arrays behave as if the function is 0 where - #masked + # Testing that masked arrays behave as if the function is 0 where + # masked x = np.arange(5) y = x * x mask = x == 2 @@ -961,7 +1399,7 @@ class TestTrapz(TestCase): assert_almost_equal(trapz(y, xm), r) def test_matrix(self): - #Test to make sure matrices give the same answer as ndarrays + # Test to make sure matrices give the same answer as ndarrays x = np.linspace(0, 5) y = x * x r = trapz(y, x) @@ -972,10 +1410,11 @@ class TestTrapz(TestCase): class TestSinc(TestCase): + def test_simple(self): assert_(sinc(0) == 1) w = sinc(np.linspace(-1, 1, 100)) - #check symmetry + # check symmetry assert_array_almost_equal(w, flipud(w), 7) def test_array_like(self): @@ -988,6 +1427,7 @@ class TestSinc(TestCase): class TestHistogram(TestCase): + def setUp(self): pass @@ -998,10 +1438,10 @@ class TestHistogram(TestCase): n = 100 v = rand(n) (a, b) = histogram(v) - #check if the sum of the bins equals the number of samples + # check if the sum of the bins equals the number of samples assert_equal(np.sum(a, axis=0), n) - #check that the bin counts are evenly spaced when the data is from a - # linear function + # check that the bin counts are evenly spaced when the data is from + # a linear function (a, b) = histogram(np.linspace(0, 10, 100)) assert_array_equal(a, 10) @@ -1023,7 +1463,8 @@ class TestHistogram(TestCase): area = np.sum(a * diff(b)) assert_almost_equal(area, 1) - # Check with non-constant bin widths (buggy but backwards compatible) + # Check with non-constant bin widths (buggy but backwards + # compatible) v = np.arange(10) bins = [0, 1, 5, 9, 10] a, b = histogram(v, bins, normed=True) @@ -1043,7 +1484,7 @@ class TestHistogram(TestCase): bins = [0, 1, 3, 6, 10] a, b = histogram(v, bins, density=True) assert_array_equal(a, .1) - assert_equal(np.sum(a*diff(b)), 1) + assert_equal(np.sum(a * diff(b)), 1) # Variale bin widths are especially useful to deal with # infinities. @@ -1086,20 +1527,20 @@ class TestHistogram(TestCase): # Check the type of the returned histogram a = np.arange(10) + .5 h, b = histogram(a) - assert_(issubdtype(h.dtype, int)) + assert_(np.issubdtype(h.dtype, int)) h, b = histogram(a, normed=True) - assert_(issubdtype(h.dtype, float)) + assert_(np.issubdtype(h.dtype, float)) h, b = histogram(a, weights=np.ones(10, int)) - assert_(issubdtype(h.dtype, int)) + assert_(np.issubdtype(h.dtype, int)) h, b = histogram(a, weights=np.ones(10, float)) - assert_(issubdtype(h.dtype, float)) + assert_(np.issubdtype(h.dtype, float)) def test_f32_rounding(self): # gh-4799, check that the rounding of the edges works with float32 - x = np.array([276.318359 , -69.593948 , 21.329449], dtype=np.float32) + x = np.array([276.318359, -69.593948, 21.329449], dtype=np.float32) y = np.array([5005.689453, 4481.327637, 6010.369629], dtype=np.float32) counts_hist, xedges, yedges = np.histogram2d(x, y, bins=100) assert_equal(counts_hist.sum(), 3.) @@ -1133,13 +1574,215 @@ class TestHistogram(TestCase): weights=[2, 1, 1, 1, 1, 1, 1, 1, 1], density=True) assert_almost_equal(a, [.2, .1, .1, .075]) + def test_exotic_weights(self): + + # Test the use of weights that are not integer or floats, but e.g. + # complex numbers or object types. + + # Complex weights + values = np.array([1.3, 2.5, 2.3]) + weights = np.array([1, -1, 2]) + 1j * np.array([2, 1, 2]) + + # Check with custom bins + wa, wb = histogram(values, bins=[0, 2, 3], weights=weights) + assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3])) + + # Check with even bins + wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights) + assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3])) + + # Decimal weights + from decimal import Decimal + values = np.array([1.3, 2.5, 2.3]) + weights = np.array([Decimal(1), Decimal(2), Decimal(3)]) + + # Check with custom bins + wa, wb = histogram(values, bins=[0, 2, 3], weights=weights) + assert_array_almost_equal(wa, [Decimal(1), Decimal(5)]) + + # Check with even bins + wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights) + assert_array_almost_equal(wa, [Decimal(1), Decimal(5)]) + + def test_no_side_effects(self): + # This is a regression test that ensures that values passed to + # ``histogram`` are unchanged. + values = np.array([1.3, 2.5, 2.3]) + np.histogram(values, range=[-10, 10], bins=100) + assert_array_almost_equal(values, [1.3, 2.5, 2.3]) + def test_empty(self): a, b = histogram([], bins=([0, 1])) assert_array_equal(a, np.array([0])) assert_array_equal(b, np.array([0, 1])) + def test_error_binnum_type (self): + # Tests if right Error is raised if bins argument is float + vals = np.linspace(0.0, 1.0, num=100) + histogram(vals, 5) + assert_raises(TypeError, histogram, vals, 2.4) + + def test_finite_range(self): + # Normal ranges should be fine + vals = np.linspace(0.0, 1.0, num=100) + histogram(vals, range=[0.25,0.75]) + assert_raises(ValueError, histogram, vals, range=[np.nan,0.75]) + assert_raises(ValueError, histogram, vals, range=[0.25,np.inf]) + + def test_bin_edge_cases(self): + # Ensure that floating-point computations correctly place edge cases. + arr = np.array([337, 404, 739, 806, 1007, 1811, 2012]) + hist, edges = np.histogram(arr, bins=8296, range=(2, 2280)) + mask = hist > 0 + left_edges = edges[:-1][mask] + right_edges = edges[1:][mask] + for x, left, right in zip(arr, left_edges, right_edges): + self.assertGreaterEqual(x, left) + self.assertLess(x, right) + + def test_last_bin_inclusive_range(self): + arr = np.array([0., 0., 0., 1., 2., 3., 3., 4., 5.]) + hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5)) + self.assertEqual(hist[-1], 1) + + +class TestHistogramOptimBinNums(TestCase): + """ + Provide test coverage when using provided estimators for optimal number of + bins + """ + + def test_empty(self): + estimator_list = ['fd', 'scott', 'rice', 'sturges', + 'doane', 'sqrt', 'auto'] + # check it can deal with empty data + for estimator in estimator_list: + a, b = histogram([], bins=estimator) + assert_array_equal(a, np.array([0])) + assert_array_equal(b, np.array([0, 1])) + + def test_simple(self): + """ + Straightforward testing with a mixture of linspace data (for + consistency). All test values have been precomputed and the values + shouldn't change + """ + # Some basic sanity checking, with some fixed data. + # Checking for the correct number of bins + basic_test = {50: {'fd': 4, 'scott': 4, 'rice': 8, 'sturges': 7, + 'doane': 8, 'sqrt': 8, 'auto': 7}, + 500: {'fd': 8, 'scott': 8, 'rice': 16, 'sturges': 10, + 'doane': 12, 'sqrt': 23, 'auto': 10}, + 5000: {'fd': 17, 'scott': 17, 'rice': 35, 'sturges': 14, + 'doane': 17, 'sqrt': 71, 'auto': 17}} + + for testlen, expectedResults in basic_test.items(): + # Create some sort of non uniform data to test with + # (2 peak uniform mixture) + x1 = np.linspace(-10, -1, testlen // 5 * 2) + x2 = np.linspace(1, 10, testlen // 5 * 3) + x = np.concatenate((x1, x2)) + for estimator, numbins in expectedResults.items(): + a, b = np.histogram(x, estimator) + assert_equal(len(a), numbins, err_msg="For the {0} estimator " + "with datasize of {1}".format(estimator, testlen)) + + def test_small(self): + """ + Smaller datasets have the potential to cause issues with the data + adaptive methods, especially the FD method. All bin numbers have been + precalculated. + """ + small_dat = {1: {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1, + 'doane': 1, 'sqrt': 1}, + 2: {'fd': 2, 'scott': 1, 'rice': 3, 'sturges': 2, + 'doane': 1, 'sqrt': 2}, + 3: {'fd': 2, 'scott': 2, 'rice': 3, 'sturges': 3, + 'doane': 3, 'sqrt': 2}} + + for testlen, expectedResults in small_dat.items(): + testdat = np.arange(testlen) + for estimator, expbins in expectedResults.items(): + a, b = np.histogram(testdat, estimator) + assert_equal(len(a), expbins, err_msg="For the {0} estimator " + "with datasize of {1}".format(estimator, testlen)) + + def test_incorrect_methods(self): + """ + Check a Value Error is thrown when an unknown string is passed in + """ + check_list = ['mad', 'freeman', 'histograms', 'IQR'] + for estimator in check_list: + assert_raises(ValueError, histogram, [1, 2, 3], estimator) + + def test_novariance(self): + """ + Check that methods handle no variance in data + Primarily for Scott and FD as the SD and IQR are both 0 in this case + """ + novar_dataset = np.ones(100) + novar_resultdict = {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1, + 'doane': 1, 'sqrt': 1, 'auto': 1} + + for estimator, numbins in novar_resultdict.items(): + a, b = np.histogram(novar_dataset, estimator) + assert_equal(len(a), numbins, err_msg="{0} estimator, " + "No Variance test".format(estimator)) + + def test_outlier(self): + """ + Check the FD, Scott and Doane with outliers. + + The FD estimates a smaller binwidth since it's less affected by + outliers. Since the range is so (artificially) large, this means more + bins, most of which will be empty, but the data of interest usually is + unaffected. The Scott estimator is more affected and returns fewer bins, + despite most of the variance being in one area of the data. The Doane + estimator lies somewhere between the other two. + """ + xcenter = np.linspace(-10, 10, 50) + outlier_dataset = np.hstack((np.linspace(-110, -100, 5), xcenter)) + + outlier_resultdict = {'fd': 21, 'scott': 5, 'doane': 11} + + for estimator, numbins in outlier_resultdict.items(): + a, b = np.histogram(outlier_dataset, estimator) + assert_equal(len(a), numbins) + + def test_simple_range(self): + """ + Straightforward testing with a mixture of linspace data (for + consistency). Adding in a 3rd mixture that will then be + completely ignored. All test values have been precomputed and + the shouldn't change. + """ + # some basic sanity checking, with some fixed data. Checking for the correct number of bins + basic_test = {50: {'fd': 8, 'scott': 8, 'rice': 15, 'sturges': 14, 'auto': 14}, + 500: {'fd': 15, 'scott': 16, 'rice': 32, 'sturges': 20, 'auto': 20}, + 5000: {'fd': 33, 'scott': 33, 'rice': 69, 'sturges': 27, 'auto': 33}} + + for testlen, expectedResults in basic_test.items(): + # create some sort of non uniform data to test with (3 peak uniform mixture) + x1 = np.linspace(-10, -1, testlen // 5 * 2) + x2 = np.linspace(1, 10, testlen // 5 * 3) + x3 = np.linspace(-100, -50, testlen) + x = np.hstack((x1, x2, x3)) + for estimator, numbins in expectedResults.items(): + a, b = np.histogram(x, estimator, range = (-20, 20)) + msg = "For the {0} estimator with datasize of {1}".format(estimator, testlen) + assert_equal(len(a), numbins, err_msg=msg) + + def test_simple_weighted(self): + """ + Check that weighted data raises a TypeError + """ + estimator_list = ['fd', 'scott', 'rice', 'sturges', 'auto'] + for estimator in estimator_list: + assert_raises(TypeError, histogram, [1, 2, 3], estimator, weights=[1, 2, 3]) + class TestHistogramdd(TestCase): + def test_simple(self): x = np.array([[-.5, .5, 1.5], [-.5, 1.5, 2.5], [-.5, 2.5, .5], [.5, .5, 1.5], [.5, 1.5, 2.5], [.5, 2.5, 2.5]]) @@ -1223,8 +1866,8 @@ class TestHistogramdd(TestCase): assert_array_max_ulp(a, np.zeros((2, 2, 2))) def test_bins_errors(self): - """There are two ways to specify bins. Check for the right errors when - mixing those.""" + # There are two ways to specify bins. Check for the right errors + # when mixing those. x = np.arange(8).reshape(2, 4) assert_raises(ValueError, np.histogramdd, x, bins=[-1, 2, 4, 5]) assert_raises(ValueError, np.histogramdd, x, bins=[1, 0.99, 1, 1]) @@ -1235,7 +1878,7 @@ class TestHistogramdd(TestCase): assert_(np.histogramdd(x, bins=[1, 1, 1, [1, 2, 3, 4]])) def test_inf_edges(self): - """Test using +/-inf bin edges works. See #1788.""" + # Test using +/-inf bin edges works. See #1788. with np.errstate(invalid='ignore'): x = np.arange(6).reshape(3, 2) expected = np.array([[1, 0], [0, 1], [0, 1]]) @@ -1247,31 +1890,39 @@ class TestHistogramdd(TestCase): assert_allclose(h, expected) def test_rightmost_binedge(self): - """Test event very close to rightmost binedge. - See Github issue #4266""" + # Test event very close to rightmost binedge. See Github issue #4266 x = [0.9999999995] - bins = [[0.,0.5,1.0]] + bins = [[0., 0.5, 1.0]] hist, _ = histogramdd(x, bins=bins) assert_(hist[0] == 0.0) assert_(hist[1] == 1.) x = [1.0] - bins = [[0.,0.5,1.0]] + bins = [[0., 0.5, 1.0]] hist, _ = histogramdd(x, bins=bins) assert_(hist[0] == 0.0) assert_(hist[1] == 1.) x = [1.0000000001] - bins = [[0.,0.5,1.0]] + bins = [[0., 0.5, 1.0]] hist, _ = histogramdd(x, bins=bins) assert_(hist[0] == 0.0) assert_(hist[1] == 1.) x = [1.0001] - bins = [[0.,0.5,1.0]] + bins = [[0., 0.5, 1.0]] hist, _ = histogramdd(x, bins=bins) assert_(hist[0] == 0.0) assert_(hist[1] == 0.0) + def test_finite_range(self): + vals = np.random.random((100, 3)) + histogramdd(vals, range=[[0.0, 1.0], [0.25, 0.75], [0.25, 0.5]]) + assert_raises(ValueError, histogramdd, vals, + range=[[0.0, 1.0], [0.25, 0.75], [0.25, np.inf]]) + assert_raises(ValueError, histogramdd, vals, + range=[[0.0, 1.0], [np.nan, 0.75], [0.25, 0.5]]) + class TestUnique(TestCase): + def test_simple(self): x = np.array([4, 3, 2, 1, 1, 2, 3, 4, 0]) assert_(np.all(unique(x) == [0, 1, 2, 3, 4])) @@ -1283,6 +1934,7 @@ class TestUnique(TestCase): class TestCheckFinite(TestCase): + def test_simple(self): a = [1, 2, 3] b = [1, 2, np.inf] @@ -1292,7 +1944,7 @@ class TestCheckFinite(TestCase): assert_raises(ValueError, np.lib.asarray_chkfinite, c) def test_dtype_order(self): - """Regression test for missing dtype and order arguments""" + # Regression test for missing dtype and order arguments a = [1, 2, 3] a = np.lib.asarray_chkfinite(a, order='F', dtype=np.float64) assert_(a.dtype == np.float64) @@ -1324,16 +1976,42 @@ class TestCorrCoef(TestCase): [[1., -1.], [-1., 1.]]) def test_simple(self): - assert_almost_equal(corrcoef(self.A), self.res1) - assert_almost_equal(corrcoef(self.A, self.B), self.res2) + tgt1 = corrcoef(self.A) + assert_almost_equal(tgt1, self.res1) + assert_(np.all(np.abs(tgt1) <= 1.0)) + + tgt2 = corrcoef(self.A, self.B) + assert_almost_equal(tgt2, self.res2) + assert_(np.all(np.abs(tgt2) <= 1.0)) def test_ddof(self): - assert_almost_equal(corrcoef(self.A, ddof=-1), self.res1) - assert_almost_equal(corrcoef(self.A, self.B, ddof=-1), self.res2) + # ddof raises DeprecationWarning + with suppress_warnings() as sup: + warnings.simplefilter("always") + assert_warns(DeprecationWarning, corrcoef, self.A, ddof=-1) + sup.filter(DeprecationWarning) + # ddof has no or negligible effect on the function + assert_almost_equal(corrcoef(self.A, ddof=-1), self.res1) + assert_almost_equal(corrcoef(self.A, self.B, ddof=-1), self.res2) + assert_almost_equal(corrcoef(self.A, ddof=3), self.res1) + assert_almost_equal(corrcoef(self.A, self.B, ddof=3), self.res2) + + def test_bias(self): + # bias raises DeprecationWarning + with suppress_warnings() as sup: + warnings.simplefilter("always") + assert_warns(DeprecationWarning, corrcoef, self.A, self.B, 1, 0) + assert_warns(DeprecationWarning, corrcoef, self.A, bias=0) + sup.filter(DeprecationWarning) + # bias has no or negligible effect on the function + assert_almost_equal(corrcoef(self.A, bias=1), self.res1) def test_complex(self): x = np.array([[1, 2, 3], [1j, 2j, 3j]]) - assert_allclose(corrcoef(x), np.array([[1., -1.j], [1.j, 1.]])) + res = corrcoef(x) + tgt = np.array([[1., -1.j], [1.j, 1.]]) + assert_allclose(res, tgt) + assert_(np.all(np.abs(res) <= 1.0)) def test_xy(self): x = np.array([[1, 2, 3]]) @@ -1349,18 +2027,29 @@ class TestCorrCoef(TestCase): assert_array_equal(corrcoef(np.array([]).reshape(2, 0)), np.array([[np.nan, np.nan], [np.nan, np.nan]])) - def test_wrong_ddof(self): - x = np.array([[0, 2], [1, 1], [2, 0]]).T - with warnings.catch_warnings(record=True): - warnings.simplefilter('always', RuntimeWarning) - assert_array_equal(corrcoef(x, ddof=5), - np.array([[np.nan, np.nan], [np.nan, np.nan]])) + def test_extreme(self): + x = [[1e-100, 1e100], [1e100, 1e-100]] + with np.errstate(all='raise'): + c = corrcoef(x) + assert_array_almost_equal(c, np.array([[1., -1.], [-1., 1.]])) + assert_(np.all(np.abs(c) <= 1.0)) class TestCov(TestCase): + x1 = np.array([[0, 2], [1, 1], [2, 0]]).T + res1 = np.array([[1., -1.], [-1., 1.]]) + x2 = np.array([0.0, 1.0, 2.0], ndmin=2) + frequencies = np.array([1, 4, 1]) + x2_repeats = np.array([[0.0], [1.0], [1.0], [1.0], [1.0], [2.0]]).T + res2 = np.array([[0.4, -0.4], [-0.4, 0.4]]) + unit_frequencies = np.ones(3, dtype=np.integer) + weights = np.array([1.0, 4.0, 1.0]) + res3 = np.array([[2. / 3., -2. / 3.], [-2. / 3., 2. / 3.]]) + unit_weights = np.ones(3) + x3 = np.array([0.3942, 0.5969, 0.7730, 0.9918, 0.7964]) + def test_basic(self): - x = np.array([[0, 2], [1, 1], [2, 0]]).T - assert_allclose(cov(x), np.array([[1., -1.], [-1., 1.]])) + assert_allclose(cov(self.x1), self.res1) def test_complex(self): x = np.array([[1, 2, 3], [1j, 2j, 3j]]) @@ -1381,14 +2070,71 @@ class TestCov(TestCase): np.array([[np.nan, np.nan], [np.nan, np.nan]])) def test_wrong_ddof(self): - x = np.array([[0, 2], [1, 1], [2, 0]]).T with warnings.catch_warnings(record=True): warnings.simplefilter('always', RuntimeWarning) - assert_array_equal(cov(x, ddof=5), - np.array([[np.inf, -np.inf], [-np.inf, np.inf]])) + assert_array_equal(cov(self.x1, ddof=5), + np.array([[np.inf, -np.inf], + [-np.inf, np.inf]])) + + def test_1D_rowvar(self): + assert_allclose(cov(self.x3), cov(self.x3, rowvar=0)) + y = np.array([0.0780, 0.3107, 0.2111, 0.0334, 0.8501]) + assert_allclose(cov(self.x3, y), cov(self.x3, y, rowvar=0)) + + def test_1D_variance(self): + assert_allclose(cov(self.x3, ddof=1), np.var(self.x3, ddof=1)) + + def test_fweights(self): + assert_allclose(cov(self.x2, fweights=self.frequencies), + cov(self.x2_repeats)) + assert_allclose(cov(self.x1, fweights=self.frequencies), + self.res2) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies), + self.res1) + nonint = self.frequencies + 0.5 + assert_raises(TypeError, cov, self.x1, fweights=nonint) + f = np.ones((2, 3), dtype=np.integer) + assert_raises(RuntimeError, cov, self.x1, fweights=f) + f = np.ones(2, dtype=np.integer) + assert_raises(RuntimeError, cov, self.x1, fweights=f) + f = -1 * np.ones(3, dtype=np.integer) + assert_raises(ValueError, cov, self.x1, fweights=f) + + def test_aweights(self): + assert_allclose(cov(self.x1, aweights=self.weights), self.res3) + assert_allclose(cov(self.x1, aweights=3.0 * self.weights), + cov(self.x1, aweights=self.weights)) + assert_allclose(cov(self.x1, aweights=self.unit_weights), self.res1) + w = np.ones((2, 3)) + assert_raises(RuntimeError, cov, self.x1, aweights=w) + w = np.ones(2) + assert_raises(RuntimeError, cov, self.x1, aweights=w) + w = -1.0 * np.ones(3) + assert_raises(ValueError, cov, self.x1, aweights=w) + + def test_unit_fweights_and_aweights(self): + assert_allclose(cov(self.x2, fweights=self.frequencies, + aweights=self.unit_weights), + cov(self.x2_repeats)) + assert_allclose(cov(self.x1, fweights=self.frequencies, + aweights=self.unit_weights), + self.res2) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=self.unit_weights), + self.res1) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=self.weights), + self.res3) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=3.0 * self.weights), + cov(self.x1, aweights=self.weights)) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=self.unit_weights), + self.res1) class Test_I0(TestCase): + def test_simple(self): assert_almost_equal( i0(0.5), @@ -1414,6 +2160,7 @@ class Test_I0(TestCase): class TestKaiser(TestCase): + def test_simple(self): assert_(np.isfinite(kaiser(1, 1.0))) assert_almost_equal(kaiser(0, 1.0), @@ -1432,6 +2179,7 @@ class TestKaiser(TestCase): class TestMsort(TestCase): + def test_simple(self): A = np.array([[0.44567325, 0.79115165, 0.54900530], [0.36844147, 0.37325583, 0.96098397], @@ -1444,6 +2192,7 @@ class TestMsort(TestCase): class TestMeshgrid(TestCase): + def test_simple(self): [X, Y] = meshgrid([1, 2, 3], [4, 5, 6, 7]) assert_array_equal(X, np.array([[1, 2, 3], @@ -1522,6 +2271,7 @@ class TestMeshgrid(TestCase): class TestPiecewise(TestCase): + def test_simple(self): # Condition is single bool list x = piecewise([0, 0], [True, False], [1]) @@ -1550,6 +2300,10 @@ class TestPiecewise(TestCase): x = piecewise([1, 2], [[True, False], [False, True]], [3, 4]) assert_array_equal(x, [3, 4]) + def test_scalar_domains_three_conditions(self): + x = piecewise(3, [True, False, False], [4, 2, 0]) + assert_equal(x, 4) + def test_default(self): # No value specified for x[1], should be 0 x = piecewise([1, 2], [True, False], [2]) @@ -1572,10 +2326,18 @@ class TestPiecewise(TestCase): def test_0d_comparison(self): x = 3 - y = piecewise(x, [x <= 3, x > 3], [4, 0]) + piecewise(x, [x <= 3, x > 3], [4, 0]) # Should succeed. + + def test_multidimensional_extrafunc(self): + x = np.array([[-2.5, -1.5, -0.5], + [0.5, 1.5, 2.5]]) + y = piecewise(x, [x < 0, x >= 2], [-1, 1, 3]) + assert_array_equal(y, np.array([[-1., -1., -1.], + [3., 3., 1.]])) class TestBincount(TestCase): + def test_simple(self): y = np.bincount(np.arange(4)) assert_array_equal(y, np.ones(4)) @@ -1624,23 +2386,46 @@ class TestBincount(TestCase): def test_with_incorrect_minlength(self): x = np.array([], dtype=int) - assert_raises_regex(TypeError, "an integer is required", + assert_raises_regex(TypeError, + "'str' object cannot be interpreted", lambda: np.bincount(x, minlength="foobar")) - assert_raises_regex(ValueError, "must be positive", + assert_raises_regex(ValueError, + "must be positive", lambda: np.bincount(x, minlength=-1)) - assert_raises_regex(ValueError, "must be positive", + assert_raises_regex(ValueError, + "must be positive", lambda: np.bincount(x, minlength=0)) x = np.arange(5) - assert_raises_regex(TypeError, "an integer is required", + assert_raises_regex(TypeError, + "'str' object cannot be interpreted", lambda: np.bincount(x, minlength="foobar")) - assert_raises_regex(ValueError, "minlength must be positive", + assert_raises_regex(ValueError, + "minlength must be positive", lambda: np.bincount(x, minlength=-1)) - assert_raises_regex(ValueError, "minlength must be positive", + assert_raises_regex(ValueError, + "minlength must be positive", lambda: np.bincount(x, minlength=0)) + @dec.skipif(not HAS_REFCOUNT, "python has no sys.getrefcount") + def test_dtype_reference_leaks(self): + # gh-6805 + intp_refcount = sys.getrefcount(np.dtype(np.intp)) + double_refcount = sys.getrefcount(np.dtype(np.double)) + + for j in range(10): + np.bincount([1, 2, 3]) + assert_equal(sys.getrefcount(np.dtype(np.intp)), intp_refcount) + assert_equal(sys.getrefcount(np.dtype(np.double)), double_refcount) + + for j in range(10): + np.bincount([1, 2, 3], [4, 5, 6]) + assert_equal(sys.getrefcount(np.dtype(np.intp)), intp_refcount) + assert_equal(sys.getrefcount(np.dtype(np.double)), double_refcount) + class TestInterp(TestCase): + def test_exceptions(self): assert_raises(ValueError, interp, 0, [], []) assert_raises(ValueError, interp, 0, [0], [1, 2]) @@ -1655,10 +2440,42 @@ class TestInterp(TestCase): assert_almost_equal(np.interp(x0, x, y), x0) def test_right_left_behavior(self): - assert_equal(interp([-1, 0, 1], [0], [1]), [1, 1, 1]) - assert_equal(interp([-1, 0, 1], [0], [1], left=0), [0, 1, 1]) - assert_equal(interp([-1, 0, 1], [0], [1], right=0), [1, 1, 0]) - assert_equal(interp([-1, 0, 1], [0], [1], left=0, right=0), [0, 1, 0]) + # Needs range of sizes to test different code paths. + # size ==1 is special cased, 1 < size < 5 is linear search, and + # size >= 5 goes through local search and possibly binary search. + for size in range(1, 10): + xp = np.arange(size, dtype=np.double) + yp = np.ones(size, dtype=np.double) + incpts = np.array([-1, 0, size - 1, size], dtype=np.double) + decpts = incpts[::-1] + + incres = interp(incpts, xp, yp) + decres = interp(decpts, xp, yp) + inctgt = np.array([1, 1, 1, 1], dtype=np.float) + dectgt = inctgt[::-1] + assert_equal(incres, inctgt) + assert_equal(decres, dectgt) + + incres = interp(incpts, xp, yp, left=0) + decres = interp(decpts, xp, yp, left=0) + inctgt = np.array([0, 1, 1, 1], dtype=np.float) + dectgt = inctgt[::-1] + assert_equal(incres, inctgt) + assert_equal(decres, dectgt) + + incres = interp(incpts, xp, yp, right=2) + decres = interp(decpts, xp, yp, right=2) + inctgt = np.array([1, 1, 1, 2], dtype=np.float) + dectgt = inctgt[::-1] + assert_equal(incres, inctgt) + assert_equal(decres, dectgt) + + incres = interp(incpts, xp, yp, left=0, right=2) + decres = interp(decpts, xp, yp, left=0, right=2) + inctgt = np.array([0, 1, 1, 2], dtype=np.float) + dectgt = inctgt[::-1] + assert_equal(incres, inctgt) + assert_equal(decres, dectgt) def test_scalar_interpolation_point(self): x = np.linspace(0, 1, 5) @@ -1674,6 +2491,28 @@ class TestInterp(TestCase): x0 = np.nan assert_almost_equal(np.interp(x0, x, y), x0) + def test_complex_interp(self): + # test complex interpolation + x = np.linspace(0, 1, 5) + y = np.linspace(0, 1, 5) + (1 + np.linspace(0, 1, 5))*1.0j + x0 = 0.3 + y0 = x0 + (1+x0)*1.0j + assert_almost_equal(np.interp(x0, x, y), y0) + # test complex left and right + x0 = -1 + left = 2 + 3.0j + assert_almost_equal(np.interp(x0, x, y, left=left), left) + x0 = 2.0 + right = 2 + 3.0j + assert_almost_equal(np.interp(x0, x, y, right=right), right) + # test complex periodic + x = [-180, -170, -185, 185, -10, -5, 0, 365] + xp = [190, -190, 350, -350] + fp = [5+1.0j, 10+2j, 3+3j, 4+4j] + y = [7.5+1.5j, 5.+1.0j, 8.75+1.75j, 6.25+1.25j, 3.+3j, 3.25+3.25j, + 3.5+3.5j, 3.75+3.75j] + assert_almost_equal(np.interp(x, xp, fp, period=360), y) + def test_zero_dimensional_interpolation_point(self): x = np.linspace(0, 1, 5) y = np.linspace(0, 1, 5) @@ -1703,13 +2542,19 @@ def compare_results(res, desired): assert_array_equal(res[i], desired[i]) -class TestScoreatpercentile(TestCase): +class TestPercentile(TestCase): def test_basic(self): x = np.arange(8) * 0.5 assert_equal(np.percentile(x, 0), 0.) assert_equal(np.percentile(x, 100), 3.5) assert_equal(np.percentile(x, 50), 1.75) + x[1] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(x, 0), np.nan) + assert_equal(np.percentile(x, 0, interpolation='nearest'), np.nan) + assert_(w[0].category is RuntimeWarning) def test_api(self): d = np.ones(5) @@ -1720,10 +2565,10 @@ class TestScoreatpercentile(TestCase): def test_2D(self): x = np.array([[1, 1, 1], - [1, 1, 1], - [4, 4, 3], - [1, 1, 1], - [1, 1, 1]]) + [1, 1, 1], + [4, 4, 3], + [1, 1, 1], + [1, 1, 1]]) assert_array_equal(np.percentile(x, 50, axis=0), [1, 1, 1]) def test_linear(self): @@ -1731,7 +2576,7 @@ class TestScoreatpercentile(TestCase): # Test defaults assert_equal(np.percentile(range(10), 50), 4.5) - # explicitly specify interpolation_method 'fraction' (the default) + # explicitly specify interpolation_method 'linear' (the default) assert_equal(np.percentile(range(10), 50, interpolation='linear'), 4.5) @@ -1746,6 +2591,10 @@ class TestScoreatpercentile(TestCase): def test_midpoint(self): assert_equal(np.percentile(range(10), 51, interpolation='midpoint'), 4.5) + assert_equal(np.percentile(range(11), 51, + interpolation='midpoint'), 5.5) + assert_equal(np.percentile(range(11), 50, + interpolation='midpoint'), 5) def test_nearest(self): assert_equal(np.percentile(range(10), 51, @@ -1776,7 +2625,8 @@ class TestScoreatpercentile(TestCase): assert_equal(np.percentile(x, (25, 50), axis=1).shape, (2, 3, 5, 6)) assert_equal(np.percentile(x, (25, 50), axis=2).shape, (2, 3, 4, 6)) assert_equal(np.percentile(x, (25, 50), axis=3).shape, (2, 3, 4, 5)) - assert_equal(np.percentile(x, (25, 50, 75), axis=1).shape, (3, 3, 5, 6)) + assert_equal( + np.percentile(x, (25, 50, 75), axis=1).shape, (3, 3, 5, 6)) assert_equal(np.percentile(x, (25, 50), interpolation="higher").shape, (2,)) assert_equal(np.percentile(x, (25, 50, 75), @@ -1797,10 +2647,10 @@ class TestScoreatpercentile(TestCase): x = np.arange(12).reshape(3, 4) assert_equal(np.percentile(x, 50), 5.5) self.assertTrue(np.isscalar(np.percentile(x, 50))) - r0 = np.array([ 4., 5., 6., 7.]) + r0 = np.array([4., 5., 6., 7.]) assert_equal(np.percentile(x, 50, axis=0), r0) assert_equal(np.percentile(x, 50, axis=0).shape, r0.shape) - r1 = np.array([ 1.5, 5.5, 9.5]) + r1 = np.array([1.5, 5.5, 9.5]) assert_almost_equal(np.percentile(x, 50, axis=1), r1) assert_equal(np.percentile(x, 50, axis=1).shape, r1.shape) @@ -1818,11 +2668,11 @@ class TestScoreatpercentile(TestCase): x = np.arange(12).reshape(3, 4) assert_equal(np.percentile(x, 50, interpolation='lower'), 5.) self.assertTrue(np.isscalar(np.percentile(x, 50))) - r0 = np.array([ 4., 5., 6., 7.]) + r0 = np.array([4., 5., 6., 7.]) c0 = np.percentile(x, 50, interpolation='lower', axis=0) assert_equal(c0, r0) assert_equal(c0.shape, r0.shape) - r1 = np.array([ 1., 5., 9.]) + r1 = np.array([1., 5., 9.]) c1 = np.percentile(x, 50, interpolation='lower', axis=1) assert_almost_equal(c1, r1) assert_equal(c1.shape, r1.shape) @@ -1894,7 +2744,7 @@ class TestScoreatpercentile(TestCase): def test_percentile_empty_dim(self): # empty dims are preserved - d = np.arange(11*2).reshape(11, 1, 2, 1) + d = np.arange(11 * 2).reshape(11, 1, 2, 1) assert_array_equal(np.percentile(d, 50, axis=0).shape, (1, 2, 1)) assert_array_equal(np.percentile(d, 50, axis=1).shape, (11, 2, 1)) assert_array_equal(np.percentile(d, 50, axis=2).shape, (11, 1, 1)) @@ -1920,7 +2770,6 @@ class TestScoreatpercentile(TestCase): assert_array_equal(np.array(np.percentile(d, [10, 50], axis=3)).shape, (2, 11, 1, 2)) - def test_percentile_no_overwrite(self): a = np.array([2, 3, 4, 1]) np.percentile(a, [50], overwrite_input=False) @@ -1961,22 +2810,22 @@ class TestScoreatpercentile(TestCase): assert_equal(np.percentile(x, [25, 60], axis=(0,)), np.percentile(x, [25, 60], axis=0)) - d = np.arange(3 * 5 * 7 * 11).reshape(3, 5, 7, 11) - np.random.shuffle(d) + d = np.arange(3 * 5 * 7 * 11).reshape((3, 5, 7, 11)) + np.random.shuffle(d.ravel()) assert_equal(np.percentile(d, 25, axis=(0, 1, 2))[0], - np.percentile(d[:, :, :, 0].flatten(), 25)) + np.percentile(d[:,:,:, 0].flatten(), 25)) assert_equal(np.percentile(d, [10, 90], axis=(0, 1, 3))[:, 1], - np.percentile(d[:, :, 1, :].flatten(), [10, 90])) + np.percentile(d[:,:, 1,:].flatten(), [10, 90])) assert_equal(np.percentile(d, 25, axis=(3, 1, -4))[2], - np.percentile(d[:, :, 2, :].flatten(), 25)) + np.percentile(d[:,:, 2,:].flatten(), 25)) assert_equal(np.percentile(d, 25, axis=(3, 1, 2))[2], - np.percentile(d[2, :, :, :].flatten(), 25)) + np.percentile(d[2,:,:,:].flatten(), 25)) assert_equal(np.percentile(d, 25, axis=(3, 2))[2, 1], - np.percentile(d[2, 1, :, :].flatten(), 25)) + np.percentile(d[2, 1,:,:].flatten(), 25)) assert_equal(np.percentile(d, 25, axis=(1, -2))[2, 1], - np.percentile(d[2, :, :, 1].flatten(), 25)) + np.percentile(d[2,:,:, 1].flatten(), 25)) assert_equal(np.percentile(d, 25, axis=(1, 3))[2, 2], - np.percentile(d[2, :, 2, :].flatten(), 25)) + np.percentile(d[2,:, 2,:].flatten(), 25)) def test_extended_axis_invalid(self): d = np.ones((3, 5, 7, 11)) @@ -2006,8 +2855,123 @@ class TestScoreatpercentile(TestCase): assert_equal(np.percentile(d, [1, 7], axis=(0, 3), keepdims=True).shape, (2, 1, 5, 7, 1)) + def test_out(self): + o = np.zeros((4,)) + d = np.ones((3, 4)) + assert_equal(np.percentile(d, 0, 0, out=o), o) + assert_equal(np.percentile(d, 0, 0, interpolation='nearest', out=o), o) + o = np.zeros((3,)) + assert_equal(np.percentile(d, 1, 1, out=o), o) + assert_equal(np.percentile(d, 1, 1, interpolation='nearest', out=o), o) + + o = np.zeros(()) + assert_equal(np.percentile(d, 2, out=o), o) + assert_equal(np.percentile(d, 2, interpolation='nearest', out=o), o) + + def test_out_nan(self): + with warnings.catch_warnings(record=True): + warnings.filterwarnings('always', '', RuntimeWarning) + o = np.zeros((4,)) + d = np.ones((3, 4)) + d[2, 1] = np.nan + assert_equal(np.percentile(d, 0, 0, out=o), o) + assert_equal( + np.percentile(d, 0, 0, interpolation='nearest', out=o), o) + o = np.zeros((3,)) + assert_equal(np.percentile(d, 1, 1, out=o), o) + assert_equal( + np.percentile(d, 1, 1, interpolation='nearest', out=o), o) + o = np.zeros(()) + assert_equal(np.percentile(d, 1, out=o), o) + assert_equal( + np.percentile(d, 1, interpolation='nearest', out=o), o) + + def test_nan_behavior(self): + a = np.arange(24, dtype=float) + a[2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, 0.3), np.nan) + assert_equal(np.percentile(a, 0.3, axis=0), np.nan) + assert_equal(np.percentile(a, [0.3, 0.6], axis=0), + np.array([np.nan] * 2)) + assert_(w[0].category is RuntimeWarning) + assert_(w[1].category is RuntimeWarning) + assert_(w[2].category is RuntimeWarning) + + a = np.arange(24, dtype=float).reshape(2, 3, 4) + a[1, 2, 3] = np.nan + a[1, 1, 2] = np.nan + + # no axis + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, 0.3), np.nan) + assert_equal(np.percentile(a, 0.3).ndim, 0) + assert_(w[0].category is RuntimeWarning) + + # axis0 zerod + b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 0) + b[2, 3] = np.nan + b[1, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, 0.3, 0), b) + + # axis0 not zerod + b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), + [0.3, 0.6], 0) + b[:, 2, 3] = np.nan + b[:, 1, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, [0.3, 0.6], 0), b) + + # axis1 zerod + b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 1) + b[1, 3] = np.nan + b[1, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, 0.3, 1), b) + # axis1 not zerod + b = np.percentile( + np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], 1) + b[:, 1, 3] = np.nan + b[:, 1, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, [0.3, 0.6], 1), b) + + # axis02 zerod + b = np.percentile( + np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, (0, 2)) + b[1] = np.nan + b[2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, 0.3, (0, 2)), b) + # axis02 not zerod + b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), + [0.3, 0.6], (0, 2)) + b[:, 1] = np.nan + b[:, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile(a, [0.3, 0.6], (0, 2)), b) + # axis02 not zerod with nearest interpolation + b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), + [0.3, 0.6], (0, 2), interpolation='nearest') + b[:, 1] = np.nan + b[:, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.percentile( + a, [0.3, 0.6], (0, 2), interpolation='nearest'), b) + class TestMedian(TestCase): + def test_basic(self): a0 = np.array(1) a1 = np.arange(2) @@ -2028,7 +2992,10 @@ class TestMedian(TestCase): # check array scalar result assert_equal(np.median(a).ndim, 0) a[1] = np.nan - assert_equal(np.median(a).ndim, 0) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a).ndim, 0) + assert_(w[0].category is RuntimeWarning) def test_axis_keyword(self): a3 = np.array([[2, 3], @@ -2071,7 +3038,7 @@ class TestMedian(TestCase): [3, 4]) a4 = np.arange(3 * 4 * 5, dtype=np.float32).reshape((3, 4, 5)) - map(np.random.shuffle, a4) + np.random.shuffle(a4.ravel()) assert_allclose(np.median(a4, axis=None), np.median(a4.copy(), axis=None, overwrite_input=True)) assert_allclose(np.median(a4, axis=0), @@ -2091,6 +3058,7 @@ class TestMedian(TestCase): def test_subclass(self): # gh-3846 class MySubClass(np.ndarray): + def __new__(cls, input_array, info=None): obj = np.asarray(input_array).view(cls) obj.info = info @@ -2099,11 +3067,108 @@ class TestMedian(TestCase): def mean(self, axis=None, dtype=None, out=None): return -7 - a = MySubClass([1,2,3]) + a = MySubClass([1, 2, 3]) assert_equal(np.median(a), -7) + def test_out(self): + o = np.zeros((4,)) + d = np.ones((3, 4)) + assert_equal(np.median(d, 0, out=o), o) + o = np.zeros((3,)) + assert_equal(np.median(d, 1, out=o), o) + o = np.zeros(()) + assert_equal(np.median(d, out=o), o) + + def test_out_nan(self): + with warnings.catch_warnings(record=True): + warnings.filterwarnings('always', '', RuntimeWarning) + o = np.zeros((4,)) + d = np.ones((3, 4)) + d[2, 1] = np.nan + assert_equal(np.median(d, 0, out=o), o) + o = np.zeros((3,)) + assert_equal(np.median(d, 1, out=o), o) + o = np.zeros(()) + assert_equal(np.median(d, out=o), o) + + def test_nan_behavior(self): + a = np.arange(24, dtype=float) + a[2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a), np.nan) + assert_equal(np.median(a, axis=0), np.nan) + assert_(w[0].category is RuntimeWarning) + assert_(w[1].category is RuntimeWarning) + + a = np.arange(24, dtype=float).reshape(2, 3, 4) + a[1, 2, 3] = np.nan + a[1, 1, 2] = np.nan + + # no axis + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a), np.nan) + assert_equal(np.median(a).ndim, 0) + assert_(w[0].category is RuntimeWarning) + + # axis0 + b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 0) + b[2, 3] = np.nan + b[1, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a, 0), b) + assert_equal(len(w), 1) + + # axis1 + b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 1) + b[1, 3] = np.nan + b[1, 2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a, 1), b) + assert_equal(len(w), 1) + + # axis02 + b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), (0, 2)) + b[1] = np.nan + b[2] = np.nan + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a, (0, 2)), b) + assert_equal(len(w), 1) + + def test_empty(self): + # empty arrays + a = np.array([], dtype=float) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a), np.nan) + assert_(w[0].category is RuntimeWarning) + + # multiple dimensions + a = np.array([], dtype=float, ndmin=3) + # no axis + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a), np.nan) + assert_(w[0].category is RuntimeWarning) + + # axis 0 and 1 + b = np.array([], dtype=float, ndmin=2) + assert_equal(np.median(a, axis=0), b) + assert_equal(np.median(a, axis=1), b) + + # axis 2 + b = np.array(np.nan, dtype=float, ndmin=2) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_equal(np.median(a, axis=2), b) + assert_(w[0].category is RuntimeWarning) + def test_object(self): - o = np.arange(7.); + o = np.arange(7.) assert_(type(np.median(o.astype(object))), float) o[2] = np.nan assert_(type(np.median(o.astype(object))), float) @@ -2121,22 +3186,22 @@ class TestMedian(TestCase): assert_equal(np.median(x, axis=(0, )), np.median(x, axis=0)) assert_equal(np.median(x, axis=(-1, )), np.median(x, axis=-1)) - d = np.arange(3 * 5 * 7 * 11).reshape(3, 5, 7, 11) - np.random.shuffle(d) + d = np.arange(3 * 5 * 7 * 11).reshape((3, 5, 7, 11)) + np.random.shuffle(d.ravel()) assert_equal(np.median(d, axis=(0, 1, 2))[0], - np.median(d[:, :, :, 0].flatten())) + np.median(d[:,:,:, 0].flatten())) assert_equal(np.median(d, axis=(0, 1, 3))[1], - np.median(d[:, :, 1, :].flatten())) + np.median(d[:,:, 1,:].flatten())) assert_equal(np.median(d, axis=(3, 1, -4))[2], - np.median(d[:, :, 2, :].flatten())) + np.median(d[:,:, 2,:].flatten())) assert_equal(np.median(d, axis=(3, 1, 2))[2], - np.median(d[2, :, :, :].flatten())) + np.median(d[2,:,:,:].flatten())) assert_equal(np.median(d, axis=(3, 2))[2, 1], - np.median(d[2, 1, :, :].flatten())) + np.median(d[2, 1,:,:].flatten())) assert_equal(np.median(d, axis=(1, -2))[2, 1], - np.median(d[2, :, :, 1].flatten())) + np.median(d[2,:,:, 1].flatten())) assert_equal(np.median(d, axis=(1, 3))[2, 2], - np.median(d[2, :, 2, :].flatten())) + np.median(d[2,:, 2,:].flatten())) def test_extended_axis_invalid(self): d = np.ones((3, 5, 7, 11)) @@ -2162,7 +3227,6 @@ class TestMedian(TestCase): (1, 1, 7, 1)) - class TestAdd_newdoc_ufunc(TestCase): def test_ufunc_arg(self): diff --git a/numpy/lib/tests/test_index_tricks.py b/numpy/lib/tests/test_index_tricks.py index 97047c53a..d9fa1f43e 100644 --- a/numpy/lib/tests/test_index_tricks.py +++ b/numpy/lib/tests/test_index_tricks.py @@ -7,7 +7,7 @@ from numpy.testing import ( ) from numpy.lib.index_tricks import ( mgrid, ndenumerate, fill_diagonal, diag_indices, diag_indices_from, - index_exp, ndindex, r_, s_ + index_exp, ndindex, r_, s_, ix_ ) @@ -47,6 +47,27 @@ class TestRavelUnravelIndex(TestCase): [[3, 6, 6], [4, 5, 1]]) assert_equal(np.unravel_index(1621, (6, 7, 8, 9)), [3, 1, 4, 1]) + def test_big_indices(self): + # ravel_multi_index for big indices (issue #7546) + if np.intp == np.int64: + arr = ([1, 29], [3, 5], [3, 117], [19, 2], + [2379, 1284], [2, 2], [0, 1]) + assert_equal( + np.ravel_multi_index(arr, (41, 7, 120, 36, 2706, 8, 6)), + [5627771580, 117259570957]) + + # test overflow checking for too big array (issue #7546) + dummy_arr = ([0],[0]) + half_max = np.iinfo(np.intp).max // 2 + assert_equal( + np.ravel_multi_index(dummy_arr, (half_max, 2)), [0]) + assert_raises(ValueError, + np.ravel_multi_index, dummy_arr, (half_max+1, 2)) + assert_equal( + np.ravel_multi_index(dummy_arr, (half_max, 2), order='F'), [0]) + assert_raises(ValueError, + np.ravel_multi_index, dummy_arr, (half_max+1, 2), order='F') + def test_dtypes(self): # Test with different data types for dtype in [np.int16, np.uint16, np.int32, @@ -86,6 +107,12 @@ class TestRavelUnravelIndex(TestCase): assert_raises( ValueError, np.ravel_multi_index, [5, 1, -1, 2], (4, 3, 7, 12)) + def test_writeability(self): + # See gh-7269 + x, y = np.unravel_index([1, 2, 3], (4, 5)) + self.assertTrue(x.flags.writeable) + self.assertTrue(y.flags.writeable) + class TestGrid(TestCase): def test_basic(self): @@ -169,6 +196,43 @@ class TestIndexExpression(TestCase): assert_equal(a[:, :3, [1, 2]], a[s_[:, :3, [1, 2]]]) +class TestIx_(TestCase): + def test_regression_1(self): + # Test empty inputs create ouputs of indexing type, gh-5804 + # Test both lists and arrays + for func in (range, np.arange): + a, = np.ix_(func(0)) + assert_equal(a.dtype, np.intp) + + def test_shape_and_dtype(self): + sizes = (4, 5, 3, 2) + # Test both lists and arrays + for func in (range, np.arange): + arrays = np.ix_(*[func(sz) for sz in sizes]) + for k, (a, sz) in enumerate(zip(arrays, sizes)): + assert_equal(a.shape[k], sz) + assert_(all(sh == 1 for j, sh in enumerate(a.shape) if j != k)) + assert_(np.issubdtype(a.dtype, int)) + + def test_bool(self): + bool_a = [True, False, True, True] + int_a, = np.nonzero(bool_a) + assert_equal(np.ix_(bool_a)[0], int_a) + + def test_1d_only(self): + idx2d = [[1, 2, 3], [4, 5, 6]] + assert_raises(ValueError, np.ix_, idx2d) + + def test_repeated_input(self): + length_of_vector = 5 + x = np.arange(length_of_vector) + out = ix_(x, x) + assert_equal(out[0].shape, (length_of_vector, 1)) + assert_equal(out[1].shape, (1, length_of_vector)) + # check that input shape is not modified + assert_equal(x.shape, (length_of_vector,)) + + def test_c_(): a = np.c_[np.array([[1, 2, 3]]), 0, 0, np.array([[4, 5, 6]])] assert_equal(a, [[1, 2, 3, 0, 0, 4, 5, 6]]) diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 68b2018cd..333891d46 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -4,7 +4,7 @@ import sys import gzip import os import threading -from tempfile import mkstemp, NamedTemporaryFile +from tempfile import NamedTemporaryFile import time import warnings import gc @@ -13,16 +13,14 @@ from datetime import datetime import numpy as np import numpy.ma as ma -from numpy.lib._iotools import (ConverterError, ConverterLockError, - ConversionWarning) -from numpy.compat import asbytes, asbytes_nested, bytes, asstr -from nose import SkipTest -from numpy.ma.testutils import ( - TestCase, assert_equal, assert_array_equal, - assert_raises, assert_raises_regex, run_module_suite +from numpy.lib._iotools import ConverterError, ConversionWarning +from numpy.compat import asbytes, bytes, unicode, Path +from numpy.ma.testutils import assert_equal +from numpy.testing import ( + TestCase, run_module_suite, assert_warns, assert_, + assert_raises_regex, assert_raises, assert_allclose, + assert_array_equal, temppath, dec, IS_PYPY, suppress_warnings ) -from numpy.testing import assert_warns, assert_, build_err_msg -from numpy.testing.utils import tempdir class TextIO(BytesIO): @@ -49,8 +47,9 @@ IS_64BIT = sys.maxsize > 2**32 def strptime(s, fmt=None): - """This function is available in the datetime module only - from Python >= 2.5. + """ + This function is available in the datetime module only from Python >= + 2.5. """ if sys.version_info[0] >= 3: @@ -158,6 +157,7 @@ class RoundtripTest(object): a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) self.check_roundtrips(a) + @dec.slow def test_format_2_0(self): dt = [(("%d" % i) * 100, float) for i in range(500)] a = np.ones(1000, dtype=dt) @@ -194,13 +194,13 @@ class TestSavezLoad(RoundtripTest, TestCase): def test_big_arrays(self): L = (1 << 31) + 100000 a = np.empty(L, dtype=np.uint8) - with tempdir(prefix="numpy_test_big_arrays_") as tmpdir: - tmp = os.path.join(tmpdir, "file.npz") + with temppath(prefix="numpy_test_big_arrays_", suffix=".npz") as tmp: np.savez(tmp, a=a) del a npfile = np.load(tmp) - a = npfile['a'] + a = npfile['a'] # Should succeed npfile.close() + del a # Avoid pyflakes unused variable warning. def test_multiple_arrays(self): a = np.array([[1, 2], [3, 4]], float) @@ -216,7 +216,7 @@ class TestSavezLoad(RoundtripTest, TestCase): l = np.load(c) assert_equal(a, l['file_a']) assert_equal(b, l['file_b']) - + def test_BagObj(self): a = np.array([[1, 2], [3, 4]], float) b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex) @@ -233,16 +233,12 @@ class TestSavezLoad(RoundtripTest, TestCase): # and savez functions in multithreaded environment def writer(error_list): - fd, tmp = mkstemp(suffix='.npz') - os.close(fd) - try: + with temppath(suffix='.npz') as tmp: arr = np.random.randn(500, 500) try: np.savez(tmp, arr=arr) except OSError as err: error_list.append(err) - finally: - os.remove(tmp) errors = [] threads = [threading.Thread(target=writer, args=(errors,)) @@ -258,40 +254,27 @@ class TestSavezLoad(RoundtripTest, TestCase): def test_not_closing_opened_fid(self): # Test that issue #2178 is fixed: # verify could seek on 'loaded' file - - fd, tmp = mkstemp(suffix='.npz') - os.close(fd) - try: - fp = open(tmp, 'wb') - np.savez(fp, data='LOVELY LOAD') - fp.close() - - fp = open(tmp, 'rb', 10000) - fp.seek(0) - assert_(not fp.closed) - _ = np.load(fp)['data'] - assert_(not fp.closed) - # must not get closed by .load(opened fp) - fp.seek(0) - assert_(not fp.closed) - - finally: - fp.close() - os.remove(tmp) - + with temppath(suffix='.npz') as tmp: + with open(tmp, 'wb') as fp: + np.savez(fp, data='LOVELY LOAD') + with open(tmp, 'rb', 10000) as fp: + fp.seek(0) + assert_(not fp.closed) + np.load(fp)['data'] + # fp must not get closed by .load + assert_(not fp.closed) + fp.seek(0) + assert_(not fp.closed) + + @np.testing.dec.skipif(IS_PYPY, "context manager required on PyPy") def test_closing_fid(self): # Test that issue #1517 (too many opened files) remains closed # It might be a "weak" test since failed to get triggered on # e.g. Debian sid of 2012 Jul 05 but was reported to # trigger the failure on Ubuntu 10.04: # http://projects.scipy.org/numpy/ticket/1517#comment:2 - fd, tmp = mkstemp(suffix='.npz') - os.close(fd) - - try: - fp = open(tmp, 'wb') - np.savez(fp, data='LOVELY LOAD') - fp.close() + with temppath(suffix='.npz') as tmp: + np.savez(tmp, data='LOVELY LOAD') # We need to check if the garbage collector can properly close # numpy npz file returned by np.load when their reference count # goes to zero. Python 3 running in debug mode raises a @@ -299,24 +282,22 @@ class TestSavezLoad(RoundtripTest, TestCase): # collector, so we catch the warnings. Because ResourceWarning # is unknown in Python < 3.x, we take the easy way out and # catch all warnings. - with warnings.catch_warnings(): - warnings.simplefilter("ignore") + with suppress_warnings() as sup: + sup.filter(Warning) # TODO: specify exact message for i in range(1, 1025): try: np.load(tmp)["data"] except Exception as e: msg = "Failed to load data from a file: %s" % e raise AssertionError(msg) - finally: - os.remove(tmp) def test_closing_zipfile_after_load(self): - # Check that zipfile owns file and can close it. - # This needs to pass a file name to load for the - # test. - with tempdir(prefix="numpy_test_closing_zipfile_after_load_") as tmpdir: - fd, tmp = mkstemp(suffix='.npz', dir=tmpdir) - os.close(fd) + # Check that zipfile owns file and can close it. This needs to + # pass a file name to load for the test. On windows failure will + # cause a second error will be raised when the attempt to remove + # the open file is made. + prefix = 'numpy_test_closing_zipfile_after_load_' + with temppath(suffix='.npz', prefix=prefix) as tmp: np.savez(tmp, lab='place holder') data = np.load(tmp) fp = data.zip.fp @@ -390,9 +371,8 @@ class TestSaveTxt(TestCase): assert_raises(ValueError, np.savetxt, c, a, fmt=99) def test_header_footer(self): - """ - Test the functionality of the header and footer keyword argument. - """ + # Test the functionality of the header and footer keyword argument. + c = BytesIO() a = np.array([(1, 2), (3, 4)], dtype=np.int) test_header_footer = 'Test header / footer' @@ -425,15 +405,11 @@ class TestSaveTxt(TestCase): asbytes('1 2\n3 4\n' + commentstr + test_header_footer + '\n')) def test_file_roundtrip(self): - f, name = mkstemp() - os.close(f) - try: + with temppath() as name: a = np.array([(1, 2), (3, 4)]) np.savetxt(name, a) b = np.loadtxt(name) assert_array_equal(a, b) - finally: - os.unlink(name) def test_complex_arrays(self): ncols = 2 @@ -553,15 +529,49 @@ class TestLoadTxt(TestCase): a = np.array([[2, -999], [7, 9]], int) assert_array_equal(x, a) - def test_comments(self): + def test_comments_unicode(self): c = TextIO() c.write('# comment\n1,2,3,5\n') c.seek(0) x = np.loadtxt(c, dtype=int, delimiter=',', - comments='#') + comments=unicode('#')) + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + def test_comments_byte(self): + c = TextIO() + c.write('# comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments=b'#') + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + def test_comments_multiple(self): + c = TextIO() + c.write('# comment\n1,2,3\n@ comment2\n4,5,6 // comment3') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments=['#', '@', '//']) + a = np.array([[1, 2, 3], [4, 5, 6]], int) + assert_array_equal(x, a) + + def test_comments_multi_chars(self): + c = TextIO() + c.write('/* comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments='/*') a = np.array([1, 2, 3, 5], int) assert_array_equal(x, a) + # Check that '/*' is not transformed to ['/', '*'] + c = TextIO() + c.write('*/ comment\n1,2,3,5\n') + c.seek(0) + assert_raises(ValueError, np.loadtxt, c, dtype=int, delimiter=',', + comments='/*') + def test_skiprows(self): c = TextIO() c.write('comment\n1,2,3,5\n') @@ -599,6 +609,29 @@ class TestLoadTxt(TestCase): x = np.loadtxt(c, dtype=float, usecols=np.array([1, 2])) assert_array_equal(x, a[:, 1:]) + # Testing with an integer instead of a sequence + for int_type in [int, np.int8, np.int16, + np.int32, np.int64, np.uint8, np.uint16, + np.uint32, np.uint64]: + to_read = int_type(1) + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=to_read) + assert_array_equal(x, a[:, 1]) + + # Testing with some crazy custom integer type + class CrazyInt(object): + def __index__(self): + return 1 + + crazy_int = CrazyInt() + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=crazy_int) + assert_array_equal(x, a[:, 1]) + + c.seek(0) + x = np.loadtxt(c, dtype=float, usecols=(crazy_int,)) + assert_array_equal(x, a[:, 1]) + # Checking with dtypes defined converters. data = '''JOE 70.1 25.3 BOB 60.5 27.9 @@ -610,6 +643,21 @@ class TestLoadTxt(TestCase): assert_equal(arr['stid'], [b"JOE", b"BOB"]) assert_equal(arr['temp'], [25.3, 27.9]) + # Testing non-ints in usecols + c.seek(0) + bogus_idx = 1.5 + assert_raises_regex( + TypeError, + '^usecols must be.*%s' % type(bogus_idx), + np.loadtxt, c, usecols=bogus_idx + ) + + assert_raises_regex( + TypeError, + '^usecols must be.*%s' % type(bogus_idx), + np.loadtxt, c, usecols=[0, bogus_idx, 0] + ) + def test_fancy_dtype(self): c = TextIO() c.write('1,2,3.0\n4,5,6.0\n') @@ -639,9 +687,8 @@ class TestLoadTxt(TestCase): assert_array_equal(x, a) def test_empty_file(self): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", - message="loadtxt: Empty input file:") + with suppress_warnings() as sup: + sup.filter(message="loadtxt: Empty input file:") c = TextIO() x = np.loadtxt(c) assert_equal(x.shape, (0,)) @@ -663,9 +710,7 @@ class TestLoadTxt(TestCase): assert_array_equal(data, [33, 66]) def test_dtype_with_object(self): - "Test using an explicit dtype with an object" - from datetime import date - import time + # Test using an explicit dtype with an object data = """ 1; 2001-01-01 2; 2002-01-31 """ ndtype = [('idx', int), ('code', np.object)] @@ -694,16 +739,33 @@ class TestLoadTxt(TestCase): res = np.loadtxt(c, dtype=np.int64) assert_equal(res, tgt) - def test_universal_newline(self): - f, name = mkstemp() - os.write(f, b'1 21\r3 42\r') - os.close(f) + def test_from_float_hex(self): + # IEEE doubles and floats only, otherwise the float32 + # conversion may fail. + tgt = np.logspace(-10, 10, 5).astype(np.float32) + tgt = np.hstack((tgt, -tgt)).astype(np.float) + inp = '\n'.join(map(float.hex, tgt)) + c = TextIO() + c.write(inp) + for dt in [np.float, np.float32]: + c.seek(0) + res = np.loadtxt(c, dtype=dt) + assert_equal(res, tgt, err_msg="%s" % dt) + + def test_from_complex(self): + tgt = (complex(1, 1), complex(1, -1)) + c = TextIO() + c.write("%s %s" % tgt) + c.seek(0) + res = np.loadtxt(c, dtype=np.complex) + assert_equal(res, tgt) - try: + def test_universal_newline(self): + with temppath() as name: + with open(name, 'w') as f: + f.write('1 21\r3 42\r') data = np.loadtxt(name) - assert_array_equal(data, [[1, 21], [3, 42]]) - finally: - os.unlink(name) + assert_array_equal(data, [[1, 21], [3, 42]]) def test_empty_field_after_tab(self): c = TextIO() @@ -763,9 +825,8 @@ class TestLoadTxt(TestCase): assert_(x.shape == (3,)) # Test ndmin kw with empty file. - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", - message="loadtxt: Empty input file:") + with suppress_warnings() as sup: + sup.filter(message="loadtxt: Empty input file:") f = TextIO() assert_(np.loadtxt(f, ndmin=2).shape == (0, 1,)) assert_(np.loadtxt(f, ndmin=1).shape == (0,)) @@ -786,6 +847,14 @@ class TestLoadTxt(TestCase): # Check for exception and that exception contains line number assert_raises_regex(ValueError, "3", np.loadtxt, c) + def test_none_as_string(self): + # gh-5155, None should work as string when format demands it + c = TextIO() + c.write('100,foo,200\n300,None,400') + c.seek(0) + dt = np.dtype([('x', int), ('a', 'S10'), ('y', int)]) + np.loadtxt(c, delimiter=',', dtype=dt, comments=None) # Should succeed + class Testfromregex(TestCase): # np.fromregex expects files opened in binary mode. @@ -828,15 +897,13 @@ class Testfromregex(TestCase): class TestFromTxt(TestCase): # def test_record(self): - "Test w/ explicit dtype" + # Test w/ explicit dtype data = TextIO('1 2\n3 4') -# data.seek(0) test = np.ndfromtxt(data, dtype=[('x', np.int32), ('y', np.int32)]) control = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) assert_equal(test, control) # data = TextIO('M 64.0 75.0\nF 25.0 60.0') -# data.seek(0) descriptor = {'names': ('gender', 'age', 'weight'), 'formats': ('S1', 'i4', 'f4')} control = np.array([('M', 64.0, 75.0), ('F', 25.0, 60.0)], @@ -845,7 +912,7 @@ class TestFromTxt(TestCase): assert_equal(test, control) def test_array(self): - "Test outputing a standard ndarray" + # Test outputing a standard ndarray data = TextIO('1 2\n3 4') control = np.array([[1, 2], [3, 4]], dtype=int) test = np.ndfromtxt(data, dtype=int) @@ -857,7 +924,7 @@ class TestFromTxt(TestCase): assert_array_equal(test, control) def test_1D(self): - "Test squeezing to 1D" + # Test squeezing to 1D control = np.array([1, 2, 3, 4], int) # data = TextIO('1\n2\n3\n4\n') @@ -869,7 +936,7 @@ class TestFromTxt(TestCase): assert_array_equal(test, control) def test_comments(self): - "Test the stripping of comments" + # Test the stripping of comments control = np.array([1, 2, 3, 5], int) # Comment on its own line data = TextIO('# comment\n1,2,3,5\n') @@ -881,7 +948,7 @@ class TestFromTxt(TestCase): assert_equal(test, control) def test_skiprows(self): - "Test row skipping" + # Test row skipping control = np.array([1, 2, 3, 5], int) kwargs = dict(dtype=int, delimiter=',') # @@ -905,8 +972,8 @@ class TestFromTxt(TestCase): assert_equal(test, ctrl) def test_skip_footer_with_invalid(self): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore") + with suppress_warnings() as sup: + sup.filter(ConversionWarning) basestr = '1 1\n2 2\n3 3\n4 4\n5 \n6 \n7 \n' # Footer too small to get rid of all invalid values assert_raises(ValueError, np.genfromtxt, @@ -929,7 +996,7 @@ class TestFromTxt(TestCase): assert_equal(a, np.array([[1., 1.], [3., 3.], [4., 4.]])) def test_header(self): - "Test retrieving a header" + # Test retrieving a header data = TextIO('gender age weight\nM 64.0 75.0\nF 25.0 60.0') test = np.ndfromtxt(data, dtype=None, names=True) control = {'gender': np.array([b'M', b'F']), @@ -940,7 +1007,7 @@ class TestFromTxt(TestCase): assert_equal(test['weight'], control['weight']) def test_auto_dtype(self): - "Test the automatic definition of the output dtype" + # Test the automatic definition of the output dtype data = TextIO('A 64 75.0 3+4j True\nBCD 25 60.0 5+6j False') test = np.ndfromtxt(data, dtype=None) control = [np.array([b'A', b'BCD']), @@ -953,14 +1020,14 @@ class TestFromTxt(TestCase): assert_equal(test['f%i' % i], ctrl) def test_auto_dtype_uniform(self): - "Tests whether the output dtype can be uniformized" + # Tests whether the output dtype can be uniformized data = TextIO('1 2 3 4\n5 6 7 8\n') test = np.ndfromtxt(data, dtype=None) control = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) assert_equal(test, control) def test_fancy_dtype(self): - "Check that a nested dtype isn't MIA" + # Check that a nested dtype isn't MIA data = TextIO('1,2,3.0\n4,5,6.0\n') fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) test = np.ndfromtxt(data, dtype=fancydtype, delimiter=',') @@ -968,7 +1035,7 @@ class TestFromTxt(TestCase): assert_equal(test, control) def test_names_overwrite(self): - "Test overwriting the names of the dtype" + # Test overwriting the names of the dtype descriptor = {'names': ('g', 'a', 'w'), 'formats': ('S1', 'i4', 'f4')} data = TextIO(b'M 64.0 75.0\nF 25.0 60.0') @@ -980,7 +1047,7 @@ class TestFromTxt(TestCase): assert_equal(test, control) def test_commented_header(self): - "Check that names can be retrieved even if the line is commented out." + # Check that names can be retrieved even if the line is commented out. data = TextIO(""" #gender age weight M 21 72.100000 @@ -1003,7 +1070,7 @@ M 33 21.99 assert_equal(test, ctrl) def test_autonames_and_usecols(self): - "Tests names and usecols" + # Tests names and usecols data = TextIO('A B C D\n aaaa 121 45 9.1') test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), names=True, dtype=None) @@ -1012,7 +1079,7 @@ M 33 21.99 assert_equal(test, control) def test_converters_with_usecols(self): - "Test the combination user-defined converters and usecol" + # Test the combination user-defined converters and usecol data = TextIO('1,2,3,,5\n6,7,8,9,10\n') test = np.ndfromtxt(data, dtype=int, delimiter=',', converters={3: lambda s: int(s or - 999)}, @@ -1021,7 +1088,7 @@ M 33 21.99 assert_equal(test, control) def test_converters_with_usecols_and_names(self): - "Tests names and usecols" + # Tests names and usecols data = TextIO('A B C D\n aaaa 121 45 9.1') test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), names=True, dtype=None, converters={'C': lambda s: 2 * int(s)}) @@ -1030,7 +1097,7 @@ M 33 21.99 assert_equal(test, control) def test_converters_cornercases(self): - "Test the conversion to datetime." + # Test the conversion to datetime. converter = { 'date': lambda s: strptime(s, '%Y-%m-%d %H:%M:%SZ')} data = TextIO('2009-02-03 12:00:00Z, 72214.0') @@ -1041,7 +1108,7 @@ M 33 21.99 assert_equal(test, control) def test_converters_cornercases2(self): - "Test the conversion to datetime64." + # Test the conversion to datetime64. converter = { 'date': lambda s: np.datetime64(strptime(s, '%Y-%m-%d %H:%M:%SZ'))} data = TextIO('2009-02-03 12:00:00Z, 72214.0') @@ -1052,7 +1119,7 @@ M 33 21.99 assert_equal(test, control) def test_unused_converter(self): - "Test whether unused converters are forgotten" + # Test whether unused converters are forgotten data = TextIO("1 21\n 3 42\n") test = np.ndfromtxt(data, usecols=(1,), converters={0: lambda s: int(s, 16)}) @@ -1077,7 +1144,7 @@ M 33 21.99 assert_raises(ConverterError, np.genfromtxt, s, **kwargs) def test_tricky_converter_bug1666(self): - "Test some corner case" + # Test some corner cases s = TextIO('q1,2\nq3,4') cnv = lambda s: float(s[1:]) test = np.genfromtxt(s, delimiter=',', converters={0: cnv}) @@ -1099,22 +1166,20 @@ M 33 21.99 def test_dtype_with_converters_and_usecols(self): dstr = "1,5,-1,1:1\n2,8,-1,1:n\n3,3,-2,m:n\n" dmap = {'1:1':0, '1:n':1, 'm:1':2, 'm:n':3} - dtyp = [('E1','i4'),('E2','i4'),('E3','i2'),('N', 'i1')] + dtyp = [('e1','i4'),('e2','i4'),('e3','i2'),('n', 'i1')] conv = {0: int, 1: int, 2: int, 3: lambda r: dmap[r.decode()]} test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',', names=None, converters=conv) control = np.rec.array([[1,5,-1,0], [2,8,-1,1], [3,3,-2,3]], dtype=dtyp) assert_equal(test, control) - dtyp = [('E1','i4'),('E2','i4'),('N', 'i1')] + dtyp = [('e1','i4'),('e2','i4'),('n', 'i1')] test = np.recfromcsv(TextIO(dstr,), dtype=dtyp, delimiter=',', usecols=(0,1,3), names=None, converters=conv) control = np.rec.array([[1,5,0], [2,8,1], [3,3,3]], dtype=dtyp) assert_equal(test, control) def test_dtype_with_object(self): - "Test using an explicit dtype with an object" - from datetime import date - import time + # Test using an explicit dtype with an object data = """ 1; 2001-01-01 2; 2002-01-31 """ ndtype = [('idx', int), ('code', np.object)] @@ -1126,7 +1191,7 @@ M 33 21.99 [(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))], dtype=ndtype) assert_equal(test, control) - # + ndtype = [('nest', [('idx', int), ('code', np.object)])] try: test = np.genfromtxt(TextIO(data), delimiter=";", @@ -1138,7 +1203,7 @@ M 33 21.99 raise AssertionError(errmsg) def test_userconverters_with_explicit_dtype(self): - "Test user_converters w/ explicit (standard) dtype" + # Test user_converters w/ explicit (standard) dtype data = TextIO('skip,skip,2001-01-01,1.0,skip') test = np.genfromtxt(data, delimiter=",", names=None, dtype=float, usecols=(2, 3), converters={2: bytes}) @@ -1147,7 +1212,7 @@ M 33 21.99 assert_equal(test, control) def test_spacedelimiter(self): - "Test space delimiter" + # Test space delimiter data = TextIO("1 2 3 4 5\n6 7 8 9 10") test = np.ndfromtxt(data) control = np.array([[1., 2., 3., 4., 5.], @@ -1155,7 +1220,7 @@ M 33 21.99 assert_equal(test, control) def test_integer_delimiter(self): - "Test using an integer for delimiter" + # Test using an integer for delimiter data = " 1 2 3\n 4 5 67\n890123 4" test = np.genfromtxt(TextIO(data), delimiter=3) control = np.array([[1, 2, 3], [4, 5, 67], [890, 123, 4]]) @@ -1169,7 +1234,7 @@ M 33 21.99 assert_equal(test, control) def test_missing_with_tabs(self): - "Test w/ a delimiter tab" + # Test w/ a delimiter tab txt = "1\t2\t3\n\t2\t\n1\t\t3" test = np.genfromtxt(TextIO(txt), delimiter="\t", usemask=True,) @@ -1179,7 +1244,7 @@ M 33 21.99 assert_equal(test.mask, ctrl_m) def test_usecols(self): - "Test the selection of columns" + # Test the selection of columns # Select 1 column control = np.array([[1, 2], [3, 4]], float) data = TextIO() @@ -1200,7 +1265,7 @@ M 33 21.99 assert_equal(test, control[:, 1:]) def test_usecols_as_css(self): - "Test giving usecols with a comma-separated string" + # Test giving usecols with a comma-separated string data = "1 2 3\n4 5 6" test = np.genfromtxt(TextIO(data), names="a, b, c", usecols="a, c") @@ -1208,7 +1273,7 @@ M 33 21.99 assert_equal(test, ctrl) def test_usecols_with_structured_dtype(self): - "Test usecols with an explicit structured dtype" + # Test usecols with an explicit structured dtype data = TextIO("JOE 70.1 25.3\nBOB 60.5 27.9") names = ['stid', 'temp'] dtypes = ['S4', 'f8'] @@ -1218,12 +1283,12 @@ M 33 21.99 assert_equal(test['temp'], [25.3, 27.9]) def test_usecols_with_integer(self): - "Test usecols with an integer" + # Test usecols with an integer test = np.genfromtxt(TextIO(b"1 2 3\n4 5 6"), usecols=0) assert_equal(test, np.array([1., 4.])) def test_usecols_with_named_columns(self): - "Test usecols with named columns" + # Test usecols with named columns ctrl = np.array([(1, 3), (4, 6)], dtype=[('a', float), ('c', float)]) data = "1 2 3\n4 5 6" kwargs = dict(names="a, b, c") @@ -1234,16 +1299,15 @@ M 33 21.99 assert_equal(test, ctrl) def test_empty_file(self): - "Test that an empty file raises the proper warning." - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", - message="genfromtxt: Empty input file:") + # Test that an empty file raises the proper warning. + with suppress_warnings() as sup: + sup.filter(message="genfromtxt: Empty input file:") data = TextIO() test = np.genfromtxt(data) assert_equal(test, np.array([])) def test_fancy_dtype_alt(self): - "Check that a nested dtype isn't MIA" + # Check that a nested dtype isn't MIA data = TextIO('1,2,3.0\n4,5,6.0\n') fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) test = np.mafromtxt(data, dtype=fancydtype, delimiter=',') @@ -1309,7 +1373,7 @@ M 33 21.99 assert_equal(test, control) def test_user_filling_values(self): - "Test with missing and filling values" + # Test with missing and filling values ctrl = np.array([(0, 3), (4, -999)], dtype=[('a', int), ('b', int)]) data = "N/A, 2, 3\n4, ,???" kwargs = dict(delimiter=",", @@ -1347,7 +1411,7 @@ M 33 21.99 assert_equal(test.mask, control.mask) def test_with_masked_column_uniform(self): - "Test masked column" + # Test masked column data = TextIO('1 2 3\n4 5 6\n') test = np.genfromtxt(data, dtype=None, missing_values='2,5', usemask=True) @@ -1355,7 +1419,7 @@ M 33 21.99 assert_equal(test, control) def test_with_masked_column_various(self): - "Test masked column" + # Test masked column data = TextIO('True 2 3\nFalse 5 6\n') test = np.genfromtxt(data, dtype=None, missing_values='2,5', usemask=True) @@ -1365,7 +1429,7 @@ M 33 21.99 assert_equal(test, control) def test_invalid_raise(self): - "Test invalid raise" + # Test invalid raise data = ["1, 1, 1, 1, 1"] * 50 for i in range(5): data[10 * i] = "2, 2, 2, 2 2" @@ -1373,8 +1437,8 @@ M 33 21.99 mdata = TextIO("\n".join(data)) # kwargs = dict(delimiter=",", dtype=None, names=True) - # XXX: is there a better way to get the return value of the callable in - # assert_warns ? + # XXX: is there a better way to get the return value of the + # callable in assert_warns ? ret = {} def f(_ret={}): @@ -1389,7 +1453,7 @@ M 33 21.99 delimiter=",", names=True) def test_invalid_raise_with_usecols(self): - "Test invalid_raise with usecols" + # Test invalid_raise with usecols data = ["1, 1, 1, 1, 1"] * 50 for i in range(5): data[10 * i] = "2, 2, 2, 2 2" @@ -1397,8 +1461,8 @@ M 33 21.99 mdata = TextIO("\n".join(data)) kwargs = dict(delimiter=",", dtype=None, names=True, invalid_raise=False) - # XXX: is there a better way to get the return value of the callable in - # assert_warns ? + # XXX: is there a better way to get the return value of the + # callable in assert_warns ? ret = {} def f(_ret={}): @@ -1416,7 +1480,7 @@ M 33 21.99 assert_equal(mtest, control) def test_inconsistent_dtype(self): - "Test inconsistent dtype" + # Test inconsistent dtype data = ["1, 1, 1, 1, -1.1"] * 50 mdata = TextIO("\n".join(data)) @@ -1426,7 +1490,7 @@ M 33 21.99 assert_raises(ValueError, np.genfromtxt, mdata, **kwargs) def test_default_field_format(self): - "Test default format" + # Test default format data = "0, 1, 2.3\n4, 5, 6.7" mtest = np.ndfromtxt(TextIO(data), delimiter=",", dtype=None, defaultfmt="f%02i") @@ -1435,7 +1499,7 @@ M 33 21.99 assert_equal(mtest, ctrl) def test_single_dtype_wo_names(self): - "Test single dtype w/o names" + # Test single dtype w/o names data = "0, 1, 2.3\n4, 5, 6.7" mtest = np.ndfromtxt(TextIO(data), delimiter=",", dtype=float, defaultfmt="f%02i") @@ -1443,7 +1507,7 @@ M 33 21.99 assert_equal(mtest, ctrl) def test_single_dtype_w_explicit_names(self): - "Test single dtype w explicit names" + # Test single dtype w explicit names data = "0, 1, 2.3\n4, 5, 6.7" mtest = np.ndfromtxt(TextIO(data), delimiter=",", dtype=float, names="a, b, c") @@ -1452,7 +1516,7 @@ M 33 21.99 assert_equal(mtest, ctrl) def test_single_dtype_w_implicit_names(self): - "Test single dtype w implicit names" + # Test single dtype w implicit names data = "a, b, c\n0, 1, 2.3\n4, 5, 6.7" mtest = np.ndfromtxt(TextIO(data), delimiter=",", dtype=float, names=True) @@ -1461,7 +1525,7 @@ M 33 21.99 assert_equal(mtest, ctrl) def test_easy_structured_dtype(self): - "Test easy structured dtype" + # Test easy structured dtype data = "0, 1, 2.3\n4, 5, 6.7" mtest = np.ndfromtxt(TextIO(data), delimiter=",", dtype=(int, float, float), defaultfmt="f_%02i") @@ -1470,7 +1534,7 @@ M 33 21.99 assert_equal(mtest, ctrl) def test_autostrip(self): - "Test autostrip" + # Test autostrip data = "01/01/2003 , 1.3, abcde" kwargs = dict(delimiter=",", dtype=None) mtest = np.ndfromtxt(TextIO(data), **kwargs) @@ -1483,7 +1547,7 @@ M 33 21.99 assert_equal(mtest, ctrl) def test_replace_space(self): - "Test the 'replace_space' option" + # Test the 'replace_space' option txt = "A.A, B (B), C:C\n1, 2, 3.14" # Test default: replace ' ' by '_' and delete non-alphanum chars test = np.genfromtxt(TextIO(txt), @@ -1506,8 +1570,32 @@ M 33 21.99 ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) assert_equal(test, ctrl) + def test_replace_space_known_dtype(self): + # Test the 'replace_space' (and related) options when dtype != None + txt = "A.A, B (B), C:C\n1, 2, 3" + # Test default: replace ' ' by '_' and delete non-alphanum chars + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=int) + ctrl_dtype = [("AA", int), ("B_B", int), ("CC", int)] + ctrl = np.array((1, 2, 3), dtype=ctrl_dtype) + assert_equal(test, ctrl) + # Test: no replace, no delete + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=int, + replace_space='', deletechars='') + ctrl_dtype = [("A.A", int), ("B (B)", int), ("C:C", int)] + ctrl = np.array((1, 2, 3), dtype=ctrl_dtype) + assert_equal(test, ctrl) + # Test: no delete (spaces are replaced by _) + test = np.genfromtxt(TextIO(txt), + delimiter=",", names=True, dtype=int, + deletechars='') + ctrl_dtype = [("A.A", int), ("B_(B)", int), ("C:C", int)] + ctrl = np.array((1, 2, 3), dtype=ctrl_dtype) + assert_equal(test, ctrl) + def test_incomplete_names(self): - "Test w/ incomplete names" + # Test w/ incomplete names data = "A,,C\n0,1,2\n3,4,5" kwargs = dict(delimiter=",", names=True) # w/ dtype=None @@ -1521,7 +1609,7 @@ M 33 21.99 test = np.ndfromtxt(TextIO(data), **kwargs) def test_names_auto_completion(self): - "Make sure that names are properly completed" + # Make sure that names are properly completed data = "1 2 3\n 4 5 6" test = np.genfromtxt(TextIO(data), dtype=(int, float, int), names="a") @@ -1530,7 +1618,7 @@ M 33 21.99 assert_equal(test, ctrl) def test_names_with_usecols_bug1636(self): - "Make sure we pick up the right names w/ usecols" + # Make sure we pick up the right names w/ usecols data = "A,B,C,D,E\n0,1,2,3,4\n0,1,2,3,4\n0,1,2,3,4" ctrl_names = ("A", "C", "E") test = np.genfromtxt(TextIO(data), @@ -1549,7 +1637,7 @@ M 33 21.99 assert_equal(test.dtype.names, ctrl_names) def test_fixed_width_names(self): - "Test fix-width w/ names" + # Test fix-width w/ names data = " A B C\n 0 1 2.3\n 45 67 9." kwargs = dict(delimiter=(5, 5, 4), names=True, dtype=None) ctrl = np.array([(0, 1, 2.3), (45, 67, 9.)], @@ -1564,7 +1652,7 @@ M 33 21.99 assert_equal(test, ctrl) def test_filling_values(self): - "Test missing values" + # Test missing values data = b"1, 2, 3\n1, , 5\n0, 6, \n" kwargs = dict(delimiter=",", dtype=None, filling_values=-999) ctrl = np.array([[1, 2, 3], [1, -999, 5], [0, 6, -999]], dtype=int) @@ -1633,9 +1721,64 @@ M 33 21.99 self.assertTrue(isinstance(test, np.recarray)) assert_equal(test, control) + def test_max_rows(self): + # Test the `max_rows` keyword argument. + data = '1 2\n3 4\n5 6\n7 8\n9 10\n' + txt = TextIO(data) + a1 = np.genfromtxt(txt, max_rows=3) + a2 = np.genfromtxt(txt) + assert_equal(a1, [[1, 2], [3, 4], [5, 6]]) + assert_equal(a2, [[7, 8], [9, 10]]) + + # max_rows must be at least 1. + assert_raises(ValueError, np.genfromtxt, TextIO(data), max_rows=0) + + # An input with several invalid rows. + data = '1 1\n2 2\n0 \n3 3\n4 4\n5 \n6 \n7 \n' + + test = np.genfromtxt(TextIO(data), max_rows=2) + control = np.array([[1., 1.], [2., 2.]]) + assert_equal(test, control) + + # Test keywords conflict + assert_raises(ValueError, np.genfromtxt, TextIO(data), skip_footer=1, + max_rows=4) + + # Test with invalid value + assert_raises(ValueError, np.genfromtxt, TextIO(data), max_rows=4) + + # Test with invalid not raise + with suppress_warnings() as sup: + sup.filter(ConversionWarning) + + test = np.genfromtxt(TextIO(data), max_rows=4, invalid_raise=False) + control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]]) + assert_equal(test, control) + + test = np.genfromtxt(TextIO(data), max_rows=5, invalid_raise=False) + control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]]) + assert_equal(test, control) + + # Structured array with field names. + data = 'a b\n#c d\n1 1\n2 2\n#0 \n3 3\n4 4\n5 5\n' + + # Test with header, names and comments + txt = TextIO(data) + test = np.genfromtxt(txt, skip_header=1, max_rows=3, names=True) + control = np.array([(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)], + dtype=[('c', '<f8'), ('d', '<f8')]) + assert_equal(test, control) + # To continue reading the same "file", don't use skip_header or + # names, and use the previously determined dtype. + test = np.genfromtxt(txt, max_rows=None, dtype=test.dtype) + control = np.array([(4.0, 4.0), (5.0, 5.0)], + dtype=[('c', '<f8'), ('d', '<f8')]) + assert_equal(test, control) + def test_gft_using_filename(self): - # Test that we can load data from a filename as well as a file object - wanted = np.arange(6).reshape((2, 3)) + # Test that we can load data from a filename as well as a file + # object + tgt = np.arange(6).reshape((2, 3)) if sys.version_info[0] >= 3: # python 3k is known to fail for '\r' linesep = ('\n', '\r\n') @@ -1644,15 +1787,11 @@ M 33 21.99 for sep in linesep: data = '0 1 2' + sep + '3 4 5' - f, name = mkstemp() - # We can't use NamedTemporaryFile on windows, because we cannot - # reopen the file. - try: - os.write(f, asbytes(data)) - assert_array_equal(np.genfromtxt(name), wanted) - finally: - os.close(f) - os.unlink(name) + with temppath() as name: + with open(name, 'w') as f: + f.write(data) + res = np.genfromtxt(name) + assert_array_equal(res, tgt) def test_gft_using_generator(self): # gft doesn't work with unicode. @@ -1663,6 +1802,133 @@ M 33 21.99 res = np.genfromtxt(count()) assert_array_equal(res, np.arange(10)) + def test_auto_dtype_largeint(self): + # Regression test for numpy/numpy#5635 whereby large integers could + # cause OverflowErrors. + + # Test the automatic definition of the output dtype + # + # 2**66 = 73786976294838206464 => should convert to float + # 2**34 = 17179869184 => should convert to int64 + # 2**10 = 1024 => should convert to int (int32 on 32-bit systems, + # int64 on 64-bit systems) + + data = TextIO('73786976294838206464 17179869184 1024') + + test = np.ndfromtxt(data, dtype=None) + + assert_equal(test.dtype.names, ['f0', 'f1', 'f2']) + + assert_(test.dtype['f0'] == np.float) + assert_(test.dtype['f1'] == np.int64) + assert_(test.dtype['f2'] == np.integer) + + assert_allclose(test['f0'], 73786976294838206464.) + assert_equal(test['f1'], 17179869184) + assert_equal(test['f2'], 1024) + + +class TestPathUsage(TestCase): + # Test that pathlib.Path can be used + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_loadtxt(self): + with temppath(suffix='.txt') as path: + path = Path(path) + a = np.array([[1.1, 2], [3, 4]]) + np.savetxt(path, a) + x = np.loadtxt(path) + assert_array_equal(x, a) + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_save_load(self): + # Test that pathlib.Path instances can be used with savez. + with temppath(suffix='.npy') as path: + path = Path(path) + a = np.array([[1, 2], [3, 4]], int) + np.save(path, a) + data = np.load(path) + assert_array_equal(data, a) + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_savez_load(self): + # Test that pathlib.Path instances can be used with savez. + with temppath(suffix='.npz') as path: + path = Path(path) + np.savez(path, lab='place holder') + with np.load(path) as data: + assert_array_equal(data['lab'], 'place holder') + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_savez_compressed_load(self): + # Test that pathlib.Path instances can be used with savez. + with temppath(suffix='.npz') as path: + path = Path(path) + np.savez_compressed(path, lab='place holder') + data = np.load(path) + assert_array_equal(data['lab'], 'place holder') + data.close() + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_genfromtxt(self): + with temppath(suffix='.txt') as path: + path = Path(path) + a = np.array([(1, 2), (3, 4)]) + np.savetxt(path, a) + data = np.genfromtxt(path) + assert_array_equal(a, data) + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_ndfromtxt(self): + # Test outputing a standard ndarray + with temppath(suffix='.txt') as path: + path = Path(path) + with path.open('w') as f: + f.write(u'1 2\n3 4') + + control = np.array([[1, 2], [3, 4]], dtype=int) + test = np.ndfromtxt(path, dtype=int) + assert_array_equal(test, control) + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_mafromtxt(self): + # From `test_fancy_dtype_alt` above + with temppath(suffix='.txt') as path: + path = Path(path) + with path.open('w') as f: + f.write(u'1,2,3.0\n4,5,6.0\n') + + test = np.mafromtxt(path, delimiter=',') + control = ma.array([(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)]) + assert_equal(test, control) + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_recfromtxt(self): + with temppath(suffix='.txt') as path: + path = Path(path) + with path.open('w') as f: + f.write(u'A,B\n0,1\n2,3') + + kwargs = dict(delimiter=",", missing_values="N/A", names=True) + test = np.recfromtxt(path, **kwargs) + control = np.array([(0, 1), (2, 3)], + dtype=[('A', np.int), ('B', np.int)]) + self.assertTrue(isinstance(test, np.recarray)) + assert_equal(test, control) + + @np.testing.dec.skipif(Path is None, "No pathlib.Path") + def test_recfromcsv(self): + with temppath(suffix='.txt') as path: + path = Path(path) + with path.open('w') as f: + f.write(u'A,B\n0,1\n2,3') + + kwargs = dict(missing_values="N/A", names=True, case_sensitive=True) + test = np.recfromcsv(path, dtype=None, **kwargs) + control = np.array([(0, 1), (2, 3)], + dtype=[('A', np.int), ('B', np.int)]) + self.assertTrue(isinstance(test, np.recarray)) + assert_equal(test, control) + def test_gzip_load(): a = np.random.random((5, 5)) @@ -1688,16 +1954,15 @@ def test_gzip_loadtxt(): g = gzip.GzipFile(fileobj=s, mode='w') g.write(b'1 2 3\n') g.close() + s.seek(0) + with temppath(suffix='.gz') as name: + with open(name, 'wb') as f: + f.write(s.read()) + res = np.loadtxt(name) + s.close() - f, name = mkstemp(suffix='.gz') - try: - os.write(f, s.read()) - s.close() - assert_array_equal(np.loadtxt(name), [1, 2, 3]) - finally: - os.close(f) - os.unlink(name) + assert_array_equal(res, [1, 2, 3]) def test_gzip_loadtxt_from_string(): @@ -1746,12 +2011,17 @@ def test_load_refcount(): np.savez(f, [1, 2, 3]) f.seek(0) - gc.collect() - n_before = len(gc.get_objects()) - np.load(f) - n_after = len(gc.get_objects()) - - assert_equal(n_before, n_after) + assert_(gc.isenabled()) + gc.disable() + try: + gc.collect() + np.load(f) + # gc.collect returns the number of unreachable objects in cycles that + # were found -- we are checking that no cycles were created by np.load + n_objects_in_cycles = gc.collect() + finally: + gc.enable() + assert_equal(n_objects_in_cycles, 0) if __name__ == "__main__": run_module_suite() diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py index 35ae86c20..06c0953b5 100644 --- a/numpy/lib/tests/test_nanfunctions.py +++ b/numpy/lib/tests/test_nanfunctions.py @@ -5,7 +5,7 @@ import warnings import numpy as np from numpy.testing import ( run_module_suite, TestCase, assert_, assert_equal, assert_almost_equal, - assert_raises, assert_array_equal + assert_no_warnings, assert_raises, assert_array_equal, suppress_warnings ) @@ -22,6 +22,18 @@ _rdat = [np.array([0.6244, 0.2692, 0.0116, 0.1170]), np.array([0.1042, -0.5954]), np.array([0.1610, 0.1859, 0.3146])] +# Rows of _ndat with nans converted to ones +_ndat_ones = np.array([[0.6244, 1.0, 0.2692, 0.0116, 1.0, 0.1170], + [0.5351, -0.9403, 1.0, 0.2100, 0.4759, 0.2833], + [1.0, 1.0, 1.0, 0.1042, 1.0, -0.5954], + [0.1610, 1.0, 1.0, 0.1859, 0.3146, 1.0]]) + +# Rows of _ndat with nans converted to zeros +_ndat_zeros = np.array([[0.6244, 0.0, 0.2692, 0.0116, 0.0, 0.1170], + [0.5351, -0.9403, 0.0, 0.2100, 0.4759, 0.2833], + [0.0, 0.0, 0.0, 0.1042, 0.0, -0.5954], + [0.1610, 0.0, 0.0, 0.1859, 0.3146, 0.0]]) + class TestNanFunctions_MinMax(TestCase): @@ -155,8 +167,8 @@ class TestNanFunctions_ArgminArgmax(TestCase): def test_result_values(self): for f, fcmp in zip(self.nanfuncs, [np.greater, np.less]): for row in _ndat: - with warnings.catch_warnings(record=True): - warnings.simplefilter('always') + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in") ind = f(row) val = row[ind] # comparing with NaN is tricky as the result @@ -236,6 +248,21 @@ class TestNanFunctions_IntTypes(TestCase): for mat in self.integer_arrays(): assert_equal(np.nansum(mat), tgt) + def test_nanprod(self): + tgt = np.prod(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nanprod(mat), tgt) + + def test_nancumsum(self): + tgt = np.cumsum(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nancumsum(mat), tgt) + + def test_nancumprod(self): + tgt = np.cumprod(self.mat) + for mat in self.integer_arrays(): + assert_equal(np.nancumprod(mat), tgt) + def test_nanmean(self): tgt = np.mean(self.mat) for mat in self.integer_arrays(): @@ -260,70 +287,108 @@ class TestNanFunctions_IntTypes(TestCase): assert_equal(np.nanstd(mat, ddof=1), tgt) -class TestNanFunctions_Sum(TestCase): - +class SharedNanFunctionsTestsMixin(object): def test_mutation(self): # Check that passed array is not modified. ndat = _ndat.copy() - np.nansum(ndat) - assert_equal(ndat, _ndat) + for f in self.nanfuncs: + f(ndat) + assert_equal(ndat, _ndat) def test_keepdims(self): mat = np.eye(3) - for axis in [None, 0, 1]: - tgt = np.sum(mat, axis=axis, keepdims=True) - res = np.nansum(mat, axis=axis, keepdims=True) - assert_(res.ndim == tgt.ndim) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for axis in [None, 0, 1]: + tgt = rf(mat, axis=axis, keepdims=True) + res = nf(mat, axis=axis, keepdims=True) + assert_(res.ndim == tgt.ndim) def test_out(self): mat = np.eye(3) - resout = np.zeros(3) - tgt = np.sum(mat, axis=1) - res = np.nansum(mat, axis=1, out=resout) - assert_almost_equal(res, resout) - assert_almost_equal(res, tgt) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + resout = np.zeros(3) + tgt = rf(mat, axis=1) + res = nf(mat, axis=1, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) def test_dtype_from_dtype(self): mat = np.eye(3) codes = 'efdgFDG' - for c in codes: - tgt = np.sum(mat, dtype=np.dtype(c), axis=1).dtype.type - res = np.nansum(mat, dtype=np.dtype(c), axis=1).dtype.type - assert_(res is tgt) - # scalar case - tgt = np.sum(mat, dtype=np.dtype(c), axis=None).dtype.type - res = np.nansum(mat, dtype=np.dtype(c), axis=None).dtype.type - assert_(res is tgt) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for c in codes: + with suppress_warnings() as sup: + if nf in {np.nanstd, np.nanvar} and c in 'FDG': + # Giving the warning is a small bug, see gh-8000 + sup.filter(np.ComplexWarning) + tgt = rf(mat, dtype=np.dtype(c), axis=1).dtype.type + res = nf(mat, dtype=np.dtype(c), axis=1).dtype.type + assert_(res is tgt) + # scalar case + tgt = rf(mat, dtype=np.dtype(c), axis=None).dtype.type + res = nf(mat, dtype=np.dtype(c), axis=None).dtype.type + assert_(res is tgt) def test_dtype_from_char(self): mat = np.eye(3) codes = 'efdgFDG' - for c in codes: - tgt = np.sum(mat, dtype=c, axis=1).dtype.type - res = np.nansum(mat, dtype=c, axis=1).dtype.type - assert_(res is tgt) - # scalar case - tgt = np.sum(mat, dtype=c, axis=None).dtype.type - res = np.nansum(mat, dtype=c, axis=None).dtype.type - assert_(res is tgt) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for c in codes: + with suppress_warnings() as sup: + if nf in {np.nanstd, np.nanvar} and c in 'FDG': + # Giving the warning is a small bug, see gh-8000 + sup.filter(np.ComplexWarning) + tgt = rf(mat, dtype=c, axis=1).dtype.type + res = nf(mat, dtype=c, axis=1).dtype.type + assert_(res is tgt) + # scalar case + tgt = rf(mat, dtype=c, axis=None).dtype.type + res = nf(mat, dtype=c, axis=None).dtype.type + assert_(res is tgt) def test_dtype_from_input(self): codes = 'efdgFDG' - for c in codes: - mat = np.eye(3, dtype=c) - tgt = np.sum(mat, axis=1).dtype.type - res = np.nansum(mat, axis=1).dtype.type - assert_(res is tgt) - # scalar case - tgt = np.sum(mat, axis=None).dtype.type - res = np.nansum(mat, axis=None).dtype.type - assert_(res is tgt) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + for c in codes: + mat = np.eye(3, dtype=c) + tgt = rf(mat, axis=1).dtype.type + res = nf(mat, axis=1).dtype.type + assert_(res is tgt, "res %s, tgt %s" % (res, tgt)) + # scalar case + tgt = rf(mat, axis=None).dtype.type + res = nf(mat, axis=None).dtype.type + assert_(res is tgt) def test_result_values(self): - tgt = [np.sum(d) for d in _rdat] - res = np.nansum(_ndat, axis=1) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + tgt = [rf(d) for d in _rdat] + res = nf(_ndat, axis=1) assert_almost_equal(res, tgt) + def test_scalar(self): + for f in self.nanfuncs: + assert_(f(0.) == 0.) + + def test_matrices(self): + # Check that it works and that type and + # shape are preserved + mat = np.matrix(np.eye(3)) + for f in self.nanfuncs: + res = f(mat, axis=0) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (1, 3)) + res = f(mat, axis=1) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (3, 1)) + res = f(mat) + assert_(np.isscalar(res)) + + +class TestNanFunctions_SumProd(TestCase, SharedNanFunctionsTestsMixin): + + nanfuncs = [np.nansum, np.nanprod] + stdfuncs = [np.sum, np.prod] + def test_allnans(self): # Check for FutureWarning with warnings.catch_warnings(record=True) as w: @@ -340,113 +405,118 @@ class TestNanFunctions_Sum(TestCase): assert_(len(w) == 0, 'unwanted warning raised') def test_empty(self): - mat = np.zeros((0, 3)) - tgt = [0]*3 - res = np.nansum(mat, axis=0) - assert_equal(res, tgt) - tgt = [] - res = np.nansum(mat, axis=1) - assert_equal(res, tgt) - tgt = 0 - res = np.nansum(mat, axis=None) - assert_equal(res, tgt) + for f, tgt_value in zip([np.nansum, np.nanprod], [0, 1]): + mat = np.zeros((0, 3)) + tgt = [tgt_value]*3 + res = f(mat, axis=0) + assert_equal(res, tgt) + tgt = [] + res = f(mat, axis=1) + assert_equal(res, tgt) + tgt = tgt_value + res = f(mat, axis=None) + assert_equal(res, tgt) - def test_scalar(self): - assert_(np.nansum(0.) == 0.) + +class TestNanFunctions_CumSumProd(TestCase, SharedNanFunctionsTestsMixin): + + nanfuncs = [np.nancumsum, np.nancumprod] + stdfuncs = [np.cumsum, np.cumprod] + + def test_allnans(self): + for f, tgt_value in zip(self.nanfuncs, [0, 1]): + # Unlike other nan-functions, sum/prod/cumsum/cumprod don't warn on all nan input + with assert_no_warnings(): + res = f([np.nan]*3, axis=None) + tgt = tgt_value*np.ones((3)) + assert_(np.array_equal(res, tgt), 'result is not %s * np.ones((3))' % (tgt_value)) + # Check scalar + res = f(np.nan) + tgt = tgt_value*np.ones((1)) + assert_(np.array_equal(res, tgt), 'result is not %s * np.ones((1))' % (tgt_value)) + # Check there is no warning for not all-nan + f([0]*3, axis=None) + + def test_empty(self): + for f, tgt_value in zip(self.nanfuncs, [0, 1]): + mat = np.zeros((0, 3)) + tgt = tgt_value*np.ones((0, 3)) + res = f(mat, axis=0) + assert_equal(res, tgt) + tgt = mat + res = f(mat, axis=1) + assert_equal(res, tgt) + tgt = np.zeros((0)) + res = f(mat, axis=None) + assert_equal(res, tgt) + + def test_keepdims(self): + for f, g in zip(self.nanfuncs, self.stdfuncs): + mat = np.eye(3) + for axis in [None, 0, 1]: + tgt = f(mat, axis=axis, out=None) + res = g(mat, axis=axis, out=None) + assert_(res.ndim == tgt.ndim) + + for f in self.nanfuncs: + d = np.ones((3, 5, 7, 11)) + # Randomly set some elements to NaN: + rs = np.random.RandomState(0) + d[rs.rand(*d.shape) < 0.5] = np.nan + res = f(d, axis=None) + assert_equal(res.shape, (1155,)) + for axis in np.arange(4): + res = f(d, axis=axis) + assert_equal(res.shape, (3, 5, 7, 11)) def test_matrices(self): # Check that it works and that type and # shape are preserved mat = np.matrix(np.eye(3)) - res = np.nansum(mat, axis=0) - assert_(isinstance(res, np.matrix)) - assert_(res.shape == (1, 3)) - res = np.nansum(mat, axis=1) - assert_(isinstance(res, np.matrix)) - assert_(res.shape == (3, 1)) - res = np.nansum(mat) - assert_(np.isscalar(res)) + for f in self.nanfuncs: + for axis in np.arange(2): + res = f(mat, axis=axis) + assert_(isinstance(res, np.matrix)) + assert_(res.shape == (3, 3)) + res = f(mat) + assert_(res.shape == (1, 3*3)) + + def test_result_values(self): + for axis in (-2, -1, 0, 1, None): + tgt = np.cumprod(_ndat_ones, axis=axis) + res = np.nancumprod(_ndat, axis=axis) + assert_almost_equal(res, tgt) + tgt = np.cumsum(_ndat_zeros,axis=axis) + res = np.nancumsum(_ndat, axis=axis) + assert_almost_equal(res, tgt) + + def test_out(self): + mat = np.eye(3) + for nf, rf in zip(self.nanfuncs, self.stdfuncs): + resout = np.eye(3) + for axis in (-2, -1, 0, 1): + tgt = rf(mat, axis=axis) + res = nf(mat, axis=axis, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) -class TestNanFunctions_MeanVarStd(TestCase): +class TestNanFunctions_MeanVarStd(TestCase, SharedNanFunctionsTestsMixin): nanfuncs = [np.nanmean, np.nanvar, np.nanstd] stdfuncs = [np.mean, np.var, np.std] - def test_mutation(self): - # Check that passed array is not modified. - ndat = _ndat.copy() - for f in self.nanfuncs: - f(ndat) - assert_equal(ndat, _ndat) - def test_dtype_error(self): for f in self.nanfuncs: - for dtype in [np.bool_, np.int_, np.object]: - assert_raises(TypeError, f, _ndat, axis=1, dtype=np.int) + for dtype in [np.bool_, np.int_, np.object_]: + assert_raises(TypeError, f, _ndat, axis=1, dtype=dtype) def test_out_dtype_error(self): for f in self.nanfuncs: - for dtype in [np.bool_, np.int_, np.object]: + for dtype in [np.bool_, np.int_, np.object_]: out = np.empty(_ndat.shape[0], dtype=dtype) assert_raises(TypeError, f, _ndat, axis=1, out=out) - def test_keepdims(self): - mat = np.eye(3) - for nf, rf in zip(self.nanfuncs, self.stdfuncs): - for axis in [None, 0, 1]: - tgt = rf(mat, axis=axis, keepdims=True) - res = nf(mat, axis=axis, keepdims=True) - assert_(res.ndim == tgt.ndim) - - def test_out(self): - mat = np.eye(3) - for nf, rf in zip(self.nanfuncs, self.stdfuncs): - resout = np.zeros(3) - tgt = rf(mat, axis=1) - res = nf(mat, axis=1, out=resout) - assert_almost_equal(res, resout) - assert_almost_equal(res, tgt) - - def test_dtype_from_dtype(self): - mat = np.eye(3) - codes = 'efdgFDG' - for nf, rf in zip(self.nanfuncs, self.stdfuncs): - for c in codes: - tgt = rf(mat, dtype=np.dtype(c), axis=1).dtype.type - res = nf(mat, dtype=np.dtype(c), axis=1).dtype.type - assert_(res is tgt) - # scalar case - tgt = rf(mat, dtype=np.dtype(c), axis=None).dtype.type - res = nf(mat, dtype=np.dtype(c), axis=None).dtype.type - assert_(res is tgt) - - def test_dtype_from_char(self): - mat = np.eye(3) - codes = 'efdgFDG' - for nf, rf in zip(self.nanfuncs, self.stdfuncs): - for c in codes: - tgt = rf(mat, dtype=c, axis=1).dtype.type - res = nf(mat, dtype=c, axis=1).dtype.type - assert_(res is tgt) - # scalar case - tgt = rf(mat, dtype=c, axis=None).dtype.type - res = nf(mat, dtype=c, axis=None).dtype.type - assert_(res is tgt) - - def test_dtype_from_input(self): - codes = 'efdgFDG' - for nf, rf in zip(self.nanfuncs, self.stdfuncs): - for c in codes: - mat = np.eye(3, dtype=c) - tgt = rf(mat, axis=1).dtype.type - res = nf(mat, axis=1).dtype.type - assert_(res is tgt, "res %s, tgt %s" % (res, tgt)) - # scalar case - tgt = rf(mat, axis=None).dtype.type - res = nf(mat, axis=None).dtype.type - assert_(res is tgt) - def test_ddof(self): nanfuncs = [np.nanvar, np.nanstd] stdfuncs = [np.var, np.std] @@ -462,22 +532,16 @@ class TestNanFunctions_MeanVarStd(TestCase): dsize = [len(d) for d in _rdat] for nf, rf in zip(nanfuncs, stdfuncs): for ddof in range(5): - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always') + with suppress_warnings() as sup: + sup.record(RuntimeWarning) + sup.filter(np.ComplexWarning) tgt = [ddof >= d for d in dsize] res = nf(_ndat, axis=1, ddof=ddof) assert_equal(np.isnan(res), tgt) if any(tgt): - assert_(len(w) == 1) - assert_(issubclass(w[0].category, RuntimeWarning)) + assert_(len(sup.log) == 1) else: - assert_(len(w) == 0) - - def test_result_values(self): - for nf, rf in zip(self.nanfuncs, self.stdfuncs): - tgt = [rf(d) for d in _rdat] - res = nf(_ndat, axis=1) - assert_almost_equal(res, tgt) + assert_(len(sup.log) == 0) def test_allnans(self): mat = np.array([np.nan]*9).reshape(3, 3) @@ -508,24 +572,6 @@ class TestNanFunctions_MeanVarStd(TestCase): assert_equal(f(mat, axis=axis), np.zeros([])) assert_(len(w) == 0) - def test_scalar(self): - for f in self.nanfuncs: - assert_(f(0.) == 0.) - - def test_matrices(self): - # Check that it works and that type and - # shape are preserved - mat = np.matrix(np.eye(3)) - for f in self.nanfuncs: - res = f(mat, axis=0) - assert_(isinstance(res, np.matrix)) - assert_(res.shape == (1, 3)) - res = f(mat, axis=1) - assert_(isinstance(res, np.matrix)) - assert_(res.shape == (3, 1)) - res = f(mat) - assert_(np.isscalar(res)) - class TestNanFunctions_Median(TestCase): @@ -547,8 +593,8 @@ class TestNanFunctions_Median(TestCase): w = np.random.random((4, 200)) * np.array(d.shape)[:, None] w = w.astype(np.intp) d[tuple(w)] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always', RuntimeWarning) + with suppress_warnings() as sup: + sup.filter(RuntimeWarning) res = np.nanmedian(d, axis=None, keepdims=True) assert_equal(res.shape, (1, 1, 1, 1)) res = np.nanmedian(d, axis=(0, 1), keepdims=True) @@ -604,21 +650,20 @@ class TestNanFunctions_Median(TestCase): def test_allnans(self): mat = np.array([np.nan]*9).reshape(3, 3) for axis in [None, 0, 1]: - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always') + with suppress_warnings() as sup: + sup.record(RuntimeWarning) + assert_(np.isnan(np.nanmedian(mat, axis=axis)).all()) if axis is None: - assert_(len(w) == 1) + assert_(len(sup.log) == 1) else: - assert_(len(w) == 3) - assert_(issubclass(w[0].category, RuntimeWarning)) + assert_(len(sup.log) == 3) # Check scalar assert_(np.isnan(np.nanmedian(np.nan))) if axis is None: - assert_(len(w) == 2) + assert_(len(sup.log) == 2) else: - assert_(len(w) == 4) - assert_(issubclass(w[0].category, RuntimeWarning)) + assert_(len(sup.log) == 4) def test_empty(self): mat = np.zeros((0, 3)) @@ -646,8 +691,8 @@ class TestNanFunctions_Median(TestCase): assert_raises(ValueError, np.nanmedian, d, axis=(1, 1)) def test_float_special(self): - with warnings.catch_warnings(record=True): - warnings.simplefilter('ignore', RuntimeWarning) + with suppress_warnings() as sup: + sup.filter(RuntimeWarning) a = np.array([[np.inf, np.nan], [np.nan, np.nan]]) assert_equal(np.nanmedian(a, axis=0), [np.inf, np.nan]) assert_equal(np.nanmedian(a, axis=1), [np.inf, np.nan]) @@ -684,8 +729,8 @@ class TestNanFunctions_Percentile(TestCase): w = np.random.random((4, 200)) * np.array(d.shape)[:, None] w = w.astype(np.intp) d[tuple(w)] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always', RuntimeWarning) + with suppress_warnings() as sup: + sup.filter(RuntimeWarning) res = np.nanpercentile(d, 90, axis=None, keepdims=True) assert_equal(res.shape, (1, 1, 1, 1)) res = np.nanpercentile(d, 90, axis=(0, 1), keepdims=True) @@ -721,7 +766,8 @@ class TestNanFunctions_Percentile(TestCase): tgt = [np.percentile(d, 28) for d in _rdat] res = np.nanpercentile(_ndat, 28, axis=1) assert_almost_equal(res, tgt) - tgt = [np.percentile(d, (28, 98)) for d in _rdat] + # Transpose the array to fit the output convention of numpy.percentile + tgt = np.transpose([np.percentile(d, (28, 98)) for d in _rdat]) res = np.nanpercentile(_ndat, (28, 98), axis=1) assert_almost_equal(res, tgt) @@ -769,6 +815,32 @@ class TestNanFunctions_Percentile(TestCase): assert_raises(IndexError, np.nanpercentile, d, q=5, axis=(0, 4)) assert_raises(ValueError, np.nanpercentile, d, q=5, axis=(1, 1)) + def test_multiple_percentiles(self): + perc = [50, 100] + mat = np.ones((4, 3)) + nan_mat = np.nan * mat + # For checking consistency in higher dimensional case + large_mat = np.ones((3, 4, 5)) + large_mat[:, 0:2:4, :] = 0 + large_mat[:, :, 3:] *= 2 + for axis in [None, 0, 1]: + for keepdim in [False, True]: + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "All-NaN slice encountered") + val = np.percentile(mat, perc, axis=axis, keepdims=keepdim) + nan_val = np.nanpercentile(nan_mat, perc, axis=axis, + keepdims=keepdim) + assert_equal(nan_val.shape, val.shape) + + val = np.percentile(large_mat, perc, axis=axis, + keepdims=keepdim) + nan_val = np.nanpercentile(large_mat, perc, axis=axis, + keepdims=keepdim) + assert_equal(nan_val, val) + + megamat = np.ones((3, 4, 5, 6)) + assert_equal(np.nanpercentile(megamat, perc, axis=(1, 2)).shape, (2, 3, 6)) + if __name__ == "__main__": run_module_suite() diff --git a/numpy/lib/tests/test_packbits.py b/numpy/lib/tests/test_packbits.py new file mode 100644 index 000000000..0de084ef9 --- /dev/null +++ b/numpy/lib/tests/test_packbits.py @@ -0,0 +1,27 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np +from numpy.testing import assert_array_equal, assert_equal, assert_raises + + +def test_packbits(): + # Copied from the docstring. + a = [[[1, 0, 1], [0, 1, 0]], + [[1, 1, 0], [0, 0, 1]]] + for dtype in [np.bool, np.uint8, np.int]: + arr = np.array(a, dtype=dtype) + b = np.packbits(arr, axis=-1) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, np.array([[[160], [64]], [[192], [32]]])) + + assert_raises(TypeError, np.packbits, np.array(a, dtype=float)) + + +def test_unpackbits(): + # Copied from the docstring. + a = np.array([[2], [7], [23]], dtype=np.uint8) + b = np.unpackbits(a, axis=1) + assert_equal(b.dtype, np.uint8) + assert_array_equal(b, np.array([[0, 0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 0, 1, 1, 1]])) diff --git a/numpy/lib/tests/test_polynomial.py b/numpy/lib/tests/test_polynomial.py index 5c15941e6..00dffd3d3 100644 --- a/numpy/lib/tests/test_polynomial.py +++ b/numpy/lib/tests/test_polynomial.py @@ -81,7 +81,7 @@ poly1d([ 2.]) import numpy as np from numpy.testing import ( run_module_suite, TestCase, assert_, assert_equal, assert_array_equal, - assert_almost_equal, rundocs + assert_almost_equal, assert_array_almost_equal, assert_raises, rundocs ) @@ -89,6 +89,30 @@ class TestDocs(TestCase): def test_doctests(self): return rundocs() + def test_poly(self): + assert_array_almost_equal(np.poly([3, -np.sqrt(2), np.sqrt(2)]), + [1, -3, -2, 6]) + + # From matlab docs + A = [[1, 2, 3], [4, 5, 6], [7, 8, 0]] + assert_array_almost_equal(np.poly(A), [1, -6, -72, -27]) + + # Should produce real output for perfect conjugates + assert_(np.isrealobj(np.poly([+1.082j, +2.613j, -2.613j, -1.082j]))) + assert_(np.isrealobj(np.poly([0+1j, -0+-1j, 1+2j, + 1-2j, 1.+3.5j, 1-3.5j]))) + assert_(np.isrealobj(np.poly([1j, -1j, 1+2j, 1-2j, 1+3j, 1-3.j]))) + assert_(np.isrealobj(np.poly([1j, -1j, 1+2j, 1-2j]))) + assert_(np.isrealobj(np.poly([1j, -1j, 2j, -2j]))) + assert_(np.isrealobj(np.poly([1j, -1j]))) + assert_(np.isrealobj(np.poly([1, -1]))) + + assert_(np.iscomplexobj(np.poly([1j, -1.0000001j]))) + + np.random.seed(42) + a = np.random.randn(100) + 1j*np.random.randn(100) + assert_(np.isrealobj(np.poly(np.concatenate((a, np.conjugate(a)))))) + def test_roots(self): assert_array_equal(np.roots([1, 0, 0]), [0, 0]) @@ -111,6 +135,12 @@ class TestDocs(TestCase): err = [1, -1, 1, -1, 1, -1, 1] weights = np.arange(8, 1, -1)**2/7.0 + # Check exception when too few points for variance estimate. Note that + # the Bayesian estimate requires the number of data points to exceed + # degree + 3. + assert_raises(ValueError, np.polyfit, + [0, 1, 3], [0, 1, 3], deg=0, cov=True) + # check 1D case m, cov = np.polyfit(x, y+err, 2, cov=True) est = [3.8571, 0.2857, 1.619] diff --git a/numpy/lib/tests/test_recfunctions.py b/numpy/lib/tests/test_recfunctions.py index 51a2077eb..699a04716 100644 --- a/numpy/lib/tests/test_recfunctions.py +++ b/numpy/lib/tests/test_recfunctions.py @@ -3,9 +3,8 @@ from __future__ import division, absolute_import, print_function import numpy as np import numpy.ma as ma from numpy.ma.mrecords import MaskedRecords -from numpy.ma.testutils import ( - run_module_suite, TestCase, assert_, assert_equal - ) +from numpy.ma.testutils import assert_equal +from numpy.testing import TestCase, run_module_suite, assert_ from numpy.lib.recfunctions import ( drop_fields, rename_fields, get_fieldstructure, recursive_fill_fields, find_duplicates, merge_arrays, append_fields, stack_arrays, join_by @@ -700,6 +699,26 @@ class TestJoinBy2(TestCase): assert_equal(test.dtype, control.dtype) assert_equal(test, control) +class TestAppendFieldsObj(TestCase): + """ + Test append_fields with arrays containing objects + """ + # https://github.com/numpy/numpy/issues/2346 + + def setUp(self): + from datetime import date + self.data = dict(obj=date(2000, 1, 1)) + + def test_append_to_objects(self): + "Test append_fields when the base array contains objects" + obj = self.data['obj'] + x = np.array([(obj, 1.), (obj, 2.)], + dtype=[('A', object), ('B', float)]) + y = np.array([10, 20], dtype=int) + test = append_fields(x, 'C', data=y, usemask=False) + control = np.array([(obj, 1.0, 10), (obj, 2.0, 20)], + dtype=[('A', object), ('B', float), ('C', int)]) + assert_equal(test, control) if __name__ == '__main__': run_module_suite() diff --git a/numpy/lib/tests/test_regression.py b/numpy/lib/tests/test_regression.py index 00fa3f195..ee50dcfa4 100644 --- a/numpy/lib/tests/test_regression.py +++ b/numpy/lib/tests/test_regression.py @@ -85,10 +85,6 @@ class TestRegression(TestCase): assert_(x != y) assert_(x == x) - def test_mem_insert(self, level=rlevel): - # Ticket #572 - np.lib.place(1, 1, 1) - def test_polyfit_build(self): # Ticket #628 ref = [-1.06123820e-06, 5.70886914e-04, -1.13822012e-01, diff --git a/numpy/lib/tests/test_shape_base.py b/numpy/lib/tests/test_shape_base.py index 23f3edfbe..2eb4a809d 100644 --- a/numpy/lib/tests/test_shape_base.py +++ b/numpy/lib/tests/test_shape_base.py @@ -3,7 +3,7 @@ from __future__ import division, absolute_import, print_function import numpy as np from numpy.lib.shape_base import ( apply_along_axis, apply_over_axes, array_split, split, hsplit, dsplit, - vsplit, dstack, kron, tile + vsplit, dstack, column_stack, kron, tile ) from numpy.testing import ( run_module_suite, TestCase, assert_, assert_equal, assert_array_equal, @@ -27,6 +27,37 @@ class TestApplyAlongAxis(TestCase): assert_array_equal(apply_along_axis(np.sum, 0, a), [[27, 30, 33], [36, 39, 42], [45, 48, 51]]) + def test_preserve_subclass(self): + def double(row): + return row * 2 + m = np.matrix([[0, 1], [2, 3]]) + result = apply_along_axis(double, 0, m) + assert isinstance(result, np.matrix) + assert_array_equal( + result, np.matrix([[0, 2], [4, 6]]) + ) + + def test_subclass(self): + class MinimalSubclass(np.ndarray): + data = 1 + + def minimal_function(array): + return array.data + + a = np.zeros((6, 3)).view(MinimalSubclass) + + assert_array_equal( + apply_along_axis(minimal_function, 0, a), np.array([1, 1, 1]) + ) + + def test_scalar_array(self): + class MinimalSubclass(np.ndarray): + pass + a = np.ones((6, 3)).view(MinimalSubclass) + res = apply_along_axis(np.sum, 0, a) + assert isinstance(res, MinimalSubclass) + assert_array_equal(res, np.array([6, 6, 6]).view(MinimalSubclass)) + class TestApplyOverAxes(TestCase): def test_simple(self): @@ -103,12 +134,17 @@ class TestArraySplit(TestCase): def test_integer_split_2D_rows(self): a = np.array([np.arange(10), np.arange(10)]) - res = assert_warns(FutureWarning, array_split, a, 3, axis=0) + res = array_split(a, 3, axis=0) + tgt = [np.array([np.arange(10)]), np.array([np.arange(10)]), + np.zeros((0, 10))] + compare_results(res, tgt) + assert_(a.dtype.type is res[-1].dtype.type) - # After removing the FutureWarning, the last should be zeros((0, 10)) - desired = [np.array([np.arange(10)]), np.array([np.arange(10)]), - np.array([])] - compare_results(res, desired) + # Same thing for manual splits: + res = array_split(a, [0, 1, 2], axis=0) + tgt = [np.zeros((0, 10)), np.array([np.arange(10)]), + np.array([np.arange(10)])] + compare_results(res, tgt) assert_(a.dtype.type is res[-1].dtype.type) def test_integer_split_2D_cols(self): @@ -123,12 +159,10 @@ class TestArraySplit(TestCase): """ This will fail if we change default axis """ a = np.array([np.arange(10), np.arange(10)]) - res = assert_warns(FutureWarning, array_split, a, 3) - - # After removing the FutureWarning, the last should be zeros((0, 10)) - desired = [np.array([np.arange(10)]), np.array([np.arange(10)]), - np.array([])] - compare_results(res, desired) + res = array_split(a, 3) + tgt = [np.array([np.arange(10)]), np.array([np.arange(10)]), + np.zeros((0, 10))] + compare_results(res, tgt) assert_(a.dtype.type is res[-1].dtype.type) # perhaps should check higher dimensions @@ -172,8 +206,15 @@ class TestSplit(TestCase): a = np.arange(10) assert_raises(ValueError, split, a, 3) +class TestColumnStack(TestCase): + def test_non_iterable(self): + assert_raises(TypeError, column_stack, 1) + class TestDstack(TestCase): + def test_non_iterable(self): + assert_raises(TypeError, dstack, 1) + def test_0D_array(self): a = np.array(1) b = np.array(2) @@ -209,6 +250,9 @@ class TestHsplit(TestCase): """Only testing for integer splits. """ + def test_non_iterable(self): + assert_raises(ValueError, hsplit, 1, 1) + def test_0D_array(self): a = np.array(1) try: @@ -235,6 +279,13 @@ class TestVsplit(TestCase): """Only testing for integer splits. """ + def test_non_iterable(self): + assert_raises(ValueError, vsplit, 1, 1) + + def test_0D_array(self): + a = np.array(1) + assert_raises(ValueError, vsplit, a, 2) + def test_1D_array(self): a = np.array([1, 2, 3, 4]) try: @@ -253,6 +304,16 @@ class TestVsplit(TestCase): class TestDsplit(TestCase): # Only testing for integer splits. + def test_non_iterable(self): + assert_raises(ValueError, dsplit, 1, 1) + + def test_0D_array(self): + a = np.array(1) + assert_raises(ValueError, dsplit, a, 2) + + def test_1D_array(self): + a = np.array([1, 2, 3, 4]) + assert_raises(ValueError, dsplit, a, 2) def test_2D_array(self): a = np.array([[1, 2, 3, 4], @@ -324,9 +385,18 @@ class TestTile(TestCase): assert_equal(tile(b, (2, 2)), [[1, 2, 1, 2], [3, 4, 3, 4], [1, 2, 1, 2], [3, 4, 3, 4]]) + def test_tile_one_repetition_on_array_gh4679(self): + a = np.arange(5) + b = tile(a, 1) + b += 2 + assert_equal(a, np.arange(5)) + def test_empty(self): a = np.array([[[]]]) + b = np.array([[], []]) + c = tile(b, 2).shape d = tile(a, (3, 2, 5)).shape + assert_equal(c, (2, 0)) assert_equal(d, (3, 2, 0)) def test_kroncompare(self): diff --git a/numpy/lib/tests/test_stride_tricks.py b/numpy/lib/tests/test_stride_tricks.py index bc7e30ca4..95df135cf 100644 --- a/numpy/lib/tests/test_stride_tricks.py +++ b/numpy/lib/tests/test_stride_tricks.py @@ -5,8 +5,9 @@ from numpy.testing import ( run_module_suite, assert_equal, assert_array_equal, assert_raises, assert_ ) -from numpy.lib.stride_tricks import as_strided, broadcast_arrays - +from numpy.lib.stride_tricks import ( + as_strided, broadcast_arrays, _broadcast_shape, broadcast_to +) def assert_shapes_correct(input_shapes, expected_shape): # Broadcast a list of arrays with the given input shapes and check the @@ -217,6 +218,67 @@ def test_same_as_ufunc(): assert_same_as_ufunc(input_shapes[0], input_shapes[1], False, True) assert_same_as_ufunc(input_shapes[0], input_shapes[1], True, True) + +def test_broadcast_to_succeeds(): + data = [ + [np.array(0), (0,), np.array(0)], + [np.array(0), (1,), np.zeros(1)], + [np.array(0), (3,), np.zeros(3)], + [np.ones(1), (1,), np.ones(1)], + [np.ones(1), (2,), np.ones(2)], + [np.ones(1), (1, 2, 3), np.ones((1, 2, 3))], + [np.arange(3), (3,), np.arange(3)], + [np.arange(3), (1, 3), np.arange(3).reshape(1, -1)], + [np.arange(3), (2, 3), np.array([[0, 1, 2], [0, 1, 2]])], + # test if shape is not a tuple + [np.ones(0), 0, np.ones(0)], + [np.ones(1), 1, np.ones(1)], + [np.ones(1), 2, np.ones(2)], + # these cases with size 0 are strange, but they reproduce the behavior + # of broadcasting with ufuncs (see test_same_as_ufunc above) + [np.ones(1), (0,), np.ones(0)], + [np.ones((1, 2)), (0, 2), np.ones((0, 2))], + [np.ones((2, 1)), (2, 0), np.ones((2, 0))], + ] + for input_array, shape, expected in data: + actual = broadcast_to(input_array, shape) + assert_array_equal(expected, actual) + + +def test_broadcast_to_raises(): + data = [ + [(0,), ()], + [(1,), ()], + [(3,), ()], + [(3,), (1,)], + [(3,), (2,)], + [(3,), (4,)], + [(1, 2), (2, 1)], + [(1, 1), (1,)], + [(1,), -1], + [(1,), (-1,)], + [(1, 2), (-1, 2)], + ] + for orig_shape, target_shape in data: + arr = np.zeros(orig_shape) + assert_raises(ValueError, lambda: broadcast_to(arr, target_shape)) + + +def test_broadcast_shape(): + # broadcast_shape is already exercized indirectly by broadcast_arrays + assert_raises(ValueError, _broadcast_shape) + assert_equal(_broadcast_shape([1, 2]), (2,)) + assert_equal(_broadcast_shape(np.ones((1, 1))), (1, 1)) + assert_equal(_broadcast_shape(np.ones((1, 1)), np.ones((3, 4))), (3, 4)) + assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 32)), (1, 2)) + assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 100)), (1, 2)) + + # regression tests for gh-5862 + assert_equal(_broadcast_shape(*([np.ones(2)] * 32 + [1])), (2,)) + bad_args = [np.ones(2)] * 32 + [np.ones(3)] * 32 + assert_raises(ValueError, lambda: _broadcast_shape(*bad_args)) + + def test_as_strided(): a = np.array([None]) a_view = as_strided(a) @@ -233,6 +295,45 @@ def test_as_strided(): expected = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]]) assert_array_equal(a_view, expected) + # Regression test for gh-5081 + dt = np.dtype([('num', 'i4'), ('obj', 'O')]) + a = np.empty((4,), dtype=dt) + a['num'] = np.arange(1, 5) + a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize)) + expected_num = [[1, 2, 3, 4]] * 3 + expected_obj = [[None]*4]*3 + assert_equal(a_view.dtype, dt) + assert_array_equal(expected_num, a_view['num']) + assert_array_equal(expected_obj, a_view['obj']) + + # Make sure that void types without fields are kept unchanged + a = np.empty((4,), dtype='V4') + a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize)) + assert_equal(a.dtype, a_view.dtype) + + # Make sure that the only type that could fail is properly handled + dt = np.dtype({'names': [''], 'formats': ['V4']}) + a = np.empty((4,), dtype=dt) + a_view = as_strided(a, shape=(3, 4), strides=(0, a.itemsize)) + assert_equal(a.dtype, a_view.dtype) + +def as_strided_writeable(): + arr = np.ones(10) + view = as_strided(arr, writeable=False) + assert_(not view.flags.writeable) + + # Check that writeable also is fine: + view = as_strided(arr, writeable=True) + assert_(view.flags.writeable) + view[...] = 3 + assert_array_equal(arr, np.full_like(arr, 3)) + + # Test that things do not break down for readonly: + arr.flags.writeable = False + view = as_strided(arr, writeable=False) + view = as_strided(arr, writeable=True) + assert_(not view.flags.writeable) + class VerySimpleSubClass(np.ndarray): def __new__(cls, *args, **kwargs): @@ -277,6 +378,53 @@ def test_subclasses(): assert_(type(b_view) is np.ndarray) assert_(a_view.shape == b_view.shape) + # and for broadcast_to + shape = (2, 4) + a_view = broadcast_to(a, shape) + assert_(type(a_view) is np.ndarray) + assert_(a_view.shape == shape) + a_view = broadcast_to(a, shape, subok=True) + assert_(type(a_view) is SimpleSubClass) + assert_(a_view.info == 'simple finalized') + assert_(a_view.shape == shape) + + +def test_writeable(): + # broadcast_to should return a readonly array + original = np.array([1, 2, 3]) + result = broadcast_to(original, (2, 3)) + assert_equal(result.flags.writeable, False) + assert_raises(ValueError, result.__setitem__, slice(None), 0) + + # but the result of broadcast_arrays needs to be writeable (for now), to + # preserve backwards compatibility + for results in [broadcast_arrays(original), + broadcast_arrays(0, original)]: + for result in results: + assert_equal(result.flags.writeable, True) + # keep readonly input readonly + original.flags.writeable = False + _, result = broadcast_arrays(0, original) + assert_equal(result.flags.writeable, False) + + # regresssion test for GH6491 + shape = (2,) + strides = [0] + tricky_array = as_strided(np.array(0), shape, strides) + other = np.zeros((1,)) + first, second = broadcast_arrays(tricky_array, other) + assert_(first.shape == second.shape) + + +def test_reference_types(): + input_array = np.array('a', dtype=object) + expected = np.array(['a'] * 3, dtype=object) + actual = broadcast_to(input_array, (3,)) + assert_array_equal(expected, actual) + + actual, _ = broadcast_arrays(input_array, np.ones(3)) + assert_array_equal(expected, actual) + if __name__ == "__main__": run_module_suite() diff --git a/numpy/lib/tests/test_twodim_base.py b/numpy/lib/tests/test_twodim_base.py index 739061a5d..98b8aa39c 100644 --- a/numpy/lib/tests/test_twodim_base.py +++ b/numpy/lib/tests/test_twodim_base.py @@ -5,11 +5,11 @@ from __future__ import division, absolute_import, print_function from numpy.testing import ( TestCase, run_module_suite, assert_equal, assert_array_equal, - assert_array_max_ulp, assert_array_almost_equal, assert_raises, rand, + assert_array_max_ulp, assert_array_almost_equal, assert_raises, ) from numpy import ( - arange, rot90, add, fliplr, flipud, zeros, ones, eye, array, diag, + arange, add, fliplr, flipud, zeros, ones, eye, array, diag, histogram2d, tri, mask_indices, triu_indices, triu_indices_from, tril_indices, tril_indices_from, vander, ) @@ -169,37 +169,6 @@ class TestFlipud(TestCase): assert_equal(flipud(a), b) -class TestRot90(TestCase): - def test_basic(self): - self.assertRaises(ValueError, rot90, ones(4)) - - a = [[0, 1, 2], - [3, 4, 5]] - b1 = [[2, 5], - [1, 4], - [0, 3]] - b2 = [[5, 4, 3], - [2, 1, 0]] - b3 = [[3, 0], - [4, 1], - [5, 2]] - b4 = [[0, 1, 2], - [3, 4, 5]] - - for k in range(-3, 13, 4): - assert_equal(rot90(a, k=k), b1) - for k in range(-2, 13, 4): - assert_equal(rot90(a, k=k), b2) - for k in range(-1, 13, 4): - assert_equal(rot90(a, k=k), b3) - for k in range(0, 13, 4): - assert_equal(rot90(a, k=k), b4) - - def test_axes(self): - a = ones((50, 40, 3)) - assert_equal(rot90(a).shape, (40, 50, 3)) - - class TestHistogram2d(TestCase): def test_simple(self): x = array( @@ -254,7 +223,7 @@ class TestHistogram2d(TestCase): assert_array_almost_equal(H, answer, 3) def test_all_outliers(self): - r = rand(100) + 1. + 1e6 # histogramdd rounds by decimal=6 + r = np.random.rand(100) + 1. + 1e6 # histogramdd rounds by decimal=6 H, xed, yed = histogram2d(r, r, (4, 5), range=([0, 1], [0, 1])) assert_array_equal(H, 0) @@ -265,6 +234,37 @@ class TestHistogram2d(TestCase): a, edge1, edge2 = histogram2d([], [], bins=4) assert_array_max_ulp(a, np.zeros((4, 4))) + def test_binparameter_combination(self): + x = array( + [0, 0.09207008, 0.64575234, 0.12875982, 0.47390599, + 0.59944483, 1]) + y = array( + [0, 0.14344267, 0.48988575, 0.30558665, 0.44700682, + 0.15886423, 1]) + edges = (0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1) + H, xe, ye = histogram2d(x, y, (edges, 4)) + answer = array( + [[ 2., 0., 0., 0.], + [ 0., 1., 0., 0.], + [ 0., 0., 0., 0.], + [ 0., 0., 0., 0.], + [ 0., 1., 0., 0.], + [ 1., 0., 0., 0.], + [ 0., 1., 0., 0.], + [ 0., 0., 0., 0.], + [ 0., 0., 0., 0.], + [ 0., 0., 0., 1.]]) + assert_array_equal(H, answer) + assert_array_equal(ye, array([0., 0.25, 0.5, 0.75, 1])) + H, xe, ye = histogram2d(x, y, (4, edges)) + answer = array( + [[ 1., 1., 0., 1., 0., 0., 0., 0., 0., 0.], + [ 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.], + [ 0., 1., 0., 0., 1., 0., 0., 0., 0., 0.], + [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]]) + assert_array_equal(H, answer) + assert_array_equal(xe, array([0., 0.25, 0.5, 0.75, 1])) + class TestTri(TestCase): def test_dtype(self): diff --git a/numpy/lib/tests/test_type_check.py b/numpy/lib/tests/test_type_check.py index 3931f95e5..93a4da97a 100644 --- a/numpy/lib/tests/test_type_check.py +++ b/numpy/lib/tests/test_type_check.py @@ -18,11 +18,13 @@ def assert_all(x): class TestCommonType(TestCase): def test_basic(self): ai32 = np.array([[1, 2], [3, 4]], dtype=np.int32) + af16 = np.array([[1, 2], [3, 4]], dtype=np.float16) af32 = np.array([[1, 2], [3, 4]], dtype=np.float32) af64 = np.array([[1, 2], [3, 4]], dtype=np.float64) acs = np.array([[1+5j, 2+6j], [3+7j, 4+8j]], dtype=np.csingle) acd = np.array([[1+5j, 2+6j], [3+7j, 4+8j]], dtype=np.cdouble) assert_(common_type(ai32) == np.float64) + assert_(common_type(af16) == np.float16) assert_(common_type(af32) == np.float32) assert_(common_type(af64) == np.float64) assert_(common_type(acs) == np.csingle) @@ -146,6 +148,41 @@ class TestIscomplexobj(TestCase): z = np.array([-1j, 0, -1]) assert_(iscomplexobj(z)) + def test_scalar(self): + assert_(not iscomplexobj(1.0)) + assert_(iscomplexobj(1+0j)) + + def test_list(self): + assert_(iscomplexobj([3, 1+0j, True])) + assert_(not iscomplexobj([3, 1, True])) + + def test_duck(self): + class DummyComplexArray: + @property + def dtype(self): + return np.dtype(complex) + dummy = DummyComplexArray() + assert_(iscomplexobj(dummy)) + + def test_pandas_duck(self): + # This tests a custom np.dtype duck-typed class, such as used by pandas + # (pandas.core.dtypes) + class PdComplex(np.complex128): + pass + class PdDtype(object): + name = 'category' + names = None + type = PdComplex + kind = 'c' + str = '<c16' + base = np.dtype('complex128') + class DummyPd: + @property + def dtype(self): + return PdDtype + dummy = DummyPd() + assert_(iscomplexobj(dummy)) + class TestIsrealobj(TestCase): def test_basic(self): @@ -186,7 +223,7 @@ class TestIsnan(TestCase): class TestIsfinite(TestCase): - # Fixme, wrong place, isfinite now ufunc + # Fixme, wrong place, isfinite now ufunc def test_goodvalues(self): z = np.array((-1., 0., 1.)) @@ -217,7 +254,7 @@ class TestIsfinite(TestCase): class TestIsinf(TestCase): - # Fixme, wrong place, isinf now ufunc + # Fixme, wrong place, isinf now ufunc def test_goodvalues(self): z = np.array((-1., 0., 1.)) @@ -277,6 +314,8 @@ class TestNanToNum(TestCase): def test_integer(self): vals = nan_to_num(1) assert_all(vals == 1) + vals = nan_to_num([1]) + assert_array_equal(vals, np.array([1], np.int)) def test_complex_good(self): vals = nan_to_num(1+1j) diff --git a/numpy/lib/tests/test_utils.py b/numpy/lib/tests/test_utils.py index 8fbd1c445..92bcdc238 100644 --- a/numpy/lib/tests/test_utils.py +++ b/numpy/lib/tests/test_utils.py @@ -3,7 +3,7 @@ from __future__ import division, absolute_import, print_function import sys from numpy.core import arange from numpy.testing import ( - run_module_suite, assert_, assert_equal, dec + run_module_suite, assert_, assert_equal, assert_raises_regex, dec ) from numpy.lib import deprecate import numpy.lib.utils as utils @@ -62,5 +62,10 @@ def test_byte_bounds(): assert_equal(high - low, a.size * a.itemsize) +def test_assert_raises_regex_context_manager(): + with assert_raises_regex(ValueError, 'no deprecation warning'): + raise ValueError('no deprecation warning') + + if __name__ == "__main__": run_module_suite() diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py index 40a140b6b..8cf2ec091 100644 --- a/numpy/lib/twodim_base.py +++ b/numpy/lib/twodim_base.py @@ -4,14 +4,14 @@ from __future__ import division, absolute_import, print_function from numpy.core.numeric import ( - asanyarray, arange, zeros, greater_equal, multiply, ones, asarray, - where, int8, int16, int32, int64, empty, promote_types + absolute, asanyarray, arange, zeros, greater_equal, multiply, ones, + asarray, where, int8, int16, int32, int64, empty, promote_types, diagonal, ) -from numpy.core import iinfo +from numpy.core import iinfo, transpose __all__ = [ - 'diag', 'diagflat', 'eye', 'fliplr', 'flipud', 'rot90', 'tri', 'triu', + 'diag', 'diagflat', 'eye', 'fliplr', 'flipud', 'tri', 'triu', 'tril', 'vander', 'histogram2d', 'mask_indices', 'tril_indices', 'tril_indices_from', 'triu_indices', 'triu_indices_from', ] @@ -19,6 +19,8 @@ __all__ = [ i1 = iinfo(int8) i2 = iinfo(int16) i4 = iinfo(int32) + + def _min_int(low, high): """ get small int that fits the range """ if high <= i1.max and low >= i1.min: @@ -55,7 +57,7 @@ def fliplr(m): Notes ----- - Equivalent to A[:,::-1]. Requires the array to be at least 2-D. + Equivalent to m[:,::-1]. Requires the array to be at least 2-D. Examples -------- @@ -70,7 +72,7 @@ def fliplr(m): [ 3., 0., 0.]]) >>> A = np.random.randn(2,3,5) - >>> np.all(np.fliplr(A)==A[:,::-1,...]) + >>> np.all(np.fliplr(A) == A[:,::-1,...]) True """ @@ -105,7 +107,7 @@ def flipud(m): Notes ----- - Equivalent to ``A[::-1,...]``. + Equivalent to ``m[::-1,...]``. Does not require the array to be two-dimensional. Examples @@ -121,7 +123,7 @@ def flipud(m): [ 1., 0., 0.]]) >>> A = np.random.randn(2,3,5) - >>> np.all(np.flipud(A)==A[::-1,...]) + >>> np.all(np.flipud(A) == A[::-1,...]) True >>> np.flipud([1,2]) @@ -134,59 +136,6 @@ def flipud(m): return m[::-1, ...] -def rot90(m, k=1): - """ - Rotate an array by 90 degrees in the counter-clockwise direction. - - The first two dimensions are rotated; therefore, the array must be at - least 2-D. - - Parameters - ---------- - m : array_like - Array of two or more dimensions. - k : integer - Number of times the array is rotated by 90 degrees. - - Returns - ------- - y : ndarray - Rotated array. - - See Also - -------- - fliplr : Flip an array horizontally. - flipud : Flip an array vertically. - - Examples - -------- - >>> m = np.array([[1,2],[3,4]], int) - >>> m - array([[1, 2], - [3, 4]]) - >>> np.rot90(m) - array([[2, 4], - [1, 3]]) - >>> np.rot90(m, 2) - array([[4, 3], - [2, 1]]) - - """ - m = asanyarray(m) - if m.ndim < 2: - raise ValueError("Input must >= 2-d.") - k = k % 4 - if k == 0: - return m - elif k == 1: - return fliplr(m).swapaxes(0, 1) - elif k == 2: - return fliplr(flipud(m)) - else: - # k == 3 - return fliplr(m.swapaxes(0, 1)) - - def eye(N, M=None, k=0, dtype=float): """ Return a 2-D array with ones on the diagonal and zeros elsewhere. @@ -293,7 +242,7 @@ def diag(v, k=0): [0, 0, 8]]) """ - v = asarray(v) + v = asanyarray(v) s = v.shape if len(s) == 1: n = s[0]+abs(k) @@ -305,7 +254,7 @@ def diag(v, k=0): res[:n-k].flat[i::n+1] = v return res elif len(s) == 2: - return v.diagonal(k) + return diagonal(v, k) else: raise ValueError("Input must be 1- or 2-d.") @@ -587,16 +536,18 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None): y : array_like, shape (N,) An array containing the y coordinates of the points to be histogrammed. - bins : int or [int, int] or array_like or [array, array], optional + bins : int or array_like or [int, int] or [array, array], optional The bin specification: * If int, the number of bins for the two dimensions (nx=ny=bins). - * If [int, int], the number of bins in each dimension - (nx, ny = bins). * If array_like, the bin edges for the two dimensions (x_edges=y_edges=bins). + * If [int, int], the number of bins in each dimension + (nx, ny = bins). * If [array, array], the bin edges in each dimension (x_edges, y_edges = bins). + * A combination [int, array] or [array, int], where int + is the number of bins and array is the bin edges. range : array_like, shape(2,2), optional The leftmost and rightmost edges of the bins along each dimension @@ -645,55 +596,43 @@ def histogram2d(x, y, bins=10, range=None, normed=False, weights=None): >>> import matplotlib as mpl >>> import matplotlib.pyplot as plt - Construct a 2D-histogram with variable bin width. First define the bin + Construct a 2-D histogram with variable bin width. First define the bin edges: - >>> xedges = [0, 1, 1.5, 3, 5] + >>> xedges = [0, 1, 3, 5] >>> yedges = [0, 2, 3, 4, 6] Next we create a histogram H with random bin content: - >>> x = np.random.normal(3, 1, 100) + >>> x = np.random.normal(2, 1, 100) >>> y = np.random.normal(1, 1, 100) - >>> H, xedges, yedges = np.histogram2d(y, x, bins=(xedges, yedges)) - - Or we fill the histogram H with a determined bin content: - - >>> H = np.ones((4, 4)).cumsum().reshape(4, 4) - >>> print H[::-1] # This shows the bin content in the order as plotted - [[ 13. 14. 15. 16.] - [ 9. 10. 11. 12.] - [ 5. 6. 7. 8.] - [ 1. 2. 3. 4.]] + >>> H, xedges, yedges = np.histogram2d(x, y, bins=(xedges, yedges)) + >>> H = H.T # Let each row list bins with common y range. - Imshow can only do an equidistant representation of bins: + :func:`imshow <matplotlib.pyplot.imshow>` can only display square bins: >>> fig = plt.figure(figsize=(7, 3)) - >>> ax = fig.add_subplot(131) - >>> ax.set_title('imshow: equidistant') - >>> im = plt.imshow(H, interpolation='nearest', origin='low', - extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]]) + >>> ax = fig.add_subplot(131, title='imshow: square bins') + >>> plt.imshow(H, interpolation='nearest', origin='low', + ... extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]]) - pcolormesh can display exact bin edges: + :func:`pcolormesh <matplotlib.pyplot.pcolormesh>` can display actual edges: - >>> ax = fig.add_subplot(132) - >>> ax.set_title('pcolormesh: exact bin edges') + >>> ax = fig.add_subplot(132, title='pcolormesh: actual edges', + ... aspect='equal') >>> X, Y = np.meshgrid(xedges, yedges) >>> ax.pcolormesh(X, Y, H) - >>> ax.set_aspect('equal') - NonUniformImage displays exact bin edges with interpolation: + :class:`NonUniformImage <matplotlib.image.NonUniformImage>` can be used to + display actual bin edges with interpolation: - >>> ax = fig.add_subplot(133) - >>> ax.set_title('NonUniformImage: interpolated') + >>> ax = fig.add_subplot(133, title='NonUniformImage: interpolated', + ... aspect='equal', xlim=xedges[[0, -1]], ylim=yedges[[0, -1]]) >>> im = mpl.image.NonUniformImage(ax, interpolation='bilinear') - >>> xcenters = xedges[:-1] + 0.5 * (xedges[1:] - xedges[:-1]) - >>> ycenters = yedges[:-1] + 0.5 * (yedges[1:] - yedges[:-1]) + >>> xcenters = (xedges[:-1] + xedges[1:]) / 2 + >>> ycenters = (yedges[:-1] + yedges[1:]) / 2 >>> im.set_data(xcenters, ycenters, H) >>> ax.images.append(im) - >>> ax.set_xlim(xedges[0], xedges[-1]) - >>> ax.set_ylim(yedges[0], yedges[-1]) - >>> ax.set_aspect('equal') >>> plt.show() """ diff --git a/numpy/lib/type_check.py b/numpy/lib/type_check.py index a45d0bd86..1658f160c 100644 --- a/numpy/lib/type_check.py +++ b/numpy/lib/type_check.py @@ -266,7 +266,15 @@ def iscomplexobj(x): True """ - return issubclass(asarray(x).dtype.type, _nx.complexfloating) + try: + dtype = x.dtype + except AttributeError: + dtype = asarray(x).dtype + try: + return issubclass(dtype.type, _nx.complexfloating) + except AttributeError: + return False + def isrealobj(x): """ @@ -300,7 +308,7 @@ def isrealobj(x): False """ - return not issubclass(asarray(x).dtype.type, _nx.complexfloating) + return not iscomplexobj(x) #----------------------------------------------------------------------------- @@ -324,16 +332,17 @@ def nan_to_num(x): Returns ------- - out : ndarray, float - Array with the same shape as `x` and dtype of the element in `x` with - the greatest precision. NaN is replaced by zero, and infinity - (-infinity) is replaced by the largest (smallest or most negative) - floating point value that fits in the output dtype. All finite numbers - are upcast to the output dtype (default float64). + out : ndarray + New Array with the same shape as `x` and dtype of the element in + `x` with the greatest precision. If `x` is inexact, then NaN is + replaced by zero, and infinity (-infinity) is replaced by the + largest (smallest or most negative) floating point value that fits + in the output dtype. If `x` is not inexact, then a copy of `x` is + returned. See Also -------- - isinf : Shows which elements are negative or negative infinity. + isinf : Shows which elements are positive or negative infinity. isneginf : Shows which elements are negative infinity. isposinf : Shows which elements are positive infinity. isnan : Shows which elements are Not a Number (NaN). @@ -341,7 +350,7 @@ def nan_to_num(x): Notes ----- - Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic (IEEE 754). This means that Not a Number is not equivalent to infinity. @@ -354,33 +363,22 @@ def nan_to_num(x): -1.28000000e+002, 1.28000000e+002]) """ - try: - t = x.dtype.type - except AttributeError: - t = obj2sctype(type(x)) - if issubclass(t, _nx.complexfloating): - return nan_to_num(x.real) + 1j * nan_to_num(x.imag) - else: - try: - y = x.copy() - except AttributeError: - y = array(x) - if not issubclass(t, _nx.integer): - if not y.shape: - y = array([x]) - scalar = True - else: - scalar = False - are_inf = isposinf(y) - are_neg_inf = isneginf(y) - are_nan = isnan(y) - maxf, minf = _getmaxmin(y.dtype.type) - y[are_nan] = 0 - y[are_inf] = maxf - y[are_neg_inf] = minf - if scalar: - y = y[0] - return y + x = _nx.array(x, subok=True) + xtype = x.dtype.type + if not issubclass(xtype, _nx.inexact): + return x + + iscomplex = issubclass(xtype, _nx.complexfloating) + isscalar = (x.ndim == 0) + + x = x[None] if isscalar else x + dest = (x.real, x.imag) if iscomplex else (x,) + maxf, minf = _getmaxmin(x.real.dtype) + for d in dest: + _nx.copyto(d, 0.0, where=isnan(d)) + _nx.copyto(d, maxf, where=isposinf(d)) + _nx.copyto(d, minf, where=isneginf(d)) + return x[0] if isscalar else x #----------------------------------------------------------------------------- @@ -434,7 +432,7 @@ def real_if_close(a,tol=100): from numpy.core import getlimits f = getlimits.finfo(a.dtype.type) tol = f.eps * tol - if _nx.allclose(a.imag, 0, atol=tol): + if _nx.all(_nx.absolute(a.imag) < tol): a = a.real return a @@ -511,7 +509,7 @@ def typename(char): >>> typechars = ['S1', '?', 'B', 'D', 'G', 'F', 'I', 'H', 'L', 'O', 'Q', ... 'S', 'U', 'V', 'b', 'd', 'g', 'f', 'i', 'h', 'l', 'q'] >>> for typechar in typechars: - ... print typechar, ' : ', np.typename(typechar) + ... print(typechar, ' : ', np.typename(typechar)) ... S1 : character ? : bool @@ -542,14 +540,15 @@ def typename(char): #----------------------------------------------------------------------------- #determine the "minimum common type" for a group of arrays. -array_type = [[_nx.single, _nx.double, _nx.longdouble], - [_nx.csingle, _nx.cdouble, _nx.clongdouble]] -array_precision = {_nx.single: 0, - _nx.double: 1, - _nx.longdouble: 2, - _nx.csingle: 0, - _nx.cdouble: 1, - _nx.clongdouble: 2} +array_type = [[_nx.half, _nx.single, _nx.double, _nx.longdouble], + [None, _nx.csingle, _nx.cdouble, _nx.clongdouble]] +array_precision = {_nx.half: 0, + _nx.single: 1, + _nx.double: 2, + _nx.longdouble: 3, + _nx.csingle: 1, + _nx.cdouble: 2, + _nx.clongdouble: 3} def common_type(*arrays): """ Return a scalar type which is common to the input arrays. @@ -593,7 +592,7 @@ def common_type(*arrays): if iscomplexobj(a): is_complex = True if issubclass(t, _nx.integer): - p = 1 + p = 2 # array_precision[_nx.double] else: p = array_precision.get(t, None) if p is None: diff --git a/numpy/lib/ufunclike.py b/numpy/lib/ufunclike.py index e91f64d0e..b6c017b96 100644 --- a/numpy/lib/ufunclike.py +++ b/numpy/lib/ufunclike.py @@ -82,7 +82,7 @@ def isposinf(x, y=None): Notes ----- - Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic (IEEE 754). Errors result if the second argument is also supplied when `x` is a @@ -145,7 +145,7 @@ def isneginf(x, y=None): Notes ----- - Numpy uses the IEEE Standard for Binary Floating-Point for Arithmetic + NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic (IEEE 754). Errors result if the second argument is also supplied when x is a scalar diff --git a/numpy/lib/user_array.py b/numpy/lib/user_array.py index bb5bec628..3103da57b 100644 --- a/numpy/lib/user_array.py +++ b/numpy/lib/user_array.py @@ -1,5 +1,6 @@ """ Standard container-class for easy multiple-inheritance. + Try to inherit from the ndarray instead of using this class as this is not complete. @@ -16,7 +17,19 @@ from numpy.compat import long class container(object): + """ + container(data, dtype=None, copy=True) + + Standard container-class for easy multiple-inheritance. + + Methods + ------- + copy + tostring + byteswap + astype + """ def __init__(self, data, dtype=None, copy=True): self.array = array(data, dtype, copy=copy) @@ -219,15 +232,19 @@ class container(object): return self._rc(greater_equal(self.array, other)) def copy(self): + "" return self._rc(self.array.copy()) def tostring(self): + "" return self.array.tostring() def byteswap(self): + "" return self._rc(self.array.byteswap()) def astype(self, typecode): + "" return self._rc(self.array.astype(typecode)) def _rc(self, a): diff --git a/numpy/lib/utils.py b/numpy/lib/utils.py index 519d0e9b9..97b93cace 100644 --- a/numpy/lib/utils.py +++ b/numpy/lib/utils.py @@ -9,6 +9,9 @@ import warnings from numpy.core.numerictypes import issubclass_, issubsctype, issubdtype from numpy.core import ndarray, ufunc, asarray +# getargspec and formatargspec were removed in Python 3.6 +from numpy.compat import getargspec, formatargspec + __all__ = [ 'issubclass_', 'issubsctype', 'issubdtype', 'deprecate', 'deprecate_with_doc', 'get_include', 'info', 'source', 'who', @@ -93,7 +96,7 @@ class _Deprecate(object): def newfunc(*args,**kwds): """`arrayrange` is deprecated, use `arange` instead!""" - warnings.warn(depdoc, DeprecationWarning) + warnings.warn(depdoc, DeprecationWarning, stacklevel=2) return func(*args, **kwds) newfunc = _set_function_name(newfunc, old_name) @@ -149,7 +152,7 @@ def deprecate(*args, **kwargs): >>> olduint(6) /usr/lib/python2.5/site-packages/numpy/lib/utils.py:114: DeprecationWarning: uint32 is deprecated - warnings.warn(str1, DeprecationWarning) + warnings.warn(str1, DeprecationWarning, stacklevel=2) 6 """ @@ -238,10 +241,10 @@ def byte_bounds(a): def who(vardict=None): """ - Print the Numpy arrays in the given dictionary. + Print the NumPy arrays in the given dictionary. If there is no dictionary passed in or `vardict` is None then returns - Numpy arrays in the globals() dictionary (all Numpy arrays in the + NumPy arrays in the globals() dictionary (all NumPy arrays in the namespace). Parameters @@ -390,9 +393,9 @@ def _info(obj, output=sys.stdout): Parameters ---------- - obj: ndarray + obj : ndarray Must be ndarray, not checked. - output: + output Where printed output goes. Notes @@ -531,7 +534,7 @@ def info(object=None, maxwidth=76, output=sys.stdout, toplevel='numpy'): elif inspect.isfunction(object): name = object.__name__ - arguments = inspect.formatargspec(*inspect.getargspec(object)) + arguments = formatargspec(*getargspec(object)) if len(name+arguments) > maxwidth: argstr = _split_line(name, arguments, maxwidth) @@ -546,8 +549,8 @@ def info(object=None, maxwidth=76, output=sys.stdout, toplevel='numpy'): arguments = "()" try: if hasattr(object, '__init__'): - arguments = inspect.formatargspec( - *inspect.getargspec(object.__init__.__func__) + arguments = formatargspec( + *getargspec(object.__init__.__func__) ) arglist = arguments.split(', ') if len(arglist) > 1: @@ -589,8 +592,8 @@ def info(object=None, maxwidth=76, output=sys.stdout, toplevel='numpy'): print("Instance of class: ", object.__class__.__name__, file=output) print(file=output) if hasattr(object, '__call__'): - arguments = inspect.formatargspec( - *inspect.getargspec(object.__call__.__func__) + arguments = formatargspec( + *getargspec(object.__call__.__func__) ) arglist = arguments.split(', ') if len(arglist) > 1: @@ -619,8 +622,8 @@ def info(object=None, maxwidth=76, output=sys.stdout, toplevel='numpy'): elif inspect.ismethod(object): name = object.__name__ - arguments = inspect.formatargspec( - *inspect.getargspec(object.__func__) + arguments = formatargspec( + *getargspec(object.__func__) ) arglist = arguments.split(', ') if len(arglist) > 1: @@ -643,7 +646,7 @@ def info(object=None, maxwidth=76, output=sys.stdout, toplevel='numpy'): def source(object, output=sys.stdout): """ - Print or write to a file the source code for a Numpy object. + Print or write to a file the source code for a NumPy object. The source code is only returned for objects written in Python. Many functions and classes are defined in C and will therefore not return @@ -976,7 +979,7 @@ def _getmembers(item): import inspect try: members = inspect.getmembers(item) - except AttributeError: + except Exception: members = [(x, getattr(item, x)) for x in dir(item) if hasattr(item, x)] return members @@ -1011,8 +1014,9 @@ class SafeEval(object): """ def __init__(self): + # 2014-10-15, 1.10 warnings.warn("SafeEval is deprecated in 1.10 and will be removed.", - DeprecationWarning) + DeprecationWarning, stacklevel=2) def visit(self, node): cls = node.__class__ |