diff options
Diffstat (limited to 'numpy/lib')
39 files changed, 2917 insertions, 2473 deletions
diff --git a/numpy/lib/_datasource.py b/numpy/lib/_datasource.py index 30237b76f..0d71375c2 100644 --- a/numpy/lib/_datasource.py +++ b/numpy/lib/_datasource.py @@ -20,17 +20,18 @@ gzip, bz2 and xz are supported. Example:: >>> # Create a DataSource, use os.curdir (default) for local storage. - >>> ds = datasource.DataSource() + >>> from numpy import DataSource + >>> ds = DataSource() >>> >>> # Open a remote file. >>> # DataSource downloads the file, stores it locally in: >>> # './www.google.com/index.html' >>> # opens the file and returns a file object. - >>> fp = ds.open('http://www.google.com/index.html') + >>> fp = ds.open('http://www.google.com/') # doctest: +SKIP >>> >>> # Use the file as you normally would - >>> fp.read() - >>> fp.close() + >>> fp.read() # doctest: +SKIP + >>> fp.close() # doctest: +SKIP """ from __future__ import division, absolute_import, print_function @@ -40,6 +41,7 @@ import sys import warnings import shutil import io +from contextlib import closing from numpy.core.overrides import set_module @@ -156,6 +158,7 @@ class _FileOpeners(object): Examples -------- + >>> import gzip >>> np.lib._datasource._file_openers.keys() [None, '.bz2', '.gz', '.xz', '.lzma'] >>> np.lib._datasource._file_openers['.gz'] is gzip.open @@ -290,7 +293,7 @@ class DataSource(object): URLs require a scheme string (``http://``) to be used, without it they will fail:: - >>> repos = DataSource() + >>> repos = np.DataSource() >>> repos.exists('www.google.com/index.html') False >>> repos.exists('http://www.google.com/index.html') @@ -302,17 +305,17 @@ class DataSource(object): -------- :: - >>> ds = DataSource('/home/guido') - >>> urlname = 'http://www.google.com/index.html' - >>> gfile = ds.open('http://www.google.com/index.html') # remote file + >>> ds = np.DataSource('/home/guido') + >>> urlname = 'http://www.google.com/' + >>> gfile = ds.open('http://www.google.com/') >>> ds.abspath(urlname) - '/home/guido/www.google.com/site/index.html' + '/home/guido/www.google.com/index.html' - >>> ds = DataSource(None) # use with temporary file + >>> ds = np.DataSource(None) # use with temporary file >>> ds.open('/home/guido/foobar.txt') <open file '/home/guido.foobar.txt', mode 'r' at 0x91d4430> >>> ds.abspath('/home/guido/foobar.txt') - '/tmp/tmpy4pgsP/home/guido/foobar.txt' + '/tmp/.../home/guido/foobar.txt' """ @@ -412,13 +415,9 @@ class DataSource(object): # TODO: Doesn't handle compressed files! if self._isurl(path): try: - openedurl = urlopen(path) - f = _open(upath, 'wb') - try: - shutil.copyfileobj(openedurl, f) - finally: - f.close() - openedurl.close() + with closing(urlopen(path)) as openedurl: + with _open(upath, 'wb') as f: + shutil.copyfileobj(openedurl, f) except URLError: raise URLError("URL not found: %s" % path) else: @@ -545,6 +544,11 @@ class DataSource(object): is accessible if it exists in either location. """ + + # First test for local path + if os.path.exists(path): + return True + # We import this here because importing urllib2 is slow and # a significant fraction of numpy's total import time. if sys.version_info[0] >= 3: @@ -554,10 +558,6 @@ class DataSource(object): from urllib2 import urlopen from urllib2 import URLError - # Test local path - if os.path.exists(path): - return True - # Test cached url upath = self.abspath(path) if os.path.exists(upath): diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index 8a042f190..0ebd39b8c 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -146,11 +146,17 @@ def flatten_dtype(ndtype, flatten_base=False): >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), ... ('block', int, (2, 3))]) >>> np.lib._iotools.flatten_dtype(dt) - [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32')] + [dtype('S4'), dtype('float64'), dtype('float64'), dtype('int64')] >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True) - [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32'), - dtype('int32'), dtype('int32'), dtype('int32'), dtype('int32'), - dtype('int32')] + [dtype('S4'), + dtype('float64'), + dtype('float64'), + dtype('int64'), + dtype('int64'), + dtype('int64'), + dtype('int64'), + dtype('int64'), + dtype('int64')] """ names = ndtype.names @@ -309,13 +315,13 @@ class NameValidator(object): -------- >>> validator = np.lib._iotools.NameValidator() >>> validator(['file', 'field2', 'with space', 'CaSe']) - ['file_', 'field2', 'with_space', 'CaSe'] + ('file_', 'field2', 'with_space', 'CaSe') >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'], - deletechars='q', - case_sensitive='False') + ... deletechars='q', + ... case_sensitive=False) >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe']) - ['excl_', 'field2', 'no_', 'with_space', 'case'] + ('EXCL', 'FIELD2', 'NO_Q', 'WITH_SPACE', 'CASE') """ # @@ -599,7 +605,7 @@ class StringConverter(object): -------- >>> import dateutil.parser >>> import datetime - >>> dateparser = datetustil.parser.parse + >>> dateparser = dateutil.parser.parse >>> defaultdate = datetime.date(2000, 1, 1) >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate) """ diff --git a/numpy/lib/_version.py b/numpy/lib/_version.py index c3563a7fa..8aa999fc9 100644 --- a/numpy/lib/_version.py +++ b/numpy/lib/_version.py @@ -47,9 +47,12 @@ class NumpyVersion(): >>> from numpy.lib import NumpyVersion >>> if NumpyVersion(np.__version__) < '1.7.0': ... print('skip') - skip + >>> # skip >>> NumpyVersion('1.7') # raises ValueError, add ".0" + Traceback (most recent call last): + ... + ValueError: Not a valid numpy version string """ diff --git a/numpy/lib/arraypad.py b/numpy/lib/arraypad.py index 4f6371058..f08d425d6 100644 --- a/numpy/lib/arraypad.py +++ b/numpy/lib/arraypad.py @@ -7,6 +7,7 @@ from __future__ import division, absolute_import, print_function import numpy as np from numpy.core.overrides import array_function_dispatch +from numpy.lib.index_tricks import ndindex __all__ = ['pad'] @@ -16,50 +17,67 @@ __all__ = ['pad'] # Private utility functions. -def _arange_ndarray(arr, shape, axis, reverse=False): +def _linear_ramp(ndim, axis, start, stop, size, reverse=False): """ - Create an ndarray of `shape` with increments along specified `axis` + Create a linear ramp of `size` in `axis` with `ndim`. + + This algorithm behaves like a vectorized version of `numpy.linspace`. + The resulting linear ramp is broadcastable to any array that matches the + ramp in `shape[axis]` and `ndim`. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - shape : tuple of ints - Shape of desired array. Should be equivalent to `arr.shape` except - `shape[axis]` which may have any positive value. + ndim : int + Number of dimensions of the resulting array. All dimensions except + the one specified by `axis` will have the size 1. axis : int - Axis to increment along. + The dimension that contains the linear ramp of `size`. + start : int or ndarray + The starting value(s) of the linear ramp. If given as an array, its + size must match `size`. + stop : int or ndarray + The stop value(s) (not included!) of the linear ramp. If given as an + array, its size must match `size`. + size : int + The number of elements in the linear ramp. If this argument is 0 the + dimensions of `ramp` will all be of length 1 except for the one given + by `axis` which will be 0. reverse : bool - If False, increment in a positive fashion from 1 to `shape[axis]`, - inclusive. If True, the bounds are the same but the order reversed. + If False, increment in a positive fashion, otherwise decrement. Returns ------- - padarr : ndarray - Output array sized to pad `arr` along `axis`, with linear range from - 1 to `shape[axis]` along specified `axis`. - - Notes - ----- - The range is deliberately 1-indexed for this specific use case. Think of - this algorithm as broadcasting `np.arange` to a single `axis` of an - arbitrarily shaped ndarray. + ramp : ndarray + Output array of dtype np.float64 that in- or decrements along the given + `axis`. + Examples + -------- + >>> _linear_ramp(ndim=2, axis=0, start=np.arange(3), stop=10, size=2) + array([[0. , 1. , 2. ], + [5. , 5.5, 6. ]]) + >>> _linear_ramp(ndim=3, axis=0, start=2, stop=0, size=0) + array([], shape=(0, 1, 1), dtype=float64) """ - initshape = tuple(1 if i != axis else shape[axis] - for (i, x) in enumerate(arr.shape)) - if not reverse: - padarr = np.arange(1, shape[axis] + 1) - else: - padarr = np.arange(shape[axis], 0, -1) - padarr = padarr.reshape(initshape) - for i, dim in enumerate(shape): - if padarr.shape[i] != dim: - padarr = padarr.repeat(dim, axis=i) - return padarr + # Create initial ramp + ramp = np.arange(size, dtype=np.float64) + if reverse: + ramp = ramp[::-1] + + # Make sure, that ramp is broadcastable + init_shape = (1,) * axis + (size,) + (1,) * (ndim - axis - 1) + ramp = ramp.reshape(init_shape) + + if size != 0: + # And scale to given start and stop values + gain = (stop - start) / float(size) + ramp = ramp * gain + ramp += start + return ramp -def _round_ifneeded(arr, dtype): + +def _round_if_needed(arr, dtype): """ Rounds arr inplace if destination dtype is integer. @@ -69,821 +87,418 @@ def _round_ifneeded(arr, dtype): Input array. dtype : dtype The dtype of the destination array. - """ if np.issubdtype(dtype, np.integer): arr.round(out=arr) -def _slice_at_axis(shape, sl, axis): - """ - Construct a slice tuple the length of shape, with sl at the specified axis - """ - slice_tup = (slice(None),) - return slice_tup * axis + (sl,) + slice_tup * (len(shape) - axis - 1) - - -def _slice_first(shape, n, axis): - """ Construct a slice tuple to take the first n elements along axis """ - return _slice_at_axis(shape, slice(0, n), axis=axis) - - -def _slice_last(shape, n, axis): - """ Construct a slice tuple to take the last n elements along axis """ - dim = shape[axis] # doing this explicitly makes n=0 work - return _slice_at_axis(shape, slice(dim - n, dim), axis=axis) - - -def _do_prepend(arr, pad_chunk, axis): - return np.concatenate( - (pad_chunk.astype(arr.dtype, copy=False), arr), axis=axis) - - -def _do_append(arr, pad_chunk, axis): - return np.concatenate( - (arr, pad_chunk.astype(arr.dtype, copy=False)), axis=axis) - - -def _prepend_const(arr, pad_amt, val, axis=-1): - """ - Prepend constant `val` along `axis` of `arr`. - - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to prepend. - val : scalar - Constant value to use. For best results should be of type `arr.dtype`; - if not `arr.dtype` will be cast to `arr.dtype`. - axis : int - Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, with `pad_amt` constant `val` prepended along `axis`. - - """ - if pad_amt == 0: - return arr - padshape = tuple(x if i != axis else pad_amt - for (i, x) in enumerate(arr.shape)) - return _do_prepend(arr, np.full(padshape, val, dtype=arr.dtype), axis) - - -def _append_const(arr, pad_amt, val, axis=-1): - """ - Append constant `val` along `axis` of `arr`. - - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to append. - val : scalar - Constant value to use. For best results should be of type `arr.dtype`; - if not `arr.dtype` will be cast to `arr.dtype`. - axis : int - Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, with `pad_amt` constant `val` appended along `axis`. - - """ - if pad_amt == 0: - return arr - padshape = tuple(x if i != axis else pad_amt - for (i, x) in enumerate(arr.shape)) - return _do_append(arr, np.full(padshape, val, dtype=arr.dtype), axis) - - - -def _prepend_edge(arr, pad_amt, axis=-1): - """ - Prepend `pad_amt` to `arr` along `axis` by extending edge values. - - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to prepend. - axis : int - Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, extended by `pad_amt` edge values appended along `axis`. - - """ - if pad_amt == 0: - return arr - - edge_slice = _slice_first(arr.shape, 1, axis=axis) - edge_arr = arr[edge_slice] - return _do_prepend(arr, edge_arr.repeat(pad_amt, axis=axis), axis) - - -def _append_edge(arr, pad_amt, axis=-1): - """ - Append `pad_amt` to `arr` along `axis` by extending edge values. - - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to append. - axis : int - Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, extended by `pad_amt` edge values prepended along - `axis`. - +def _slice_at_axis(sl, axis): """ - if pad_amt == 0: - return arr - - edge_slice = _slice_last(arr.shape, 1, axis=axis) - edge_arr = arr[edge_slice] - return _do_append(arr, edge_arr.repeat(pad_amt, axis=axis), axis) - - -def _prepend_ramp(arr, pad_amt, end, axis=-1): - """ - Prepend linear ramp along `axis`. + Construct tuple of slices to slice an array in the given dimension. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to prepend. - end : scalar - Constal value to use. For best results should be of type `arr.dtype`; - if not `arr.dtype` will be cast to `arr.dtype`. + sl : slice + The slice for the given dimension. axis : int - Axis along which to pad `arr`. + The axis to which `sl` is applied. All other dimensions are left + "unsliced". Returns ------- - padarr : ndarray - Output array, with `pad_amt` values prepended along `axis`. The - prepended region ramps linearly from the edge value to `end`. - - """ - if pad_amt == 0: - return arr - - # Generate shape for final concatenated array - padshape = tuple(x if i != axis else pad_amt - for (i, x) in enumerate(arr.shape)) - - # Generate an n-dimensional array incrementing along `axis` - ramp_arr = _arange_ndarray(arr, padshape, axis, - reverse=True).astype(np.float64) - - # Appropriate slicing to extract n-dimensional edge along `axis` - edge_slice = _slice_first(arr.shape, 1, axis=axis) - - # Extract edge, and extend along `axis` - edge_pad = arr[edge_slice].repeat(pad_amt, axis) - - # Linear ramp - slope = (end - edge_pad) / float(pad_amt) - ramp_arr = ramp_arr * slope - ramp_arr += edge_pad - _round_ifneeded(ramp_arr, arr.dtype) - - # Ramp values will most likely be float, cast them to the same type as arr - return _do_prepend(arr, ramp_arr, axis) - - -def _append_ramp(arr, pad_amt, end, axis=-1): - """ - Append linear ramp along `axis`. - - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to append. - end : scalar - Constal value to use. For best results should be of type `arr.dtype`; - if not `arr.dtype` will be cast to `arr.dtype`. - axis : int - Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, with `pad_amt` values appended along `axis`. The - appended region ramps linearly from the edge value to `end`. + sl : tuple of slices + A tuple with slices matching `shape` in length. + Examples + -------- + >>> _slice_at_axis(slice(None, 3, -1), 1) + (slice(None, None, None), slice(None, 3, -1), (...,)) """ - if pad_amt == 0: - return arr - - # Generate shape for final concatenated array - padshape = tuple(x if i != axis else pad_amt - for (i, x) in enumerate(arr.shape)) - - # Generate an n-dimensional array incrementing along `axis` - ramp_arr = _arange_ndarray(arr, padshape, axis, - reverse=False).astype(np.float64) - - # Slice a chunk from the edge to calculate stats on - edge_slice = _slice_last(arr.shape, 1, axis=axis) - - # Extract edge, and extend along `axis` - edge_pad = arr[edge_slice].repeat(pad_amt, axis) - - # Linear ramp - slope = (end - edge_pad) / float(pad_amt) - ramp_arr = ramp_arr * slope - ramp_arr += edge_pad - _round_ifneeded(ramp_arr, arr.dtype) - - # Ramp values will most likely be float, cast them to the same type as arr - return _do_append(arr, ramp_arr, axis) + return (slice(None),) * axis + (sl,) + (...,) -def _prepend_max(arr, pad_amt, num, axis=-1): +def _view_roi(array, original_area_slice, axis): """ - Prepend `pad_amt` maximum values along `axis`. + Get a view of the current region of interest during iterative padding. - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to prepend. - num : int - Depth into `arr` along `axis` to calculate maximum. - Range: [1, `arr.shape[axis]`] or None (entire axis) - axis : int - Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, with `pad_amt` values appended along `axis`. The - prepended region is the maximum of the first `num` values along - `axis`. - - """ - if pad_amt == 0: - return arr - - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _prepend_edge(arr, pad_amt, axis) - - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None - - # Slice a chunk from the edge to calculate stats on - max_slice = _slice_first(arr.shape, num, axis=axis) - - # Extract slice, calculate max - max_chunk = arr[max_slice].max(axis=axis, keepdims=True) - - # Concatenate `arr` with `max_chunk`, extended along `axis` by `pad_amt` - return _do_prepend(arr, max_chunk.repeat(pad_amt, axis=axis), axis) - - -def _append_max(arr, pad_amt, num, axis=-1): - """ - Pad one `axis` of `arr` with the maximum of the last `num` elements. + When padding multiple dimensions iteratively corner values are + unnecessarily overwritten multiple times. This function reduces the + working area for the first dimensions so that corners are excluded. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to append. - num : int - Depth into `arr` along `axis` to calculate maximum. - Range: [1, `arr.shape[axis]`] or None (entire axis) + array : ndarray + The array with the region of interest. + original_area_slice : tuple of slices + Denotes the area with original values of the unpadded array. axis : int - Axis along which to pad `arr`. + The currently padded dimension assuming that `axis` is padded before + `axis` + 1. Returns ------- - padarr : ndarray - Output array, with `pad_amt` values appended along `axis`. The - appended region is the maximum of the final `num` values along `axis`. - + roi : ndarray + The region of interest of the original `array`. """ - if pad_amt == 0: - return arr - - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _append_edge(arr, pad_amt, axis) - - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None - - # Slice a chunk from the edge to calculate stats on - if num is not None: - max_slice = _slice_last(arr.shape, num, axis=axis) - else: - max_slice = tuple(slice(None) for x in arr.shape) - - # Extract slice, calculate max - max_chunk = arr[max_slice].max(axis=axis, keepdims=True) - - # Concatenate `arr` with `max_chunk`, extended along `axis` by `pad_amt` - return _do_append(arr, max_chunk.repeat(pad_amt, axis=axis), axis) + axis += 1 + sl = (slice(None),) * axis + original_area_slice[axis:] + return array[sl] -def _prepend_mean(arr, pad_amt, num, axis=-1): +def _pad_simple(array, pad_width, fill_value=None): """ - Prepend `pad_amt` mean values along `axis`. + Pad array on all sides with either a single value or undefined values. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to prepend. - num : int - Depth into `arr` along `axis` to calculate mean. - Range: [1, `arr.shape[axis]`] or None (entire axis) - axis : int - Axis along which to pad `arr`. + array : ndarray + Array to grow. + pad_width : sequence of tuple[int, int] + Pad width on both sides for each dimension in `arr`. + fill_value : scalar, optional + If provided the padded area is filled with this value, otherwise + the pad area left undefined. Returns ------- - padarr : ndarray - Output array, with `pad_amt` values prepended along `axis`. The - prepended region is the mean of the first `num` values along `axis`. - + padded : ndarray + The padded array with the same dtype as`array`. Its order will default + to C-style if `array` is not F-contiguous. + original_area_slice : tuple + A tuple of slices pointing to the area of the original array. """ - if pad_amt == 0: - return arr - - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _prepend_edge(arr, pad_amt, axis) - - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None + # Allocate grown array + new_shape = tuple( + left + size + right + for size, (left, right) in zip(array.shape, pad_width) + ) + order = 'F' if array.flags.fnc else 'C' # Fortran and not also C-order + padded = np.empty(new_shape, dtype=array.dtype, order=order) - # Slice a chunk from the edge to calculate stats on - mean_slice = _slice_first(arr.shape, num, axis=axis) + if fill_value is not None: + padded.fill(fill_value) - # Extract slice, calculate mean - mean_chunk = arr[mean_slice].mean(axis, keepdims=True) - _round_ifneeded(mean_chunk, arr.dtype) + # Copy old array into correct space + original_area_slice = tuple( + slice(left, left + size) + for size, (left, right) in zip(array.shape, pad_width) + ) + padded[original_area_slice] = array - # Concatenate `arr` with `mean_chunk`, extended along `axis` by `pad_amt` - return _do_prepend(arr, mean_chunk.repeat(pad_amt, axis), axis=axis) + return padded, original_area_slice -def _append_mean(arr, pad_amt, num, axis=-1): +def _set_pad_area(padded, axis, width_pair, value_pair): """ - Append `pad_amt` mean values along `axis`. + Set empty-padded area in given dimension. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to append. - num : int - Depth into `arr` along `axis` to calculate mean. - Range: [1, `arr.shape[axis]`] or None (entire axis) + padded : ndarray + Array with the pad area which is modified inplace. axis : int - Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, with `pad_amt` values appended along `axis`. The - appended region is the maximum of the final `num` values along `axis`. - + Dimension with the pad area to set. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. + value_pair : tuple of scalars or ndarrays + Values inserted into the pad area on each side. It must match or be + broadcastable to the shape of `arr`. """ - if pad_amt == 0: - return arr - - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _append_edge(arr, pad_amt, axis) - - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None + left_slice = _slice_at_axis(slice(None, width_pair[0]), axis) + padded[left_slice] = value_pair[0] - # Slice a chunk from the edge to calculate stats on - if num is not None: - mean_slice = _slice_last(arr.shape, num, axis=axis) - else: - mean_slice = tuple(slice(None) for x in arr.shape) - - # Extract slice, calculate mean - mean_chunk = arr[mean_slice].mean(axis=axis, keepdims=True) - _round_ifneeded(mean_chunk, arr.dtype) - - # Concatenate `arr` with `mean_chunk`, extended along `axis` by `pad_amt` - return _do_append(arr, mean_chunk.repeat(pad_amt, axis), axis=axis) + right_slice = _slice_at_axis( + slice(padded.shape[axis] - width_pair[1], None), axis) + padded[right_slice] = value_pair[1] -def _prepend_med(arr, pad_amt, num, axis=-1): +def _get_edges(padded, axis, width_pair): """ - Prepend `pad_amt` median values along `axis`. + Retrieve edge values from empty-padded array in given dimension. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to prepend. - num : int - Depth into `arr` along `axis` to calculate median. - Range: [1, `arr.shape[axis]`] or None (entire axis) + padded : ndarray + Empty-padded array. axis : int - Axis along which to pad `arr`. + Dimension in which the edges are considered. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. Returns ------- - padarr : ndarray - Output array, with `pad_amt` values prepended along `axis`. The - prepended region is the median of the first `num` values along `axis`. - + left_edge, right_edge : ndarray + Edge values of the valid area in `padded` in the given dimension. Its + shape will always match `padded` except for the dimension given by + `axis` which will have a length of 1. """ - if pad_amt == 0: - return arr - - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _prepend_edge(arr, pad_amt, axis) - - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None - - # Slice a chunk from the edge to calculate stats on - med_slice = _slice_first(arr.shape, num, axis=axis) + left_index = width_pair[0] + left_slice = _slice_at_axis(slice(left_index, left_index + 1), axis) + left_edge = padded[left_slice] - # Extract slice, calculate median - med_chunk = np.median(arr[med_slice], axis=axis, keepdims=True) - _round_ifneeded(med_chunk, arr.dtype) + right_index = padded.shape[axis] - width_pair[1] + right_slice = _slice_at_axis(slice(right_index - 1, right_index), axis) + right_edge = padded[right_slice] - # Concatenate `arr` with `med_chunk`, extended along `axis` by `pad_amt` - return _do_prepend(arr, med_chunk.repeat(pad_amt, axis), axis=axis) + return left_edge, right_edge -def _append_med(arr, pad_amt, num, axis=-1): +def _get_linear_ramps(padded, axis, width_pair, end_value_pair): """ - Append `pad_amt` median values along `axis`. + Construct linear ramps for empty-padded array in given dimension. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to append. - num : int - Depth into `arr` along `axis` to calculate median. - Range: [1, `arr.shape[axis]`] or None (entire axis) + padded : ndarray + Empty-padded array. axis : int - Axis along which to pad `arr`. + Dimension in which the ramps are constructed. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. + end_value_pair : (scalar, scalar) + End values for the linear ramps which form the edge of the fully padded + array. These values are included in the linear ramps. Returns ------- - padarr : ndarray - Output array, with `pad_amt` values appended along `axis`. The - appended region is the median of the final `num` values along `axis`. - + left_ramp, right_ramp : ndarray + Linear ramps to set on both sides of `padded`. """ - if pad_amt == 0: - return arr + edge_pair = _get_edges(padded, axis, width_pair) - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _append_edge(arr, pad_amt, axis) + left_ramp = _linear_ramp( + padded.ndim, axis, start=end_value_pair[0], stop=edge_pair[0], + size=width_pair[0], reverse=False + ) + _round_if_needed(left_ramp, padded.dtype) - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None + right_ramp = _linear_ramp( + padded.ndim, axis, start=end_value_pair[1], stop=edge_pair[1], + size=width_pair[1], reverse=True + ) + _round_if_needed(right_ramp, padded.dtype) - # Slice a chunk from the edge to calculate stats on - if num is not None: - med_slice = _slice_last(arr.shape, num, axis=axis) - else: - med_slice = tuple(slice(None) for x in arr.shape) + return left_ramp, right_ramp - # Extract slice, calculate median - med_chunk = np.median(arr[med_slice], axis=axis, keepdims=True) - _round_ifneeded(med_chunk, arr.dtype) - # Concatenate `arr` with `med_chunk`, extended along `axis` by `pad_amt` - return _do_append(arr, med_chunk.repeat(pad_amt, axis), axis=axis) - - -def _prepend_min(arr, pad_amt, num, axis=-1): +def _get_stats(padded, axis, width_pair, length_pair, stat_func): """ - Prepend `pad_amt` minimum values along `axis`. + Calculate statistic for the empty-padded array in given dimnsion. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to prepend. - num : int - Depth into `arr` along `axis` to calculate minimum. - Range: [1, `arr.shape[axis]`] or None (entire axis) + padded : ndarray + Empty-padded array. axis : int - Axis along which to pad `arr`. + Dimension in which the statistic is calculated. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. + length_pair : 2-element sequence of None or int + Gives the number of values in valid area from each side that is + taken into account when calculating the statistic. If None the entire + valid area in `padded` is considered. + stat_func : function + Function to compute statistic. The expected signature is + ``stat_func(x: ndarray, axis: int, keepdims: bool) -> ndarray``. Returns ------- - padarr : ndarray - Output array, with `pad_amt` values prepended along `axis`. The - prepended region is the minimum of the first `num` values along - `axis`. - - """ - if pad_amt == 0: - return arr - - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _prepend_edge(arr, pad_amt, axis) - - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None - - # Slice a chunk from the edge to calculate stats on - min_slice = _slice_first(arr.shape, num, axis=axis) - - # Extract slice, calculate min - min_chunk = arr[min_slice].min(axis=axis, keepdims=True) - - # Concatenate `arr` with `min_chunk`, extended along `axis` by `pad_amt` - return _do_prepend(arr, min_chunk.repeat(pad_amt, axis), axis=axis) - - -def _append_min(arr, pad_amt, num, axis=-1): - """ - Append `pad_amt` median values along `axis`. + left_stat, right_stat : ndarray + Calculated statistic for both sides of `padded`. + """ + # Calculate indices of the edges of the area with original values + left_index = width_pair[0] + right_index = padded.shape[axis] - width_pair[1] + # as well as its length + max_length = right_index - left_index + + # Limit stat_lengths to max_length + left_length, right_length = length_pair + if left_length is None or max_length < left_length: + left_length = max_length + if right_length is None or max_length < right_length: + right_length = max_length + + # Calculate statistic for the left side + left_slice = _slice_at_axis( + slice(left_index, left_index + left_length), axis) + left_chunk = padded[left_slice] + left_stat = stat_func(left_chunk, axis=axis, keepdims=True) + _round_if_needed(left_stat, padded.dtype) + + if left_length == right_length == max_length: + # return early as right_stat must be identical to left_stat + return left_stat, left_stat + + # Calculate statistic for the right side + right_slice = _slice_at_axis( + slice(right_index - right_length, right_index), axis) + right_chunk = padded[right_slice] + right_stat = stat_func(right_chunk, axis=axis, keepdims=True) + _round_if_needed(right_stat, padded.dtype) + return left_stat, right_stat + + +def _set_reflect_both(padded, axis, width_pair, method, include_edge=False): + """ + Pad `axis` of `arr` with reflection. Parameters ---------- - arr : ndarray + padded : ndarray Input array of arbitrary shape. - pad_amt : int - Amount of padding to append. - num : int - Depth into `arr` along `axis` to calculate minimum. - Range: [1, `arr.shape[axis]`] or None (entire axis) axis : int Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, with `pad_amt` values appended along `axis`. The - appended region is the minimum of the final `num` values along `axis`. - - """ - if pad_amt == 0: - return arr - - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _append_edge(arr, pad_amt, axis) - - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None - - # Slice a chunk from the edge to calculate stats on - if num is not None: - min_slice = _slice_last(arr.shape, num, axis=axis) - else: - min_slice = tuple(slice(None) for x in arr.shape) - - # Extract slice, calculate min - min_chunk = arr[min_slice].min(axis=axis, keepdims=True) - - # Concatenate `arr` with `min_chunk`, extended along `axis` by `pad_amt` - return _do_append(arr, min_chunk.repeat(pad_amt, axis), axis=axis) - - -def _pad_ref(arr, pad_amt, method, axis=-1): - """ - Pad `axis` of `arr` by reflection. - - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : tuple of ints, length 2 - Padding to (prepend, append) along `axis`. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. method : str Controls method of reflection; options are 'even' or 'odd'. - axis : int - Axis along which to pad `arr`. + include_edge : bool + If true, edge value is included in reflection, otherwise the edge + value forms the symmetric axis to the reflection. Returns ------- - padarr : ndarray - Output array, with `pad_amt[0]` values prepended and `pad_amt[1]` - values appended along `axis`. Both regions are padded with reflected - values from the original array. - - Notes - ----- - This algorithm does not pad with repetition, i.e. the edges are not - repeated in the reflection. For that behavior, use `mode='symmetric'`. - - The modes 'reflect', 'symmetric', and 'wrap' must be padded with a - single function, lest the indexing tricks in non-integer multiples of the - original shape would violate repetition in the final iteration. - - """ - # Implicit booleanness to test for zero (or None) in any scalar type - if pad_amt[0] == 0 and pad_amt[1] == 0: - return arr - - ########################################################################## - # Prepended region - - # Slice off a reverse indexed chunk from near edge to pad `arr` before - ref_slice = _slice_at_axis(arr.shape, slice(pad_amt[0], 0, -1), axis=axis) - - ref_chunk1 = arr[ref_slice] - - # Memory/computationally more expensive, only do this if `method='odd'` - if 'odd' in method and pad_amt[0] > 0: - edge_slice1 = _slice_first(arr.shape, 1, axis=axis) - edge_chunk = arr[edge_slice1] - ref_chunk1 = 2 * edge_chunk - ref_chunk1 - del edge_chunk - - ########################################################################## - # Appended region - - # Slice off a reverse indexed chunk from far edge to pad `arr` after - start = arr.shape[axis] - pad_amt[1] - 1 - end = arr.shape[axis] - 1 - ref_slice = _slice_at_axis(arr.shape, slice(start, end), axis=axis) - rev_idx = _slice_at_axis(arr.shape, slice(None, None, -1), axis=axis) - ref_chunk2 = arr[ref_slice][rev_idx] - - if 'odd' in method: - edge_slice2 = _slice_last(arr.shape, 1, axis=axis) - edge_chunk = arr[edge_slice2] - ref_chunk2 = 2 * edge_chunk - ref_chunk2 - del edge_chunk - - # Concatenate `arr` with both chunks, extending along `axis` - return np.concatenate((ref_chunk1, arr, ref_chunk2), axis=axis) - - -def _pad_sym(arr, pad_amt, method, axis=-1): - """ - Pad `axis` of `arr` by symmetry. - - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. pad_amt : tuple of ints, length 2 - Padding to (prepend, append) along `axis`. - method : str - Controls method of symmetry; options are 'even' or 'odd'. - axis : int - Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, with `pad_amt[0]` values prepended and `pad_amt[1]` - values appended along `axis`. Both regions are padded with symmetric - values from the original array. - - Notes - ----- - This algorithm DOES pad with repetition, i.e. the edges are repeated. - For padding without repeated edges, use `mode='reflect'`. - - The modes 'reflect', 'symmetric', and 'wrap' must be padded with a - single function, lest the indexing tricks in non-integer multiples of the - original shape would violate repetition in the final iteration. - + New index positions of padding to do along the `axis`. If these are + both 0, padding is done in this dimension. """ - # Implicit booleanness to test for zero (or None) in any scalar type - if pad_amt[0] == 0 and pad_amt[1] == 0: - return arr - - ########################################################################## - # Prepended region - - # Slice off a reverse indexed chunk from near edge to pad `arr` before - sym_slice = _slice_first(arr.shape, pad_amt[0], axis=axis) - rev_idx = _slice_at_axis(arr.shape, slice(None, None, -1), axis=axis) - sym_chunk1 = arr[sym_slice][rev_idx] - - # Memory/computationally more expensive, only do this if `method='odd'` - if 'odd' in method and pad_amt[0] > 0: - edge_slice1 = _slice_first(arr.shape, 1, axis=axis) - edge_chunk = arr[edge_slice1] - sym_chunk1 = 2 * edge_chunk - sym_chunk1 - del edge_chunk - - ########################################################################## - # Appended region - - # Slice off a reverse indexed chunk from far edge to pad `arr` after - sym_slice = _slice_last(arr.shape, pad_amt[1], axis=axis) - sym_chunk2 = arr[sym_slice][rev_idx] - - if 'odd' in method: - edge_slice2 = _slice_last(arr.shape, 1, axis=axis) - edge_chunk = arr[edge_slice2] - sym_chunk2 = 2 * edge_chunk - sym_chunk2 - del edge_chunk - - # Concatenate `arr` with both chunks, extending along `axis` - return np.concatenate((sym_chunk1, arr, sym_chunk2), axis=axis) + left_pad, right_pad = width_pair + old_length = padded.shape[axis] - right_pad - left_pad - -def _pad_wrap(arr, pad_amt, axis=-1): - """ - Pad `axis` of `arr` via wrapping. + if include_edge: + # Edge is included, we need to offset the pad amount by 1 + edge_offset = 1 + else: + edge_offset = 0 # Edge is not included, no need to offset pad amount + old_length -= 1 # but must be omitted from the chunk + + if left_pad > 0: + # Pad with reflected values on left side: + # First limit chunk size which can't be larger than pad area + chunk_length = min(old_length, left_pad) + # Slice right to left, stop on or next to edge, start relative to stop + stop = left_pad - edge_offset + start = stop + chunk_length + left_slice = _slice_at_axis(slice(start, stop, -1), axis) + left_chunk = padded[left_slice] + + if method == "odd": + # Negate chunk and align with edge + edge_slice = _slice_at_axis(slice(left_pad, left_pad + 1), axis) + left_chunk = 2 * padded[edge_slice] - left_chunk + + # Insert chunk into padded area + start = left_pad - chunk_length + stop = left_pad + pad_area = _slice_at_axis(slice(start, stop), axis) + padded[pad_area] = left_chunk + # Adjust pointer to left edge for next iteration + left_pad -= chunk_length + + if right_pad > 0: + # Pad with reflected values on right side: + # First limit chunk size which can't be larger than pad area + chunk_length = min(old_length, right_pad) + # Slice right to left, start on or next to edge, stop relative to start + start = -right_pad + edge_offset - 2 + stop = start - chunk_length + right_slice = _slice_at_axis(slice(start, stop, -1), axis) + right_chunk = padded[right_slice] + + if method == "odd": + # Negate chunk and align with edge + edge_slice = _slice_at_axis( + slice(-right_pad - 1, -right_pad), axis) + right_chunk = 2 * padded[edge_slice] - right_chunk + + # Insert chunk into padded area + start = padded.shape[axis] - right_pad + stop = start + chunk_length + pad_area = _slice_at_axis(slice(start, stop), axis) + padded[pad_area] = right_chunk + # Adjust pointer to right edge for next iteration + right_pad -= chunk_length + + return left_pad, right_pad + + +def _set_wrap_both(padded, axis, width_pair): + """ + Pad `axis` of `arr` with wrapped values. Parameters ---------- - arr : ndarray + padded : ndarray Input array of arbitrary shape. - pad_amt : tuple of ints, length 2 - Padding to (prepend, append) along `axis`. axis : int Axis along which to pad `arr`. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. Returns ------- - padarr : ndarray - Output array, with `pad_amt[0]` values prepended and `pad_amt[1]` - values appended along `axis`. Both regions are padded wrapped values - from the opposite end of `axis`. - - Notes - ----- - This method of padding is also known as 'tile' or 'tiling'. - - The modes 'reflect', 'symmetric', and 'wrap' must be padded with a - single function, lest the indexing tricks in non-integer multiples of the - original shape would violate repetition in the final iteration. - - """ - # Implicit booleanness to test for zero (or None) in any scalar type - if pad_amt[0] == 0 and pad_amt[1] == 0: - return arr - - ########################################################################## - # Prepended region - - # Slice off a reverse indexed chunk from near edge to pad `arr` before - wrap_slice = _slice_last(arr.shape, pad_amt[0], axis=axis) - wrap_chunk1 = arr[wrap_slice] - - ########################################################################## - # Appended region - - # Slice off a reverse indexed chunk from far edge to pad `arr` after - wrap_slice = _slice_first(arr.shape, pad_amt[1], axis=axis) - wrap_chunk2 = arr[wrap_slice] - - # Concatenate `arr` with both chunks, extending along `axis` - return np.concatenate((wrap_chunk1, arr, wrap_chunk2), axis=axis) + pad_amt : tuple of ints, length 2 + New index positions of padding to do along the `axis`. If these are + both 0, padding is done in this dimension. + """ + left_pad, right_pad = width_pair + period = padded.shape[axis] - right_pad - left_pad + + # If the current dimension of `arr` doesn't contain enough valid values + # (not part of the undefined pad area) we need to pad multiple times. + # Each time the pad area shrinks on both sides which is communicated with + # these variables. + new_left_pad = 0 + new_right_pad = 0 + + if left_pad > 0: + # Pad with wrapped values on left side + # First slice chunk from right side of the non-pad area. + # Use min(period, left_pad) to ensure that chunk is not larger than + # pad area + right_slice = _slice_at_axis( + slice(-right_pad - min(period, left_pad), + -right_pad if right_pad != 0 else None), + axis + ) + right_chunk = padded[right_slice] + + if left_pad > period: + # Chunk is smaller than pad area + pad_area = _slice_at_axis(slice(left_pad - period, left_pad), axis) + new_left_pad = left_pad - period + else: + # Chunk matches pad area + pad_area = _slice_at_axis(slice(None, left_pad), axis) + padded[pad_area] = right_chunk + + if right_pad > 0: + # Pad with wrapped values on right side + # First slice chunk from left side of the non-pad area. + # Use min(period, right_pad) to ensure that chunk is not larger than + # pad area + left_slice = _slice_at_axis( + slice(left_pad, left_pad + min(period, right_pad),), axis) + left_chunk = padded[left_slice] + + if right_pad > period: + # Chunk is smaller than pad area + pad_area = _slice_at_axis( + slice(-right_pad, -right_pad + period), axis) + new_right_pad = right_pad - period + else: + # Chunk matches pad area + pad_area = _slice_at_axis(slice(-right_pad, None), axis) + padded[pad_area] = left_chunk + + return new_left_pad, new_right_pad def _as_pairs(x, ndim, as_index=False): @@ -953,23 +568,23 @@ def _as_pairs(x, ndim, as_index=False): return np.broadcast_to(x, (ndim, 2)).tolist() -############################################################################### -# Public functions +def _pad_dispatcher(array, pad_width, mode=None, **kwargs): + return (array,) -def _pad_dispatcher(array, pad_width, mode, **kwargs): - return (array,) +############################################################################### +# Public functions @array_function_dispatch(_pad_dispatcher, module='numpy') -def pad(array, pad_width, mode, **kwargs): +def pad(array, pad_width, mode='constant', **kwargs): """ - Pads an array. + Pad an array. Parameters ---------- array : array_like of rank N - Input array + The array to pad. pad_width : {sequence, array_like, int} Number of values padded to the edges of each axis. ((before_1, after_1), ... (before_N, after_N)) unique pad widths @@ -977,10 +592,10 @@ def pad(array, pad_width, mode, **kwargs): ((before, after),) yields same before and after pad for each axis. (pad,) or int is a shortcut for before = after = pad width for all axes. - mode : str or function + mode : str or function, optional One of the following string values or a user supplied function. - 'constant' + 'constant' (default) Pads with a constant value. 'edge' Pads with the edge values of array. @@ -1010,6 +625,11 @@ def pad(array, pad_width, mode, **kwargs): Pads with the wrap of the vector along the axis. The first values are used to pad the end and the end values are used to pad the beginning. + 'empty' + Pads with undefined values. + + .. versionadded:: 1.17 + <function> Padding function, see Notes. stat_length : sequence or int, optional @@ -1026,31 +646,31 @@ def pad(array, pad_width, mode, **kwargs): length for all axes. Default is ``None``, to use the entire axis. - constant_values : sequence or int, optional + constant_values : sequence or scalar, optional Used in 'constant'. The values to set the padded values for each axis. - ((before_1, after_1), ... (before_N, after_N)) unique pad constants + ``((before_1, after_1), ... (before_N, after_N))`` unique pad constants for each axis. - ((before, after),) yields same before and after constants for each + ``((before, after),)`` yields same before and after constants for each axis. - (constant,) or int is a shortcut for before = after = constant for + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for all axes. Default is 0. - end_values : sequence or int, optional + end_values : sequence or scalar, optional Used in 'linear_ramp'. The values used for the ending value of the linear_ramp and that will form the edge of the padded array. - ((before_1, after_1), ... (before_N, after_N)) unique end values + ``((before_1, after_1), ... (before_N, after_N))`` unique end values for each axis. - ((before, after),) yields same before and after end values for each + ``((before, after),)`` yields same before and after end values for each axis. - (constant,) or int is a shortcut for before = after = end value for + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for all axes. Default is 0. @@ -1075,9 +695,8 @@ def pad(array, pad_width, mode, **kwargs): think about with a rank 2 array where the corners of the padded array are calculated by using padded values from the first axis. - The padding function, if used, should return a rank 1 array equal in - length to the vector argument with padded values replaced. It has the - following signature:: + The padding function, if used, should modify a rank 1 array in-place. It + has the following signature:: padding_func(vector, iaxis_pad_width, iaxis, kwargs) @@ -1085,7 +704,7 @@ def pad(array, pad_width, mode, **kwargs): vector : ndarray A rank 1 array already padded with zeros. Padded values are - vector[:pad_tuple[0]] and vector[-pad_tuple[1]:]. + vector[:iaxis_pad_width[0]] and vector[-iaxis_pad_width[1]:]. iaxis_pad_width : tuple A 2-tuple of ints, iaxis_pad_width[0] represents the number of values padded at the beginning of vector where @@ -1099,11 +718,11 @@ def pad(array, pad_width, mode, **kwargs): Examples -------- >>> a = [1, 2, 3, 4, 5] - >>> np.pad(a, (2,3), 'constant', constant_values=(4, 6)) - array([4, 4, 1, 2, 3, 4, 5, 6, 6, 6]) + >>> np.pad(a, (2, 3), 'constant', constant_values=(4, 6)) + array([4, 4, 1, ..., 6, 6, 6]) >>> np.pad(a, (2, 3), 'edge') - array([1, 1, 1, 2, 3, 4, 5, 5, 5, 5]) + array([1, 1, 1, ..., 5, 5, 5]) >>> np.pad(a, (2, 3), 'linear_ramp', end_values=(5, -4)) array([ 5, 3, 1, 2, 3, 4, 5, 2, -1, -4]) @@ -1147,7 +766,6 @@ def pad(array, pad_width, mode, **kwargs): ... pad_value = kwargs.get('padder', 10) ... vector[:pad_width[0]] = pad_value ... vector[-pad_width[1]:] = pad_value - ... return vector >>> a = np.arange(6) >>> a = a.reshape((2, 3)) >>> np.pad(a, 2, pad_with) @@ -1165,15 +783,42 @@ def pad(array, pad_width, mode, **kwargs): [100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100]]) """ - if not np.asarray(pad_width).dtype.kind == 'i': + array = np.asarray(array) + pad_width = np.asarray(pad_width) + + if not pad_width.dtype.kind == 'i': raise TypeError('`pad_width` must be of integral type.') - narray = np.array(array) - pad_width = _as_pairs(pad_width, narray.ndim, as_index=True) + # Broadcast to shape (array.ndim, 2) + pad_width = _as_pairs(pad_width, array.ndim, as_index=True) + + if callable(mode): + # Old behavior: Use user-supplied function with np.apply_along_axis + function = mode + # Create a new zero padded array + padded, _ = _pad_simple(array, pad_width, fill_value=0) + # And apply along each axis + + for axis in range(padded.ndim): + # Iterate using ndindex as in apply_along_axis, but assuming that + # function operates inplace on the padded array. + + # view with the iteration axis at the end + view = np.moveaxis(padded, axis, -1) + + # compute indices for the iteration axes, and append a trailing + # ellipsis to prevent 0d arrays decaying to scalars (gh-8642) + inds = ndindex(view.shape[:-1]) + inds = (ind + (Ellipsis,) for ind in inds) + for ind in inds: + function(view[ind], pad_width[axis], axis, kwargs) - allowedkwargs = { + return padded + + # Make sure that no unsupported keywords were passed for the current mode + allowed_kwargs = { + 'empty': [], 'edge': [], 'wrap': [], 'constant': ['constant_values'], - 'edge': [], 'linear_ramp': ['end_values'], 'maximum': ['stat_length'], 'mean': ['stat_length'], @@ -1181,175 +826,101 @@ def pad(array, pad_width, mode, **kwargs): 'minimum': ['stat_length'], 'reflect': ['reflect_type'], 'symmetric': ['reflect_type'], - 'wrap': [], - } - - kwdefaults = { - 'stat_length': None, - 'constant_values': 0, - 'end_values': 0, - 'reflect_type': 'even', - } - - if isinstance(mode, np.compat.basestring): - # Make sure have allowed kwargs appropriate for mode - for key in kwargs: - if key not in allowedkwargs[mode]: - raise ValueError('%s keyword not in allowed keywords %s' % - (key, allowedkwargs[mode])) - - # Set kwarg defaults - for kw in allowedkwargs[mode]: - kwargs.setdefault(kw, kwdefaults[kw]) - - # Need to only normalize particular keywords. - for i in kwargs: - if i == 'stat_length': - kwargs[i] = _as_pairs(kwargs[i], narray.ndim, as_index=True) - if i in ['end_values', 'constant_values']: - kwargs[i] = _as_pairs(kwargs[i], narray.ndim) - else: - # Drop back to old, slower np.apply_along_axis mode for user-supplied - # vector function - function = mode - - # Create a new padded array - rank = list(range(narray.ndim)) - total_dim_increase = [np.sum(pad_width[i]) for i in rank] - offset_slices = tuple( - slice(pad_width[i][0], pad_width[i][0] + narray.shape[i]) - for i in rank) - new_shape = np.array(narray.shape) + total_dim_increase - newmat = np.zeros(new_shape, narray.dtype) - - # Insert the original array into the padded array - newmat[offset_slices] = narray - - # This is the core of pad ... - for iaxis in rank: - np.apply_along_axis(function, - iaxis, - newmat, - pad_width[iaxis], - iaxis, - kwargs) - return newmat - - # If we get here, use new padding method - newmat = narray.copy() - - # API preserved, but completely new algorithm which pads by building the - # entire block to pad before/after `arr` with in one step, for each axis. - if mode == 'constant': - for axis, ((pad_before, pad_after), (before_val, after_val)) \ - in enumerate(zip(pad_width, kwargs['constant_values'])): - newmat = _prepend_const(newmat, pad_before, before_val, axis) - newmat = _append_const(newmat, pad_after, after_val, axis) - - elif mode == 'edge': - for axis, (pad_before, pad_after) in enumerate(pad_width): - newmat = _prepend_edge(newmat, pad_before, axis) - newmat = _append_edge(newmat, pad_after, axis) - - elif mode == 'linear_ramp': - for axis, ((pad_before, pad_after), (before_val, after_val)) \ - in enumerate(zip(pad_width, kwargs['end_values'])): - newmat = _prepend_ramp(newmat, pad_before, before_val, axis) - newmat = _append_ramp(newmat, pad_after, after_val, axis) - - elif mode == 'maximum': - for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \ - in enumerate(zip(pad_width, kwargs['stat_length'])): - newmat = _prepend_max(newmat, pad_before, chunk_before, axis) - newmat = _append_max(newmat, pad_after, chunk_after, axis) - - elif mode == 'mean': - for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \ - in enumerate(zip(pad_width, kwargs['stat_length'])): - newmat = _prepend_mean(newmat, pad_before, chunk_before, axis) - newmat = _append_mean(newmat, pad_after, chunk_after, axis) - - elif mode == 'median': - for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \ - in enumerate(zip(pad_width, kwargs['stat_length'])): - newmat = _prepend_med(newmat, pad_before, chunk_before, axis) - newmat = _append_med(newmat, pad_after, chunk_after, axis) - - elif mode == 'minimum': - for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \ - in enumerate(zip(pad_width, kwargs['stat_length'])): - newmat = _prepend_min(newmat, pad_before, chunk_before, axis) - newmat = _append_min(newmat, pad_after, chunk_after, axis) - - elif mode == 'reflect': - for axis, (pad_before, pad_after) in enumerate(pad_width): - if narray.shape[axis] == 0: - # Axes with non-zero padding cannot be empty. - if pad_before > 0 or pad_after > 0: - raise ValueError("There aren't any elements to reflect" - " in axis {} of `array`".format(axis)) - # Skip zero padding on empty axes. - continue - - # Recursive padding along any axis where `pad_amt` is too large - # for indexing tricks. We can only safely pad the original axis - # length, to keep the period of the reflections consistent. - if ((pad_before > 0) or - (pad_after > 0)) and newmat.shape[axis] == 1: + } + try: + unsupported_kwargs = set(kwargs) - set(allowed_kwargs[mode]) + except KeyError: + raise ValueError("mode '{}' is not supported".format(mode)) + if unsupported_kwargs: + raise ValueError("unsupported keyword arguments for mode '{}': {}" + .format(mode, unsupported_kwargs)) + + stat_functions = {"maximum": np.max, "minimum": np.min, + "mean": np.mean, "median": np.median} + + # Create array with final shape and original values + # (padded area is undefined) + padded, original_area_slice = _pad_simple(array, pad_width) + # And prepare iteration over all dimensions + # (zipping may be more readable than using enumerate) + axes = range(padded.ndim) + + if mode == "constant": + values = kwargs.get("constant_values", 0) + values = _as_pairs(values, padded.ndim) + for axis, width_pair, value_pair in zip(axes, pad_width, values): + roi = _view_roi(padded, original_area_slice, axis) + _set_pad_area(roi, axis, width_pair, value_pair) + + elif mode == "empty": + pass # Do nothing as _pad_simple already returned the correct result + + elif array.size == 0: + # Only modes "constant" and "empty" can extend empty axes, all other + # modes depend on `array` not being empty + # -> ensure every empty axis is only "padded with 0" + for axis, width_pair in zip(axes, pad_width): + if array.shape[axis] == 0 and any(width_pair): + raise ValueError( + "can't extend empty axis {} using modes other than " + "'constant' or 'empty'".format(axis) + ) + # passed, don't need to do anything more as _pad_simple already + # returned the correct result + + elif mode == "edge": + for axis, width_pair in zip(axes, pad_width): + roi = _view_roi(padded, original_area_slice, axis) + edge_pair = _get_edges(roi, axis, width_pair) + _set_pad_area(roi, axis, width_pair, edge_pair) + + elif mode == "linear_ramp": + end_values = kwargs.get("end_values", 0) + end_values = _as_pairs(end_values, padded.ndim) + for axis, width_pair, value_pair in zip(axes, pad_width, end_values): + roi = _view_roi(padded, original_area_slice, axis) + ramp_pair = _get_linear_ramps(roi, axis, width_pair, value_pair) + _set_pad_area(roi, axis, width_pair, ramp_pair) + + elif mode in stat_functions: + func = stat_functions[mode] + length = kwargs.get("stat_length", None) + length = _as_pairs(length, padded.ndim, as_index=True) + for axis, width_pair, length_pair in zip(axes, pad_width, length): + roi = _view_roi(padded, original_area_slice, axis) + stat_pair = _get_stats(roi, axis, width_pair, length_pair, func) + _set_pad_area(roi, axis, width_pair, stat_pair) + + elif mode in {"reflect", "symmetric"}: + method = kwargs.get("reflect_type", "even") + include_edge = True if mode == "symmetric" else False + for axis, (left_index, right_index) in zip(axes, pad_width): + if array.shape[axis] == 1 and (left_index > 0 or right_index > 0): # Extending singleton dimension for 'reflect' is legacy # behavior; it really should raise an error. - newmat = _prepend_edge(newmat, pad_before, axis) - newmat = _append_edge(newmat, pad_after, axis) + edge_pair = _get_edges(padded, axis, (left_index, right_index)) + _set_pad_area( + padded, axis, (left_index, right_index), edge_pair) continue - method = kwargs['reflect_type'] - safe_pad = newmat.shape[axis] - 1 - while ((pad_before > safe_pad) or (pad_after > safe_pad)): - pad_iter_b = min(safe_pad, - safe_pad * (pad_before // safe_pad)) - pad_iter_a = min(safe_pad, safe_pad * (pad_after // safe_pad)) - newmat = _pad_ref(newmat, (pad_iter_b, - pad_iter_a), method, axis) - pad_before -= pad_iter_b - pad_after -= pad_iter_a - safe_pad += pad_iter_b + pad_iter_a - newmat = _pad_ref(newmat, (pad_before, pad_after), method, axis) - - elif mode == 'symmetric': - for axis, (pad_before, pad_after) in enumerate(pad_width): - # Recursive padding along any axis where `pad_amt` is too large - # for indexing tricks. We can only safely pad the original axis - # length, to keep the period of the reflections consistent. - method = kwargs['reflect_type'] - safe_pad = newmat.shape[axis] - while ((pad_before > safe_pad) or - (pad_after > safe_pad)): - pad_iter_b = min(safe_pad, - safe_pad * (pad_before // safe_pad)) - pad_iter_a = min(safe_pad, safe_pad * (pad_after // safe_pad)) - newmat = _pad_sym(newmat, (pad_iter_b, - pad_iter_a), method, axis) - pad_before -= pad_iter_b - pad_after -= pad_iter_a - safe_pad += pad_iter_b + pad_iter_a - newmat = _pad_sym(newmat, (pad_before, pad_after), method, axis) - - elif mode == 'wrap': - for axis, (pad_before, pad_after) in enumerate(pad_width): - # Recursive padding along any axis where `pad_amt` is too large - # for indexing tricks. We can only safely pad the original axis - # length, to keep the period of the reflections consistent. - safe_pad = newmat.shape[axis] - while ((pad_before > safe_pad) or - (pad_after > safe_pad)): - pad_iter_b = min(safe_pad, - safe_pad * (pad_before // safe_pad)) - pad_iter_a = min(safe_pad, safe_pad * (pad_after // safe_pad)) - newmat = _pad_wrap(newmat, (pad_iter_b, pad_iter_a), axis) - - pad_before -= pad_iter_b - pad_after -= pad_iter_a - safe_pad += pad_iter_b + pad_iter_a - newmat = _pad_wrap(newmat, (pad_before, pad_after), axis) - - return newmat + roi = _view_roi(padded, original_area_slice, axis) + while left_index > 0 or right_index > 0: + # Iteratively pad until dimension is filled with reflected + # values. This is necessary if the pad area is larger than + # the length of the original values in the current dimension. + left_index, right_index = _set_reflect_both( + roi, axis, (left_index, right_index), + method, include_edge + ) + + elif mode == "wrap": + for axis, (left_index, right_index) in zip(axes, pad_width): + roi = _view_roi(padded, original_area_slice, axis) + while left_index > 0 or right_index > 0: + # Iteratively pad until dimension is filled with wrapped + # values. This is necessary if the pad area is larger than + # the length of the original values in the current dimension. + left_index, right_index = _set_wrap_both( + roi, axis, (left_index, right_index)) + + return padded diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index fd64ecbd6..b53d8c03f 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -82,7 +82,7 @@ def ediff1d(ary, to_end=None, to_begin=None): array([ 1, 2, 3, -7]) >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99])) - array([-99, 1, 2, 3, -7, 88, 99]) + array([-99, 1, 2, ..., -7, 88, 99]) The returned array is always 1D. @@ -94,8 +94,7 @@ def ediff1d(ary, to_end=None, to_begin=None): # force a 1d array ary = np.asanyarray(ary).ravel() - # we have unit tests enforcing - # propagation of the dtype of input + # enforce propagation of the dtype of input # ary to returned result dtype_req = ary.dtype @@ -106,23 +105,22 @@ def ediff1d(ary, to_end=None, to_begin=None): if to_begin is None: l_begin = 0 else: - to_begin = np.asanyarray(to_begin) - if not np.can_cast(to_begin, dtype_req): - raise TypeError("dtype of to_begin must be compatible " - "with input ary") - - to_begin = to_begin.ravel() + _to_begin = np.asanyarray(to_begin, dtype=dtype_req) + if not np.all(_to_begin == to_begin): + raise ValueError("cannot convert 'to_begin' to array with dtype " + "'%r' as required for input ary" % dtype_req) + to_begin = _to_begin.ravel() l_begin = len(to_begin) if to_end is None: l_end = 0 else: - to_end = np.asanyarray(to_end) - if not np.can_cast(to_end, dtype_req): - raise TypeError("dtype of to_end must be compatible " - "with input ary") - - to_end = to_end.ravel() + _to_end = np.asanyarray(to_end, dtype=dtype_req) + # check that casting has not overflowed + if not np.all(_to_end == to_end): + raise ValueError("cannot convert 'to_end' to array with dtype " + "'%r' as required for input ary" % dtype_req) + to_end = _to_end.ravel() l_end = len(to_end) # do the calculation in place and copy to_begin and to_end @@ -241,13 +239,11 @@ def unique(ar, return_index=False, return_inverse=False, >>> a = np.array(['a', 'b', 'b', 'c', 'a']) >>> u, indices = np.unique(a, return_index=True) >>> u - array(['a', 'b', 'c'], - dtype='|S1') + array(['a', 'b', 'c'], dtype='<U1') >>> indices array([0, 1, 3]) >>> a[indices] - array(['a', 'b', 'c'], - dtype='|S1') + array(['a', 'b', 'c'], dtype='<U1') Reconstruct the input array from the unique values: @@ -256,9 +252,9 @@ def unique(ar, return_index=False, return_inverse=False, >>> u array([1, 2, 3, 4, 6]) >>> indices - array([0, 1, 4, 3, 1, 2, 1]) + array([0, 1, 4, ..., 1, 2, 1]) >>> u[indices] - array([1, 2, 6, 4, 2, 3, 2]) + array([1, 2, 6, ..., 2, 3, 2]) """ ar = np.asanyarray(ar) @@ -661,8 +657,8 @@ def isin(element, test_elements, assume_unique=False, invert=False): >>> test_elements = [1, 2, 4, 8] >>> mask = np.isin(element, test_elements) >>> mask - array([[ False, True], - [ True, False]]) + array([[False, True], + [ True, False]]) >>> element[mask] array([2, 4]) @@ -676,7 +672,7 @@ def isin(element, test_elements, assume_unique=False, invert=False): >>> mask = np.isin(element, test_elements, invert=True) >>> mask array([[ True, False], - [ False, True]]) + [False, True]]) >>> element[mask] array([0, 6]) @@ -685,14 +681,14 @@ def isin(element, test_elements, assume_unique=False, invert=False): >>> test_set = {1, 2, 4, 8} >>> np.isin(element, test_set) - array([[ False, False], - [ False, False]]) + array([[False, False], + [False, False]]) Casting the set to a list gives the expected result: >>> np.isin(element, list(test_set)) - array([[ False, True], - [ True, False]]) + array([[False, True], + [ True, False]]) """ element = np.asarray(element) return in1d(element, test_elements, assume_unique=assume_unique, diff --git a/numpy/lib/arrayterator.py b/numpy/lib/arrayterator.py index f2d4fe9fd..c16668582 100644 --- a/numpy/lib/arrayterator.py +++ b/numpy/lib/arrayterator.py @@ -80,9 +80,8 @@ class Arrayterator(object): >>> for subarr in a_itor: ... if not subarr.all(): - ... print(subarr, subarr.shape) - ... - [[[[0 1]]]] (1, 1, 1, 2) + ... print(subarr, subarr.shape) # doctest: +SKIP + >>> # [[[[0 1]]]] (1, 1, 1, 2) """ @@ -160,7 +159,7 @@ class Arrayterator(object): ... if not subarr: ... print(subarr, type(subarr)) ... - 0 <type 'numpy.int32'> + 0 <class 'numpy.int64'> """ for block in self: diff --git a/numpy/lib/financial.py b/numpy/lib/financial.py index e1e297492..216687475 100644 --- a/numpy/lib/financial.py +++ b/numpy/lib/financial.py @@ -127,7 +127,7 @@ def fv(rate, nper, pmt, pv, when='end'): >>> a = np.array((0.05, 0.06, 0.07))/12 >>> np.fv(a, 10*12, -100, -100) - array([ 15692.92889434, 16569.87435405, 17509.44688102]) + array([ 15692.92889434, 16569.87435405, 17509.44688102]) # may vary """ when = _convert_when(when) @@ -275,7 +275,7 @@ def nper(rate, pmt, pv, fv=0, when='end'): If you only had $150/month to pay towards the loan, how long would it take to pay-off a loan of $8,000 at 7% annual interest? - >>> print(round(np.nper(0.07/12, -150, 8000), 5)) + >>> print(np.round(np.nper(0.07/12, -150, 8000), 5)) 64.07335 So, over 64 months would be required to pay off the loan. @@ -286,10 +286,10 @@ def nper(rate, pmt, pv, fv=0, when='end'): >>> np.nper(*(np.ogrid[0.07/12: 0.08/12: 0.01/12, ... -150 : -99 : 50 , ... 8000 : 9001 : 1000])) - array([[[ 64.07334877, 74.06368256], - [ 108.07548412, 127.99022654]], - [[ 66.12443902, 76.87897353], - [ 114.70165583, 137.90124779]]]) + array([[[ 64.07334877, 74.06368256], + [108.07548412, 127.99022654]], + [[ 66.12443902, 76.87897353], + [114.70165583, 137.90124779]]]) """ when = _convert_when(when) @@ -539,7 +539,7 @@ def pv(rate, nper, pmt, fv=0, when='end'): >>> a = np.array((0.05, 0.04, 0.03))/12 >>> np.pv(a, 10*12, -100, 15692.93) - array([ -100.00067132, -649.26771385, -1273.78633713]) + array([ -100.00067132, -649.26771385, -1273.78633713]) # may vary So, to end up with the same $15692.93 under the same $100 per month "savings plan," for annual interest rates of 4% and 3%, one would @@ -704,15 +704,15 @@ def irr(values): Examples -------- - >>> round(irr([-100, 39, 59, 55, 20]), 5) + >>> round(np.irr([-100, 39, 59, 55, 20]), 5) 0.28095 - >>> round(irr([-100, 0, 0, 74]), 5) + >>> round(np.irr([-100, 0, 0, 74]), 5) -0.0955 - >>> round(irr([-100, 100, 0, -7]), 5) + >>> round(np.irr([-100, 100, 0, -7]), 5) -0.0833 - >>> round(irr([-100, 100, 0, 7]), 5) + >>> round(np.irr([-100, 100, 0, 7]), 5) 0.06206 - >>> round(irr([-5, 10.5, 1, -8, 1]), 5) + >>> round(np.irr([-5, 10.5, 1, -8, 1]), 5) 0.0886 (Compare with the Example given for numpy.lib.financial.npv) @@ -777,7 +777,7 @@ def npv(rate, values): Examples -------- >>> np.npv(0.281,[-100, 39, 59, 55, 20]) - -0.0084785916384548798 + -0.0084785916384548798 # may vary (Compare with the Example given for numpy.lib.financial.irr) diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 10945e5e8..3bf818812 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -146,10 +146,17 @@ The description of the fourth element of the header therefore has become: "The next 4 bytes form a little-endian unsigned int: the length of the header data HEADER_LEN." +Format Version 3.0 +------------------ + +This version replaces the ASCII string (which in practice was latin1) with +a utf8-encoded string, so supports structured types with any unicode field +names. + Notes ----- The ``.npy`` format, including motivation for creating it and a comparison of -alternatives, is described in the `"npy-format" NEP +alternatives, is described in the `"npy-format" NEP <https://www.numpy.org/neps/nep-0001-npy-format.html>`_, however details have evolved with time and this document is more current. @@ -162,9 +169,8 @@ import io import warnings from numpy.lib.utils import safe_eval from numpy.compat import ( - asbytes, asstr, isfileobj, long, os_fspath + isfileobj, long, os_fspath, pickle ) -from numpy.core.numeric import pickle MAGIC_PREFIX = b'\x93NUMPY' @@ -174,10 +180,16 @@ BUFFER_SIZE = 2**18 # size of buffer for reading npz files in bytes # difference between version 1.0 and 2.0 is a 4 byte (I) header length # instead of 2 bytes (H) allowing storage of large structured arrays +_header_size_info = { + (1, 0): ('<H', 'latin1'), + (2, 0): ('<I', 'latin1'), + (3, 0): ('<I', 'utf8'), +} + def _check_version(version): - if version not in [(1, 0), (2, 0), None]: - msg = "we only support format version (1,0) and (2, 0), not %s" + if version not in [(1, 0), (2, 0), (3, 0), None]: + msg = "we only support format version (1,0), (2,0), and (3,0), not %s" raise ValueError(msg % (version,)) def magic(major, minor): @@ -262,15 +274,19 @@ def dtype_to_descr(dtype): def descr_to_dtype(descr): ''' descr may be stored as dtype.descr, which is a list of - (name, format, [shape]) tuples. Offsets are not explicitly saved, rather - empty fields with name,format == '', '|Vn' are added as padding. + (name, format, [shape]) tuples where format may be a str or a tuple. + Offsets are not explicitly saved, rather empty fields with + name, format == '', '|Vn' are added as padding. This function reverses the process, eliminating the empty padding fields. ''' - if isinstance(descr, (str, dict)): + if isinstance(descr, str): # No padding removal needed return numpy.dtype(descr) - + elif isinstance(descr, tuple): + # subtype, will always have a shape descr[1] + dt = descr_to_dtype(descr[0]) + return numpy.dtype((dt, descr[1])) fields = [] offset = 0 for field in descr: @@ -323,6 +339,56 @@ def header_data_from_array_1_0(array): d['descr'] = dtype_to_descr(array.dtype) return d + +def _wrap_header(header, version): + """ + Takes a stringified header, and attaches the prefix and padding to it + """ + import struct + assert version is not None + fmt, encoding = _header_size_info[version] + if not isinstance(header, bytes): # always true on python 3 + header = header.encode(encoding) + hlen = len(header) + 1 + padlen = ARRAY_ALIGN - ((MAGIC_LEN + struct.calcsize(fmt) + hlen) % ARRAY_ALIGN) + try: + header_prefix = magic(*version) + struct.pack(fmt, hlen + padlen) + except struct.error: + msg = "Header length {} too big for version={}".format(hlen, version) + raise ValueError(msg) + + # Pad the header with spaces and a final newline such that the magic + # string, the header-length short and the header are aligned on a + # ARRAY_ALIGN byte boundary. This supports memory mapping of dtypes + # aligned up to ARRAY_ALIGN on systems like Linux where mmap() + # offset must be page-aligned (i.e. the beginning of the file). + return header_prefix + header + b' '*padlen + b'\n' + + +def _wrap_header_guess_version(header): + """ + Like `_wrap_header`, but chooses an appropriate version given the contents + """ + try: + return _wrap_header(header, (1, 0)) + except ValueError: + pass + + try: + ret = _wrap_header(header, (2, 0)) + except UnicodeEncodeError: + pass + else: + warnings.warn("Stored array in format 2.0. It can only be" + "read by NumPy >= 1.9", UserWarning, stacklevel=2) + return ret + + header = _wrap_header(header, (3, 0)) + warnings.warn("Stored array in format 3.0. It can only be " + "read by NumPy >= 1.17", UserWarning, stacklevel=2) + return header + + def _write_array_header(fp, d, version=None): """ Write the header for an array and returns the version used @@ -336,48 +402,19 @@ def _write_array_header(fp, d, version=None): None means use oldest that works explicit version will raise a ValueError if the format does not allow saving this data. Default: None - Returns - ------- - version : tuple of int - the file version which needs to be used to store the data """ - import struct header = ["{"] for key, value in sorted(d.items()): # Need to use repr here, since we eval these when reading header.append("'%s': %s, " % (key, repr(value))) header.append("}") header = "".join(header) - header = asbytes(_filter_header(header)) - - hlen = len(header) + 1 # 1 for newline - padlen_v1 = ARRAY_ALIGN - ((MAGIC_LEN + struct.calcsize('<H') + hlen) % ARRAY_ALIGN) - padlen_v2 = ARRAY_ALIGN - ((MAGIC_LEN + struct.calcsize('<I') + hlen) % ARRAY_ALIGN) - - # Which version(s) we write depends on the total header size; v1 has a max of 65535 - if hlen + padlen_v1 < 2**16 and version in (None, (1, 0)): - version = (1, 0) - header_prefix = magic(1, 0) + struct.pack('<H', hlen + padlen_v1) - topad = padlen_v1 - elif hlen + padlen_v2 < 2**32 and version in (None, (2, 0)): - version = (2, 0) - header_prefix = magic(2, 0) + struct.pack('<I', hlen + padlen_v2) - topad = padlen_v2 + header = _filter_header(header) + if version is None: + header = _wrap_header_guess_version(header) else: - msg = "Header length %s too big for version=%s" - msg %= (hlen, version) - raise ValueError(msg) - - # Pad the header with spaces and a final newline such that the magic - # string, the header-length short and the header are aligned on a - # ARRAY_ALIGN byte boundary. This supports memory mapping of dtypes - # aligned up to ARRAY_ALIGN on systems like Linux where mmap() - # offset must be page-aligned (i.e. the beginning of the file). - header = header + b' '*topad + b'\n' - - fp.write(header_prefix) + header = _wrap_header(header, version) fp.write(header) - return version def write_array_header_1_0(fp, d): """ Write the header for an array using the 1.0 format. @@ -480,7 +517,7 @@ def _filter_header(s): Parameters ---------- - s : byte string + s : string Npy file header. Returns @@ -498,7 +535,7 @@ def _filter_header(s): tokens = [] last_token_was_number = False # adding newline as python 2.7.5 workaround - string = asstr(s) + "\n" + string = s + "\n" for token in tokenize.generate_tokens(StringIO(string).readline): token_type = token[0] token_string = token[1] @@ -520,16 +557,15 @@ def _read_array_header(fp, version): # Read an unsigned, little-endian short int which has the length of the # header. import struct - if version == (1, 0): - hlength_type = '<H' - elif version == (2, 0): - hlength_type = '<I' - else: - raise ValueError("Invalid version %r" % version) + hinfo = _header_size_info.get(version) + if hinfo is None: + raise ValueError("Invalid version {!r}".format(version)) + hlength_type, encoding = hinfo hlength_str = _read_bytes(fp, struct.calcsize(hlength_type), "array header length") header_length = struct.unpack(hlength_type, hlength_str)[0] header = _read_bytes(fp, header_length, "array header") + header = header.decode(encoding) # The header is a pretty-printed string representation of a literal # Python dictionary with trailing newlines padded to a ARRAY_ALIGN byte @@ -541,29 +577,29 @@ def _read_array_header(fp, version): try: d = safe_eval(header) except SyntaxError as e: - msg = "Cannot parse header: %r\nException: %r" - raise ValueError(msg % (header, e)) + msg = "Cannot parse header: {!r}\nException: {!r}" + raise ValueError(msg.format(header, e)) if not isinstance(d, dict): - msg = "Header is not a dictionary: %r" - raise ValueError(msg % d) + msg = "Header is not a dictionary: {!r}" + raise ValueError(msg.format(d)) keys = sorted(d.keys()) if keys != ['descr', 'fortran_order', 'shape']: - msg = "Header does not contain the correct keys: %r" - raise ValueError(msg % (keys,)) + msg = "Header does not contain the correct keys: {!r}" + raise ValueError(msg.format(keys)) # Sanity-check the values. if (not isinstance(d['shape'], tuple) or not numpy.all([isinstance(x, (int, long)) for x in d['shape']])): - msg = "shape is not valid: %r" - raise ValueError(msg % (d['shape'],)) + msg = "shape is not valid: {!r}" + raise ValueError(msg.format(d['shape'])) if not isinstance(d['fortran_order'], bool): - msg = "fortran_order is not a valid bool: %r" - raise ValueError(msg % (d['fortran_order'],)) + msg = "fortran_order is not a valid bool: {!r}" + raise ValueError(msg.format(d['fortran_order'])) try: dtype = descr_to_dtype(d['descr']) except TypeError as e: - msg = "descr is not a valid dtype descriptor: %r" - raise ValueError(msg % (d['descr'],)) + msg = "descr is not a valid dtype descriptor: {!r}" + raise ValueError(msg.format(d['descr'])) return d['shape'], d['fortran_order'], dtype @@ -604,12 +640,7 @@ def write_array(fp, array, version=None, allow_pickle=True, pickle_kwargs=None): """ _check_version(version) - used_ver = _write_array_header(fp, header_data_from_array_1_0(array), - version) - # this warning can be removed when 1.9 has aged enough - if version != (2, 0) and used_ver == (2, 0): - warnings.warn("Stored array in format 2.0. It can only be" - "read by NumPy >= 1.9", UserWarning, stacklevel=2) + _write_array_header(fp, header_data_from_array_1_0(array), version) if array.itemsize == 0: buffersize = 0 @@ -619,14 +650,13 @@ def write_array(fp, array, version=None, allow_pickle=True, pickle_kwargs=None): if array.dtype.hasobject: # We contain Python objects so we cannot write out the data - # directly. Instead, we will pickle it out with version 2 of the - # pickle protocol. + # directly. Instead, we will pickle it out if not allow_pickle: raise ValueError("Object arrays cannot be saved when " "allow_pickle=False") if pickle_kwargs is None: pickle_kwargs = {} - pickle.dump(array, fp, protocol=2, **pickle_kwargs) + pickle.dump(array, fp, protocol=3, **pickle_kwargs) elif array.flags.f_contiguous and not array.flags.c_contiguous: if isfileobj(fp): array.T.tofile(fp) @@ -645,7 +675,7 @@ def write_array(fp, array, version=None, allow_pickle=True, pickle_kwargs=None): fp.write(chunk.tobytes('C')) -def read_array(fp, allow_pickle=True, pickle_kwargs=None): +def read_array(fp, allow_pickle=False, pickle_kwargs=None): """ Read an array from an NPY file. @@ -655,7 +685,11 @@ def read_array(fp, allow_pickle=True, pickle_kwargs=None): If this is not a real file object, then this may take extra memory and time. allow_pickle : bool, optional - Whether to allow reading pickled data. Default: True + Whether to allow writing pickled data. Default: False + + .. versionchanged:: 1.16.3 + Made default False in response to CVE-2019-6446. + pickle_kwargs : dict Additional keyword arguments to pass to pickle.load. These are only useful when loading object arrays saved on Python 2 when using @@ -806,20 +840,12 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, shape=shape, ) # If we got here, then it should be safe to create the file. - fp = open(os_fspath(filename), mode+'b') - try: - used_ver = _write_array_header(fp, d, version) - # this warning can be removed when 1.9 has aged enough - if version != (2, 0) and used_ver == (2, 0): - warnings.warn("Stored array in format 2.0. It can only be" - "read by NumPy >= 1.9", UserWarning, stacklevel=2) + with open(os_fspath(filename), mode+'b') as fp: + _write_array_header(fp, d, version) offset = fp.tell() - finally: - fp.close() else: # Read the header of the file first. - fp = open(os_fspath(filename), 'rb') - try: + with open(os_fspath(filename), 'rb') as fp: version = read_magic(fp) _check_version(version) @@ -828,8 +854,6 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None, msg = "Array can't be memory-mapped: Python objects in dtype." raise ValueError(msg) offset = fp.tell() - finally: - fp.close() if fortran_order: order = 'F' diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 5f87c8b2c..cf7246402 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -1,7 +1,7 @@ from __future__ import division, absolute_import, print_function try: - # Accessing collections abstact classes from collections + # Accessing collections abstract classes from collections # has been deprecated since Python 3.3 import collections.abc as collections_abc except ImportError: @@ -31,7 +31,6 @@ from numpy.core.overrides import set_module from numpy.core import overrides from numpy.core.function_base import add_newdoc from numpy.lib.twodim_base import diag -from .utils import deprecate from numpy.core.multiarray import ( _insert, add_docstring, bincount, normalize_axis_index, _monotonicity, interp as compiled_interp, interp_complex as compiled_interp_complex @@ -218,12 +217,12 @@ def flip(m, axis=None): [2, 3]], [[4, 5], [6, 7]]]) - >>> flip(A, 0) + >>> np.flip(A, 0) array([[[4, 5], [6, 7]], [[0, 1], [2, 3]]]) - >>> flip(A, 1) + >>> np.flip(A, 1) array([[[2, 3], [0, 1]], [[6, 7], @@ -239,7 +238,7 @@ def flip(m, axis=None): [[1, 0], [3, 2]]]) >>> A = np.random.randn(3,4,5) - >>> np.all(flip(A,2) == A[:,:,::-1,...]) + >>> np.all(np.flip(A,2) == A[:,:,::-1,...]) True """ if not hasattr(m, 'ndim'): @@ -359,7 +358,7 @@ def average(a, axis=None, weights=None, returned=False): Examples -------- - >>> data = range(1,5) + >>> data = list(range(1,5)) >>> data [1, 2, 3, 4] >>> np.average(data) @@ -373,13 +372,12 @@ def average(a, axis=None, weights=None, returned=False): [2, 3], [4, 5]]) >>> np.average(data, axis=1, weights=[1./4, 3./4]) - array([ 0.75, 2.75, 4.75]) + array([0.75, 2.75, 4.75]) >>> np.average(data, weights=[1./4, 3./4]) - Traceback (most recent call last): - ... + ... TypeError: Axis must be specified when shapes of a and weights differ. - + >>> a = np.ones(5, dtype=np.float128) >>> w = np.ones(5, dtype=np.complex64) >>> avg = np.average(a, weights=w) @@ -586,7 +584,7 @@ def piecewise(x, condlist, funclist, *args, **kw): ``x >= 0``. >>> np.piecewise(x, [x < 0, x >= 0], [lambda x: -x, lambda x: x]) - array([ 2.5, 1.5, 0.5, 0.5, 1.5, 2.5]) + array([2.5, 1.5, 0.5, 0.5, 1.5, 2.5]) Apply the same function to a scalar value. @@ -671,7 +669,7 @@ def select(condlist, choicelist, default=0): >>> condlist = [x<3, x>5] >>> choicelist = [x, x**2] >>> np.select(condlist, choicelist) - array([ 0, 1, 2, 0, 0, 0, 36, 49, 64, 81]) + array([ 0, 1, 2, ..., 49, 64, 81]) """ # Check the size of condlist and choicelist are the same, or abort. @@ -684,7 +682,7 @@ def select(condlist, choicelist, default=0): # 2014-02-24, 1.9 warnings.warn("select with an empty condition list is not possible" "and will be deprecated", - DeprecationWarning, stacklevel=2) + DeprecationWarning, stacklevel=3) return np.asarray(default)[()] choicelist = [np.asarray(choice) for choice in choicelist] @@ -719,7 +717,7 @@ def select(condlist, choicelist, default=0): msg = "select condlists containing integer ndarrays is deprecated " \ "and will be removed in the future. Use `.astype(bool)` to " \ "convert to bools." - warnings.warn(msg, DeprecationWarning, stacklevel=2) + warnings.warn(msg, DeprecationWarning, stacklevel=3) if choicelist[0].ndim == 0: # This may be common, so avoid the call. @@ -854,9 +852,9 @@ def gradient(f, *varargs, **kwargs): -------- >>> f = np.array([1, 2, 4, 7, 11, 16], dtype=float) >>> np.gradient(f) - array([ 1. , 1.5, 2.5, 3.5, 4.5, 5. ]) + array([1. , 1.5, 2.5, 3.5, 4.5, 5. ]) >>> np.gradient(f, 2) - array([ 0.5 , 0.75, 1.25, 1.75, 2.25, 2.5 ]) + array([0.5 , 0.75, 1.25, 1.75, 2.25, 2.5 ]) Spacing can be also specified with an array that represents the coordinates of the values F along the dimensions. @@ -864,13 +862,13 @@ def gradient(f, *varargs, **kwargs): >>> x = np.arange(f.size) >>> np.gradient(f, x) - array([ 1. , 1.5, 2.5, 3.5, 4.5, 5. ]) + array([1. , 1.5, 2.5, 3.5, 4.5, 5. ]) Or a non uniform one: >>> x = np.array([0., 1., 1.5, 3.5, 4., 6.], dtype=float) >>> np.gradient(f, x) - array([ 1. , 3. , 3.5, 6.7, 6.9, 2.5]) + array([1. , 3. , 3.5, 6.7, 6.9, 2.5]) For two dimensional arrays, the return will be two arrays ordered by axis. In this example the first array stands for the gradient in @@ -878,8 +876,8 @@ def gradient(f, *varargs, **kwargs): >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=float)) [array([[ 2., 2., -1.], - [ 2., 2., -1.]]), array([[ 1. , 2.5, 4. ], - [ 1. , 1. , 1. ]])] + [ 2., 2., -1.]]), array([[1. , 2.5, 4. ], + [1. , 1. , 1. ]])] In this example the spacing is also specified: uniform for axis=0 and non uniform for axis=1 @@ -888,17 +886,17 @@ def gradient(f, *varargs, **kwargs): >>> y = [1., 1.5, 3.5] >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=float), dx, y) [array([[ 1. , 1. , -0.5], - [ 1. , 1. , -0.5]]), array([[ 2. , 2. , 2. ], - [ 2. , 1.7, 0.5]])] + [ 1. , 1. , -0.5]]), array([[2. , 2. , 2. ], + [2. , 1.7, 0.5]])] It is possible to specify how boundaries are treated using `edge_order` >>> x = np.array([0, 1, 2, 3, 4]) >>> f = x**2 >>> np.gradient(f, edge_order=1) - array([ 1., 2., 4., 6., 7.]) + array([1., 2., 4., 6., 7.]) >>> np.gradient(f, edge_order=2) - array([-0., 2., 4., 6., 8.]) + array([0., 2., 4., 6., 8.]) The `axis` keyword can be used to specify a subset of axes of which the gradient is calculated @@ -1151,7 +1149,7 @@ def diff(a, n=1, axis=-1, prepend=np._NoValue, append=np._NoValue): """ Calculate the n-th discrete difference along the given axis. - The first difference is given by ``out[n] = a[n+1] - a[n]`` along + The first difference is given by ``out[i] = a[i+1] - a[i]`` along the given axis, higher differences are calculated by using `diff` recursively. @@ -1200,7 +1198,7 @@ def diff(a, n=1, axis=-1, prepend=np._NoValue, append=np._NoValue): >>> np.diff(u8_arr) array([255], dtype=uint8) >>> u8_arr[1,...] - u8_arr[0,...] - array(255, np.uint8) + 255 If this is not desirable, then the array should be cast to a larger integer type first: @@ -1237,6 +1235,8 @@ def diff(a, n=1, axis=-1, prepend=np._NoValue, append=np._NoValue): a = asanyarray(a) nd = a.ndim + if nd == 0: + raise ValueError("diff requires input that is at least one dimensional") axis = normalize_axis_index(axis, nd) combined = [] @@ -1340,7 +1340,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): >>> np.interp(2.5, xp, fp) 1.0 >>> np.interp([0, 1, 1.5, 2.72, 3.14], xp, fp) - array([ 3. , 3. , 2.5 , 0.56, 0. ]) + array([3. , 3. , 2.5 , 0.56, 0. ]) >>> UNDEF = -99.0 >>> np.interp(3.14, xp, fp, right=UNDEF) -99.0 @@ -1364,7 +1364,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): >>> xp = [190, -190, 350, -350] >>> fp = [5, 10, 3, 4] >>> np.interp(x, xp, fp, period=360) - array([7.5, 5., 8.75, 6.25, 3., 3.25, 3.5, 3.75]) + array([7.5 , 5. , 8.75, 6.25, 3. , 3.25, 3.5 , 3.75]) Complex interpolation: @@ -1372,7 +1372,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): >>> xp = [2,3,5] >>> fp = [1.0j, 0, 2+3j] >>> np.interp(x, xp, fp) - array([ 0.+1.j , 1.+1.5j]) + array([0.+1.j , 1.+1.5j]) """ @@ -1431,9 +1431,9 @@ def angle(z, deg=False): Returns ------- angle : ndarray or scalar - The counterclockwise angle from the positive real axis on - the complex plane, with dtype as numpy.float64. - + The counterclockwise angle from the positive real axis on the complex + plane in the range ``(-pi, pi]``, with dtype as numpy.float64. + ..versionchanged:: 1.16.0 This function works on subclasses of ndarray like `ma.array`. @@ -1445,7 +1445,7 @@ def angle(z, deg=False): Examples -------- >>> np.angle([1.0, 1.0j, 1+1j]) # in radians - array([ 0. , 1.57079633, 0.78539816]) + array([ 0. , 1.57079633, 0.78539816]) # may vary >>> np.angle(1+1j, deg=True) # in degrees 45.0 @@ -1505,9 +1505,9 @@ def unwrap(p, discont=pi, axis=-1): >>> phase = np.linspace(0, np.pi, num=5) >>> phase[3:] += np.pi >>> phase - array([ 0. , 0.78539816, 1.57079633, 5.49778714, 6.28318531]) + array([ 0. , 0.78539816, 1.57079633, 5.49778714, 6.28318531]) # may vary >>> np.unwrap(phase) - array([ 0. , 0.78539816, 1.57079633, -0.78539816, 0. ]) + array([ 0. , 0.78539816, 1.57079633, -0.78539816, 0. ]) # may vary """ p = asarray(p) @@ -1547,10 +1547,10 @@ def sort_complex(a): Examples -------- >>> np.sort_complex([5, 3, 6, 2, 1]) - array([ 1.+0.j, 2.+0.j, 3.+0.j, 5.+0.j, 6.+0.j]) + array([1.+0.j, 2.+0.j, 3.+0.j, 5.+0.j, 6.+0.j]) >>> np.sort_complex([1 + 2j, 2 - 1j, 3 - 2j, 3 - 3j, 3 + 5j]) - array([ 1.+2.j, 2.-1.j, 3.-3.j, 3.-2.j, 3.+5.j]) + array([1.+2.j, 2.-1.j, 3.-3.j, 3.-2.j, 3.+5.j]) """ b = array(a, copy=True) @@ -1596,7 +1596,7 @@ def trim_zeros(filt, trim='fb'): array([1, 2, 3, 0, 2, 1]) >>> np.trim_zeros(a, 'b') - array([0, 0, 0, 1, 2, 3, 0, 2, 1]) + array([0, 0, 0, ..., 0, 2, 1]) The input data type is preserved, list/tuple in means list/tuple out. @@ -1931,6 +1931,30 @@ class vectorize(object): vectorized : callable Vectorized function. + See Also + -------- + frompyfunc : Takes an arbitrary Python function and returns a ufunc + + Notes + ----- + The `vectorize` function is provided primarily for convenience, not for + performance. The implementation is essentially a for loop. + + If `otypes` is not specified, then a call to the function with the + first argument will be used to determine the number of outputs. The + results of this call will be cached if `cache` is `True` to prevent + calling the function twice. However, to implement the cache, the + original function must be wrapped which will slow down subsequent + calls, so only do this if your function is expensive. + + The new keyword argument interface and `excluded` argument support + further degrades performance. + + References + ---------- + .. [1] NumPy Reference, section `Generalized Universal Function API + <https://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html>`_. + Examples -------- >>> def myfunc(a, b): @@ -1958,11 +1982,11 @@ class vectorize(object): >>> out = vfunc([1, 2, 3, 4], 2) >>> type(out[0]) - <type 'numpy.int32'> + <class 'numpy.int64'> >>> vfunc = np.vectorize(myfunc, otypes=[float]) >>> out = vfunc([1, 2, 3, 4], 2) >>> type(out[0]) - <type 'numpy.float64'> + <class 'numpy.float64'> The `excluded` argument can be used to prevent vectorizing over certain arguments. This can be useful for array-like arguments of a fixed length @@ -1990,7 +2014,7 @@ class vectorize(object): >>> import scipy.stats >>> pearsonr = np.vectorize(scipy.stats.pearsonr, - ... signature='(n),(n)->(),()') + ... signature='(n),(n)->(),()') >>> pearsonr([[0, 1, 2, 3]], [[1, 2, 3, 4], [4, 3, 2, 1]]) (array([ 1., -1.]), array([ 0., 0.])) @@ -1998,36 +2022,12 @@ class vectorize(object): >>> convolve = np.vectorize(np.convolve, signature='(n),(m)->(k)') >>> convolve(np.eye(4), [1, 2, 1]) - array([[ 1., 2., 1., 0., 0., 0.], - [ 0., 1., 2., 1., 0., 0.], - [ 0., 0., 1., 2., 1., 0.], - [ 0., 0., 0., 1., 2., 1.]]) + array([[1., 2., 1., 0., 0., 0.], + [0., 1., 2., 1., 0., 0.], + [0., 0., 1., 2., 1., 0.], + [0., 0., 0., 1., 2., 1.]]) - See Also - -------- - frompyfunc : Takes an arbitrary Python function and returns a ufunc - - Notes - ----- - The `vectorize` function is provided primarily for convenience, not for - performance. The implementation is essentially a for loop. - - If `otypes` is not specified, then a call to the function with the - first argument will be used to determine the number of outputs. The - results of this call will be cached if `cache` is `True` to prevent - calling the function twice. However, to implement the cache, the - original function must be wrapped which will slow down subsequent - calls, so only do this if your function is expensive. - - The new keyword argument interface and `excluded` argument support - further degrades performance. - - References - ---------- - .. [1] NumPy Reference, section `Generalized Universal Function API - <https://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html>`_. """ - def __init__(self, pyfunc, otypes=None, doc=None, excluded=None, cache=False, signature=None): self.pyfunc = pyfunc @@ -2311,10 +2311,14 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, array `m` and let ``f = fweights`` and ``a = aweights`` for brevity. The steps to compute the weighted covariance are as follows:: + >>> m = np.arange(10, dtype=np.float64) + >>> f = np.arange(10) * 2 + >>> a = np.arange(10) ** 2. + >>> ddof = 9 # N - 1 >>> w = f * a >>> v1 = np.sum(w) >>> v2 = np.sum(w * a) - >>> m -= np.sum(m * w, axis=1, keepdims=True) / v1 + >>> m -= np.sum(m * w, axis=None, keepdims=True) / v1 >>> cov = np.dot(m * w, m.T) * v1 / (v1**2 - ddof * v2) Note that when ``a == 1``, the normalization factor @@ -2346,14 +2350,14 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, >>> x = [-2.1, -1, 4.3] >>> y = [3, 1.1, 0.12] >>> X = np.stack((x, y), axis=0) - >>> print(np.cov(X)) - [[ 11.71 -4.286 ] - [ -4.286 2.14413333]] - >>> print(np.cov(x, y)) - [[ 11.71 -4.286 ] - [ -4.286 2.14413333]] - >>> print(np.cov(x)) - 11.71 + >>> np.cov(X) + array([[11.71 , -4.286 ], # may vary + [-4.286 , 2.144133]]) + >>> np.cov(x, y) + array([[11.71 , -4.286 ], # may vary + [-4.286 , 2.144133]]) + >>> np.cov(x) + array(11.71) """ # Check inputs @@ -2439,7 +2443,7 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, if fact <= 0: warnings.warn("Degrees of freedom <= 0 for slice", - RuntimeWarning, stacklevel=2) + RuntimeWarning, stacklevel=3) fact = 0.0 X -= avg[:, None] @@ -2518,7 +2522,7 @@ def corrcoef(x, y=None, rowvar=True, bias=np._NoValue, ddof=np._NoValue): if bias is not np._NoValue or ddof is not np._NoValue: # 2015-03-15, 1.10 warnings.warn('bias and ddof have no effect and are deprecated', - DeprecationWarning, stacklevel=2) + DeprecationWarning, stacklevel=3) c = cov(x, y, rowvar) try: d = diag(c) @@ -2590,12 +2594,12 @@ def blackman(M): Examples -------- + >>> import matplotlib.pyplot as plt >>> np.blackman(12) - array([ -1.38777878e-17, 3.26064346e-02, 1.59903635e-01, - 4.14397981e-01, 7.36045180e-01, 9.67046769e-01, - 9.67046769e-01, 7.36045180e-01, 4.14397981e-01, - 1.59903635e-01, 3.26064346e-02, -1.38777878e-17]) - + array([-1.38777878e-17, 3.26064346e-02, 1.59903635e-01, # may vary + 4.14397981e-01, 7.36045180e-01, 9.67046769e-01, + 9.67046769e-01, 7.36045180e-01, 4.14397981e-01, + 1.59903635e-01, 3.26064346e-02, -1.38777878e-17]) Plot the window and the frequency response: @@ -2604,30 +2608,31 @@ def blackman(M): >>> plt.plot(window) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Blackman window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Blackman window') >>> plt.ylabel("Amplitude") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Amplitude') >>> plt.xlabel("Sample") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Sample') >>> plt.show() >>> plt.figure() - <matplotlib.figure.Figure object at 0x...> + <Figure size 640x480 with 0 Axes> >>> A = fft(window, 2048) / 25.5 >>> mag = np.abs(fftshift(A)) >>> freq = np.linspace(-0.5, 0.5, len(A)) - >>> response = 20 * np.log10(mag) + >>> with np.errstate(divide='ignore', invalid='ignore'): + ... response = 20 * np.log10(mag) + ... >>> response = np.clip(response, -100, 100) >>> plt.plot(freq, response) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Frequency response of Blackman window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Frequency response of Blackman window') >>> plt.ylabel("Magnitude [dB]") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Magnitude [dB]') >>> plt.xlabel("Normalized frequency [cycles per sample]") - <matplotlib.text.Text object at 0x...> - >>> plt.axis('tight') - (-0.5, 0.5, -100.0, ...) + Text(0.5, 0, 'Normalized frequency [cycles per sample]') + >>> _ = plt.axis('tight') >>> plt.show() """ @@ -2699,8 +2704,9 @@ def bartlett(M): Examples -------- + >>> import matplotlib.pyplot as plt >>> np.bartlett(12) - array([ 0. , 0.18181818, 0.36363636, 0.54545455, 0.72727273, + array([ 0. , 0.18181818, 0.36363636, 0.54545455, 0.72727273, # may vary 0.90909091, 0.90909091, 0.72727273, 0.54545455, 0.36363636, 0.18181818, 0. ]) @@ -2711,30 +2717,31 @@ def bartlett(M): >>> plt.plot(window) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Bartlett window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Bartlett window') >>> plt.ylabel("Amplitude") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Amplitude') >>> plt.xlabel("Sample") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Sample') >>> plt.show() >>> plt.figure() - <matplotlib.figure.Figure object at 0x...> + <Figure size 640x480 with 0 Axes> >>> A = fft(window, 2048) / 25.5 >>> mag = np.abs(fftshift(A)) >>> freq = np.linspace(-0.5, 0.5, len(A)) - >>> response = 20 * np.log10(mag) + >>> with np.errstate(divide='ignore', invalid='ignore'): + ... response = 20 * np.log10(mag) + ... >>> response = np.clip(response, -100, 100) >>> plt.plot(freq, response) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Frequency response of Bartlett window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Frequency response of Bartlett window') >>> plt.ylabel("Magnitude [dB]") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Magnitude [dB]') >>> plt.xlabel("Normalized frequency [cycles per sample]") - <matplotlib.text.Text object at 0x...> - >>> plt.axis('tight') - (-0.5, 0.5, -100.0, ...) + Text(0.5, 0, 'Normalized frequency [cycles per sample]') + >>> _ = plt.axis('tight') >>> plt.show() """ @@ -2801,41 +2808,44 @@ def hanning(M): Examples -------- >>> np.hanning(12) - array([ 0. , 0.07937323, 0.29229249, 0.57115742, 0.82743037, - 0.97974649, 0.97974649, 0.82743037, 0.57115742, 0.29229249, - 0.07937323, 0. ]) + array([0. , 0.07937323, 0.29229249, 0.57115742, 0.82743037, + 0.97974649, 0.97974649, 0.82743037, 0.57115742, 0.29229249, + 0.07937323, 0. ]) Plot the window and its frequency response: + >>> import matplotlib.pyplot as plt >>> from numpy.fft import fft, fftshift >>> window = np.hanning(51) >>> plt.plot(window) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Hann window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Hann window') >>> plt.ylabel("Amplitude") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Amplitude') >>> plt.xlabel("Sample") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Sample') >>> plt.show() >>> plt.figure() - <matplotlib.figure.Figure object at 0x...> + <Figure size 640x480 with 0 Axes> >>> A = fft(window, 2048) / 25.5 >>> mag = np.abs(fftshift(A)) >>> freq = np.linspace(-0.5, 0.5, len(A)) - >>> response = 20 * np.log10(mag) + >>> with np.errstate(divide='ignore', invalid='ignore'): + ... response = 20 * np.log10(mag) + ... >>> response = np.clip(response, -100, 100) >>> plt.plot(freq, response) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Frequency response of the Hann window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Frequency response of the Hann window') >>> plt.ylabel("Magnitude [dB]") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Magnitude [dB]') >>> plt.xlabel("Normalized frequency [cycles per sample]") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Normalized frequency [cycles per sample]') >>> plt.axis('tight') - (-0.5, 0.5, -100.0, ...) + ... >>> plt.show() """ @@ -2900,26 +2910,27 @@ def hamming(M): Examples -------- >>> np.hamming(12) - array([ 0.08 , 0.15302337, 0.34890909, 0.60546483, 0.84123594, + array([ 0.08 , 0.15302337, 0.34890909, 0.60546483, 0.84123594, # may vary 0.98136677, 0.98136677, 0.84123594, 0.60546483, 0.34890909, 0.15302337, 0.08 ]) Plot the window and the frequency response: + >>> import matplotlib.pyplot as plt >>> from numpy.fft import fft, fftshift >>> window = np.hamming(51) >>> plt.plot(window) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Hamming window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Hamming window') >>> plt.ylabel("Amplitude") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Amplitude') >>> plt.xlabel("Sample") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Sample') >>> plt.show() >>> plt.figure() - <matplotlib.figure.Figure object at 0x...> + <Figure size 640x480 with 0 Axes> >>> A = fft(window, 2048) / 25.5 >>> mag = np.abs(fftshift(A)) >>> freq = np.linspace(-0.5, 0.5, len(A)) @@ -2928,13 +2939,13 @@ def hamming(M): >>> plt.plot(freq, response) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Frequency response of Hamming window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Frequency response of Hamming window') >>> plt.ylabel("Magnitude [dB]") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Magnitude [dB]') >>> plt.xlabel("Normalized frequency [cycles per sample]") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Normalized frequency [cycles per sample]') >>> plt.axis('tight') - (-0.5, 0.5, -100.0, ...) + ... >>> plt.show() """ @@ -3059,10 +3070,13 @@ def i0(x): See Also -------- - scipy.special.iv, scipy.special.ive + scipy.special.i0, scipy.special.iv, scipy.special.ive Notes ----- + The scipy implementation is recommended over this function: it is a + proper ufunc written in C, and more than an order of magnitude faster. + We use the algorithm published by Clenshaw [1]_ and referenced by Abramowitz and Stegun [2]_, for which the function domain is partitioned into the two intervals [0,8] and (8,inf), and Chebyshev @@ -3082,21 +3096,15 @@ def i0(x): Examples -------- - >>> np.i0([0.]) - array(1.0) + >>> np.i0(0.) + array(1.0) # may vary >>> np.i0([0., 1. + 2j]) - array([ 1.00000000+0.j , 0.18785373+0.64616944j]) + array([ 1.00000000+0.j , 0.18785373+0.64616944j]) # may vary """ - x = atleast_1d(x).copy() - y = empty_like(x) - ind = (x < 0) - x[ind] = -x[ind] - ind = (x <= 8.0) - y[ind] = _i0_1(x[ind]) - ind2 = ~ind - y[ind2] = _i0_2(x[ind2]) - return y.squeeze() + x = np.asanyarray(x) + x = np.abs(x) + return piecewise(x, [x <= 8.0], [_i0_1, _i0_2]) ## End of cephes code for i0 @@ -3180,11 +3188,12 @@ def kaiser(M, beta): Examples -------- + >>> import matplotlib.pyplot as plt >>> np.kaiser(12, 14) - array([ 7.72686684e-06, 3.46009194e-03, 4.65200189e-02, - 2.29737120e-01, 5.99885316e-01, 9.45674898e-01, - 9.45674898e-01, 5.99885316e-01, 2.29737120e-01, - 4.65200189e-02, 3.46009194e-03, 7.72686684e-06]) + array([7.72686684e-06, 3.46009194e-03, 4.65200189e-02, # may vary + 2.29737120e-01, 5.99885316e-01, 9.45674898e-01, + 9.45674898e-01, 5.99885316e-01, 2.29737120e-01, + 4.65200189e-02, 3.46009194e-03, 7.72686684e-06]) Plot the window and the frequency response: @@ -3194,15 +3203,15 @@ def kaiser(M, beta): >>> plt.plot(window) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Kaiser window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Kaiser window') >>> plt.ylabel("Amplitude") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Amplitude') >>> plt.xlabel("Sample") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Sample') >>> plt.show() >>> plt.figure() - <matplotlib.figure.Figure object at 0x...> + <Figure size 640x480 with 0 Axes> >>> A = fft(window, 2048) / 25.5 >>> mag = np.abs(fftshift(A)) >>> freq = np.linspace(-0.5, 0.5, len(A)) @@ -3211,13 +3220,13 @@ def kaiser(M, beta): >>> plt.plot(freq, response) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Frequency response of Kaiser window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Frequency response of Kaiser window') >>> plt.ylabel("Magnitude [dB]") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Magnitude [dB]') >>> plt.xlabel("Normalized frequency [cycles per sample]") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Normalized frequency [cycles per sample]') >>> plt.axis('tight') - (-0.5, 0.5, -100.0, ...) + (-0.5, 0.5, -100.0, ...) # may vary >>> plt.show() """ @@ -3273,31 +3282,32 @@ def sinc(x): Examples -------- + >>> import matplotlib.pyplot as plt >>> x = np.linspace(-4, 4, 41) >>> np.sinc(x) - array([ -3.89804309e-17, -4.92362781e-02, -8.40918587e-02, + array([-3.89804309e-17, -4.92362781e-02, -8.40918587e-02, # may vary -8.90384387e-02, -5.84680802e-02, 3.89804309e-17, - 6.68206631e-02, 1.16434881e-01, 1.26137788e-01, - 8.50444803e-02, -3.89804309e-17, -1.03943254e-01, + 6.68206631e-02, 1.16434881e-01, 1.26137788e-01, + 8.50444803e-02, -3.89804309e-17, -1.03943254e-01, -1.89206682e-01, -2.16236208e-01, -1.55914881e-01, - 3.89804309e-17, 2.33872321e-01, 5.04551152e-01, - 7.56826729e-01, 9.35489284e-01, 1.00000000e+00, - 9.35489284e-01, 7.56826729e-01, 5.04551152e-01, - 2.33872321e-01, 3.89804309e-17, -1.55914881e-01, - -2.16236208e-01, -1.89206682e-01, -1.03943254e-01, - -3.89804309e-17, 8.50444803e-02, 1.26137788e-01, - 1.16434881e-01, 6.68206631e-02, 3.89804309e-17, + 3.89804309e-17, 2.33872321e-01, 5.04551152e-01, + 7.56826729e-01, 9.35489284e-01, 1.00000000e+00, + 9.35489284e-01, 7.56826729e-01, 5.04551152e-01, + 2.33872321e-01, 3.89804309e-17, -1.55914881e-01, + -2.16236208e-01, -1.89206682e-01, -1.03943254e-01, + -3.89804309e-17, 8.50444803e-02, 1.26137788e-01, + 1.16434881e-01, 6.68206631e-02, 3.89804309e-17, -5.84680802e-02, -8.90384387e-02, -8.40918587e-02, -4.92362781e-02, -3.89804309e-17]) >>> plt.plot(x, np.sinc(x)) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Sinc Function") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Sinc Function') >>> plt.ylabel("Amplitude") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Amplitude') >>> plt.xlabel("X") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'X') >>> plt.show() It works in 2-D as well: @@ -3469,18 +3479,18 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False): >>> np.median(a) 3.5 >>> np.median(a, axis=0) - array([ 6.5, 4.5, 2.5]) + array([6.5, 4.5, 2.5]) >>> np.median(a, axis=1) - array([ 7., 2.]) + array([7., 2.]) >>> m = np.median(a, axis=0) >>> out = np.zeros_like(m) >>> np.median(a, axis=0, out=m) - array([ 6.5, 4.5, 2.5]) + array([6.5, 4.5, 2.5]) >>> m - array([ 6.5, 4.5, 2.5]) + array([6.5, 4.5, 2.5]) >>> b = a.copy() >>> np.median(b, axis=1, overwrite_input=True) - array([ 7., 2.]) + array([7., 2.]) >>> assert not np.all(a==b) >>> b = a.copy() >>> np.median(b, axis=None, overwrite_input=True) @@ -3647,23 +3657,23 @@ def percentile(a, q, axis=None, out=None, >>> np.percentile(a, 50) 3.5 >>> np.percentile(a, 50, axis=0) - array([[ 6.5, 4.5, 2.5]]) + array([6.5, 4.5, 2.5]) >>> np.percentile(a, 50, axis=1) - array([ 7., 2.]) + array([7., 2.]) >>> np.percentile(a, 50, axis=1, keepdims=True) - array([[ 7.], - [ 2.]]) + array([[7.], + [2.]]) >>> m = np.percentile(a, 50, axis=0) >>> out = np.zeros_like(m) >>> np.percentile(a, 50, axis=0, out=out) - array([[ 6.5, 4.5, 2.5]]) + array([6.5, 4.5, 2.5]) >>> m - array([[ 6.5, 4.5, 2.5]]) + array([6.5, 4.5, 2.5]) >>> b = a.copy() >>> np.percentile(b, 50, axis=1, overwrite_input=True) - array([ 7., 2.]) + array([7., 2.]) >>> assert not np.all(a == b) The different types of interpolation can be visualized graphically: @@ -3695,7 +3705,8 @@ def percentile(a, q, axis=None, out=None, plt.show() """ - q = np.true_divide(q, 100.0) # handles the asarray for us too + q = np.true_divide(q, 100) + q = asanyarray(q) # undo any decay that the ufunc performed (see gh-13105) if not _quantile_is_valid(q): raise ValueError("Percentiles must be in the range [0, 100]") return _quantile_unchecked( @@ -3712,7 +3723,8 @@ def quantile(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=False): """ Compute the q-th quantile of the data along the specified axis. - ..versionadded:: 1.15.0 + + .. versionadded:: 1.15.0 Parameters ---------- @@ -3789,21 +3801,21 @@ def quantile(a, q, axis=None, out=None, >>> np.quantile(a, 0.5) 3.5 >>> np.quantile(a, 0.5, axis=0) - array([[ 6.5, 4.5, 2.5]]) + array([6.5, 4.5, 2.5]) >>> np.quantile(a, 0.5, axis=1) - array([ 7., 2.]) + array([7., 2.]) >>> np.quantile(a, 0.5, axis=1, keepdims=True) - array([[ 7.], - [ 2.]]) + array([[7.], + [2.]]) >>> m = np.quantile(a, 0.5, axis=0) >>> out = np.zeros_like(m) >>> np.quantile(a, 0.5, axis=0, out=out) - array([[ 6.5, 4.5, 2.5]]) + array([6.5, 4.5, 2.5]) >>> m - array([[ 6.5, 4.5, 2.5]]) + array([6.5, 4.5, 2.5]) >>> b = a.copy() >>> np.quantile(b, 0.5, axis=1, overwrite_input=True) - array([ 7., 2.]) + array([7., 2.]) >>> assert not np.all(a == b) """ q = np.asanyarray(q) @@ -3913,7 +3925,7 @@ def _quantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, indices_above = concatenate((indices_above, [-1])) weights_above = indices - indices_below - weights_below = 1.0 - weights_above + weights_below = 1 - weights_above weights_shape = [1, ] * ap.ndim weights_shape[axis] = len(indices) @@ -3950,8 +3962,6 @@ def _quantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, r = add(x1, x2) if np.any(n): - warnings.warn("Invalid value encountered in percentile", - RuntimeWarning, stacklevel=3) if zerod: if ap.ndim == 1: if out is not None: @@ -4032,9 +4042,9 @@ def trapz(y, x=None, dx=1.0, axis=-1): array([[0, 1, 2], [3, 4, 5]]) >>> np.trapz(a, axis=0) - array([ 1.5, 2.5, 3.5]) + array([1.5, 2.5, 3.5]) >>> np.trapz(a, axis=1) - array([ 2., 8.]) + array([2., 8.]) """ y = asanyarray(y) @@ -4152,17 +4162,17 @@ def meshgrid(*xi, **kwargs): >>> y = np.linspace(0, 1, ny) >>> xv, yv = np.meshgrid(x, y) >>> xv - array([[ 0. , 0.5, 1. ], - [ 0. , 0.5, 1. ]]) + array([[0. , 0.5, 1. ], + [0. , 0.5, 1. ]]) >>> yv - array([[ 0., 0., 0.], - [ 1., 1., 1.]]) + array([[0., 0., 0.], + [1., 1., 1.]]) >>> xv, yv = np.meshgrid(x, y, sparse=True) # make sparse output arrays >>> xv - array([[ 0. , 0.5, 1. ]]) + array([[0. , 0.5, 1. ]]) >>> yv - array([[ 0.], - [ 1.]]) + array([[0.], + [1.]]) `meshgrid` is very useful to evaluate functions on a grid. @@ -4224,7 +4234,7 @@ def delete(arr, obj, axis=None): arr : array_like Input array. obj : slice, int or array of ints - Indicate which sub-arrays to remove. + Indicate indices of sub-arrays to remove along the specified axis. axis : int, optional The axis along which to delete the subarray defined by `obj`. If `axis` is None, `obj` is applied to the flattened array. @@ -4245,6 +4255,7 @@ def delete(arr, obj, axis=None): ----- Often it is preferable to use a boolean mask. For example: + >>> arr = np.arange(12) + 1 >>> mask = np.ones(len(arr), dtype=bool) >>> mask[[0,2,4]] = False >>> result = arr[mask,...] @@ -4291,7 +4302,7 @@ def delete(arr, obj, axis=None): # 2013-09-24, 1.9 warnings.warn( "in the future the special handling of scalars will be removed " - "from delete and raise an error", DeprecationWarning, stacklevel=2) + "from delete and raise an error", DeprecationWarning, stacklevel=3) if wrap: return wrap(arr) else: @@ -4328,7 +4339,7 @@ def delete(arr, obj, axis=None): else: slobj[axis] = slice(None, start) new[tuple(slobj)] = arr[tuple(slobj)] - # copy end chunck + # copy end chunk if stop == N: pass else: @@ -4360,7 +4371,7 @@ def delete(arr, obj, axis=None): if obj.dtype == bool: warnings.warn("in the future insert will treat boolean arrays and " "array-likes as boolean index instead of casting it " - "to integer", FutureWarning, stacklevel=2) + "to integer", FutureWarning, stacklevel=3) obj = obj.astype(intp) if isinstance(_obj, (int, long, integer)): # optimization for a single value @@ -4388,7 +4399,7 @@ def delete(arr, obj, axis=None): # 2013-09-24, 1.9 warnings.warn( "using a non-integer array as obj in delete will result in an " - "error in the future", DeprecationWarning, stacklevel=2) + "error in the future", DeprecationWarning, stacklevel=3) obj = obj.astype(intp) keep = ones(N, dtype=bool) @@ -4399,13 +4410,13 @@ def delete(arr, obj, axis=None): warnings.warn( "in the future out of bounds indices will raise an error " "instead of being ignored by `numpy.delete`.", - DeprecationWarning, stacklevel=2) + DeprecationWarning, stacklevel=3) obj = obj[inside_bounds] positive_indices = obj >= 0 if not positive_indices.all(): warnings.warn( "in the future negative indices will not be ignored by " - "`numpy.delete`.", FutureWarning, stacklevel=2) + "`numpy.delete`.", FutureWarning, stacklevel=3) obj = obj[positive_indices] keep[obj, ] = False @@ -4476,7 +4487,7 @@ def insert(arr, obj, values, axis=None): [2, 2], [3, 3]]) >>> np.insert(a, 1, 5) - array([1, 5, 1, 2, 2, 3, 3]) + array([1, 5, 1, ..., 2, 3, 3]) >>> np.insert(a, 1, 5, axis=1) array([[1, 5, 1], [2, 5, 2], @@ -4496,13 +4507,13 @@ def insert(arr, obj, values, axis=None): >>> b array([1, 1, 2, 2, 3, 3]) >>> np.insert(b, [2, 2], [5, 6]) - array([1, 1, 5, 6, 2, 2, 3, 3]) + array([1, 1, 5, ..., 2, 3, 3]) >>> np.insert(b, slice(2, 4), [5, 6]) - array([1, 1, 5, 2, 6, 2, 3, 3]) + array([1, 1, 5, ..., 2, 3, 3]) >>> np.insert(b, [2, 2], [7.13, False]) # type casting - array([1, 1, 7, 0, 2, 2, 3, 3]) + array([1, 1, 7, ..., 2, 3, 3]) >>> x = np.arange(8).reshape(2, 4) >>> idx = (1, 3) @@ -4530,7 +4541,7 @@ def insert(arr, obj, values, axis=None): # 2013-09-24, 1.9 warnings.warn( "in the future the special handling of scalars will be removed " - "from insert and raise an error", DeprecationWarning, stacklevel=2) + "from insert and raise an error", DeprecationWarning, stacklevel=3) arr = arr.copy(order=arrorder) arr[...] = values if wrap: @@ -4554,7 +4565,7 @@ def insert(arr, obj, values, axis=None): warnings.warn( "in the future insert will treat boolean arrays and " "array-likes as a boolean index instead of casting it to " - "integer", FutureWarning, stacklevel=2) + "integer", FutureWarning, stacklevel=3) indices = indices.astype(intp) # Code after warning period: #if obj.ndim != 1: @@ -4604,7 +4615,7 @@ def insert(arr, obj, values, axis=None): # 2013-09-24, 1.9 warnings.warn( "using a non-integer array as obj in insert will result in an " - "error in the future", DeprecationWarning, stacklevel=2) + "error in the future", DeprecationWarning, stacklevel=3) indices = indices.astype(intp) indices[indices < 0] += N @@ -4666,7 +4677,7 @@ def append(arr, values, axis=None): Examples -------- >>> np.append([1, 2, 3], [[4, 5, 6], [7, 8, 9]]) - array([1, 2, 3, 4, 5, 6, 7, 8, 9]) + array([1, 2, 3, ..., 7, 8, 9]) When `axis` is specified, `values` must have the correct shape. @@ -4676,8 +4687,8 @@ def append(arr, values, axis=None): [7, 8, 9]]) >>> np.append([[1, 2, 3], [4, 5, 6]], [7, 8, 9], axis=0) Traceback (most recent call last): - ... - ValueError: arrays must have same number of dimensions + ... + ValueError: all the input arrays must have same number of dimensions """ arr = asanyarray(arr) diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index 482eabe14..8474bd5d3 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -3,6 +3,7 @@ Histogram-related functions """ from __future__ import division, absolute_import, print_function +import contextlib import functools import operator import warnings @@ -148,7 +149,8 @@ def _hist_bin_stone(x, range): nbins_upper_bound = max(100, int(np.sqrt(n))) nbins = min(_range(1, nbins_upper_bound + 1), key=jhat) if nbins == nbins_upper_bound: - warnings.warn("The number of bins estimated may be suboptimal.", RuntimeWarning, stacklevel=2) + warnings.warn("The number of bins estimated may be suboptimal.", + RuntimeWarning, stacklevel=3) return ptp_x / nbins @@ -279,7 +281,7 @@ def _ravel_and_check_weights(a, weights): if a.dtype == np.bool_: warnings.warn("Converting input from {} to {} for compatibility." .format(a.dtype, np.uint8), - RuntimeWarning, stacklevel=2) + RuntimeWarning, stacklevel=3) a = a.astype(np.uint8) if weights is not None: @@ -461,7 +463,8 @@ def _histogram_bin_edges_dispatcher(a, bins=None, range=None, weights=None): @array_function_dispatch(_histogram_bin_edges_dispatcher) def histogram_bin_edges(a, bins=10, range=None, weights=None): r""" - Function to calculate only the edges of the bins used by the `histogram` function. + Function to calculate only the edges of the bins used by the `histogram` + function. Parameters ---------- @@ -554,14 +557,14 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): using the `ptp` of the data. The final bin count is obtained from ``np.round(np.ceil(range / h))``. - 'Auto' (maximum of the 'Sturges' and 'FD' estimators) + 'auto' (maximum of the 'sturges' and 'fd' estimators) A compromise to get a good value. For small datasets the Sturges value will usually be chosen, while larger datasets will usually default to FD. Avoids the overly conservative behaviour of FD and Sturges for small and large datasets respectively. Switchover point is usually :math:`a.size \approx 1000`. - 'FD' (Freedman Diaconis Estimator) + 'fd' (Freedman Diaconis Estimator) .. math:: h = 2 \frac{IQR}{n^{1/3}} The binwidth is proportional to the interquartile range (IQR) @@ -569,7 +572,7 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): conservative for small datasets, but is quite good for large datasets. The IQR is very robust to outliers. - 'Scott' + 'scott' .. math:: h = \sigma \sqrt[3]{\frac{24 * \sqrt{\pi}}{n}} The binwidth is proportional to the standard deviation of the @@ -579,14 +582,14 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): outliers. Values are very similar to the Freedman-Diaconis estimator in the absence of outliers. - 'Rice' + 'rice' .. math:: n_h = 2n^{1/3} The number of bins is only proportional to cube root of ``a.size``. It tends to overestimate the number of bins and it does not take into account data variability. - 'Sturges' + 'sturges' .. math:: n_h = \log _{2}n+1 The number of bins is the base 2 log of ``a.size``. This @@ -594,7 +597,7 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): larger, non-normal datasets. This is the default method in R's ``hist`` method. - 'Doane' + 'doane' .. math:: n_h = 1 + \log_{2}(n) + \log_{2}(1 + \frac{|g_1|}{\sigma_{g_1}}) @@ -606,8 +609,9 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): estimates for non-normal datasets. This estimator attempts to account for the skew of the data. - 'Sqrt' + 'sqrt' .. math:: n_h = \sqrt n + The simplest and fastest estimator. Only takes into account the data size. @@ -645,7 +649,7 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): >>> hist_0, bins_0 = np.histogram(arr[group_id == 0], bins='auto') >>> hist_1, bins_1 = np.histogram(arr[group_id == 1], bins='auto') - >>> hist_0; hist1 + >>> hist_0; hist_1 array([1, 1, 1]) array([2, 1, 1, 2]) >>> bins_0; bins_1 @@ -748,14 +752,14 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, >>> np.histogram([1, 2, 1], bins=[0, 1, 2, 3]) (array([0, 2, 1]), array([0, 1, 2, 3])) >>> np.histogram(np.arange(4), bins=np.arange(5), density=True) - (array([ 0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4])) + (array([0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4])) >>> np.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3]) (array([1, 4, 1]), array([0, 1, 2, 3])) >>> a = np.arange(5) >>> hist, bin_edges = np.histogram(a, density=True) >>> hist - array([ 0.5, 0. , 0.5, 0. , 0. , 0.5, 0. , 0.5, 0. , 0.5]) + array([0.5, 0. , 0.5, 0. , 0. , 0.5, 0. , 0.5, 0. , 0.5]) >>> hist.sum() 2.4999999999999996 >>> np.sum(hist * np.diff(bin_edges)) @@ -770,8 +774,9 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, >>> rng = np.random.RandomState(10) # deterministic random data >>> a = np.hstack((rng.normal(size=1000), ... rng.normal(loc=5, scale=2, size=1000))) - >>> plt.hist(a, bins='auto') # arguments are passed to np.histogram + >>> _ = plt.hist(a, bins='auto') # arguments are passed to np.histogram >>> plt.title("Histogram with 'auto' bins") + Text(0.5, 1.0, "Histogram with 'auto' bins") >>> plt.show() """ @@ -885,7 +890,7 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, warnings.warn( "The normed argument is ignored when density is provided. " "In future passing both will result in an error.", - DeprecationWarning, stacklevel=2) + DeprecationWarning, stacklevel=3) normed = None if density: @@ -901,7 +906,7 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, "density=True will produce the same result anyway. " "The argument will be removed in a future version of " "numpy.", - np.VisibleDeprecationWarning, stacklevel=2) + np.VisibleDeprecationWarning, stacklevel=3) # this normalization is incorrect, but db = np.array(np.diff(bin_edges), float) @@ -912,13 +917,19 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, warnings.warn( "Passing normed=False is deprecated, and has no effect. " "Consider passing the density argument instead.", - DeprecationWarning, stacklevel=2) + DeprecationWarning, stacklevel=3) return n, bin_edges def _histogramdd_dispatcher(sample, bins=None, range=None, normed=None, weights=None, density=None): - return (sample, bins, weights) + if hasattr(sample, 'shape'): # same condition as used in histogramdd + yield sample + else: + yield from sample + with contextlib.suppress(TypeError): + yield from bins + yield weights @array_function_dispatch(_histogramdd_dispatcher) diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py index 56abe293a..04384854c 100644 --- a/numpy/lib/index_tricks.py +++ b/numpy/lib/index_tricks.py @@ -94,12 +94,13 @@ def ix_(*args): out = [] nd = len(args) for k, new in enumerate(args): - new = asarray(new) + if not isinstance(new, _nx.ndarray): + new = asarray(new) + if new.size == 0: + # Explicitly type empty arrays to avoid float default + new = new.astype(_nx.intp) if new.ndim != 1: raise ValueError("Cross index must be 1 dimensional") - if new.size == 0: - # Explicitly type empty arrays to avoid float default - new = new.astype(_nx.intp) if issubdtype(new.dtype, _nx.bool_): new, = new.nonzero() new = new.reshape((1,)*k + (new.size,) + (1,)*(nd-k-1)) @@ -269,8 +270,9 @@ class OGridClass(nd_grid): the stop value **is inclusive**. Returns - ---------- - mesh-grid `ndarrays` with only one dimension :math:`\\neq 1` + ------- + mesh-grid + `ndarrays` with only one dimension not equal to 1 See Also -------- @@ -478,7 +480,7 @@ class RClass(AxisConcatenator): Examples -------- >>> np.r_[np.array([1,2,3]), 0, 0, np.array([4,5,6])] - array([1, 2, 3, 0, 0, 4, 5, 6]) + array([1, 2, 3, ..., 4, 5, 6]) >>> np.r_[-1:1:6j, [0]*3, 5, 6] array([-1. , -0.6, -0.2, 0.2, 0.6, 1. , 0. , 0. , 0. , 5. , 6. ]) @@ -538,7 +540,7 @@ class CClass(AxisConcatenator): [2, 5], [3, 6]]) >>> np.c_[np.array([[1,2,3]]), 0, 0, np.array([[4,5,6]])] - array([[1, 2, 3, 0, 0, 4, 5, 6]]) + array([[1, 2, 3, ..., 4, 5, 6]]) """ @@ -812,8 +814,8 @@ def fill_diagonal(a, val, wrap=False): The wrap option affects only tall matrices: >>> # tall matrices no wrap - >>> a = np.zeros((5, 3),int) - >>> fill_diagonal(a, 4) + >>> a = np.zeros((5, 3), int) + >>> np.fill_diagonal(a, 4) >>> a array([[4, 0, 0], [0, 4, 0], @@ -822,8 +824,8 @@ def fill_diagonal(a, val, wrap=False): [0, 0, 0]]) >>> # tall matrices wrap - >>> a = np.zeros((5, 3),int) - >>> fill_diagonal(a, 4, wrap=True) + >>> a = np.zeros((5, 3), int) + >>> np.fill_diagonal(a, 4, wrap=True) >>> a array([[4, 0, 0], [0, 4, 0], @@ -832,13 +834,30 @@ def fill_diagonal(a, val, wrap=False): [4, 0, 0]]) >>> # wide matrices - >>> a = np.zeros((3, 5),int) - >>> fill_diagonal(a, 4, wrap=True) + >>> a = np.zeros((3, 5), int) + >>> np.fill_diagonal(a, 4, wrap=True) >>> a array([[4, 0, 0, 0, 0], [0, 4, 0, 0, 0], [0, 0, 4, 0, 0]]) + The anti-diagonal can be filled by reversing the order of elements + using either `numpy.flipud` or `numpy.fliplr`. + + >>> a = np.zeros((3, 3), int); + >>> np.fill_diagonal(np.fliplr(a), [1,2,3]) # Horizontal flip + >>> a + array([[0, 0, 1], + [0, 2, 0], + [3, 0, 0]]) + >>> np.fill_diagonal(np.flipud(a), [1,2,3]) # Vertical flip + >>> a + array([[0, 0, 3], + [0, 2, 0], + [1, 0, 0]]) + + Note that the order in which the diagonal is filled varies depending + on the flip function. """ if a.ndim < 2: raise ValueError("array must be at least 2-d") diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index d73d84467..9a03d0b39 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -40,6 +40,33 @@ __all__ = [ ] +def _nan_mask(a, out=None): + """ + Parameters + ---------- + a : array-like + Input array with at least 1 dimension. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output and will prevent the allocation of a new array. + + Returns + ------- + y : bool ndarray or True + A bool array where ``np.nan`` positions are marked with ``False`` + and other positions are marked with ``True``. If the type of ``a`` + is such that it can't possibly contain ``np.nan``, returns ``True``. + """ + # we assume that a is an array for this private function + + if a.dtype.kind not in 'fc': + return True + + y = np.isnan(a, out=out) + y = np.invert(y, out=y) + return y + def _replace_nan(a, val): """ If `a` is of inexact type, make a copy of `a`, replace NaNs with @@ -138,7 +165,8 @@ def _remove_nan_1d(arr1d, overwrite_input=False): c = np.isnan(arr1d) s = np.nonzero(c)[0] if s.size == arr1d.size: - warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=4) + warnings.warn("All-NaN slice encountered", RuntimeWarning, + stacklevel=5) return arr1d[:0], True elif s.size == 0: return arr1d, overwrite_input @@ -271,9 +299,9 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue): >>> np.nanmin(a) 1.0 >>> np.nanmin(a, axis=0) - array([ 1., 2.]) + array([1., 2.]) >>> np.nanmin(a, axis=1) - array([ 1., 3.]) + array([1., 3.]) When positive infinity and negative infinity are present: @@ -291,7 +319,8 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue): # which do not implement isnan (gh-9009), or fmin correctly (gh-8975) res = np.fmin.reduce(a, axis=axis, out=out, **kwargs) if np.isnan(res).any(): - warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2) + warnings.warn("All-NaN slice encountered", RuntimeWarning, + stacklevel=3) else: # Slow, but safe for subclasses of ndarray a, mask = _replace_nan(a, +np.inf) @@ -303,7 +332,8 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue): mask = np.all(mask, axis=axis, **kwargs) if np.any(mask): res = _copyto(res, np.nan, mask) - warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2) + warnings.warn("All-NaN axis encountered", RuntimeWarning, + stacklevel=3) return res @@ -384,9 +414,9 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue): >>> np.nanmax(a) 3.0 >>> np.nanmax(a, axis=0) - array([ 3., 2.]) + array([3., 2.]) >>> np.nanmax(a, axis=1) - array([ 2., 3.]) + array([2., 3.]) When positive infinity and negative infinity are present: @@ -404,7 +434,8 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue): # which do not implement isnan (gh-9009), or fmax correctly (gh-8975) res = np.fmax.reduce(a, axis=axis, out=out, **kwargs) if np.isnan(res).any(): - warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2) + warnings.warn("All-NaN slice encountered", RuntimeWarning, + stacklevel=3) else: # Slow, but safe for subclasses of ndarray a, mask = _replace_nan(a, -np.inf) @@ -416,7 +447,8 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue): mask = np.all(mask, axis=axis, **kwargs) if np.any(mask): res = _copyto(res, np.nan, mask) - warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2) + warnings.warn("All-NaN axis encountered", RuntimeWarning, + stacklevel=3) return res @@ -601,12 +633,15 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): >>> np.nansum(a) 3.0 >>> np.nansum(a, axis=0) - array([ 2., 1.]) + array([2., 1.]) >>> np.nansum([1, np.nan, np.inf]) inf >>> np.nansum([1, np.nan, np.NINF]) -inf - >>> np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present + >>> from numpy.testing import suppress_warnings + >>> with suppress_warnings() as sup: + ... sup.filter(RuntimeWarning) + ... np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present nan """ @@ -677,7 +712,7 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): >>> np.nanprod(a) 6.0 >>> np.nanprod(a, axis=0) - array([ 3., 2.]) + array([3., 2.]) """ a, mask = _replace_nan(a, 1) @@ -738,16 +773,16 @@ def nancumsum(a, axis=None, dtype=None, out=None): >>> np.nancumsum([1]) array([1]) >>> np.nancumsum([1, np.nan]) - array([ 1., 1.]) + array([1., 1.]) >>> a = np.array([[1, 2], [3, np.nan]]) >>> np.nancumsum(a) - array([ 1., 3., 6., 6.]) + array([1., 3., 6., 6.]) >>> np.nancumsum(a, axis=0) - array([[ 1., 2.], - [ 4., 2.]]) + array([[1., 2.], + [4., 2.]]) >>> np.nancumsum(a, axis=1) - array([[ 1., 3.], - [ 3., 3.]]) + array([[1., 3.], + [3., 3.]]) """ a, mask = _replace_nan(a, 0) @@ -805,16 +840,16 @@ def nancumprod(a, axis=None, dtype=None, out=None): >>> np.nancumprod([1]) array([1]) >>> np.nancumprod([1, np.nan]) - array([ 1., 1.]) + array([1., 1.]) >>> a = np.array([[1, 2], [3, np.nan]]) >>> np.nancumprod(a) - array([ 1., 2., 6., 6.]) + array([1., 2., 6., 6.]) >>> np.nancumprod(a, axis=0) - array([[ 1., 2.], - [ 3., 2.]]) + array([[1., 2.], + [3., 2.]]) >>> np.nancumprod(a, axis=1) - array([[ 1., 2.], - [ 3., 3.]]) + array([[1., 2.], + [3., 3.]]) """ a, mask = _replace_nan(a, 1) @@ -895,9 +930,9 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): >>> np.nanmean(a) 2.6666666666666665 >>> np.nanmean(a, axis=0) - array([ 2., 4.]) + array([2., 4.]) >>> np.nanmean(a, axis=1) - array([ 1., 3.5]) + array([1., 3.5]) # may vary """ arr, mask = _replace_nan(a, 0) @@ -917,7 +952,7 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): isbad = (cnt == 0) if isbad.any(): - warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2) + warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=3) # NaN is the only possible bad value, so no further # action is needed to handle bad results. return avg @@ -929,7 +964,7 @@ def _nanmedian1d(arr1d, overwrite_input=False): See nanmedian for parameter usage """ arr1d, overwrite_input = _remove_nan_1d(arr1d, - overwrite_input=overwrite_input) + overwrite_input=overwrite_input) if arr1d.size == 0: return np.nan @@ -972,7 +1007,8 @@ def _nanmedian_small(a, axis=None, out=None, overwrite_input=False): a = np.ma.masked_array(a, np.isnan(a)) m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input) for i in range(np.count_nonzero(m.mask.ravel())): - warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=3) + warnings.warn("All-NaN slice encountered", RuntimeWarning, + stacklevel=4) if out is not None: out[...] = m.filled(np.nan) return out @@ -1049,19 +1085,19 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValu >>> a = np.array([[10.0, 7, 4], [3, 2, 1]]) >>> a[0, 1] = np.nan >>> a - array([[ 10., nan, 4.], - [ 3., 2., 1.]]) + array([[10., nan, 4.], + [ 3., 2., 1.]]) >>> np.median(a) nan >>> np.nanmedian(a) 3.0 >>> np.nanmedian(a, axis=0) - array([ 6.5, 2., 2.5]) + array([6.5, 2. , 2.5]) >>> np.median(a, axis=1) - array([ 7., 2.]) + array([nan, 2.]) >>> b = a.copy() >>> np.nanmedian(b, axis=1, overwrite_input=True) - array([ 7., 2.]) + array([7., 2.]) >>> assert not np.all(a==b) >>> b = a.copy() >>> np.nanmedian(b, axis=None, overwrite_input=True) @@ -1177,27 +1213,27 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) >>> a[0][1] = np.nan >>> a - array([[ 10., nan, 4.], - [ 3., 2., 1.]]) + array([[10., nan, 4.], + [ 3., 2., 1.]]) >>> np.percentile(a, 50) nan >>> np.nanpercentile(a, 50) - 3.5 + 3.0 >>> np.nanpercentile(a, 50, axis=0) - array([ 6.5, 2., 2.5]) + array([6.5, 2. , 2.5]) >>> np.nanpercentile(a, 50, axis=1, keepdims=True) - array([[ 7.], - [ 2.]]) + array([[7.], + [2.]]) >>> m = np.nanpercentile(a, 50, axis=0) >>> out = np.zeros_like(m) >>> np.nanpercentile(a, 50, axis=0, out=out) - array([ 6.5, 2., 2.5]) + array([6.5, 2. , 2.5]) >>> m - array([ 6.5, 2. , 2.5]) + array([6.5, 2. , 2.5]) >>> b = a.copy() >>> np.nanpercentile(b, 50, axis=1, overwrite_input=True) - array([ 7., 2.]) + array([7., 2.]) >>> assert not np.all(a==b) """ @@ -1221,6 +1257,7 @@ def nanquantile(a, q, axis=None, out=None, overwrite_input=False, Compute the qth quantile of the data along the specified axis, while ignoring nan values. Returns the qth quantile(s) of the array elements. + .. versionadded:: 1.15.0 Parameters @@ -1291,26 +1328,26 @@ def nanquantile(a, q, axis=None, out=None, overwrite_input=False, >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) >>> a[0][1] = np.nan >>> a - array([[ 10., nan, 4.], - [ 3., 2., 1.]]) + array([[10., nan, 4.], + [ 3., 2., 1.]]) >>> np.quantile(a, 0.5) nan >>> np.nanquantile(a, 0.5) - 3.5 + 3.0 >>> np.nanquantile(a, 0.5, axis=0) - array([ 6.5, 2., 2.5]) + array([6.5, 2. , 2.5]) >>> np.nanquantile(a, 0.5, axis=1, keepdims=True) - array([[ 7.], - [ 2.]]) + array([[7.], + [2.]]) >>> m = np.nanquantile(a, 0.5, axis=0) >>> out = np.zeros_like(m) >>> np.nanquantile(a, 0.5, axis=0, out=out) - array([ 6.5, 2., 2.5]) + array([6.5, 2. , 2.5]) >>> m - array([ 6.5, 2. , 2.5]) + array([6.5, 2. , 2.5]) >>> b = a.copy() >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True) - array([ 7., 2.]) + array([7., 2.]) >>> assert not np.all(a==b) """ a = np.asanyarray(a) @@ -1465,12 +1502,12 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): Examples -------- >>> a = np.array([[1, np.nan], [3, 4]]) - >>> np.var(a) + >>> np.nanvar(a) 1.5555555555555554 >>> np.nanvar(a, axis=0) - array([ 1., 0.]) + array([1., 0.]) >>> np.nanvar(a, axis=1) - array([ 0., 0.25]) + array([0., 0.25]) # may vary """ arr, mask = _replace_nan(a, 0) @@ -1517,7 +1554,8 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): isbad = (dof <= 0) if np.any(isbad): - warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning, stacklevel=2) + warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning, + stacklevel=3) # NaN, inf, or negative numbers are all possible bad # values, so explicitly replace them with NaN. var = _copyto(var, np.nan, isbad) @@ -1619,9 +1657,9 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): >>> np.nanstd(a) 1.247219128924647 >>> np.nanstd(a, axis=0) - array([ 1., 0.]) + array([1., 0.]) >>> np.nanstd(a, axis=1) - array([ 0., 0.5]) + array([0., 0.5]) # may vary """ var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index db6a8e5eb..318dc434a 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -7,6 +7,7 @@ import functools import itertools import warnings import weakref +import contextlib from operator import itemgetter, index as opindex import numpy as np @@ -23,10 +24,9 @@ from ._iotools import ( ) from numpy.compat import ( - asbytes, asstr, asunicode, asbytes_nested, bytes, basestring, unicode, - os_fspath, os_PathLike + asbytes, asstr, asunicode, bytes, basestring, os_fspath, os_PathLike, + pickle, contextlib_nullcontext ) -from numpy.core.numeric import pickle if sys.version_info[0] >= 3: from collections.abc import Mapping @@ -146,7 +146,11 @@ class NpzFile(Mapping): An object on which attribute can be performed as an alternative to getitem access on the `NpzFile` instance itself. allow_pickle : bool, optional - Allow loading pickled data. Default: True + Allow loading pickled data. Default: False + + .. versionchanged:: 1.16.3 + Made default False in response to CVE-2019-6446. + pickle_kwargs : dict, optional Additional keyword arguments to pass on to pickle.load. These are only useful when loading object arrays saved on @@ -168,13 +172,13 @@ class NpzFile(Mapping): >>> x = np.arange(10) >>> y = np.sin(x) >>> np.savez(outfile, x=x, y=y) - >>> outfile.seek(0) + >>> _ = outfile.seek(0) >>> npz = np.load(outfile) >>> isinstance(npz, np.lib.io.NpzFile) True - >>> npz.files - ['y', 'x'] + >>> sorted(npz.files) + ['x', 'y'] >>> npz['x'] # getitem access array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) >>> npz.f.x # attribute lookup @@ -182,7 +186,7 @@ class NpzFile(Mapping): """ - def __init__(self, fid, own_fid=False, allow_pickle=True, + def __init__(self, fid, own_fid=False, allow_pickle=False, pickle_kwargs=None): # Import is postponed to here since zipfile depends on gzip, an # optional component of the so-called standard library. @@ -285,11 +289,17 @@ class NpzFile(Mapping): @set_module('numpy') -def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, +def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII'): """ Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files. + .. warning:: Loading files that contain object arrays uses the ``pickle`` + module, which is not secure against erroneous or maliciously + constructed data. Consider passing ``allow_pickle=False`` to + load data that is known not to contain object arrays for the + safer handling of untrusted sources. + Parameters ---------- file : file-like object, string, or pathlib.Path @@ -307,8 +317,11 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, Allow loading pickled object arrays stored in npy files. Reasons for disallowing pickles include security, as loading pickled data can execute arbitrary code. If pickles are disallowed, loading object - arrays will fail. - Default: True + arrays will fail. Default: False + + .. versionchanged:: 1.16.3 + Made default False in response to CVE-2019-6446. + fix_imports : bool, optional Only useful when loading Python 2 generated pickled files on Python 3, which includes npy/npz files containing object arrays. If `fix_imports` @@ -502,7 +515,7 @@ def save(file, arr, allow_pickle=True, fix_imports=True): >>> x = np.arange(10) >>> np.save(outfile, x) - >>> outfile.seek(0) # Only needed here to simulate closing & reopening file + >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file >>> np.load(outfile) array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) @@ -597,10 +610,10 @@ def savez(file, *args, **kwds): Using `savez` with \\*args, the arrays are saved with default names. >>> np.savez(outfile, x, y) - >>> outfile.seek(0) # Only needed here to simulate closing & reopening file + >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file >>> npzfile = np.load(outfile) >>> npzfile.files - ['arr_1', 'arr_0'] + ['arr_0', 'arr_1'] >>> npzfile['arr_0'] array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) @@ -608,10 +621,10 @@ def savez(file, *args, **kwds): >>> outfile = TemporaryFile() >>> np.savez(outfile, x=x, y=y) - >>> outfile.seek(0) + >>> _ = outfile.seek(0) >>> npzfile = np.load(outfile) - >>> npzfile.files - ['y', 'x'] + >>> sorted(npzfile.files) + ['x', 'y'] >>> npzfile['x'] array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) @@ -721,8 +734,8 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None): for key, val in namedict.items(): fname = key + '.npy' val = np.asanyarray(val) - force_zip64 = val.nbytes >= 2**30 - with zipf.open(fname, 'w', force_zip64=force_zip64) as fid: + # always force zip64, gh-10776 + with zipf.open(fname, 'w', force_zip64=True) as fid: format.write_array(fid, val, allow_pickle=allow_pickle, pickle_kwargs=pickle_kwargs) @@ -829,7 +842,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, `genfromtxt`): ``converters = {3: lambda s: float(s.strip() or 0)}``. Default: None. skiprows : int, optional - Skip the first `skiprows` lines; default: 0. + Skip the first `skiprows` lines, including comments; default: 0. usecols : int or sequence, optional Which columns to read, with 0 being the first. For example, ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns. @@ -891,21 +904,21 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, >>> from io import StringIO # StringIO behaves like a file object >>> c = StringIO(u"0 1\\n2 3") >>> np.loadtxt(c) - array([[ 0., 1.], - [ 2., 3.]]) + array([[0., 1.], + [2., 3.]]) >>> d = StringIO(u"M 21 72\\nF 35 58") >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'), ... 'formats': ('S1', 'i4', 'f4')}) - array([('M', 21, 72.0), ('F', 35, 58.0)], - dtype=[('gender', '|S1'), ('age', '<i4'), ('weight', '<f4')]) + array([(b'M', 21, 72.), (b'F', 35, 58.)], + dtype=[('gender', 'S1'), ('age', '<i4'), ('weight', '<f4')]) >>> c = StringIO(u"1,0,2\\n3,0,4") >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True) >>> x - array([ 1., 3.]) + array([1., 3.]) >>> y - array([ 2., 4.]) + array([2., 4.]) """ # Type conversions for Py3 convenience @@ -1118,7 +1131,6 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, if type(x) is bytes: return conv(x) return conv(x.encode("latin1")) - import functools converters[i] = functools.partial(tobytes_first, conv=conv) else: converters[i] = conv @@ -1376,7 +1388,7 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', # Complex dtype -- each field indicates a separate column else: - ncol = len(X.dtype.descr) + ncol = len(X.dtype.names) else: ncol = X.shape[1] @@ -1387,7 +1399,7 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', if len(fmt) != ncol: raise AttributeError('fmt has wrong shape. %s' % str(fmt)) format = asstr(delimiter).join(map(asstr, fmt)) - elif isinstance(fmt, str): + elif isinstance(fmt, basestring): n_fmt_chars = fmt.count('%') error = ValueError('fmt has wrong number of %% formats: %s' % fmt) if n_fmt_chars == 1: @@ -1481,17 +1493,17 @@ def fromregex(file, regexp, dtype, encoding=None): Examples -------- >>> f = open('test.dat', 'w') - >>> f.write("1312 foo\\n1534 bar\\n444 qux") + >>> _ = f.write("1312 foo\\n1534 bar\\n444 qux") >>> f.close() >>> regexp = r"(\\d+)\\s+(...)" # match [digits, whitespace, anything] >>> output = np.fromregex('test.dat', regexp, ... [('num', np.int64), ('key', 'S3')]) >>> output - array([(1312L, 'foo'), (1534L, 'bar'), (444L, 'qux')], - dtype=[('num', '<i8'), ('key', '|S3')]) + array([(1312, b'foo'), (1534, b'bar'), ( 444, b'qux')], + dtype=[('num', '<i8'), ('key', 'S3')]) >>> output['num'] - array([1312, 1534, 444], dtype=int64) + array([1312, 1534, 444]) """ own_fh = False @@ -1537,7 +1549,8 @@ def fromregex(file, regexp, dtype, encoding=None): def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, - names=None, excludelist=None, deletechars=None, + names=None, excludelist=None, + deletechars=''.join(sorted(NameValidator.defaultdeletechars)), replace_space='_', autostrip=False, case_sensitive=True, defaultfmt="f%i", unpack=None, usemask=False, loose=True, invalid_raise=True, max_rows=None, encoding='bytes'): @@ -1674,26 +1687,26 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'), ... ('mystring','S5')], delimiter=",") >>> data - array((1, 1.3, 'abcde'), - dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')]) + array((1, 1.3, b'abcde'), + dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')]) Using dtype = None - >>> s.seek(0) # needed for StringIO example only + >>> _ = s.seek(0) # needed for StringIO example only >>> data = np.genfromtxt(s, dtype=None, ... names = ['myint','myfloat','mystring'], delimiter=",") >>> data - array((1, 1.3, 'abcde'), - dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')]) + array((1, 1.3, b'abcde'), + dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')]) Specifying dtype and names - >>> s.seek(0) + >>> _ = s.seek(0) >>> data = np.genfromtxt(s, dtype="i8,f8,S5", ... names=['myint','myfloat','mystring'], delimiter=",") >>> data - array((1, 1.3, 'abcde'), - dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')]) + array((1, 1.3, b'abcde'), + dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')]) An example with fixed-width columns @@ -1701,8 +1714,18 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'], ... delimiter=[1,3,5]) >>> data - array((1, 1.3, 'abcde'), - dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '|S5')]) + array((1, 1.3, b'abcde'), + dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', 'S5')]) + + An example to show comments + + >>> f = StringIO(''' + ... text,# of chars + ... hello world,11 + ... numpy,5''') + >>> np.genfromtxt(f, dtype='S12,S12', delimiter=',') + array([(b'text', b''), (b'hello world', b'11'), (b'numpy', b'5')], + dtype=[('f0', 'S12'), ('f1', 'S12')]) """ if max_rows is not None: @@ -1729,301 +1752,299 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, byte_converters = False # Initialize the filehandle, the LineSplitter and the NameValidator - own_fhd = False try: if isinstance(fname, os_PathLike): fname = os_fspath(fname) if isinstance(fname, basestring): - fhd = iter(np.lib._datasource.open(fname, 'rt', encoding=encoding)) - own_fhd = True + fid = np.lib._datasource.open(fname, 'rt', encoding=encoding) + fid_ctx = contextlib.closing(fid) else: - fhd = iter(fname) + fid = fname + fid_ctx = contextlib_nullcontext(fid) + fhd = iter(fid) except TypeError: raise TypeError( "fname must be a string, filehandle, list of strings, " "or generator. Got %s instead." % type(fname)) - split_line = LineSplitter(delimiter=delimiter, comments=comments, - autostrip=autostrip, encoding=encoding) - validate_names = NameValidator(excludelist=excludelist, - deletechars=deletechars, - case_sensitive=case_sensitive, - replace_space=replace_space) - - # Skip the first `skip_header` rows - for i in range(skip_header): - next(fhd) + with fid_ctx: + split_line = LineSplitter(delimiter=delimiter, comments=comments, + autostrip=autostrip, encoding=encoding) + validate_names = NameValidator(excludelist=excludelist, + deletechars=deletechars, + case_sensitive=case_sensitive, + replace_space=replace_space) - # Keep on until we find the first valid values - first_values = None - try: - while not first_values: - first_line = _decode_line(next(fhd), encoding) - if (names is True) and (comments is not None): - if comments in first_line: - first_line = ( - ''.join(first_line.split(comments)[1:])) - first_values = split_line(first_line) - except StopIteration: - # return an empty array if the datafile is empty - first_line = '' - first_values = [] - warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2) - - # Should we take the first values as names ? - if names is True: - fval = first_values[0].strip() - if comments is not None: - if fval in comments: - del first_values[0] + # Skip the first `skip_header` rows + for i in range(skip_header): + next(fhd) - # Check the columns to use: make sure `usecols` is a list - if usecols is not None: + # Keep on until we find the first valid values + first_values = None try: - usecols = [_.strip() for _ in usecols.split(",")] - except AttributeError: + while not first_values: + first_line = _decode_line(next(fhd), encoding) + if (names is True) and (comments is not None): + if comments in first_line: + first_line = ( + ''.join(first_line.split(comments)[1:])) + first_values = split_line(first_line) + except StopIteration: + # return an empty array if the datafile is empty + first_line = '' + first_values = [] + warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2) + + # Should we take the first values as names ? + if names is True: + fval = first_values[0].strip() + if comments is not None: + if fval in comments: + del first_values[0] + + # Check the columns to use: make sure `usecols` is a list + if usecols is not None: try: - usecols = list(usecols) - except TypeError: - usecols = [usecols, ] - nbcols = len(usecols or first_values) - - # Check the names and overwrite the dtype.names if needed - if names is True: - names = validate_names([str(_.strip()) for _ in first_values]) - first_line = '' - elif _is_string_like(names): - names = validate_names([_.strip() for _ in names.split(',')]) - elif names: - names = validate_names(names) - # Get the dtype - if dtype is not None: - dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names, - excludelist=excludelist, - deletechars=deletechars, - case_sensitive=case_sensitive, - replace_space=replace_space) - # Make sure the names is a list (for 2.5) - if names is not None: - names = list(names) - - if usecols: - for (i, current) in enumerate(usecols): - # if usecols is a list of names, convert to a list of indices - if _is_string_like(current): - usecols[i] = names.index(current) - elif current < 0: - usecols[i] = current + len(first_values) - # If the dtype is not None, make sure we update it - if (dtype is not None) and (len(dtype) > nbcols): - descr = dtype.descr - dtype = np.dtype([descr[_] for _ in usecols]) - names = list(dtype.names) - # If `names` is not None, update the names - elif (names is not None) and (len(names) > nbcols): - names = [names[_] for _ in usecols] - elif (names is not None) and (dtype is not None): - names = list(dtype.names) - - # Process the missing values ............................... - # Rename missing_values for convenience - user_missing_values = missing_values or () - if isinstance(user_missing_values, bytes): - user_missing_values = user_missing_values.decode('latin1') - - # Define the list of missing_values (one column: one list) - missing_values = [list(['']) for _ in range(nbcols)] - - # We have a dictionary: process it field by field - if isinstance(user_missing_values, dict): - # Loop on the items - for (key, val) in user_missing_values.items(): - # Is the key a string ? - if _is_string_like(key): - try: - # Transform it into an integer - key = names.index(key) - except ValueError: - # We couldn't find it: the name must have been dropped - continue - # Redefine the key as needed if it's a column number - if usecols: + usecols = [_.strip() for _ in usecols.split(",")] + except AttributeError: try: - key = usecols.index(key) - except ValueError: - pass - # Transform the value as a list of string - if isinstance(val, (list, tuple)): - val = [str(_) for _ in val] + usecols = list(usecols) + except TypeError: + usecols = [usecols, ] + nbcols = len(usecols or first_values) + + # Check the names and overwrite the dtype.names if needed + if names is True: + names = validate_names([str(_.strip()) for _ in first_values]) + first_line = '' + elif _is_string_like(names): + names = validate_names([_.strip() for _ in names.split(',')]) + elif names: + names = validate_names(names) + # Get the dtype + if dtype is not None: + dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names, + excludelist=excludelist, + deletechars=deletechars, + case_sensitive=case_sensitive, + replace_space=replace_space) + # Make sure the names is a list (for 2.5) + if names is not None: + names = list(names) + + if usecols: + for (i, current) in enumerate(usecols): + # if usecols is a list of names, convert to a list of indices + if _is_string_like(current): + usecols[i] = names.index(current) + elif current < 0: + usecols[i] = current + len(first_values) + # If the dtype is not None, make sure we update it + if (dtype is not None) and (len(dtype) > nbcols): + descr = dtype.descr + dtype = np.dtype([descr[_] for _ in usecols]) + names = list(dtype.names) + # If `names` is not None, update the names + elif (names is not None) and (len(names) > nbcols): + names = [names[_] for _ in usecols] + elif (names is not None) and (dtype is not None): + names = list(dtype.names) + + # Process the missing values ............................... + # Rename missing_values for convenience + user_missing_values = missing_values or () + if isinstance(user_missing_values, bytes): + user_missing_values = user_missing_values.decode('latin1') + + # Define the list of missing_values (one column: one list) + missing_values = [list(['']) for _ in range(nbcols)] + + # We have a dictionary: process it field by field + if isinstance(user_missing_values, dict): + # Loop on the items + for (key, val) in user_missing_values.items(): + # Is the key a string ? + if _is_string_like(key): + try: + # Transform it into an integer + key = names.index(key) + except ValueError: + # We couldn't find it: the name must have been dropped + continue + # Redefine the key as needed if it's a column number + if usecols: + try: + key = usecols.index(key) + except ValueError: + pass + # Transform the value as a list of string + if isinstance(val, (list, tuple)): + val = [str(_) for _ in val] + else: + val = [str(val), ] + # Add the value(s) to the current list of missing + if key is None: + # None acts as default + for miss in missing_values: + miss.extend(val) + else: + missing_values[key].extend(val) + # We have a sequence : each item matches a column + elif isinstance(user_missing_values, (list, tuple)): + for (value, entry) in zip(user_missing_values, missing_values): + value = str(value) + if value not in entry: + entry.append(value) + # We have a string : apply it to all entries + elif isinstance(user_missing_values, basestring): + user_value = user_missing_values.split(",") + for entry in missing_values: + entry.extend(user_value) + # We have something else: apply it to all entries + else: + for entry in missing_values: + entry.extend([str(user_missing_values)]) + + # Process the filling_values ............................... + # Rename the input for convenience + user_filling_values = filling_values + if user_filling_values is None: + user_filling_values = [] + # Define the default + filling_values = [None] * nbcols + # We have a dictionary : update each entry individually + if isinstance(user_filling_values, dict): + for (key, val) in user_filling_values.items(): + if _is_string_like(key): + try: + # Transform it into an integer + key = names.index(key) + except ValueError: + # We couldn't find it: the name must have been dropped, + continue + # Redefine the key if it's a column number and usecols is defined + if usecols: + try: + key = usecols.index(key) + except ValueError: + pass + # Add the value to the list + filling_values[key] = val + # We have a sequence : update on a one-to-one basis + elif isinstance(user_filling_values, (list, tuple)): + n = len(user_filling_values) + if (n <= nbcols): + filling_values[:n] = user_filling_values else: - val = [str(val), ] - # Add the value(s) to the current list of missing - if key is None: - # None acts as default - for miss in missing_values: - miss.extend(val) + filling_values = user_filling_values[:nbcols] + # We have something else : use it for all entries + else: + filling_values = [user_filling_values] * nbcols + + # Initialize the converters ................................ + if dtype is None: + # Note: we can't use a [...]*nbcols, as we would have 3 times the same + # ... converter, instead of 3 different converters. + converters = [StringConverter(None, missing_values=miss, default=fill) + for (miss, fill) in zip(missing_values, filling_values)] + else: + dtype_flat = flatten_dtype(dtype, flatten_base=True) + # Initialize the converters + if len(dtype_flat) > 1: + # Flexible type : get a converter from each dtype + zipit = zip(dtype_flat, missing_values, filling_values) + converters = [StringConverter(dt, locked=True, + missing_values=miss, default=fill) + for (dt, miss, fill) in zipit] else: - missing_values[key].extend(val) - # We have a sequence : each item matches a column - elif isinstance(user_missing_values, (list, tuple)): - for (value, entry) in zip(user_missing_values, missing_values): - value = str(value) - if value not in entry: - entry.append(value) - # We have a string : apply it to all entries - elif isinstance(user_missing_values, basestring): - user_value = user_missing_values.split(",") - for entry in missing_values: - entry.extend(user_value) - # We have something else: apply it to all entries - else: - for entry in missing_values: - entry.extend([str(user_missing_values)]) - - # Process the filling_values ............................... - # Rename the input for convenience - user_filling_values = filling_values - if user_filling_values is None: - user_filling_values = [] - # Define the default - filling_values = [None] * nbcols - # We have a dictionary : update each entry individually - if isinstance(user_filling_values, dict): - for (key, val) in user_filling_values.items(): - if _is_string_like(key): + # Set to a default converter (but w/ different missing values) + zipit = zip(missing_values, filling_values) + converters = [StringConverter(dtype, locked=True, + missing_values=miss, default=fill) + for (miss, fill) in zipit] + # Update the converters to use the user-defined ones + uc_update = [] + for (j, conv) in user_converters.items(): + # If the converter is specified by column names, use the index instead + if _is_string_like(j): try: - # Transform it into an integer - key = names.index(key) + j = names.index(j) + i = j except ValueError: - # We couldn't find it: the name must have been dropped, continue - # Redefine the key if it's a column number and usecols is defined - if usecols: + elif usecols: try: - key = usecols.index(key) + i = usecols.index(j) except ValueError: - pass - # Add the value to the list - filling_values[key] = val - # We have a sequence : update on a one-to-one basis - elif isinstance(user_filling_values, (list, tuple)): - n = len(user_filling_values) - if (n <= nbcols): - filling_values[:n] = user_filling_values - else: - filling_values = user_filling_values[:nbcols] - # We have something else : use it for all entries - else: - filling_values = [user_filling_values] * nbcols - - # Initialize the converters ................................ - if dtype is None: - # Note: we can't use a [...]*nbcols, as we would have 3 times the same - # ... converter, instead of 3 different converters. - converters = [StringConverter(None, missing_values=miss, default=fill) - for (miss, fill) in zip(missing_values, filling_values)] - else: - dtype_flat = flatten_dtype(dtype, flatten_base=True) - # Initialize the converters - if len(dtype_flat) > 1: - # Flexible type : get a converter from each dtype - zipit = zip(dtype_flat, missing_values, filling_values) - converters = [StringConverter(dt, locked=True, - missing_values=miss, default=fill) - for (dt, miss, fill) in zipit] - else: - # Set to a default converter (but w/ different missing values) - zipit = zip(missing_values, filling_values) - converters = [StringConverter(dtype, locked=True, - missing_values=miss, default=fill) - for (miss, fill) in zipit] - # Update the converters to use the user-defined ones - uc_update = [] - for (j, conv) in user_converters.items(): - # If the converter is specified by column names, use the index instead - if _is_string_like(j): - try: - j = names.index(j) + # Unused converter specified + continue + else: i = j - except ValueError: - continue - elif usecols: - try: - i = usecols.index(j) - except ValueError: - # Unused converter specified + # Find the value to test - first_line is not filtered by usecols: + if len(first_line): + testing_value = first_values[j] + else: + testing_value = None + if conv is bytes: + user_conv = asbytes + elif byte_converters: + # converters may use decode to workaround numpy's old behaviour, + # so encode the string again before passing to the user converter + def tobytes_first(x, conv): + if type(x) is bytes: + return conv(x) + return conv(x.encode("latin1")) + user_conv = functools.partial(tobytes_first, conv=conv) + else: + user_conv = conv + converters[i].update(user_conv, locked=True, + testing_value=testing_value, + default=filling_values[i], + missing_values=missing_values[i],) + uc_update.append((i, user_conv)) + # Make sure we have the corrected keys in user_converters... + user_converters.update(uc_update) + + # Fixme: possible error as following variable never used. + # miss_chars = [_.missing_values for _ in converters] + + # Initialize the output lists ... + # ... rows + rows = [] + append_to_rows = rows.append + # ... masks + if usemask: + masks = [] + append_to_masks = masks.append + # ... invalid + invalid = [] + append_to_invalid = invalid.append + + # Parse each line + for (i, line) in enumerate(itertools.chain([first_line, ], fhd)): + values = split_line(line) + nbvalues = len(values) + # Skip an empty line + if nbvalues == 0: continue - else: - i = j - # Find the value to test - first_line is not filtered by usecols: - if len(first_line): - testing_value = first_values[j] - else: - testing_value = None - if conv is bytes: - user_conv = asbytes - elif byte_converters: - # converters may use decode to workaround numpy's old behaviour, - # so encode the string again before passing to the user converter - def tobytes_first(x, conv): - if type(x) is bytes: - return conv(x) - return conv(x.encode("latin1")) - import functools - user_conv = functools.partial(tobytes_first, conv=conv) - else: - user_conv = conv - converters[i].update(user_conv, locked=True, - testing_value=testing_value, - default=filling_values[i], - missing_values=missing_values[i],) - uc_update.append((i, user_conv)) - # Make sure we have the corrected keys in user_converters... - user_converters.update(uc_update) - - # Fixme: possible error as following variable never used. - # miss_chars = [_.missing_values for _ in converters] - - # Initialize the output lists ... - # ... rows - rows = [] - append_to_rows = rows.append - # ... masks - if usemask: - masks = [] - append_to_masks = masks.append - # ... invalid - invalid = [] - append_to_invalid = invalid.append - - # Parse each line - for (i, line) in enumerate(itertools.chain([first_line, ], fhd)): - values = split_line(line) - nbvalues = len(values) - # Skip an empty line - if nbvalues == 0: - continue - if usecols: - # Select only the columns we need - try: - values = [values[_] for _ in usecols] - except IndexError: + if usecols: + # Select only the columns we need + try: + values = [values[_] for _ in usecols] + except IndexError: + append_to_invalid((i + skip_header + 1, nbvalues)) + continue + elif nbvalues != nbcols: append_to_invalid((i + skip_header + 1, nbvalues)) continue - elif nbvalues != nbcols: - append_to_invalid((i + skip_header + 1, nbvalues)) - continue - # Store the values - append_to_rows(tuple(values)) - if usemask: - append_to_masks(tuple([v.strip() in m - for (v, m) in zip(values, - missing_values)])) - if len(rows) == max_rows: - break - - if own_fhd: - fhd.close() + # Store the values + append_to_rows(tuple(values)) + if usemask: + append_to_masks(tuple([v.strip() in m + for (v, m) in zip(values, + missing_values)])) + if len(rows) == max_rows: + break # Upgrade the converters (if needed) if dtype is None: @@ -2223,6 +2244,12 @@ def ndfromtxt(fname, **kwargs): """ Load ASCII data stored in a file and return it as a single array. + .. deprecated:: 1.17 + ndfromtxt` is a deprecated alias of `genfromtxt` which + overwrites the ``usemask`` argument with `False` even when + explicitly called as ``ndfromtxt(..., usemask=True)``. + Use `genfromtxt` instead. + Parameters ---------- fname, kwargs : For a description of input parameters, see `genfromtxt`. @@ -2233,6 +2260,11 @@ def ndfromtxt(fname, **kwargs): """ kwargs['usemask'] = False + # Numpy 1.17 + warnings.warn( + "np.ndfromtxt is a deprecated alias of np.genfromtxt, " + "prefer the latter.", + DeprecationWarning, stacklevel=2) return genfromtxt(fname, **kwargs) @@ -2240,6 +2272,12 @@ def mafromtxt(fname, **kwargs): """ Load ASCII data stored in a text file and return a masked array. + .. deprecated:: 1.17 + np.mafromtxt is a deprecated alias of `genfromtxt` which + overwrites the ``usemask`` argument with `True` even when + explicitly called as ``mafromtxt(..., usemask=False)``. + Use `genfromtxt` instead. + Parameters ---------- fname, kwargs : For a description of input parameters, see `genfromtxt`. @@ -2250,6 +2288,11 @@ def mafromtxt(fname, **kwargs): """ kwargs['usemask'] = True + # Numpy 1.17 + warnings.warn( + "np.mafromtxt is a deprecated alias of np.genfromtxt, " + "prefer the latter.", + DeprecationWarning, stacklevel=2) return genfromtxt(fname, **kwargs) diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py index e3defdca2..2c72f623c 100644 --- a/numpy/lib/polynomial.py +++ b/numpy/lib/polynomial.py @@ -110,7 +110,7 @@ def poly(seq_of_zeros): Given a sequence of a polynomial's zeros: >>> np.poly((0, 0, 0)) # Multiple root example - array([1, 0, 0, 0]) + array([1., 0., 0., 0.]) The line above represents z**3 + 0*z**2 + 0*z + 0. @@ -119,14 +119,14 @@ def poly(seq_of_zeros): The line above represents z**3 - z/4 - >>> np.poly((np.random.random(1.)[0], 0, np.random.random(1.)[0])) - array([ 1. , -0.77086955, 0.08618131, 0. ]) #random + >>> np.poly((np.random.random(1)[0], 0, np.random.random(1)[0])) + array([ 1. , -0.77086955, 0.08618131, 0. ]) # random Given a square array object: >>> P = np.array([[0, 1./3], [-1./2, 0]]) >>> np.poly(P) - array([ 1. , 0. , 0.16666667]) + array([1. , 0. , 0.16666667]) Note how in all cases the leading coefficient is always 1. @@ -295,7 +295,7 @@ def polyint(p, m=1, k=None): >>> p = np.poly1d([1,1,1]) >>> P = np.polyint(p) >>> P - poly1d([ 0.33333333, 0.5 , 1. , 0. ]) + poly1d([ 0.33333333, 0.5 , 1. , 0. ]) # may vary >>> np.polyder(P) == p True @@ -310,7 +310,7 @@ def polyint(p, m=1, k=None): 0.0 >>> P = np.polyint(p, 3, k=[6,5,3]) >>> P - poly1d([ 0.01666667, 0.04166667, 0.16666667, 3. , 5. , 3. ]) + poly1d([ 0.01666667, 0.04166667, 0.16666667, 3. , 5. , 3. ]) # may vary Note that 3 = 6 / 2!, and that the constants are given in the order of integrations. Constant of the highest-order polynomial term comes first: @@ -404,7 +404,7 @@ def polyder(p, m=1): >>> np.polyder(p, 3) poly1d([6]) >>> np.polyder(p, 4) - poly1d([ 0.]) + poly1d([0.]) """ m = int(m) @@ -548,32 +548,35 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): Examples -------- + >>> import warnings >>> x = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0]) >>> y = np.array([0.0, 0.8, 0.9, 0.1, -0.8, -1.0]) >>> z = np.polyfit(x, y, 3) >>> z - array([ 0.08703704, -0.81349206, 1.69312169, -0.03968254]) + array([ 0.08703704, -0.81349206, 1.69312169, -0.03968254]) # may vary It is convenient to use `poly1d` objects for dealing with polynomials: >>> p = np.poly1d(z) >>> p(0.5) - 0.6143849206349179 + 0.6143849206349179 # may vary >>> p(3.5) - -0.34732142857143039 + -0.34732142857143039 # may vary >>> p(10) - 22.579365079365115 + 22.579365079365115 # may vary High-order polynomials may oscillate wildly: - >>> p30 = np.poly1d(np.polyfit(x, y, 30)) - /... RankWarning: Polyfit may be poorly conditioned... + >>> with warnings.catch_warnings(): + ... warnings.simplefilter('ignore', np.RankWarning) + ... p30 = np.poly1d(np.polyfit(x, y, 30)) + ... >>> p30(4) - -0.80000000000000204 + -0.80000000000000204 # may vary >>> p30(5) - -0.99999999999999445 + -0.99999999999999445 # may vary >>> p30(4.5) - -0.10547061179440398 + -0.10547061179440398 # may vary Illustration: @@ -631,7 +634,7 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): # warn on rank reduction, which indicates an ill conditioned matrix if rank != order and not full: msg = "Polyfit may be poorly conditioned" - warnings.warn(msg, RankWarning, stacklevel=2) + warnings.warn(msg, RankWarning, stacklevel=4) if full: return c, resids, rank, s, rcond @@ -703,6 +706,8 @@ def polyval(p, x): for polynomials of high degree the values may be inaccurate due to rounding errors. Use carefully. + If `x` is a subtype of `ndarray` the return value will be of the same type. + References ---------- .. [1] I. N. Bronshtein, K. A. Semendyayev, and K. A. Hirsch (Eng. @@ -714,18 +719,18 @@ def polyval(p, x): >>> np.polyval([3,0,1], 5) # 3 * 5**2 + 0 * 5**1 + 1 76 >>> np.polyval([3,0,1], np.poly1d(5)) - poly1d([ 76.]) + poly1d([76.]) >>> np.polyval(np.poly1d([3,0,1]), 5) 76 >>> np.polyval(np.poly1d([3,0,1]), np.poly1d(5)) - poly1d([ 76.]) + poly1d([76.]) """ p = NX.asarray(p) if isinstance(x, poly1d): y = 0 else: - x = NX.asarray(x) + x = NX.asanyarray(x) y = NX.zeros_like(x) for i in range(len(p)): y = y * x + p[i] @@ -871,8 +876,7 @@ def polymul(a1, a2): See Also -------- poly1d : A one-dimensional polynomial class. - poly, polyadd, polyder, polydiv, polyfit, polyint, polysub, - polyval + poly, polyadd, polyder, polydiv, polyfit, polyint, polysub, polyval convolve : Array convolution. Same output as polymul, but has parameter for overlap mode. @@ -934,7 +938,7 @@ def polydiv(u, v): See Also -------- - poly, polyadd, polyder, polydiv, polyfit, polyint, polymul, polysub, + poly, polyadd, polyder, polydiv, polyfit, polyint, polymul, polysub polyval Notes @@ -951,7 +955,7 @@ def polydiv(u, v): >>> x = np.array([3.0, 5.0, 2.0]) >>> y = np.array([2.0, 1.0]) >>> np.polydiv(x, y) - (array([ 1.5 , 1.75]), array([ 0.25])) + (array([1.5 , 1.75]), array([0.25])) """ truepoly = (isinstance(u, poly1d) or isinstance(u, poly1d)) @@ -1046,7 +1050,7 @@ class poly1d(object): >>> p.r array([-1.+1.41421356j, -1.-1.41421356j]) >>> p(p.r) - array([ -4.44089210e-16+0.j, -4.44089210e-16+0.j]) + array([ -4.44089210e-16+0.j, -4.44089210e-16+0.j]) # may vary These numbers in the previous line represent (0, 0) to machine precision @@ -1073,7 +1077,7 @@ class poly1d(object): poly1d([ 1, 4, 10, 12, 9]) >>> (p**3 + 4) / p - (poly1d([ 1., 4., 10., 12., 9.]), poly1d([ 4.])) + (poly1d([ 1., 4., 10., 12., 9.]), poly1d([4.])) ``asarray(p)`` gives the coefficient array, so polynomials can be used in all functions that accept arrays: @@ -1095,7 +1099,7 @@ class poly1d(object): Construct a polynomial from its roots: >>> np.poly1d([1, 2], True) - poly1d([ 1, -3, 2]) + poly1d([ 1., -3., 2.]) This is the same polynomial as obtained by: @@ -1107,8 +1111,14 @@ class poly1d(object): @property def coeffs(self): - """ A copy of the polynomial coefficients """ - return self._coeffs.copy() + """ The polynomial coefficients """ + return self._coeffs + + @coeffs.setter + def coeffs(self, value): + # allowing this makes p.coeffs *= 2 legal + if value is not self._coeffs: + raise AttributeError("Cannot set attribute") @property def variable(self): diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index fcc0d9a7a..fabb509ab 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -57,11 +57,10 @@ def recursive_fill_fields(input, output): Examples -------- >>> from numpy.lib import recfunctions as rfn - >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', int), ('B', float)]) + >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', np.int64), ('B', np.float64)]) >>> b = np.zeros((3,), dtype=a.dtype) >>> rfn.recursive_fill_fields(a, b) - array([(1, 10.0), (2, 20.0), (0, 0.0)], - dtype=[('A', '<i4'), ('B', '<f8')]) + array([(1, 10.), (2, 20.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')]) """ newdtype = output.dtype @@ -89,11 +88,11 @@ def get_fieldspec(dtype): Examples -------- - >>> dt = np.dtype([(('a', 'A'), int), ('b', float, 3)]) + >>> dt = np.dtype([(('a', 'A'), np.int64), ('b', np.double, 3)]) >>> dt.descr - [(('a', 'A'), '<i4'), ('b', '<f8', (3,))] + [(('a', 'A'), '<i8'), ('b', '<f8', (3,))] >>> get_fieldspec(dt) - [(('a', 'A'), dtype('int32')), ('b', dtype(('<f8', (3,))))] + [(('a', 'A'), dtype('int64')), ('b', dtype(('<f8', (3,))))] """ if dtype.names is None: @@ -120,10 +119,15 @@ def get_names(adtype): Examples -------- >>> from numpy.lib import recfunctions as rfn - >>> rfn.get_names(np.empty((1,), dtype=int)) is None - True + >>> rfn.get_names(np.empty((1,), dtype=int)) + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' + >>> rfn.get_names(np.empty((1,), dtype=[('A',int), ('B', float)])) - ('A', 'B') + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) >>> rfn.get_names(adtype) ('a', ('b', ('ba', 'bb'))) @@ -142,7 +146,7 @@ def get_names(adtype): def get_names_flat(adtype): """ Returns the field names of the input datatype as a tuple. Nested structure - are flattend beforehand. + are flattened beforehand. Parameters ---------- @@ -153,9 +157,13 @@ def get_names_flat(adtype): -------- >>> from numpy.lib import recfunctions as rfn >>> rfn.get_names_flat(np.empty((1,), dtype=int)) is None - True + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' >>> rfn.get_names_flat(np.empty((1,), dtype=[('A',int), ('B', float)])) - ('A', 'B') + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) >>> rfn.get_names_flat(adtype) ('a', 'b', 'ba', 'bb') @@ -403,20 +411,18 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, -------- >>> from numpy.lib import recfunctions as rfn >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.]))) - masked_array(data = [(1, 10.0) (2, 20.0) (--, 30.0)], - mask = [(False, False) (False, False) (True, False)], - fill_value = (999999, 1e+20), - dtype = [('f0', '<i4'), ('f1', '<f8')]) - - >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])), - ... usemask=False) - array([(1, 10.0), (2, 20.0), (-1, 30.0)], - dtype=[('f0', '<i4'), ('f1', '<f8')]) - >>> rfn.merge_arrays((np.array([1, 2]).view([('a', int)]), + array([( 1, 10.), ( 2, 20.), (-1, 30.)], + dtype=[('f0', '<i8'), ('f1', '<f8')]) + + >>> rfn.merge_arrays((np.array([1, 2], dtype=np.int64), + ... np.array([10., 20., 30.])), usemask=False) + array([(1, 10.0), (2, 20.0), (-1, 30.0)], + dtype=[('f0', '<i8'), ('f1', '<f8')]) + >>> rfn.merge_arrays((np.array([1, 2]).view([('a', np.int64)]), ... np.array([10., 20., 30.])), ... usemask=False, asrecarray=True) - rec.array([(1, 10.0), (2, 20.0), (-1, 30.0)], - dtype=[('a', '<i4'), ('f1', '<f8')]) + rec.array([( 1, 10.), ( 2, 20.), (-1, 30.)], + dtype=[('a', '<i8'), ('f1', '<f8')]) Notes ----- @@ -547,16 +553,14 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False): -------- >>> from numpy.lib import recfunctions as rfn >>> a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], - ... dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) + ... dtype=[('a', np.int64), ('b', [('ba', np.double), ('bb', np.int64)])]) >>> rfn.drop_fields(a, 'a') - array([((2.0, 3),), ((5.0, 6),)], - dtype=[('b', [('ba', '<f8'), ('bb', '<i4')])]) + array([((2., 3),), ((5., 6),)], + dtype=[('b', [('ba', '<f8'), ('bb', '<i8')])]) >>> rfn.drop_fields(a, 'ba') - array([(1, (3,)), (4, (6,))], - dtype=[('a', '<i4'), ('b', [('bb', '<i4')])]) + array([(1, (3,)), (4, (6,))], dtype=[('a', '<i8'), ('b', [('bb', '<i8')])]) >>> rfn.drop_fields(a, ['ba', 'bb']) - array([(1,), (4,)], - dtype=[('a', '<i4')]) + array([(1,), (4,)], dtype=[('a', '<i8')]) """ if _is_string_like(drop_names): drop_names = [drop_names] @@ -648,8 +652,8 @@ def rename_fields(base, namemapper): >>> a = np.array([(1, (2, [3.0, 30.])), (4, (5, [6.0, 60.]))], ... dtype=[('a', int),('b', [('ba', float), ('bb', (float, 2))])]) >>> rfn.rename_fields(a, {'a':'A', 'bb':'BB'}) - array([(1, (2.0, [3.0, 30.0])), (4, (5.0, [6.0, 60.0]))], - dtype=[('A', '<i4'), ('b', [('ba', '<f8'), ('BB', '<f8', 2)])]) + array([(1, (2., [ 3., 30.])), (4, (5., [ 6., 60.]))], + dtype=[('A', '<i8'), ('b', [('ba', '<f8'), ('BB', '<f8', (2,))])]) """ def _recursive_rename_fields(ndtype, namemapper): @@ -834,18 +838,18 @@ def repack_fields(a, align=False, recurse=False): ... print("offsets:", [d.fields[name][1] for name in d.names]) ... print("itemsize:", d.itemsize) ... - >>> dt = np.dtype('u1,i4,f4', align=True) + >>> dt = np.dtype('u1,<i4,<f4', align=True) >>> dt - dtype({'names':['f0','f1','f2'], 'formats':['u1','<i4','<f8'], 'offsets':[0,4,8], 'itemsize':16}, align=True) + dtype({'names':['f0','f1','f2'], 'formats':['u1','<i8','<f8'], 'offsets':[0,8,16], 'itemsize':24}, align=True) >>> print_offsets(dt) - offsets: [0, 4, 8] - itemsize: 16 + offsets: [0, 8, 16] + itemsize: 24 >>> packed_dt = repack_fields(dt) >>> packed_dt - dtype([('f0', 'u1'), ('f1', '<i4'), ('f2', '<f8')]) + dtype([('f0', 'u1'), ('f1', '<i8'), ('f2', '<f8')]) >>> print_offsets(packed_dt) - offsets: [0, 1, 5] - itemsize: 13 + offsets: [0, 1, 9] + itemsize: 17 """ if not isinstance(a, np.dtype): @@ -972,11 +976,11 @@ def structured_to_unstructured(arr, dtype=None, copy=False, casting='unsafe'): # next cast to a packed format with all fields converted to new dtype packed_fields = np.dtype({'names': names, - 'formats': [(out_dtype, c) for c in counts]}) + 'formats': [(out_dtype, dt.shape) for dt in dts]}) arr = arr.astype(packed_fields, copy=copy, casting=casting) # finally is it safe to view the packed fields as the unstructured type - return arr.view((out_dtype, sum(counts))) + return arr.view((out_dtype, (sum(counts),))) def _unstructured_to_structured_dispatcher(arr, dtype=None, names=None, align=None, copy=None, casting=None): @@ -1065,7 +1069,7 @@ def unstructured_to_structured(arr, dtype=None, names=None, align=False, # first view as a packed structured array of one dtype packed_fields = np.dtype({'names': names, - 'formats': [(arr.dtype, c) for c in counts]}) + 'formats': [(arr.dtype, dt.shape) for dt in dts]}) arr = np.ascontiguousarray(arr).view(packed_fields) # next cast to an unpacked but flattened format with varied dtypes @@ -1244,15 +1248,16 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, True >>> z = np.array([('A', 1), ('B', 2)], dtype=[('A', '|S3'), ('B', float)]) >>> zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], - ... dtype=[('A', '|S3'), ('B', float), ('C', float)]) + ... dtype=[('A', '|S3'), ('B', np.double), ('C', np.double)]) >>> test = rfn.stack_arrays((z,zz)) >>> test - masked_array(data = [('A', 1.0, --) ('B', 2.0, --) ('a', 10.0, 100.0) ('b', 20.0, 200.0) - ('c', 30.0, 300.0)], - mask = [(False, False, True) (False, False, True) (False, False, False) - (False, False, False) (False, False, False)], - fill_value = ('N/A', 1e+20, 1e+20), - dtype = [('A', '|S3'), ('B', '<f8'), ('C', '<f8')]) + masked_array(data=[(b'A', 1.0, --), (b'B', 2.0, --), (b'a', 10.0, 100.0), + (b'b', 20.0, 200.0), (b'c', 30.0, 300.0)], + mask=[(False, False, True), (False, False, True), + (False, False, False), (False, False, False), + (False, False, False)], + fill_value=(b'N/A', 1.e+20, 1.e+20), + dtype=[('A', 'S3'), ('B', '<f8'), ('C', '<f8')]) """ if isinstance(arrays, ndarray): @@ -1331,7 +1336,10 @@ def find_duplicates(a, key=None, ignoremask=True, return_index=False): >>> a = np.ma.array([1, 1, 1, 2, 2, 3, 3], ... mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype) >>> rfn.find_duplicates(a, ignoremask=True, return_index=True) - ... # XXX: judging by the output, the ignoremask flag has no effect + (masked_array(data=[(1,), (1,), (2,), (2,)], + mask=[(False,), (False,), (False,), (False,)], + fill_value=(999999,), + dtype=[('a', '<i8')]), array([0, 1, 3, 4])) """ a = np.asanyarray(a).ravel() # Get a dictionary of fields diff --git a/numpy/lib/scimath.py b/numpy/lib/scimath.py index 9ca006841..5ac790ce9 100644 --- a/numpy/lib/scimath.py +++ b/numpy/lib/scimath.py @@ -59,7 +59,7 @@ def _tocomplex(arr): >>> a = np.array([1,2,3],np.short) >>> ac = np.lib.scimath._tocomplex(a); ac - array([ 1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64) + array([1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64) >>> ac.dtype dtype('complex64') @@ -70,7 +70,7 @@ def _tocomplex(arr): >>> b = np.array([1,2,3],np.double) >>> bc = np.lib.scimath._tocomplex(b); bc - array([ 1.+0.j, 2.+0.j, 3.+0.j]) + array([1.+0.j, 2.+0.j, 3.+0.j]) >>> bc.dtype dtype('complex128') @@ -81,13 +81,13 @@ def _tocomplex(arr): >>> c = np.array([1,2,3],np.csingle) >>> cc = np.lib.scimath._tocomplex(c); cc - array([ 1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64) + array([1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64) >>> c *= 2; c - array([ 2.+0.j, 4.+0.j, 6.+0.j], dtype=complex64) + array([2.+0.j, 4.+0.j, 6.+0.j], dtype=complex64) >>> cc - array([ 1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64) + array([1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64) """ if issubclass(arr.dtype.type, (nt.single, nt.byte, nt.short, nt.ubyte, nt.ushort, nt.csingle)): @@ -170,7 +170,7 @@ def _fix_real_abs_gt_1(x): array([0, 1]) >>> np.lib.scimath._fix_real_abs_gt_1([0,2]) - array([ 0.+0.j, 2.+0.j]) + array([0.+0.j, 2.+0.j]) """ x = asarray(x) if any(isreal(x) & (abs(x) > 1)): @@ -212,14 +212,14 @@ def sqrt(x): >>> np.lib.scimath.sqrt(1) 1.0 >>> np.lib.scimath.sqrt([1, 4]) - array([ 1., 2.]) + array([1., 2.]) But it automatically handles negative inputs: >>> np.lib.scimath.sqrt(-1) - (0.0+1.0j) + 1j >>> np.lib.scimath.sqrt([-1,4]) - array([ 0.+1.j, 2.+0.j]) + array([0.+1.j, 2.+0.j]) """ x = _fix_real_lt_zero(x) @@ -317,7 +317,7 @@ def log10(x): 1.0 >>> np.emath.log10([-10**1, -10**2, 10**2]) - array([ 1.+1.3644j, 2.+1.3644j, 2.+0.j ]) + array([1.+1.3644j, 2.+1.3644j, 2.+0.j ]) """ x = _fix_real_lt_zero(x) @@ -354,9 +354,9 @@ def logn(n, x): >>> np.set_printoptions(precision=4) >>> np.lib.scimath.logn(2, [4, 8]) - array([ 2., 3.]) + array([2., 3.]) >>> np.lib.scimath.logn(2, [-4, -8, 8]) - array([ 2.+4.5324j, 3.+4.5324j, 3.+0.j ]) + array([2.+4.5324j, 3.+4.5324j, 3.+0.j ]) """ x = _fix_real_lt_zero(x) @@ -405,7 +405,7 @@ def log2(x): >>> np.emath.log2(8) 3.0 >>> np.emath.log2([-4, -8, 8]) - array([ 2.+4.5324j, 3.+4.5324j, 3.+0.j ]) + array([2.+4.5324j, 3.+4.5324j, 3.+0.j ]) """ x = _fix_real_lt_zero(x) @@ -451,9 +451,9 @@ def power(x, p): >>> np.lib.scimath.power([2, 4], 2) array([ 4, 16]) >>> np.lib.scimath.power([2, 4], -2) - array([ 0.25 , 0.0625]) + array([0.25 , 0.0625]) >>> np.lib.scimath.power([-2, 4], 2) - array([ 4.+0.j, 16.+0.j]) + array([ 4.-0.j, 16.+0.j]) """ x = _fix_real_lt_zero(x) @@ -499,7 +499,7 @@ def arccos(x): 0.0 >>> np.emath.arccos([1,2]) - array([ 0.-0.j , 0.+1.317j]) + array([0.-0.j , 0.-1.317j]) """ x = _fix_real_abs_gt_1(x) @@ -545,7 +545,7 @@ def arcsin(x): 0.0 >>> np.emath.arcsin([0,1]) - array([ 0. , 1.5708]) + array([0. , 1.5708]) """ x = _fix_real_abs_gt_1(x) @@ -589,11 +589,14 @@ def arctanh(x): -------- >>> np.set_printoptions(precision=4) - >>> np.emath.arctanh(np.eye(2)) - array([[ Inf, 0.], - [ 0., Inf]]) + >>> from numpy.testing import suppress_warnings + >>> with suppress_warnings() as sup: + ... sup.filter(RuntimeWarning) + ... np.emath.arctanh(np.eye(2)) + array([[inf, 0.], + [ 0., inf]]) >>> np.emath.arctanh([1j]) - array([ 0.+0.7854j]) + array([0.+0.7854j]) """ x = _fix_real_abs_gt_1(x) diff --git a/numpy/lib/shape_base.py b/numpy/lib/shape_base.py index f56c4f4db..a5d0040aa 100644 --- a/numpy/lib/shape_base.py +++ b/numpy/lib/shape_base.py @@ -7,12 +7,11 @@ import numpy.core.numeric as _nx from numpy.core.numeric import ( asarray, zeros, outer, concatenate, array, asanyarray ) -from numpy.core.fromnumeric import product, reshape, transpose +from numpy.core.fromnumeric import reshape, transpose from numpy.core.multiarray import normalize_axis_index from numpy.core import overrides from numpy.core import vstack, atleast_3d -from numpy.core.shape_base import ( - _arrays_for_stack_dispatcher, _warn_for_nonsequence) +from numpy.core.shape_base import _arrays_for_stack_dispatcher from numpy.lib.index_tricks import ndindex from numpy.matrixlib.defmatrix import matrix # this raises all the right alarm bells @@ -95,7 +94,7 @@ def take_along_axis(arr, indices, axis): Ni, M, Nk = a.shape[:axis], a.shape[axis], a.shape[axis+1:] J = indices.shape[axis] # Need not equal M - out = np.empty(Nk + (J,) + Nk) + out = np.empty(Ni + (J,) + Nk) for ii in ndindex(Ni): for kk in ndindex(Nk): @@ -129,7 +128,7 @@ def take_along_axis(arr, indices, axis): [40, 50, 60]]) >>> ai = np.argsort(a, axis=1); ai array([[0, 2, 1], - [1, 2, 0]], dtype=int64) + [1, 2, 0]]) >>> np.take_along_axis(a, ai, axis=1) array([[10, 20, 30], [40, 50, 60]]) @@ -142,7 +141,7 @@ def take_along_axis(arr, indices, axis): >>> ai = np.expand_dims(np.argmax(a, axis=1), axis=1) >>> ai array([[1], - [0], dtype=int64) + [0]]) >>> np.take_along_axis(a, ai, axis=1) array([[30], [60]]) @@ -152,10 +151,10 @@ def take_along_axis(arr, indices, axis): >>> ai_min = np.expand_dims(np.argmin(a, axis=1), axis=1) >>> ai_max = np.expand_dims(np.argmax(a, axis=1), axis=1) - >>> ai = np.concatenate([ai_min, ai_max], axis=axis) - >> ai + >>> ai = np.concatenate([ai_min, ai_max], axis=1) + >>> ai array([[0, 1], - [1, 0]], dtype=int64) + [1, 0]]) >>> np.take_along_axis(a, ai, axis=1) array([[10, 30], [40, 60]]) @@ -243,7 +242,7 @@ def put_along_axis(arr, indices, values, axis): >>> ai = np.expand_dims(np.argmax(a, axis=1), axis=1) >>> ai array([[1], - [0]], dtype=int64) + [0]]) >>> np.put_along_axis(a, ai, 99, axis=1) >>> a array([[10, 99, 20], @@ -330,9 +329,9 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs): ... return (a[0] + a[-1]) * 0.5 >>> b = np.array([[1,2,3], [4,5,6], [7,8,9]]) >>> np.apply_along_axis(my_func, 0, b) - array([ 4., 5., 6.]) + array([4., 5., 6.]) >>> np.apply_along_axis(my_func, 1, b) - array([ 2., 5., 8.]) + array([2., 5., 8.]) For a function that returns a 1D array, the number of dimensions in `outarr` is the same as `arr`. @@ -533,8 +532,7 @@ def expand_dims(a, axis): Returns ------- res : ndarray - Output array. The number of dimensions is one greater than that of - the input array. + View of `a` with the number of dimensions increased by one. See Also -------- @@ -580,7 +578,7 @@ def expand_dims(a, axis): # 2017-05-17, 1.13.0 warnings.warn("Both axis > a.ndim and axis < -a.ndim - 1 are " "deprecated and will raise an AxisError in the future.", - DeprecationWarning, stacklevel=2) + DeprecationWarning, stacklevel=3) # When the deprecation period expires, delete this if block, if axis < 0: axis = axis + a.ndim + 1 @@ -630,7 +628,10 @@ def column_stack(tup): [3, 4]]) """ - _warn_for_nonsequence(tup) + if not overrides.ARRAY_FUNCTION_ENABLED: + # raise warning if necessary + _arrays_for_stack_dispatcher(tup, stacklevel=2) + arrays = [] for v in tup: arr = array(v, copy=False, subok=True) @@ -695,8 +696,14 @@ def dstack(tup): [[3, 4]]]) """ - _warn_for_nonsequence(tup) - return _nx.concatenate([atleast_3d(_m) for _m in tup], 2) + if not overrides.ARRAY_FUNCTION_ENABLED: + # raise warning if necessary + _arrays_for_stack_dispatcher(tup, stacklevel=2) + + arrs = atleast_3d(*tup) + if not isinstance(arrs, list): + arrs = [arrs] + return _nx.concatenate(arrs, 2) def _replace_zero_by_x_arrays(sub_arys): @@ -732,11 +739,11 @@ def array_split(ary, indices_or_sections, axis=0): -------- >>> x = np.arange(8.0) >>> np.array_split(x, 3) - [array([ 0., 1., 2.]), array([ 3., 4., 5.]), array([ 6., 7.])] + [array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7.])] >>> x = np.arange(7.0) >>> np.array_split(x, 3) - [array([ 0., 1., 2.]), array([ 3., 4.]), array([ 5., 6.])] + [array([0., 1., 2.]), array([3., 4.]), array([5., 6.])] """ try: @@ -828,14 +835,14 @@ def split(ary, indices_or_sections, axis=0): -------- >>> x = np.arange(9.0) >>> np.split(x, 3) - [array([ 0., 1., 2.]), array([ 3., 4., 5.]), array([ 6., 7., 8.])] + [array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7., 8.])] >>> x = np.arange(8.0) >>> np.split(x, [3, 5, 6, 10]) - [array([ 0., 1., 2.]), - array([ 3., 4.]), - array([ 5.]), - array([ 6., 7.]), + [array([0., 1., 2.]), + array([3., 4.]), + array([5.]), + array([6., 7.]), array([], dtype=float64)] """ @@ -872,43 +879,43 @@ def hsplit(ary, indices_or_sections): -------- >>> x = np.arange(16.0).reshape(4, 4) >>> x - array([[ 0., 1., 2., 3.], - [ 4., 5., 6., 7.], - [ 8., 9., 10., 11.], - [ 12., 13., 14., 15.]]) + array([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.], + [12., 13., 14., 15.]]) >>> np.hsplit(x, 2) [array([[ 0., 1.], [ 4., 5.], [ 8., 9.], - [ 12., 13.]]), + [12., 13.]]), array([[ 2., 3.], [ 6., 7.], - [ 10., 11.], - [ 14., 15.]])] + [10., 11.], + [14., 15.]])] >>> np.hsplit(x, np.array([3, 6])) - [array([[ 0., 1., 2.], - [ 4., 5., 6.], - [ 8., 9., 10.], - [ 12., 13., 14.]]), - array([[ 3.], - [ 7.], - [ 11.], - [ 15.]]), - array([], dtype=float64)] + [array([[ 0., 1., 2.], + [ 4., 5., 6.], + [ 8., 9., 10.], + [12., 13., 14.]]), + array([[ 3.], + [ 7.], + [11.], + [15.]]), + array([], shape=(4, 0), dtype=float64)] With a higher dimensional array the split is still along the second axis. >>> x = np.arange(8.0).reshape(2, 2, 2) >>> x - array([[[ 0., 1.], - [ 2., 3.]], - [[ 4., 5.], - [ 6., 7.]]]) + array([[[0., 1.], + [2., 3.]], + [[4., 5.], + [6., 7.]]]) >>> np.hsplit(x, 2) - [array([[[ 0., 1.]], - [[ 4., 5.]]]), - array([[[ 2., 3.]], - [[ 6., 7.]]])] + [array([[[0., 1.]], + [[4., 5.]]]), + array([[[2., 3.]], + [[6., 7.]]])] """ if _nx.ndim(ary) == 0: @@ -936,35 +943,31 @@ def vsplit(ary, indices_or_sections): -------- >>> x = np.arange(16.0).reshape(4, 4) >>> x - array([[ 0., 1., 2., 3.], - [ 4., 5., 6., 7.], - [ 8., 9., 10., 11.], - [ 12., 13., 14., 15.]]) + array([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.], + [12., 13., 14., 15.]]) >>> np.vsplit(x, 2) - [array([[ 0., 1., 2., 3.], - [ 4., 5., 6., 7.]]), - array([[ 8., 9., 10., 11.], - [ 12., 13., 14., 15.]])] + [array([[0., 1., 2., 3.], + [4., 5., 6., 7.]]), array([[ 8., 9., 10., 11.], + [12., 13., 14., 15.]])] >>> np.vsplit(x, np.array([3, 6])) - [array([[ 0., 1., 2., 3.], - [ 4., 5., 6., 7.], - [ 8., 9., 10., 11.]]), - array([[ 12., 13., 14., 15.]]), - array([], dtype=float64)] + [array([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.]]), array([[12., 13., 14., 15.]]), array([], shape=(0, 4), dtype=float64)] With a higher dimensional array the split is still along the first axis. >>> x = np.arange(8.0).reshape(2, 2, 2) >>> x - array([[[ 0., 1.], - [ 2., 3.]], - [[ 4., 5.], - [ 6., 7.]]]) + array([[[0., 1.], + [2., 3.]], + [[4., 5.], + [6., 7.]]]) >>> np.vsplit(x, 2) - [array([[[ 0., 1.], - [ 2., 3.]]]), - array([[[ 4., 5.], - [ 6., 7.]]])] + [array([[[0., 1.], + [2., 3.]]]), array([[[4., 5.], + [6., 7.]]])] """ if _nx.ndim(ary) < 2: @@ -989,30 +992,28 @@ def dsplit(ary, indices_or_sections): -------- >>> x = np.arange(16.0).reshape(2, 2, 4) >>> x - array([[[ 0., 1., 2., 3.], - [ 4., 5., 6., 7.]], - [[ 8., 9., 10., 11.], - [ 12., 13., 14., 15.]]]) + array([[[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.]], + [[ 8., 9., 10., 11.], + [12., 13., 14., 15.]]]) >>> np.dsplit(x, 2) - [array([[[ 0., 1.], - [ 4., 5.]], - [[ 8., 9.], - [ 12., 13.]]]), - array([[[ 2., 3.], - [ 6., 7.]], - [[ 10., 11.], - [ 14., 15.]]])] + [array([[[ 0., 1.], + [ 4., 5.]], + [[ 8., 9.], + [12., 13.]]]), array([[[ 2., 3.], + [ 6., 7.]], + [[10., 11.], + [14., 15.]]])] >>> np.dsplit(x, np.array([3, 6])) - [array([[[ 0., 1., 2.], - [ 4., 5., 6.]], - [[ 8., 9., 10.], - [ 12., 13., 14.]]]), - array([[[ 3.], - [ 7.]], - [[ 11.], - [ 15.]]]), - array([], dtype=float64)] - + [array([[[ 0., 1., 2.], + [ 4., 5., 6.]], + [[ 8., 9., 10.], + [12., 13., 14.]]]), + array([[[ 3.], + [ 7.]], + [[11.], + [15.]]]), + array([], shape=(2, 2, 0), dtype=float64)] """ if _nx.ndim(ary) < 3: raise ValueError('dsplit only works on arrays of 3 or more dimensions') @@ -1092,15 +1093,15 @@ def kron(a, b): Examples -------- >>> np.kron([1,10,100], [5,6,7]) - array([ 5, 6, 7, 50, 60, 70, 500, 600, 700]) + array([ 5, 6, 7, ..., 500, 600, 700]) >>> np.kron([5,6,7], [1,10,100]) - array([ 5, 50, 500, 6, 60, 600, 7, 70, 700]) + array([ 5, 50, 500, ..., 7, 70, 700]) >>> np.kron(np.eye(2), np.ones((2,2))) - array([[ 1., 1., 0., 0.], - [ 1., 1., 0., 0.], - [ 0., 0., 1., 1.], - [ 0., 0., 1., 1.]]) + array([[1., 1., 0., 0.], + [1., 1., 0., 0.], + [0., 0., 1., 1.], + [0., 0., 1., 1.]]) >>> a = np.arange(100).reshape((2,5,2,5)) >>> b = np.arange(24).reshape((2,3,4)) diff --git a/numpy/lib/stride_tricks.py b/numpy/lib/stride_tricks.py index 0dc36e41c..8aafd094b 100644 --- a/numpy/lib/stride_tricks.py +++ b/numpy/lib/stride_tricks.py @@ -121,18 +121,18 @@ def _broadcast_to(array, shape, subok, readonly): if any(size < 0 for size in shape): raise ValueError('all elements of broadcast shape must be non-' 'negative') - needs_writeable = not readonly and array.flags.writeable - extras = ['reduce_ok'] if needs_writeable else [] - op_flag = 'readwrite' if needs_writeable else 'readonly' + extras = [] it = np.nditer( (array,), flags=['multi_index', 'refs_ok', 'zerosize_ok'] + extras, - op_flags=[op_flag], itershape=shape, order='C') + op_flags=['readonly'], itershape=shape, order='C') with it: # never really has writebackifcopy semantics broadcast = it.itviews[0] result = _maybe_view_as_subclass(array, broadcast) - if needs_writeable and not result.flags.writeable: + # In a future version this will go away + if not readonly and array.flags._writeable_no_warn: result.flags.writeable = True + result.flags._warn_on_write = True return result @@ -186,8 +186,6 @@ def _broadcast_shape(*args): """Returns the shape of the arrays that would result from broadcasting the supplied arrays against each other. """ - if not args: - return () # use the old-iterator because np.nditer does not handle size 0 arrays # consistently b = np.broadcast(*args[:32]) @@ -224,8 +222,15 @@ def broadcast_arrays(*args, **kwargs): broadcasted : list of arrays These arrays are views on the original arrays. They are typically not contiguous. Furthermore, more than one element of a - broadcasted array may refer to a single memory location. If you - need to write to the arrays, make copies first. + broadcasted array may refer to a single memory location. If you need + to write to the arrays, make copies first. While you can set the + ``writable`` flag True, writing to a single output value may end up + changing more than one location in the output array. + + .. deprecated:: 1.17 + The output is currently marked so that if written to, a deprecation + warning will be emitted. A future version will set the + ``writable`` flag False so writing to it will raise an error. Examples -------- @@ -262,7 +267,5 @@ def broadcast_arrays(*args, **kwargs): # Common case where nothing needs to be broadcasted. return args - # TODO: consider making the results of broadcast_arrays readonly to match - # broadcast_to. This will require a deprecation cycle. return [_broadcast_to(array, shape, subok=subok, readonly=False) for array in args] diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py index e04fdc808..15cd3ad9d 100644 --- a/numpy/lib/tests/test__iotools.py +++ b/numpy/lib/tests/test__iotools.py @@ -204,14 +204,18 @@ class TestStringConverter(object): def test_upgrademapper(self): "Tests updatemapper" dateparser = _bytes_to_date - StringConverter.upgrade_mapper(dateparser, date(2000, 1, 1)) - convert = StringConverter(dateparser, date(2000, 1, 1)) - test = convert('2001-01-01') - assert_equal(test, date(2001, 1, 1)) - test = convert('2009-01-01') - assert_equal(test, date(2009, 1, 1)) - test = convert('') - assert_equal(test, date(2000, 1, 1)) + _original_mapper = StringConverter._mapper[:] + try: + StringConverter.upgrade_mapper(dateparser, date(2000, 1, 1)) + convert = StringConverter(dateparser, date(2000, 1, 1)) + test = convert('2001-01-01') + assert_equal(test, date(2001, 1, 1)) + test = convert('2009-01-01') + assert_equal(test, date(2009, 1, 1)) + test = convert('') + assert_equal(test, date(2000, 1, 1)) + finally: + StringConverter._mapper = _original_mapper def test_string_to_object(self): "Make sure that string-to-object functions are properly recognized" diff --git a/numpy/lib/tests/test_arraypad.py b/numpy/lib/tests/test_arraypad.py index 20f6e4a1b..b7630cdcd 100644 --- a/numpy/lib/tests/test_arraypad.py +++ b/numpy/lib/tests/test_arraypad.py @@ -2,18 +2,31 @@ """ from __future__ import division, absolute_import, print_function +from itertools import chain import pytest import numpy as np -from numpy.testing import (assert_array_equal, assert_raises, assert_allclose, - assert_equal) -from numpy.lib import pad +from numpy.testing import assert_array_equal, assert_allclose, assert_equal from numpy.lib.arraypad import _as_pairs -class TestAsPairs(object): +_all_modes = { + 'constant': {'constant_values': 0}, + 'edge': {}, + 'linear_ramp': {'end_values': 0}, + 'maximum': {'stat_length': None}, + 'mean': {'stat_length': None}, + 'median': {'stat_length': None}, + 'minimum': {'stat_length': None}, + 'reflect': {'reflect_type': 'even'}, + 'symmetric': {'reflect_type': 'even'}, + 'wrap': {}, + 'empty': {} +} + +class TestAsPairs(object): def test_single_value(self): """Test casting for a single value.""" expected = np.array([[3, 3]] * 10) @@ -97,52 +110,31 @@ class TestAsPairs(object): class TestConditionalShortcuts(object): - def test_zero_padding_shortcuts(self): + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_zero_padding_shortcuts(self, mode): test = np.arange(120).reshape(4, 5, 6) - pad_amt = [(0, 0) for axis in test.shape] - modes = ['constant', - 'edge', - 'linear_ramp', - 'maximum', - 'mean', - 'median', - 'minimum', - 'reflect', - 'symmetric', - 'wrap', - ] - for mode in modes: - assert_array_equal(test, pad(test, pad_amt, mode=mode)) - - def test_shallow_statistic_range(self): + pad_amt = [(0, 0) for _ in test.shape] + assert_array_equal(test, np.pad(test, pad_amt, mode=mode)) + + @pytest.mark.parametrize("mode", ['maximum', 'mean', 'median', 'minimum',]) + def test_shallow_statistic_range(self, mode): test = np.arange(120).reshape(4, 5, 6) - pad_amt = [(1, 1) for axis in test.shape] - modes = ['maximum', - 'mean', - 'median', - 'minimum', - ] - for mode in modes: - assert_array_equal(pad(test, pad_amt, mode='edge'), - pad(test, pad_amt, mode=mode, stat_length=1)) - - def test_clip_statistic_range(self): + pad_amt = [(1, 1) for _ in test.shape] + assert_array_equal(np.pad(test, pad_amt, mode='edge'), + np.pad(test, pad_amt, mode=mode, stat_length=1)) + + @pytest.mark.parametrize("mode", ['maximum', 'mean', 'median', 'minimum',]) + def test_clip_statistic_range(self, mode): test = np.arange(30).reshape(5, 6) - pad_amt = [(3, 3) for axis in test.shape] - modes = ['maximum', - 'mean', - 'median', - 'minimum', - ] - for mode in modes: - assert_array_equal(pad(test, pad_amt, mode=mode), - pad(test, pad_amt, mode=mode, stat_length=30)) + pad_amt = [(3, 3) for _ in test.shape] + assert_array_equal(np.pad(test, pad_amt, mode=mode), + np.pad(test, pad_amt, mode=mode, stat_length=30)) class TestStatistic(object): def test_check_mean_stat_length(self): a = np.arange(100).astype('f') - a = pad(a, ((25, 20), ), 'mean', stat_length=((2, 3), )) + a = np.pad(a, ((25, 20), ), 'mean', stat_length=((2, 3), )) b = np.array( [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, @@ -166,7 +158,7 @@ class TestStatistic(object): def test_check_maximum_1(self): a = np.arange(100) - a = pad(a, (25, 20), 'maximum') + a = np.pad(a, (25, 20), 'maximum') b = np.array( [99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, @@ -190,7 +182,7 @@ class TestStatistic(object): def test_check_maximum_2(self): a = np.arange(100) + 1 - a = pad(a, (25, 20), 'maximum') + a = np.pad(a, (25, 20), 'maximum') b = np.array( [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, @@ -214,7 +206,7 @@ class TestStatistic(object): def test_check_maximum_stat_length(self): a = np.arange(100) + 1 - a = pad(a, (25, 20), 'maximum', stat_length=10) + a = np.pad(a, (25, 20), 'maximum', stat_length=10) b = np.array( [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, @@ -238,7 +230,7 @@ class TestStatistic(object): def test_check_minimum_1(self): a = np.arange(100) - a = pad(a, (25, 20), 'minimum') + a = np.pad(a, (25, 20), 'minimum') b = np.array( [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -262,7 +254,7 @@ class TestStatistic(object): def test_check_minimum_2(self): a = np.arange(100) + 2 - a = pad(a, (25, 20), 'minimum') + a = np.pad(a, (25, 20), 'minimum') b = np.array( [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -286,7 +278,7 @@ class TestStatistic(object): def test_check_minimum_stat_length(self): a = np.arange(100) + 1 - a = pad(a, (25, 20), 'minimum', stat_length=10) + a = np.pad(a, (25, 20), 'minimum', stat_length=10) b = np.array( [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -310,7 +302,7 @@ class TestStatistic(object): def test_check_median(self): a = np.arange(100).astype('f') - a = pad(a, (25, 20), 'median') + a = np.pad(a, (25, 20), 'median') b = np.array( [49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, @@ -334,7 +326,7 @@ class TestStatistic(object): def test_check_median_01(self): a = np.array([[3, 1, 4], [4, 5, 9], [9, 8, 2]]) - a = pad(a, 1, 'median') + a = np.pad(a, 1, 'median') b = np.array( [[4, 4, 5, 4, 4], @@ -348,7 +340,7 @@ class TestStatistic(object): def test_check_median_02(self): a = np.array([[3, 1, 4], [4, 5, 9], [9, 8, 2]]) - a = pad(a.T, 1, 'median').T + a = np.pad(a.T, 1, 'median').T b = np.array( [[5, 4, 5, 4, 5], @@ -364,7 +356,7 @@ class TestStatistic(object): a = np.arange(100).astype('f') a[1] = 2. a[97] = 96. - a = pad(a, (25, 20), 'median', stat_length=(3, 5)) + a = np.pad(a, (25, 20), 'median', stat_length=(3, 5)) b = np.array( [ 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., @@ -388,7 +380,7 @@ class TestStatistic(object): def test_check_mean_shape_one(self): a = [[4, 5, 6]] - a = pad(a, (5, 7), 'mean', stat_length=2) + a = np.pad(a, (5, 7), 'mean', stat_length=2) b = np.array( [[4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], @@ -410,7 +402,7 @@ class TestStatistic(object): def test_check_mean_2(self): a = np.arange(100).astype('f') - a = pad(a, (25, 20), 'mean') + a = np.pad(a, (25, 20), 'mean') b = np.array( [49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, @@ -433,7 +425,7 @@ class TestStatistic(object): assert_array_equal(a, b) @pytest.mark.parametrize("mode", [ - pytest.param("mean", marks=pytest.mark.xfail(reason="gh-11216")), + "mean", "median", "minimum", "maximum" @@ -446,11 +438,42 @@ class TestStatistic(object): a = np.pad(a, (1, 1), mode) assert_equal(a[0], a[-1]) + @pytest.mark.parametrize("mode", ["mean", "median", "minimum", "maximum"]) + @pytest.mark.parametrize( + "stat_length", [-2, (-2,), (3, -1), ((5, 2), (-2, 3)), ((-4,), (2,))] + ) + def test_check_negative_stat_length(self, mode, stat_length): + arr = np.arange(30).reshape((6, 5)) + match = "index can't contain negative values" + with pytest.raises(ValueError, match=match): + np.pad(arr, 2, mode, stat_length=stat_length) + + def test_simple_stat_length(self): + a = np.arange(30) + a = np.reshape(a, (6, 5)) + a = np.pad(a, ((2, 3), (3, 2)), mode='mean', stat_length=(3,)) + b = np.array( + [[6, 6, 6, 5, 6, 7, 8, 9, 8, 8], + [6, 6, 6, 5, 6, 7, 8, 9, 8, 8], + + [1, 1, 1, 0, 1, 2, 3, 4, 3, 3], + [6, 6, 6, 5, 6, 7, 8, 9, 8, 8], + [11, 11, 11, 10, 11, 12, 13, 14, 13, 13], + [16, 16, 16, 15, 16, 17, 18, 19, 18, 18], + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], + [26, 26, 26, 25, 26, 27, 28, 29, 28, 28], + + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23]] + ) + assert_array_equal(a, b) + class TestConstant(object): def test_check_constant(self): a = np.arange(100) - a = pad(a, (25, 20), 'constant', constant_values=(10, 20)) + a = np.pad(a, (25, 20), 'constant', constant_values=(10, 20)) b = np.array( [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, @@ -474,7 +497,7 @@ class TestConstant(object): def test_check_constant_zeros(self): a = np.arange(100) - a = pad(a, (25, 20), 'constant') + a = np.pad(a, (25, 20), 'constant') b = np.array( [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -500,7 +523,7 @@ class TestConstant(object): # If input array is int, but constant_values are float, the dtype of # the array to be padded is kept arr = np.arange(30).reshape(5, 6) - test = pad(arr, (1, 2), mode='constant', + test = np.pad(arr, (1, 2), mode='constant', constant_values=1.1) expected = np.array( [[ 1, 1, 1, 1, 1, 1, 1, 1, 1], @@ -521,7 +544,7 @@ class TestConstant(object): # the array to be padded is kept - here retaining the float constants arr = np.arange(30).reshape(5, 6) arr_float = arr.astype(np.float64) - test = pad(arr_float, ((1, 2), (1, 2)), mode='constant', + test = np.pad(arr_float, ((1, 2), (1, 2)), mode='constant', constant_values=1.1) expected = np.array( [[ 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1], @@ -539,7 +562,7 @@ class TestConstant(object): def test_check_constant_float3(self): a = np.arange(100, dtype=float) - a = pad(a, (25, 20), 'constant', constant_values=(-1.1, -1.2)) + a = np.pad(a, (25, 20), 'constant', constant_values=(-1.1, -1.2)) b = np.array( [-1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, @@ -563,7 +586,7 @@ class TestConstant(object): def test_check_constant_odd_pad_amount(self): arr = np.arange(30).reshape(5, 6) - test = pad(arr, ((1,), (2,)), mode='constant', + test = np.pad(arr, ((1,), (2,)), mode='constant', constant_values=3) expected = np.array( [[ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], @@ -620,11 +643,16 @@ class TestConstant(object): assert_array_equal(arr, expected) + def test_pad_empty_dimension(self): + arr = np.zeros((3, 0, 2)) + result = np.pad(arr, [(0,), (2,), (1,)], mode="constant") + assert result.shape == (3, 4, 4) + class TestLinearRamp(object): def test_check_simple(self): a = np.arange(100).astype('f') - a = pad(a, (25, 20), 'linear_ramp', end_values=(4, 5)) + a = np.pad(a, (25, 20), 'linear_ramp', end_values=(4, 5)) b = np.array( [4.00, 3.84, 3.68, 3.52, 3.36, 3.20, 3.04, 2.88, 2.72, 2.56, 2.40, 2.24, 2.08, 1.92, 1.76, 1.60, 1.44, 1.28, 1.12, 0.96, @@ -648,7 +676,7 @@ class TestLinearRamp(object): def test_check_2d(self): arr = np.arange(20).reshape(4, 5).astype(np.float64) - test = pad(arr, (2, 2), mode='linear_ramp', end_values=(0, 0)) + test = np.pad(arr, (2, 2), mode='linear_ramp', end_values=(0, 0)) expected = np.array( [[0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0.5, 1., 1.5, 2., 1., 0.], @@ -679,11 +707,19 @@ class TestLinearRamp(object): ]) assert_equal(actual, expected) + def test_end_values(self): + """Ensure that end values are exact.""" + a = np.pad(np.ones(10).reshape(2, 5), (223, 123), mode="linear_ramp") + assert_equal(a[:, 0], 0.) + assert_equal(a[:, -1], 0.) + assert_equal(a[0, :], 0.) + assert_equal(a[-1, :], 0.) + class TestReflect(object): def test_check_simple(self): a = np.arange(100) - a = pad(a, (25, 20), 'reflect') + a = np.pad(a, (25, 20), 'reflect') b = np.array( [25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, @@ -707,7 +743,7 @@ class TestReflect(object): def test_check_odd_method(self): a = np.arange(100) - a = pad(a, (25, 20), 'reflect', reflect_type='odd') + a = np.pad(a, (25, 20), 'reflect', reflect_type='odd') b = np.array( [-25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, @@ -731,7 +767,7 @@ class TestReflect(object): def test_check_large_pad(self): a = [[4, 5, 6], [6, 7, 8]] - a = pad(a, (5, 7), 'reflect') + a = np.pad(a, (5, 7), 'reflect') b = np.array( [[7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], @@ -754,7 +790,7 @@ class TestReflect(object): def test_check_shape(self): a = [[4, 5, 6]] - a = pad(a, (5, 7), 'reflect') + a = np.pad(a, (5, 7), 'reflect') b = np.array( [[5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], @@ -775,30 +811,49 @@ class TestReflect(object): assert_array_equal(a, b) def test_check_01(self): - a = pad([1, 2, 3], 2, 'reflect') + a = np.pad([1, 2, 3], 2, 'reflect') b = np.array([3, 2, 1, 2, 3, 2, 1]) assert_array_equal(a, b) def test_check_02(self): - a = pad([1, 2, 3], 3, 'reflect') + a = np.pad([1, 2, 3], 3, 'reflect') b = np.array([2, 3, 2, 1, 2, 3, 2, 1, 2]) assert_array_equal(a, b) def test_check_03(self): - a = pad([1, 2, 3], 4, 'reflect') + a = np.pad([1, 2, 3], 4, 'reflect') b = np.array([1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3]) assert_array_equal(a, b) - def test_check_padding_an_empty_array(self): - a = pad(np.zeros((0, 3)), ((0,), (1,)), mode='reflect') - b = np.zeros((0, 5)) - assert_array_equal(a, b) + +class TestEmptyArray(object): + """Check how padding behaves on arrays with an empty dimension.""" + + @pytest.mark.parametrize( + # Keep parametrization ordered, otherwise pytest-xdist might believe + # that different tests were collected during parallelization + "mode", sorted(_all_modes.keys() - {"constant", "empty"}) + ) + def test_pad_empty_dimension(self, mode): + match = ("can't extend empty axis 0 using modes other than 'constant' " + "or 'empty'") + with pytest.raises(ValueError, match=match): + np.pad([], 4, mode=mode) + with pytest.raises(ValueError, match=match): + np.pad(np.ndarray(0), 4, mode=mode) + with pytest.raises(ValueError, match=match): + np.pad(np.zeros((0, 3)), ((1,), (0,)), mode=mode) + + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_pad_non_empty_dimension(self, mode): + result = np.pad(np.ones((2, 0, 2)), ((3,), (0,), (1,)), mode=mode) + assert result.shape == (8, 0, 4) class TestSymmetric(object): def test_check_simple(self): a = np.arange(100) - a = pad(a, (25, 20), 'symmetric') + a = np.pad(a, (25, 20), 'symmetric') b = np.array( [24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, @@ -822,7 +877,7 @@ class TestSymmetric(object): def test_check_odd_method(self): a = np.arange(100) - a = pad(a, (25, 20), 'symmetric', reflect_type='odd') + a = np.pad(a, (25, 20), 'symmetric', reflect_type='odd') b = np.array( [-24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, @@ -846,7 +901,7 @@ class TestSymmetric(object): def test_check_large_pad(self): a = [[4, 5, 6], [6, 7, 8]] - a = pad(a, (5, 7), 'symmetric') + a = np.pad(a, (5, 7), 'symmetric') b = np.array( [[5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], @@ -870,7 +925,7 @@ class TestSymmetric(object): def test_check_large_pad_odd(self): a = [[4, 5, 6], [6, 7, 8]] - a = pad(a, (5, 7), 'symmetric', reflect_type='odd') + a = np.pad(a, (5, 7), 'symmetric', reflect_type='odd') b = np.array( [[-3, -2, -2, -1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6], [-3, -2, -2, -1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6], @@ -893,7 +948,7 @@ class TestSymmetric(object): def test_check_shape(self): a = [[4, 5, 6]] - a = pad(a, (5, 7), 'symmetric') + a = np.pad(a, (5, 7), 'symmetric') b = np.array( [[5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], @@ -914,17 +969,17 @@ class TestSymmetric(object): assert_array_equal(a, b) def test_check_01(self): - a = pad([1, 2, 3], 2, 'symmetric') + a = np.pad([1, 2, 3], 2, 'symmetric') b = np.array([2, 1, 1, 2, 3, 3, 2]) assert_array_equal(a, b) def test_check_02(self): - a = pad([1, 2, 3], 3, 'symmetric') + a = np.pad([1, 2, 3], 3, 'symmetric') b = np.array([3, 2, 1, 1, 2, 3, 3, 2, 1]) assert_array_equal(a, b) def test_check_03(self): - a = pad([1, 2, 3], 6, 'symmetric') + a = np.pad([1, 2, 3], 6, 'symmetric') b = np.array([1, 2, 3, 3, 2, 1, 1, 2, 3, 3, 2, 1, 1, 2, 3]) assert_array_equal(a, b) @@ -932,7 +987,7 @@ class TestSymmetric(object): class TestWrap(object): def test_check_simple(self): a = np.arange(100) - a = pad(a, (25, 20), 'wrap') + a = np.pad(a, (25, 20), 'wrap') b = np.array( [75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, @@ -957,7 +1012,7 @@ class TestWrap(object): def test_check_large_pad(self): a = np.arange(12) a = np.reshape(a, (3, 4)) - a = pad(a, (10, 12), 'wrap') + a = np.pad(a, (10, 12), 'wrap') b = np.array( [[10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11], @@ -1015,12 +1070,12 @@ class TestWrap(object): assert_array_equal(a, b) def test_check_01(self): - a = pad([1, 2, 3], 3, 'wrap') + a = np.pad([1, 2, 3], 3, 'wrap') b = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]) assert_array_equal(a, b) def test_check_02(self): - a = pad([1, 2, 3], 4, 'wrap') + a = np.pad([1, 2, 3], 4, 'wrap') b = np.array([3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1]) assert_array_equal(a, b) @@ -1029,35 +1084,25 @@ class TestWrap(object): b = np.pad(a, (0, 5), mode="wrap") assert_array_equal(a, b[:-5, :-5]) + def test_repeated_wrapping(self): + """ + Check wrapping on each side individually if the wrapped area is longer + than the original array. + """ + a = np.arange(5) + b = np.pad(a, (12, 0), mode="wrap") + assert_array_equal(np.r_[a, a, a, a][3:], b) -class TestStatLen(object): - def test_check_simple(self): - a = np.arange(30) - a = np.reshape(a, (6, 5)) - a = pad(a, ((2, 3), (3, 2)), mode='mean', stat_length=(3,)) - b = np.array( - [[6, 6, 6, 5, 6, 7, 8, 9, 8, 8], - [6, 6, 6, 5, 6, 7, 8, 9, 8, 8], - - [1, 1, 1, 0, 1, 2, 3, 4, 3, 3], - [6, 6, 6, 5, 6, 7, 8, 9, 8, 8], - [11, 11, 11, 10, 11, 12, 13, 14, 13, 13], - [16, 16, 16, 15, 16, 17, 18, 19, 18, 18], - [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], - [26, 26, 26, 25, 26, 27, 28, 29, 28, 28], - - [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], - [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], - [21, 21, 21, 20, 21, 22, 23, 24, 23, 23]] - ) - assert_array_equal(a, b) + a = np.arange(5) + b = np.pad(a, (0, 12), mode="wrap") + assert_array_equal(np.r_[a, a, a, a][:-3], b) class TestEdge(object): def test_check_simple(self): a = np.arange(12) a = np.reshape(a, (4, 3)) - a = pad(a, ((2, 3), (3, 2)), 'edge') + a = np.pad(a, ((2, 3), (3, 2)), 'edge') b = np.array( [[0, 0, 0, 0, 1, 2, 2, 2], [0, 0, 0, 0, 1, 2, 2, 2], @@ -1077,56 +1122,123 @@ class TestEdge(object): # Check a pad_width of the form ((1, 2),). # Regression test for issue gh-7808. a = np.array([1, 2, 3]) - padded = pad(a, ((1, 2),), 'edge') + padded = np.pad(a, ((1, 2),), 'edge') expected = np.array([1, 1, 2, 3, 3, 3]) assert_array_equal(padded, expected) a = np.array([[1, 2, 3], [4, 5, 6]]) - padded = pad(a, ((1, 2),), 'edge') - expected = pad(a, ((1, 2), (1, 2)), 'edge') + padded = np.pad(a, ((1, 2),), 'edge') + expected = np.pad(a, ((1, 2), (1, 2)), 'edge') assert_array_equal(padded, expected) a = np.arange(24).reshape(2, 3, 4) - padded = pad(a, ((1, 2),), 'edge') - expected = pad(a, ((1, 2), (1, 2), (1, 2)), 'edge') + padded = np.pad(a, ((1, 2),), 'edge') + expected = np.pad(a, ((1, 2), (1, 2), (1, 2)), 'edge') assert_array_equal(padded, expected) -class TestZeroPadWidth(object): - def test_zero_pad_width(self): - arr = np.arange(30) - arr = np.reshape(arr, (6, 5)) - for pad_width in (0, (0, 0), ((0, 0), (0, 0))): - assert_array_equal(arr, pad(arr, pad_width, mode='constant')) +class TestEmpty(object): + def test_simple(self): + arr = np.arange(24).reshape(4, 6) + result = np.pad(arr, [(2, 3), (3, 1)], mode="empty") + assert result.shape == (9, 10) + assert_equal(arr, result[2:-3, 3:-1]) + def test_pad_empty_dimension(self): + arr = np.zeros((3, 0, 2)) + result = np.pad(arr, [(0,), (2,), (1,)], mode="empty") + assert result.shape == (3, 4, 4) -class TestLegacyVectorFunction(object): - def test_legacy_vector_functionality(self): - def _padwithtens(vector, pad_width, iaxis, kwargs): - vector[:pad_width[0]] = 10 - vector[-pad_width[1]:] = 10 - return vector - a = np.arange(6).reshape(2, 3) - a = pad(a, 2, _padwithtens) - b = np.array( - [[10, 10, 10, 10, 10, 10, 10], - [10, 10, 10, 10, 10, 10, 10], +def test_legacy_vector_functionality(): + def _padwithtens(vector, pad_width, iaxis, kwargs): + vector[:pad_width[0]] = 10 + vector[-pad_width[1]:] = 10 - [10, 10, 0, 1, 2, 10, 10], - [10, 10, 3, 4, 5, 10, 10], + a = np.arange(6).reshape(2, 3) + a = np.pad(a, 2, _padwithtens) + b = np.array( + [[10, 10, 10, 10, 10, 10, 10], + [10, 10, 10, 10, 10, 10, 10], - [10, 10, 10, 10, 10, 10, 10], - [10, 10, 10, 10, 10, 10, 10]] - ) - assert_array_equal(a, b) + [10, 10, 0, 1, 2, 10, 10], + [10, 10, 3, 4, 5, 10, 10], + [10, 10, 10, 10, 10, 10, 10], + [10, 10, 10, 10, 10, 10, 10]] + ) + assert_array_equal(a, b) -class TestNdarrayPadWidth(object): - def test_check_simple(self): + +def test_unicode_mode(): + a = np.pad([1], 2, mode=u'constant') + b = np.array([0, 0, 1, 0, 0]) + assert_array_equal(a, b) + + +@pytest.mark.parametrize("mode", ["edge", "symmetric", "reflect", "wrap"]) +def test_object_input(mode): + # Regression test for issue gh-11395. + a = np.full((4, 3), fill_value=None) + pad_amt = ((2, 3), (3, 2)) + b = np.full((9, 8), fill_value=None) + assert_array_equal(np.pad(a, pad_amt, mode=mode), b) + + +class TestPadWidth(object): + @pytest.mark.parametrize("pad_width", [ + (4, 5, 6, 7), + ((1,), (2,), (3,)), + ((1, 2), (3, 4), (5, 6)), + ((3, 4, 5), (0, 1, 2)), + ]) + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_misshaped_pad_width(self, pad_width, mode): + arr = np.arange(30).reshape((6, 5)) + match = "operands could not be broadcast together" + with pytest.raises(ValueError, match=match): + np.pad(arr, pad_width, mode) + + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_misshaped_pad_width_2(self, mode): + arr = np.arange(30).reshape((6, 5)) + match = ("input operand has more dimensions than allowed by the axis " + "remapping") + with pytest.raises(ValueError, match=match): + np.pad(arr, (((3,), (4,), (5,)), ((0,), (1,), (2,))), mode) + + @pytest.mark.parametrize( + "pad_width", [-2, (-2,), (3, -1), ((5, 2), (-2, 3)), ((-4,), (2,))]) + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_negative_pad_width(self, pad_width, mode): + arr = np.arange(30).reshape((6, 5)) + match = "index can't contain negative values" + with pytest.raises(ValueError, match=match): + np.pad(arr, pad_width, mode) + + @pytest.mark.parametrize("pad_width", [ + "3", + "word", + None, + object(), + 3.4, + ((2, 3, 4), (3, 2)), # dtype=object (tuple) + complex(1, -1), + ((-2.1, 3), (3, 2)), + ]) + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_bad_type(self, pad_width, mode): + arr = np.arange(30).reshape((6, 5)) + match = "`pad_width` must be of integral type." + with pytest.raises(TypeError, match=match): + np.pad(arr, pad_width, mode) + with pytest.raises(TypeError, match=match): + np.pad(arr, np.array(pad_width), mode) + + def test_pad_width_as_ndarray(self): a = np.arange(12) a = np.reshape(a, (4, 3)) - a = pad(a, np.array(((2, 3), (3, 2))), 'edge') + a = np.pad(a, np.array(((2, 3), (3, 2))), 'edge') b = np.array( [[0, 0, 0, 0, 1, 2, 2, 2], [0, 0, 0, 0, 1, 2, 2, 2], @@ -1142,121 +1254,68 @@ class TestNdarrayPadWidth(object): ) assert_array_equal(a, b) - -class TestUnicodeInput(object): - def test_unicode_mode(self): - constant_mode = u'constant' - a = np.pad([1], 2, mode=constant_mode) - b = np.array([0, 0, 1, 0, 0]) - assert_array_equal(a, b) - - -class TestObjectInput(object): - def test_object_input(self): - # Regression test for issue gh-11395. - a = np.full((4, 3), None) - pad_amt = ((2, 3), (3, 2)) - b = np.full((9, 8), None) - modes = ['edge', - 'symmetric', - 'reflect', - 'wrap', - ] - for mode in modes: - assert_array_equal(pad(a, pad_amt, mode=mode), b) - - -class TestValueError1(object): - def test_check_simple(self): - arr = np.arange(30) - arr = np.reshape(arr, (6, 5)) - kwargs = dict(mode='mean', stat_length=(3, )) - assert_raises(ValueError, pad, arr, ((2, 3), (3, 2), (4, 5)), - **kwargs) - - def test_check_negative_stat_length(self): - arr = np.arange(30) - arr = np.reshape(arr, (6, 5)) - kwargs = dict(mode='mean', stat_length=(-3, )) - assert_raises(ValueError, pad, arr, ((2, 3), (3, 2)), - **kwargs) - - def test_check_negative_pad_width(self): - arr = np.arange(30) - arr = np.reshape(arr, (6, 5)) - kwargs = dict(mode='mean', stat_length=(3, )) - assert_raises(ValueError, pad, arr, ((-2, 3), (3, 2)), - **kwargs) - - def test_check_empty_array(self): - assert_raises(ValueError, pad, [], 4, mode='reflect') - assert_raises(ValueError, pad, np.ndarray(0), 4, mode='reflect') - assert_raises(ValueError, pad, np.zeros((0, 3)), ((1,), (0,)), - mode='reflect') - - -class TestValueError2(object): - def test_check_negative_pad_amount(self): - arr = np.arange(30) - arr = np.reshape(arr, (6, 5)) - kwargs = dict(mode='mean', stat_length=(3, )) - assert_raises(ValueError, pad, arr, ((-2, 3), (3, 2)), - **kwargs) - - -class TestValueError3(object): - def test_check_kwarg_not_allowed(self): - arr = np.arange(30).reshape(5, 6) - assert_raises(ValueError, pad, arr, 4, mode='mean', - reflect_type='odd') - - def test_mode_not_set(self): - arr = np.arange(30).reshape(5, 6) - assert_raises(TypeError, pad, arr, 4) - - def test_malformed_pad_amount(self): - arr = np.arange(30).reshape(5, 6) - assert_raises(ValueError, pad, arr, (4, 5, 6, 7), mode='constant') - - def test_malformed_pad_amount2(self): - arr = np.arange(30).reshape(5, 6) - assert_raises(ValueError, pad, arr, ((3, 4, 5), (0, 1, 2)), - mode='constant') - - def test_pad_too_many_axes(self): - arr = np.arange(30).reshape(5, 6) - - # Attempt to pad using a 3D array equivalent - bad_shape = (((3,), (4,), (5,)), ((0,), (1,), (2,))) - assert_raises(ValueError, pad, arr, bad_shape, - mode='constant') - - -class TestTypeError1(object): - def test_float(self): - arr = np.arange(30) - assert_raises(TypeError, pad, arr, ((-2.1, 3), (3, 2))) - assert_raises(TypeError, pad, arr, np.array(((-2.1, 3), (3, 2)))) - - def test_str(self): - arr = np.arange(30) - assert_raises(TypeError, pad, arr, 'foo') - assert_raises(TypeError, pad, arr, np.array('foo')) - - def test_object(self): - class FooBar(object): - pass - arr = np.arange(30) - assert_raises(TypeError, pad, arr, FooBar()) - - def test_complex(self): - arr = np.arange(30) - assert_raises(TypeError, pad, arr, complex(1, -1)) - assert_raises(TypeError, pad, arr, np.array(complex(1, -1))) - - def test_check_wrong_pad_amount(self): - arr = np.arange(30) - arr = np.reshape(arr, (6, 5)) - kwargs = dict(mode='mean', stat_length=(3, )) - assert_raises(TypeError, pad, arr, ((2, 3, 4), (3, 2)), - **kwargs) + @pytest.mark.parametrize("pad_width", [0, (0, 0), ((0, 0), (0, 0))]) + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_zero_pad_width(self, pad_width, mode): + arr = np.arange(30).reshape(6, 5) + assert_array_equal(arr, np.pad(arr, pad_width, mode=mode)) + + +@pytest.mark.parametrize("mode", _all_modes.keys()) +def test_kwargs(mode): + """Test behavior of pad's kwargs for the given mode.""" + allowed = _all_modes[mode] + not_allowed = {} + for kwargs in _all_modes.values(): + if kwargs != allowed: + not_allowed.update(kwargs) + # Test if allowed keyword arguments pass + np.pad([1, 2, 3], 1, mode, **allowed) + # Test if prohibited keyword arguments of other modes raise an error + for key, value in not_allowed.items(): + match = "unsupported keyword arguments for mode '{}'".format(mode) + with pytest.raises(ValueError, match=match): + np.pad([1, 2, 3], 1, mode, **{key: value}) + + +def test_constant_zero_default(): + arr = np.array([1, 1]) + assert_array_equal(np.pad(arr, 2), [0, 0, 1, 1, 0, 0]) + + +@pytest.mark.parametrize("mode", [1, "const", object(), None, True, False]) +def test_unsupported_mode(mode): + match= "mode '{}' is not supported".format(mode) + with pytest.raises(ValueError, match=match): + np.pad([1, 2, 3], 4, mode=mode) + + +@pytest.mark.parametrize("mode", _all_modes.keys()) +def test_non_contiguous_array(mode): + arr = np.arange(24).reshape(4, 6)[::2, ::2] + result = np.pad(arr, (2, 3), mode) + assert result.shape == (7, 8) + assert_equal(result[2:-3, 2:-3], arr) + + +@pytest.mark.parametrize("mode", _all_modes.keys()) +def test_memory_layout_persistence(mode): + """Test if C and F order is preserved for all pad modes.""" + x = np.ones((5, 10), order='C') + assert np.pad(x, 5, mode).flags["C_CONTIGUOUS"] + x = np.ones((5, 10), order='F') + assert np.pad(x, 5, mode).flags["F_CONTIGUOUS"] + + +@pytest.mark.parametrize("dtype", chain( + # Skip "other" dtypes as they are not supported by all modes + np.sctypes["int"], + np.sctypes["uint"], + np.sctypes["float"], + np.sctypes["complex"] +)) +@pytest.mark.parametrize("mode", _all_modes.keys()) +def test_dtype_persistence(dtype, mode): + arr = np.zeros((3, 2, 1), dtype=dtype) + result = np.pad(arr, 1, mode=mode) + assert result.dtype == dtype diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index a17fc66e5..dd8a38248 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -136,8 +136,8 @@ class TestSetOps(object): np.nan), # should fail because attempting # to downcast to smaller int type: - (np.array([1, 2, 3], dtype=np.int32), - np.array([5, 7, 2], dtype=np.int64), + (np.array([1, 2, 3], dtype=np.int16), + np.array([5, 1<<20, 2], dtype=np.int32), None), # should fail because attempting to cast # two special floating point values @@ -152,8 +152,8 @@ class TestSetOps(object): # specifically, raise an appropriate # Exception when attempting to append or # prepend with an incompatible type - msg = 'must be compatible' - with assert_raises_regex(TypeError, msg): + msg = 'cannot convert' + with assert_raises_regex(ValueError, msg): ediff1d(ary=ary, to_end=append, to_begin=prepend) @@ -422,41 +422,41 @@ class TestUnique(object): assert_array_equal(v, b, msg) msg = base_msg.format('return_index', dt) - v, j = unique(a, 1, 0, 0) + v, j = unique(a, True, False, False) assert_array_equal(v, b, msg) assert_array_equal(j, i1, msg) msg = base_msg.format('return_inverse', dt) - v, j = unique(a, 0, 1, 0) + v, j = unique(a, False, True, False) assert_array_equal(v, b, msg) assert_array_equal(j, i2, msg) msg = base_msg.format('return_counts', dt) - v, j = unique(a, 0, 0, 1) + v, j = unique(a, False, False, True) assert_array_equal(v, b, msg) assert_array_equal(j, c, msg) msg = base_msg.format('return_index and return_inverse', dt) - v, j1, j2 = unique(a, 1, 1, 0) + v, j1, j2 = unique(a, True, True, False) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) msg = base_msg.format('return_index and return_counts', dt) - v, j1, j2 = unique(a, 1, 0, 1) + v, j1, j2 = unique(a, True, False, True) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, c, msg) msg = base_msg.format('return_inverse and return_counts', dt) - v, j1, j2 = unique(a, 0, 1, 1) + v, j1, j2 = unique(a, False, True, True) assert_array_equal(v, b, msg) assert_array_equal(j1, i2, msg) assert_array_equal(j2, c, msg) msg = base_msg.format(('return_index, return_inverse ' 'and return_counts'), dt) - v, j1, j2, j3 = unique(a, 1, 1, 1) + v, j1, j2, j3 = unique(a, True, True, True) assert_array_equal(v, b, msg) assert_array_equal(j1, i1, msg) assert_array_equal(j2, i2, msg) diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py index 077507082..062c21725 100644 --- a/numpy/lib/tests/test_format.py +++ b/numpy/lib/tests/test_format.py @@ -287,6 +287,7 @@ from io import BytesIO import numpy as np from numpy.testing import ( assert_, assert_array_equal, assert_raises, assert_raises_regex, + assert_warns ) from numpy.lib import format @@ -411,6 +412,7 @@ record_arrays = [ np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('<')), np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('>')), np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('>')), + np.zeros(1, dtype=[('c', ('<f8', (5,)), (2,))]) ] @@ -426,7 +428,7 @@ def roundtrip(arr): f = BytesIO() format.write_array(f, arr) f2 = BytesIO(f.getvalue()) - arr2 = format.read_array(f2) + arr2 = format.read_array(f2, allow_pickle=True) return arr2 @@ -576,7 +578,7 @@ def test_pickle_python2_python3(): path = os.path.join(data_dir, fname) for encoding in ['bytes', 'latin1']: - data_f = np.load(path, encoding=encoding) + data_f = np.load(path, allow_pickle=True, encoding=encoding) if fname.endswith('.npz'): data = data_f['x'] data_f.close() @@ -598,16 +600,19 @@ def test_pickle_python2_python3(): if sys.version_info[0] >= 3: if fname.startswith('py2'): if fname.endswith('.npz'): - data = np.load(path) + data = np.load(path, allow_pickle=True) assert_raises(UnicodeError, data.__getitem__, 'x') data.close() - data = np.load(path, fix_imports=False, encoding='latin1') + data = np.load(path, allow_pickle=True, fix_imports=False, + encoding='latin1') assert_raises(ImportError, data.__getitem__, 'x') data.close() else: - assert_raises(UnicodeError, np.load, path) + assert_raises(UnicodeError, np.load, path, + allow_pickle=True) assert_raises(ImportError, np.load, path, - encoding='latin1', fix_imports=False) + allow_pickle=True, fix_imports=False, + encoding='latin1') def test_pickle_disallow(): @@ -625,6 +630,61 @@ def test_pickle_disallow(): assert_raises(ValueError, np.save, path, np.array([None], dtype=object), allow_pickle=False) +@pytest.mark.parametrize('dt', [ + np.dtype(np.dtype([('a', np.int8), + ('b', np.int16), + ('c', np.int32), + ], align=True), + (3,)), + np.dtype([('x', np.dtype({'names':['a','b'], + 'formats':['i1','i1'], + 'offsets':[0,4], + 'itemsize':8, + }, + (3,)), + (4,), + )]), + np.dtype([('x', + ('<f8', (5,)), + (2,), + )]), + np.dtype([('x', np.dtype(( + np.dtype(( + np.dtype({'names':['a','b'], + 'formats':['i1','i1'], + 'offsets':[0,4], + 'itemsize':8}), + (3,) + )), + (4,) + ))) + ]), + np.dtype([ + ('a', np.dtype(( + np.dtype(( + np.dtype(( + np.dtype([ + ('a', int), + ('b', np.dtype({'names':['a','b'], + 'formats':['i1','i1'], + 'offsets':[0,4], + 'itemsize':8})), + ]), + (3,), + )), + (4,), + )), + (5,), + ))) + ]), + ]) + +def test_descr_to_dtype(dt): + dt1 = format.descr_to_dtype(dt.descr) + assert_equal_(dt1, dt) + arr1 = np.zeros(3, dt) + arr2 = roundtrip(arr1) + assert_array_equal(arr1, arr2) def test_version_2_0(): f = BytesIO() @@ -879,3 +939,27 @@ def test_empty_npz(): fname = os.path.join(tempdir, "nothing.npz") np.savez(fname) np.load(fname) + + +def test_unicode_field_names(): + # gh-7391 + arr = np.array([ + (1, 3), + (1, 2), + (1, 3), + (1, 2) + ], dtype=[ + ('int', int), + (u'\N{CJK UNIFIED IDEOGRAPH-6574}\N{CJK UNIFIED IDEOGRAPH-5F62}', int) + ]) + fname = os.path.join(tempdir, "unicode.npy") + with open(fname, 'wb') as f: + format.write_array(f, arr, version=(3, 0)) + with open(fname, 'rb') as f: + arr2 = format.read_array(f) + assert_array_equal(arr, arr2) + + # notifies the user that 3.0 is selected + with open(fname, 'wb') as f: + with assert_warns(UserWarning): + format.write_array(f, arr, version=None) diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 3d4b0e3b2..eae52c002 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -4,13 +4,15 @@ import operator import warnings import sys import decimal +import types +from fractions import Fraction import pytest import numpy as np from numpy import ma from numpy.testing import ( assert_, assert_equal, assert_array_equal, assert_almost_equal, - assert_array_almost_equal, assert_raises, assert_allclose, + assert_array_almost_equal, assert_raises, assert_allclose, IS_PYPY, assert_warns, assert_raises_regex, suppress_warnings, HAS_REFCOUNT, ) import numpy.lib.function_base as nfb @@ -24,6 +26,7 @@ from numpy.lib import ( from numpy.compat import long +PY2 = sys.version_info[0] == 2 def get_mat(n): data = np.arange(n) @@ -31,6 +34,17 @@ def get_mat(n): return data +def _make_complex(real, imag): + """ + Like real + 1j * imag, but behaves as expected when imag contains non-finite + values + """ + ret = np.zeros(np.broadcast(real, imag).shape, np.complex_) + ret.real = real + ret.imag = imag + return ret + + class TestRot90(object): def test_basic(self): assert_raises(ValueError, rot90, np.ones(4)) @@ -353,9 +367,9 @@ class TestAverage(object): assert_equal(type(np.average(a, weights=w)), subclass) def test_upcasting(self): - types = [('i4', 'i4', 'f8'), ('i4', 'f4', 'f8'), ('f4', 'i4', 'f8'), + typs = [('i4', 'i4', 'f8'), ('i4', 'f4', 'f8'), ('f4', 'i4', 'f8'), ('f4', 'f4', 'f4'), ('f4', 'f8', 'f8')] - for at, wt, rt in types: + for at, wt, rt in typs: a = np.array([[1,2],[3,4]], dtype=at) w = np.array([[1,2],[3,4]], dtype=wt) assert_equal(np.average(a, weights=w).dtype, np.dtype(rt)) @@ -682,6 +696,9 @@ class TestDiff(object): assert_raises(np.AxisError, diff, x, axis=3) assert_raises(np.AxisError, diff, x, axis=-4) + x = np.array(1.11111111111, np.float64) + assert_raises(ValueError, diff, x) + def test_nd(self): x = 20 * rand(10, 20, 30) out1 = x[:, :, 1:] - x[:, :, :-1] @@ -931,7 +948,7 @@ class TestGradient(object): assert_equal(type(out), type(x)) # And make sure that the output and input don't have aliased mask # arrays - assert_(x.mask is not out.mask) + assert_(x._mask is not out._mask) # Also check that edge_order=2 doesn't alter the original mask x2 = np.ma.arange(5) x2[2] = np.ma.masked @@ -1088,7 +1105,7 @@ class TestAngle(object): np.arctan(3.0 / 1.0), np.arctan(1.0), 0, np.pi / 2, np.pi, -np.pi / 2.0, -np.arctan(3.0 / 1.0), np.pi - np.arctan(3.0 / 1.0)] - z = angle(x, deg=1) + z = angle(x, deg=True) zo = np.array(yo) * 180 / np.pi assert_array_almost_equal(y, yo, 11) assert_array_almost_equal(z, zo, 11) @@ -1498,6 +1515,49 @@ class TestVectorize(object): f(x) +class TestLeaks(object): + class A(object): + iters = 20 + + def bound(self, *args): + return 0 + + @staticmethod + def unbound(*args): + return 0 + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + @pytest.mark.parametrize('name, incr', [ + ('bound', A.iters), + ('unbound', 0), + ]) + def test_frompyfunc_leaks(self, name, incr): + # exposed in gh-11867 as np.vectorized, but the problem stems from + # frompyfunc. + # class.attribute = np.frompyfunc(<method>) creates a + # reference cycle if <method> is a bound class method. It requires a + # gc collection cycle to break the cycle (on CPython 3) + import gc + A_func = getattr(self.A, name) + gc.disable() + try: + refcount = sys.getrefcount(A_func) + for i in range(self.A.iters): + a = self.A() + a.f = np.frompyfunc(getattr(a, name), 1, 1) + out = a.f(np.arange(10)) + a = None + if PY2: + assert_equal(sys.getrefcount(A_func), refcount) + else: + # A.func is part of a reference cycle if incr is non-zero + assert_equal(sys.getrefcount(A_func), refcount + incr) + for i in range(5): + gc.collect() + assert_equal(sys.getrefcount(A_func), refcount) + finally: + gc.enable() + class TestDigitize(object): def test_forward(self): @@ -1860,9 +1920,9 @@ class TestCov(object): [-np.inf, np.inf]])) def test_1D_rowvar(self): - assert_allclose(cov(self.x3), cov(self.x3, rowvar=0)) + assert_allclose(cov(self.x3), cov(self.x3, rowvar=False)) y = np.array([0.0780, 0.3107, 0.2111, 0.0334, 0.8501]) - assert_allclose(cov(self.x3, y), cov(self.x3, y, rowvar=0)) + assert_allclose(cov(self.x3, y), cov(self.x3, y, rowvar=False)) def test_1D_variance(self): assert_allclose(cov(self.x3, ddof=1), np.var(self.x3, ddof=1)) @@ -1924,9 +1984,9 @@ class Test_I0(object): np.array(1.0634833707413234)) A = np.array([0.49842636, 0.6969809, 0.22011976, 0.0155549]) - assert_almost_equal( - i0(A), - np.array([1.06307822, 1.12518299, 1.01214991, 1.00006049])) + expected = np.array([1.06307822, 1.12518299, 1.01214991, 1.00006049]) + assert_almost_equal(i0(A), expected) + assert_almost_equal(i0(-A), expected) B = np.array([[0.827002, 0.99959078], [0.89694769, 0.39298162], @@ -1940,6 +2000,26 @@ class Test_I0(object): [1.03633899, 1.00067775], [1.03352052, 1.13557954], [1.05884290, 1.06432317]])) + # Regression test for gh-11205 + i0_0 = np.i0([0.]) + assert_equal(i0_0.shape, (1,)) + assert_array_equal(np.i0([0.]), np.array([1.])) + + def test_non_array(self): + a = np.arange(4) + + class array_like: + __array_interface__ = a.__array_interface__ + + def __array_wrap__(self, arr): + return self + + # E.g. pandas series survive ufunc calls through array-wrap: + assert isinstance(np.abs(array_like()), array_like) + exp = np.i0(a) + res = np.i0(array_like()) + + assert_array_equal(exp, res) class TestKaiser(object): @@ -2309,7 +2389,7 @@ class TestInterp(object): x0 = np.nan assert_almost_equal(np.interp(x0, x, y), x0) - def test_non_finite_behavior(self): + def test_non_finite_behavior_exact_x(self): x = [1, 2, 2.5, 3, 4] xp = [1, 2, 3, 4] fp = [1, 2, np.inf, 4] @@ -2317,6 +2397,64 @@ class TestInterp(object): fp = [1, 2, np.nan, 4] assert_almost_equal(np.interp(x, xp, fp), [1, 2, np.nan, np.nan, 4]) + @pytest.fixture(params=[ + lambda x: np.float_(x), + lambda x: _make_complex(x, 0), + lambda x: _make_complex(0, x), + lambda x: _make_complex(x, np.multiply(x, -2)) + ], ids=[ + 'real', + 'complex-real', + 'complex-imag', + 'complex-both' + ]) + def sc(self, request): + """ scale function used by the below tests """ + return request.param + + def test_non_finite_any_nan(self, sc): + """ test that nans are propagated """ + assert_equal(np.interp(0.5, [np.nan, 1], sc([ 0, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, np.nan], sc([ 0, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, 1], sc([np.nan, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, 1], sc([ 0, np.nan])), sc(np.nan)) + + def test_non_finite_inf(self, sc): + """ Test that interp between opposite infs gives nan """ + assert_equal(np.interp(0.5, [-np.inf, +np.inf], sc([ 0, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, 1], sc([-np.inf, +np.inf])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, 1], sc([+np.inf, -np.inf])), sc(np.nan)) + + # unless the y values are equal + assert_equal(np.interp(0.5, [-np.inf, +np.inf], sc([ 10, 10])), sc(10)) + + def test_non_finite_half_inf_xf(self, sc): + """ Test that interp where both axes have a bound at inf gives nan """ + assert_equal(np.interp(0.5, [-np.inf, 1], sc([-np.inf, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [-np.inf, 1], sc([+np.inf, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [-np.inf, 1], sc([ 0, -np.inf])), sc(np.nan)) + assert_equal(np.interp(0.5, [-np.inf, 1], sc([ 0, +np.inf])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, +np.inf], sc([-np.inf, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, +np.inf], sc([+np.inf, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, +np.inf], sc([ 0, -np.inf])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, +np.inf], sc([ 0, +np.inf])), sc(np.nan)) + + def test_non_finite_half_inf_x(self, sc): + """ Test interp where the x axis has a bound at inf """ + assert_equal(np.interp(0.5, [-np.inf, -np.inf], sc([0, 10])), sc(10)) + assert_equal(np.interp(0.5, [-np.inf, 1 ], sc([0, 10])), sc(10)) + assert_equal(np.interp(0.5, [ 0, +np.inf], sc([0, 10])), sc(0)) + assert_equal(np.interp(0.5, [+np.inf, +np.inf], sc([0, 10])), sc(0)) + + def test_non_finite_half_inf_f(self, sc): + """ Test interp where the f axis has a bound at inf """ + assert_equal(np.interp(0.5, [0, 1], sc([ 0, -np.inf])), sc(-np.inf)) + assert_equal(np.interp(0.5, [0, 1], sc([ 0, +np.inf])), sc(+np.inf)) + assert_equal(np.interp(0.5, [0, 1], sc([-np.inf, 10])), sc(-np.inf)) + assert_equal(np.interp(0.5, [0, 1], sc([+np.inf, 10])), sc(+np.inf)) + assert_equal(np.interp(0.5, [0, 1], sc([-np.inf, -np.inf])), sc(-np.inf)) + assert_equal(np.interp(0.5, [0, 1], sc([+np.inf, +np.inf])), sc(+np.inf)) + def test_complex_interp(self): # test complex interpolation x = np.linspace(0, 1, 5) @@ -2391,11 +2529,23 @@ class TestPercentile(object): assert_equal(np.percentile(x, 100), 3.5) assert_equal(np.percentile(x, 50), 1.75) x[1] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(x, 0), np.nan) - assert_equal(np.percentile(x, 0, interpolation='nearest'), np.nan) - assert_(w[0].category is RuntimeWarning) + assert_equal(np.percentile(x, 0), np.nan) + assert_equal(np.percentile(x, 0, interpolation='nearest'), np.nan) + + def test_fraction(self): + x = [Fraction(i, 2) for i in np.arange(8)] + + p = np.percentile(x, Fraction(0)) + assert_equal(p, Fraction(0)) + assert_equal(type(p), Fraction) + + p = np.percentile(x, Fraction(100)) + assert_equal(p, Fraction(7, 2)) + assert_equal(type(p), Fraction) + + p = np.percentile(x, Fraction(50)) + assert_equal(p, Fraction(7, 4)) + assert_equal(type(p), Fraction) def test_api(self): d = np.ones(5) @@ -2733,85 +2883,63 @@ class TestPercentile(object): def test_nan_behavior(self): a = np.arange(24, dtype=float) a[2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, 0.3), np.nan) - assert_equal(np.percentile(a, 0.3, axis=0), np.nan) - assert_equal(np.percentile(a, [0.3, 0.6], axis=0), - np.array([np.nan] * 2)) - assert_(w[0].category is RuntimeWarning) - assert_(w[1].category is RuntimeWarning) - assert_(w[2].category is RuntimeWarning) + assert_equal(np.percentile(a, 0.3), np.nan) + assert_equal(np.percentile(a, 0.3, axis=0), np.nan) + assert_equal(np.percentile(a, [0.3, 0.6], axis=0), + np.array([np.nan] * 2)) a = np.arange(24, dtype=float).reshape(2, 3, 4) a[1, 2, 3] = np.nan a[1, 1, 2] = np.nan # no axis - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, 0.3), np.nan) - assert_equal(np.percentile(a, 0.3).ndim, 0) - assert_(w[0].category is RuntimeWarning) + assert_equal(np.percentile(a, 0.3), np.nan) + assert_equal(np.percentile(a, 0.3).ndim, 0) # axis0 zerod b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 0) b[2, 3] = np.nan b[1, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, 0.3, 0), b) + assert_equal(np.percentile(a, 0.3, 0), b) # axis0 not zerod b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], 0) b[:, 2, 3] = np.nan b[:, 1, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, [0.3, 0.6], 0), b) + assert_equal(np.percentile(a, [0.3, 0.6], 0), b) # axis1 zerod b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 1) b[1, 3] = np.nan b[1, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, 0.3, 1), b) + assert_equal(np.percentile(a, 0.3, 1), b) # axis1 not zerod b = np.percentile( np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], 1) b[:, 1, 3] = np.nan b[:, 1, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, [0.3, 0.6], 1), b) + assert_equal(np.percentile(a, [0.3, 0.6], 1), b) # axis02 zerod b = np.percentile( np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, (0, 2)) b[1] = np.nan b[2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, 0.3, (0, 2)), b) + assert_equal(np.percentile(a, 0.3, (0, 2)), b) # axis02 not zerod b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], (0, 2)) b[:, 1] = np.nan b[:, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, [0.3, 0.6], (0, 2)), b) + assert_equal(np.percentile(a, [0.3, 0.6], (0, 2)), b) # axis02 not zerod with nearest interpolation b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], (0, 2), interpolation='nearest') b[:, 1] = np.nan b[:, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile( - a, [0.3, 0.6], (0, 2), interpolation='nearest'), b) + assert_equal(np.percentile( + a, [0.3, 0.6], (0, 2), interpolation='nearest'), b) class TestQuantile(object): @@ -2823,6 +2951,26 @@ class TestQuantile(object): assert_equal(np.quantile(x, 1), 3.5) assert_equal(np.quantile(x, 0.5), 1.75) + def test_fraction(self): + # fractional input, integral quantile + x = [Fraction(i, 2) for i in np.arange(8)] + + q = np.quantile(x, 0) + assert_equal(q, 0) + assert_equal(type(q), Fraction) + + q = np.quantile(x, 1) + assert_equal(q, Fraction(7, 2)) + assert_equal(type(q), Fraction) + + q = np.quantile(x, Fraction(1, 2)) + assert_equal(q, Fraction(7, 4)) + assert_equal(type(q), Fraction) + + # repeat with integral input but fractional quantile + x = np.arange(8) + assert_equal(np.quantile(x, Fraction(1, 2)), Fraction(7, 2)) + def test_no_p_overwrite(self): # this is worth retesting, because quantile does not make a copy p0 = np.array([0, 0.75, 0.25, 0.5, 1.0]) @@ -2858,10 +3006,7 @@ class TestMedian(object): # check array scalar result assert_equal(np.median(a).ndim, 0) a[1] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.median(a).ndim, 0) - assert_(w[0].category is RuntimeWarning) + assert_equal(np.median(a).ndim, 0) def test_axis_keyword(self): a3 = np.array([[2, 3], @@ -2960,58 +3105,43 @@ class TestMedian(object): def test_nan_behavior(self): a = np.arange(24, dtype=float) a[2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.median(a), np.nan) - assert_equal(np.median(a, axis=0), np.nan) - assert_(w[0].category is RuntimeWarning) - assert_(w[1].category is RuntimeWarning) + assert_equal(np.median(a), np.nan) + assert_equal(np.median(a, axis=0), np.nan) a = np.arange(24, dtype=float).reshape(2, 3, 4) a[1, 2, 3] = np.nan a[1, 1, 2] = np.nan # no axis - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.median(a), np.nan) - assert_equal(np.median(a).ndim, 0) - assert_(w[0].category is RuntimeWarning) + assert_equal(np.median(a), np.nan) + assert_equal(np.median(a).ndim, 0) # axis0 b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 0) b[2, 3] = np.nan b[1, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.median(a, 0), b) - assert_equal(len(w), 1) + assert_equal(np.median(a, 0), b) # axis1 b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 1) b[1, 3] = np.nan b[1, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.median(a, 1), b) - assert_equal(len(w), 1) + assert_equal(np.median(a, 1), b) # axis02 b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), (0, 2)) b[1] = np.nan b[2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.median(a, (0, 2)), b) - assert_equal(len(w), 1) + assert_equal(np.median(a, (0, 2)), b) def test_empty(self): - # empty arrays + # mean(empty array) emits two warnings: empty slice and divide by 0 a = np.array([], dtype=float) with warnings.catch_warnings(record=True) as w: warnings.filterwarnings('always', '', RuntimeWarning) assert_equal(np.median(a), np.nan) assert_(w[0].category is RuntimeWarning) + assert_equal(len(w), 2) # multiple dimensions a = np.array([], dtype=float, ndmin=3) @@ -3106,6 +3236,7 @@ class TestAdd_newdoc_ufunc(object): class TestAdd_newdoc(object): @pytest.mark.skipif(sys.flags.optimize == 2, reason="Python running -OO") + @pytest.mark.xfail(IS_PYPY, reason="PyPy does not modify tp_doc") def test_add_doc(self): # test np.add_newdoc tgt = "Current flat index into the array." diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py index c96b01d42..4895a722c 100644 --- a/numpy/lib/tests/test_histograms.py +++ b/numpy/lib/tests/test_histograms.py @@ -554,15 +554,11 @@ class TestHistogramOptimBinNums(object): return a / (a + b) ll = [[nbins_ratio(seed, size) for size in np.geomspace(start=10, stop=100, num=4).round().astype(int)] - for seed in range(256)] + for seed in range(10)] # the average difference between the two methods decreases as the dataset size increases. - assert_almost_equal(abs(np.mean(ll, axis=0) - 0.5), - [0.1065248, - 0.0968844, - 0.0331818, - 0.0178057], - decimal=3) + avg = abs(np.mean(ll, axis=0) - 0.5) + assert_almost_equal(avg, [0.15, 0.09, 0.08, 0.03], decimal=2) def test_simple_range(self): """ @@ -802,7 +798,7 @@ class TestHistogramdd(object): hist, edges = histogramdd((y, x), bins=(y_edges, x_edges)) assert_equal(hist, relative_areas) - # resulting histogram should be uniform, since counts and areas are propotional + # resulting histogram should be uniform, since counts and areas are proportional hist, edges = histogramdd((y, x), bins=(y_edges, x_edges), density=True) assert_equal(hist, 1 / (8*8)) diff --git a/numpy/lib/tests/test_index_tricks.py b/numpy/lib/tests/test_index_tricks.py index 3246f68ff..a5cdda074 100644 --- a/numpy/lib/tests/test_index_tricks.py +++ b/numpy/lib/tests/test_index_tricks.py @@ -77,6 +77,26 @@ class TestRavelUnravelIndex(object): [[3, 6, 6], [4, 5, 1]]) assert_equal(np.unravel_index(1621, (6, 7, 8, 9)), [3, 1, 4, 1]) + def test_empty_indices(self): + msg1 = 'indices must be integral: the provided empty sequence was' + msg2 = 'only int indices permitted' + assert_raises_regex(TypeError, msg1, np.unravel_index, [], (10, 3, 5)) + assert_raises_regex(TypeError, msg1, np.unravel_index, (), (10, 3, 5)) + assert_raises_regex(TypeError, msg2, np.unravel_index, np.array([]), + (10, 3, 5)) + assert_equal(np.unravel_index(np.array([],dtype=int), (10, 3, 5)), + [[], [], []]) + assert_raises_regex(TypeError, msg1, np.ravel_multi_index, ([], []), + (10, 3)) + assert_raises_regex(TypeError, msg1, np.ravel_multi_index, ([], ['abc']), + (10, 3)) + assert_raises_regex(TypeError, msg2, np.ravel_multi_index, + (np.array([]), np.array([])), (5, 3)) + assert_equal(np.ravel_multi_index( + (np.array([], dtype=int), np.array([], dtype=int)), (5, 3)), []) + assert_equal(np.ravel_multi_index(np.array([[], []], dtype=int), + (5, 3)), []) + def test_big_indices(self): # ravel_multi_index for big indices (issue #7546) if np.intp == np.int64: @@ -86,6 +106,9 @@ class TestRavelUnravelIndex(object): np.ravel_multi_index(arr, (41, 7, 120, 36, 2706, 8, 6)), [5627771580, 117259570957]) + # test unravel_index for big indices (issue #9538) + assert_raises(ValueError, np.unravel_index, 1, (2**32-1, 2**31+1)) + # test overflow checking for too big array (issue #7546) dummy_arr = ([0],[0]) half_max = np.iinfo(np.intp).max // 2 @@ -167,7 +190,7 @@ class TestGrid(object): assert_almost_equal(a[1]-a[0], 2.0/9.0, 11) def test_linspace_equivalence(self): - y, st = np.linspace(2, 10, retstep=1) + y, st = np.linspace(2, 10, retstep=True) assert_almost_equal(st, 8/49.0) assert_array_almost_equal(y, mgrid[2:10:50j], 13) @@ -272,11 +295,16 @@ class TestIndexExpression(object): class TestIx_(object): def test_regression_1(self): - # Test empty inputs create outputs of indexing type, gh-5804 - # Test both lists and arrays - for func in (range, np.arange): - a, = np.ix_(func(0)) - assert_equal(a.dtype, np.intp) + # Test empty untyped inputs create outputs of indexing type, gh-5804 + a, = np.ix_(range(0)) + assert_equal(a.dtype, np.intp) + + a, = np.ix_([]) + assert_equal(a.dtype, np.intp) + + # but if the type is specified, don't change it + a, = np.ix_(np.array([], dtype=np.float32)) + assert_equal(a.dtype, np.float32) def test_shape_and_dtype(self): sizes = (4, 5, 3, 2) diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 7ef25538b..38d4c19fa 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -22,7 +22,7 @@ from numpy.ma.testutils import assert_equal from numpy.testing import ( assert_warns, assert_, assert_raises_regex, assert_raises, assert_allclose, assert_array_equal, temppath, tempdir, IS_PYPY, - HAS_REFCOUNT, suppress_warnings, assert_no_gc_cycles, + HAS_REFCOUNT, suppress_warnings, assert_no_gc_cycles, assert_no_warnings ) @@ -87,7 +87,7 @@ class RoundtripTest(object): """ save_kwds = kwargs.get('save_kwds', {}) - load_kwds = kwargs.get('load_kwds', {}) + load_kwds = kwargs.get('load_kwds', {"allow_pickle": True}) file_on_disk = kwargs.get('file_on_disk', False) if file_on_disk: @@ -347,13 +347,23 @@ class TestSaveTxt(object): assert_raises(ValueError, np.savetxt, c, np.array(1)) assert_raises(ValueError, np.savetxt, c, np.array([[[1], [2]]])) - def test_record(self): + def test_structured(self): a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) c = BytesIO() np.savetxt(c, a, fmt='%d') c.seek(0) assert_equal(c.readlines(), [b'1 2\n', b'3 4\n']) + def test_structured_padded(self): + # gh-13297 + a = np.array([(1, 2, 3),(4, 5, 6)], dtype=[ + ('foo', 'i4'), ('bar', 'i4'), ('baz', 'i4') + ]) + c = BytesIO() + np.savetxt(c, a[['foo', 'baz']], fmt='%d') + c.seek(0) + assert_equal(c.readlines(), [b'1 3\n', b'4 6\n']) + @pytest.mark.skipif(Path is None, reason="No pathlib.Path") def test_multifield_view(self): a = np.ones(1, dtype=[('x', 'i4'), ('y', 'i4'), ('z', 'f4')]) @@ -494,8 +504,6 @@ class TestSaveTxt(object): b' (3.142e+00-2.718e+00j) (3.142e+00-2.718e+00j)\n']) - - def test_custom_writer(self): class CustomWriter(list): @@ -551,6 +559,33 @@ class TestSaveTxt(object): s.seek(0) assert_equal(s.read(), utf8 + '\n') + @pytest.mark.parametrize("fmt", [u"%f", b"%f"]) + @pytest.mark.parametrize("iotype", [StringIO, BytesIO]) + def test_unicode_and_bytes_fmt(self, fmt, iotype): + # string type of fmt should not matter, see also gh-4053 + a = np.array([1.]) + s = iotype() + np.savetxt(s, a, fmt=fmt) + s.seek(0) + if iotype is StringIO: + assert_equal(s.read(), u"%f\n" % 1.) + else: + assert_equal(s.read(), b"%f\n" % 1.) + + @pytest.mark.skipif(sys.platform=='win32', + reason="large files cause problems") + @pytest.mark.slow + def test_large_zip(self): + # The test takes at least 6GB of memory, writes a file larger than 4GB + try: + a = 'a' * 6 * 1024 * 1024 * 1024 + del a + except (MemoryError, OverflowError): + pytest.skip("Cannot allocate enough memory for test") + test_data = np.asarray([np.random.rand(np.random.randint(50,100),4) + for i in range(800000)]) + with tempdir() as tmpdir: + np.savez(os.path.join(tmpdir, 'test.npz'), test_data=test_data) class LoadTxtBase(object): def check_compressed(self, fopen, suffixes): @@ -1192,7 +1227,7 @@ class TestFromTxt(LoadTxtBase): def test_record(self): # Test w/ explicit dtype data = TextIO('1 2\n3 4') - test = np.ndfromtxt(data, dtype=[('x', np.int32), ('y', np.int32)]) + test = np.genfromtxt(data, dtype=[('x', np.int32), ('y', np.int32)]) control = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) assert_equal(test, control) # @@ -1201,14 +1236,14 @@ class TestFromTxt(LoadTxtBase): 'formats': ('S1', 'i4', 'f4')} control = np.array([('M', 64.0, 75.0), ('F', 25.0, 60.0)], dtype=descriptor) - test = np.ndfromtxt(data, dtype=descriptor) + test = np.genfromtxt(data, dtype=descriptor) assert_equal(test, control) def test_array(self): # Test outputting a standard ndarray data = TextIO('1 2\n3 4') control = np.array([[1, 2], [3, 4]], dtype=int) - test = np.ndfromtxt(data, dtype=int) + test = np.genfromtxt(data, dtype=int) assert_array_equal(test, control) # data.seek(0) @@ -1221,11 +1256,11 @@ class TestFromTxt(LoadTxtBase): control = np.array([1, 2, 3, 4], int) # data = TextIO('1\n2\n3\n4\n') - test = np.ndfromtxt(data, dtype=int) + test = np.genfromtxt(data, dtype=int) assert_array_equal(test, control) # data = TextIO('1,2,3,4\n') - test = np.ndfromtxt(data, dtype=int, delimiter=',') + test = np.genfromtxt(data, dtype=int, delimiter=',') assert_array_equal(test, control) def test_comments(self): @@ -1233,11 +1268,11 @@ class TestFromTxt(LoadTxtBase): control = np.array([1, 2, 3, 5], int) # Comment on its own line data = TextIO('# comment\n1,2,3,5\n') - test = np.ndfromtxt(data, dtype=int, delimiter=',', comments='#') + test = np.genfromtxt(data, dtype=int, delimiter=',', comments='#') assert_equal(test, control) # Comment at the end of a line data = TextIO('1,2,3,5# comment\n') - test = np.ndfromtxt(data, dtype=int, delimiter=',', comments='#') + test = np.genfromtxt(data, dtype=int, delimiter=',', comments='#') assert_equal(test, control) def test_skiprows(self): @@ -1246,7 +1281,7 @@ class TestFromTxt(LoadTxtBase): kwargs = dict(dtype=int, delimiter=',') # data = TextIO('comment\n1,2,3,5\n') - test = np.ndfromtxt(data, skip_header=1, **kwargs) + test = np.genfromtxt(data, skip_header=1, **kwargs) assert_equal(test, control) # data = TextIO('# comment\n1,2,3,5\n') @@ -1293,7 +1328,7 @@ class TestFromTxt(LoadTxtBase): data = TextIO('gender age weight\nM 64.0 75.0\nF 25.0 60.0') with warnings.catch_warnings(record=True) as w: warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) - test = np.ndfromtxt(data, dtype=None, names=True) + test = np.genfromtxt(data, dtype=None, names=True) assert_(w[0].category is np.VisibleDeprecationWarning) control = {'gender': np.array([b'M', b'F']), 'age': np.array([64.0, 25.0]), @@ -1307,7 +1342,7 @@ class TestFromTxt(LoadTxtBase): data = TextIO('A 64 75.0 3+4j True\nBCD 25 60.0 5+6j False') with warnings.catch_warnings(record=True) as w: warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) - test = np.ndfromtxt(data, dtype=None) + test = np.genfromtxt(data, dtype=None) assert_(w[0].category is np.VisibleDeprecationWarning) control = [np.array([b'A', b'BCD']), np.array([64, 25]), @@ -1321,7 +1356,7 @@ class TestFromTxt(LoadTxtBase): def test_auto_dtype_uniform(self): # Tests whether the output dtype can be uniformized data = TextIO('1 2 3 4\n5 6 7 8\n') - test = np.ndfromtxt(data, dtype=None) + test = np.genfromtxt(data, dtype=None) control = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) assert_equal(test, control) @@ -1329,7 +1364,7 @@ class TestFromTxt(LoadTxtBase): # Check that a nested dtype isn't MIA data = TextIO('1,2,3.0\n4,5,6.0\n') fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) - test = np.ndfromtxt(data, dtype=fancydtype, delimiter=',') + test = np.genfromtxt(data, dtype=fancydtype, delimiter=',') control = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=fancydtype) assert_equal(test, control) @@ -1339,7 +1374,7 @@ class TestFromTxt(LoadTxtBase): 'formats': ('S1', 'i4', 'f4')} data = TextIO(b'M 64.0 75.0\nF 25.0 60.0') names = ('gender', 'age', 'weight') - test = np.ndfromtxt(data, dtype=descriptor, names=names) + test = np.genfromtxt(data, dtype=descriptor, names=names) descriptor['names'] = names control = np.array([('M', 64.0, 75.0), ('F', 25.0, 60.0)], dtype=descriptor) @@ -1381,12 +1416,25 @@ M 33 21.99 control = np.array([(1, 2), (3, 4)], dtype=[('col1', int), ('col2', int)]) assert_equal(test, control) + def test_file_is_closed_on_error(self): + # gh-13200 + with tempdir() as tmpdir: + fpath = os.path.join(tmpdir, "test.csv") + with open(fpath, "wb") as f: + f.write(u'\N{GREEK PI SYMBOL}'.encode('utf8')) + + # ResourceWarnings are emitted from a destructor, so won't be + # detected by regular propagation to errors. + with assert_no_warnings(): + with pytest.raises(UnicodeDecodeError): + np.genfromtxt(fpath, encoding="ascii") + def test_autonames_and_usecols(self): # Tests names and usecols data = TextIO('A B C D\n aaaa 121 45 9.1') with warnings.catch_warnings(record=True) as w: warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) - test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), + test = np.genfromtxt(data, usecols=('A', 'C', 'D'), names=True, dtype=None) assert_(w[0].category is np.VisibleDeprecationWarning) control = np.array(('aaaa', 45, 9.1), @@ -1396,7 +1444,7 @@ M 33 21.99 def test_converters_with_usecols(self): # Test the combination user-defined converters and usecol data = TextIO('1,2,3,,5\n6,7,8,9,10\n') - test = np.ndfromtxt(data, dtype=int, delimiter=',', + test = np.genfromtxt(data, dtype=int, delimiter=',', converters={3: lambda s: int(s or - 999)}, usecols=(1, 3,)) control = np.array([[2, -999], [7, 9]], int) @@ -1407,7 +1455,7 @@ M 33 21.99 data = TextIO('A B C D\n aaaa 121 45 9.1') with warnings.catch_warnings(record=True) as w: warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) - test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), names=True, + test = np.genfromtxt(data, usecols=('A', 'C', 'D'), names=True, dtype=None, converters={'C': lambda s: 2 * int(s)}) assert_(w[0].category is np.VisibleDeprecationWarning) @@ -1420,7 +1468,7 @@ M 33 21.99 converter = { 'date': lambda s: strptime(s, '%Y-%m-%d %H:%M:%SZ')} data = TextIO('2009-02-03 12:00:00Z, 72214.0') - test = np.ndfromtxt(data, delimiter=',', dtype=None, + test = np.genfromtxt(data, delimiter=',', dtype=None, names=['date', 'stid'], converters=converter) control = np.array((datetime(2009, 2, 3), 72214.), dtype=[('date', np.object_), ('stid', float)]) @@ -1431,7 +1479,7 @@ M 33 21.99 converter = { 'date': lambda s: np.datetime64(strptime(s, '%Y-%m-%d %H:%M:%SZ'))} data = TextIO('2009-02-03 12:00:00Z, 72214.0') - test = np.ndfromtxt(data, delimiter=',', dtype=None, + test = np.genfromtxt(data, delimiter=',', dtype=None, names=['date', 'stid'], converters=converter) control = np.array((datetime(2009, 2, 3), 72214.), dtype=[('date', 'datetime64[us]'), ('stid', float)]) @@ -1440,12 +1488,12 @@ M 33 21.99 def test_unused_converter(self): # Test whether unused converters are forgotten data = TextIO("1 21\n 3 42\n") - test = np.ndfromtxt(data, usecols=(1,), + test = np.genfromtxt(data, usecols=(1,), converters={0: lambda s: int(s, 16)}) assert_equal(test, [21, 42]) # data.seek(0) - test = np.ndfromtxt(data, usecols=(1,), + test = np.genfromtxt(data, usecols=(1,), converters={1: lambda s: int(s, 16)}) assert_equal(test, [33, 66]) @@ -1472,12 +1520,12 @@ M 33 21.99 def test_dtype_with_converters(self): dstr = "2009; 23; 46" - test = np.ndfromtxt(TextIO(dstr,), + test = np.genfromtxt(TextIO(dstr,), delimiter=";", dtype=float, converters={0: bytes}) control = np.array([('2009', 23., 46)], dtype=[('f0', '|S4'), ('f1', float), ('f2', float)]) assert_equal(test, control) - test = np.ndfromtxt(TextIO(dstr,), + test = np.genfromtxt(TextIO(dstr,), delimiter=";", dtype=float, converters={0: float}) control = np.array([2009., 23., 46],) assert_equal(test, control) @@ -1541,7 +1589,7 @@ M 33 21.99 def test_spacedelimiter(self): # Test space delimiter data = TextIO("1 2 3 4 5\n6 7 8 9 10") - test = np.ndfromtxt(data) + test = np.genfromtxt(data) control = np.array([[1., 2., 3., 4., 5.], [6., 7., 8., 9., 10.]]) assert_equal(test, control) @@ -1555,7 +1603,7 @@ M 33 21.99 def test_missing(self): data = TextIO('1,2,3,,5\n') - test = np.ndfromtxt(data, dtype=int, delimiter=',', + test = np.genfromtxt(data, dtype=int, delimiter=',', converters={3: lambda s: int(s or - 999)}) control = np.array([1, 2, 3, -999, 5], int) assert_equal(test, control) @@ -1577,18 +1625,18 @@ M 33 21.99 data = TextIO() np.savetxt(data, control) data.seek(0) - test = np.ndfromtxt(data, dtype=float, usecols=(1,)) + test = np.genfromtxt(data, dtype=float, usecols=(1,)) assert_equal(test, control[:, 1]) # control = np.array([[1, 2, 3], [3, 4, 5]], float) data = TextIO() np.savetxt(data, control) data.seek(0) - test = np.ndfromtxt(data, dtype=float, usecols=(1, 2)) + test = np.genfromtxt(data, dtype=float, usecols=(1, 2)) assert_equal(test, control[:, 1:]) # Testing with arrays instead of tuples. data.seek(0) - test = np.ndfromtxt(data, dtype=float, usecols=np.array([1, 2])) + test = np.genfromtxt(data, dtype=float, usecols=np.array([1, 2])) assert_equal(test, control[:, 1:]) def test_usecols_as_css(self): @@ -1604,7 +1652,7 @@ M 33 21.99 data = TextIO("JOE 70.1 25.3\nBOB 60.5 27.9") names = ['stid', 'temp'] dtypes = ['S4', 'f8'] - test = np.ndfromtxt( + test = np.genfromtxt( data, usecols=(0, 2), dtype=list(zip(names, dtypes))) assert_equal(test['stid'], [b"JOE", b"BOB"]) assert_equal(test['temp'], [25.3, 27.9]) @@ -1637,7 +1685,7 @@ M 33 21.99 # Check that a nested dtype isn't MIA data = TextIO('1,2,3.0\n4,5,6.0\n') fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) - test = np.mafromtxt(data, dtype=fancydtype, delimiter=',') + test = np.genfromtxt(data, dtype=fancydtype, delimiter=',', usemask=True) control = ma.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=fancydtype) assert_equal(test, control) @@ -1645,7 +1693,7 @@ M 33 21.99 c = TextIO("aaaa 1.0 8.0 1 2 3 4 5 6") dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), ('block', int, (2, 3))]) - x = np.ndfromtxt(c, dtype=dt) + x = np.genfromtxt(c, dtype=dt) a = np.array([('aaaa', 1.0, 8.0, [[1, 2, 3], [4, 5, 6]])], dtype=dt) assert_array_equal(x, a) @@ -1653,7 +1701,7 @@ M 33 21.99 def test_withmissing(self): data = TextIO('A,B\n0,1\n2,N/A') kwargs = dict(delimiter=",", missing_values="N/A", names=True) - test = np.mafromtxt(data, dtype=None, **kwargs) + test = np.genfromtxt(data, dtype=None, usemask=True, **kwargs) control = ma.array([(0, 1), (2, -1)], mask=[(False, False), (False, True)], dtype=[('A', int), ('B', int)]) @@ -1661,7 +1709,7 @@ M 33 21.99 assert_equal(test.mask, control.mask) # data.seek(0) - test = np.mafromtxt(data, **kwargs) + test = np.genfromtxt(data, usemask=True, **kwargs) control = ma.array([(0, 1), (2, -1)], mask=[(False, False), (False, True)], dtype=[('A', float), ('B', float)]) @@ -1673,7 +1721,7 @@ M 33 21.99 basekwargs = dict(dtype=None, delimiter=",", names=True,) mdtype = [('A', int), ('B', float), ('C', complex)] # - test = np.mafromtxt(TextIO(data), missing_values="N/A", + test = np.genfromtxt(TextIO(data), missing_values="N/A", **basekwargs) control = ma.array([(0, 0.0, 0j), (1, -999, 1j), (-9, 2.2, -999j), (3, -99, 3j)], @@ -1682,16 +1730,17 @@ M 33 21.99 assert_equal(test, control) # basekwargs['dtype'] = mdtype - test = np.mafromtxt(TextIO(data), - missing_values={0: -9, 1: -99, 2: -999j}, **basekwargs) + test = np.genfromtxt(TextIO(data), + missing_values={0: -9, 1: -99, 2: -999j}, usemask=True, **basekwargs) control = ma.array([(0, 0.0, 0j), (1, -999, 1j), (-9, 2.2, -999j), (3, -99, 3j)], mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)], dtype=mdtype) assert_equal(test, control) # - test = np.mafromtxt(TextIO(data), + test = np.genfromtxt(TextIO(data), missing_values={0: -9, 'B': -99, 'C': -999j}, + usemask=True, **basekwargs) control = ma.array([(0, 0.0, 0j), (1, -999, 1j), (-9, 2.2, -999j), (3, -99, 3j)], @@ -1729,8 +1778,8 @@ M 33 21.99 def test_withmissing_float(self): data = TextIO('A,B\n0,1.5\n2,-999.00') - test = np.mafromtxt(data, dtype=None, delimiter=',', - missing_values='-999.0', names=True,) + test = np.genfromtxt(data, dtype=None, delimiter=',', + missing_values='-999.0', names=True, usemask=True) control = ma.array([(0, 1.5), (2, -1.)], mask=[(False, False), (False, True)], dtype=[('A', int), ('B', float)]) @@ -1769,14 +1818,14 @@ M 33 21.99 ret = {} def f(_ret={}): - _ret['mtest'] = np.ndfromtxt(mdata, invalid_raise=False, **kwargs) + _ret['mtest'] = np.genfromtxt(mdata, invalid_raise=False, **kwargs) assert_warns(ConversionWarning, f, _ret=ret) mtest = ret['mtest'] assert_equal(len(mtest), 45) assert_equal(mtest, np.ones(45, dtype=[(_, int) for _ in 'abcde'])) # mdata.seek(0) - assert_raises(ValueError, np.ndfromtxt, mdata, + assert_raises(ValueError, np.genfromtxt, mdata, delimiter=",", names=True) def test_invalid_raise_with_usecols(self): @@ -1793,14 +1842,14 @@ M 33 21.99 ret = {} def f(_ret={}): - _ret['mtest'] = np.ndfromtxt(mdata, usecols=(0, 4), **kwargs) + _ret['mtest'] = np.genfromtxt(mdata, usecols=(0, 4), **kwargs) assert_warns(ConversionWarning, f, _ret=ret) mtest = ret['mtest'] assert_equal(len(mtest), 45) assert_equal(mtest, np.ones(45, dtype=[(_, int) for _ in 'ae'])) # mdata.seek(0) - mtest = np.ndfromtxt(mdata, usecols=(0, 1), **kwargs) + mtest = np.genfromtxt(mdata, usecols=(0, 1), **kwargs) assert_equal(len(mtest), 50) control = np.ones(50, dtype=[(_, int) for _ in 'ab']) control[[10 * _ for _ in range(5)]] = (2, 2) @@ -1819,7 +1868,7 @@ M 33 21.99 def test_default_field_format(self): # Test default format data = "0, 1, 2.3\n4, 5, 6.7" - mtest = np.ndfromtxt(TextIO(data), + mtest = np.genfromtxt(TextIO(data), delimiter=",", dtype=None, defaultfmt="f%02i") ctrl = np.array([(0, 1, 2.3), (4, 5, 6.7)], dtype=[("f00", int), ("f01", int), ("f02", float)]) @@ -1828,7 +1877,7 @@ M 33 21.99 def test_single_dtype_wo_names(self): # Test single dtype w/o names data = "0, 1, 2.3\n4, 5, 6.7" - mtest = np.ndfromtxt(TextIO(data), + mtest = np.genfromtxt(TextIO(data), delimiter=",", dtype=float, defaultfmt="f%02i") ctrl = np.array([[0., 1., 2.3], [4., 5., 6.7]], dtype=float) assert_equal(mtest, ctrl) @@ -1836,7 +1885,7 @@ M 33 21.99 def test_single_dtype_w_explicit_names(self): # Test single dtype w explicit names data = "0, 1, 2.3\n4, 5, 6.7" - mtest = np.ndfromtxt(TextIO(data), + mtest = np.genfromtxt(TextIO(data), delimiter=",", dtype=float, names="a, b, c") ctrl = np.array([(0., 1., 2.3), (4., 5., 6.7)], dtype=[(_, float) for _ in "abc"]) @@ -1845,7 +1894,7 @@ M 33 21.99 def test_single_dtype_w_implicit_names(self): # Test single dtype w implicit names data = "a, b, c\n0, 1, 2.3\n4, 5, 6.7" - mtest = np.ndfromtxt(TextIO(data), + mtest = np.genfromtxt(TextIO(data), delimiter=",", dtype=float, names=True) ctrl = np.array([(0., 1., 2.3), (4., 5., 6.7)], dtype=[(_, float) for _ in "abc"]) @@ -1854,7 +1903,7 @@ M 33 21.99 def test_easy_structured_dtype(self): # Test easy structured dtype data = "0, 1, 2.3\n4, 5, 6.7" - mtest = np.ndfromtxt(TextIO(data), delimiter=",", + mtest = np.genfromtxt(TextIO(data), delimiter=",", dtype=(int, float, float), defaultfmt="f_%02i") ctrl = np.array([(0, 1., 2.3), (4, 5., 6.7)], dtype=[("f_00", int), ("f_01", float), ("f_02", float)]) @@ -1866,14 +1915,14 @@ M 33 21.99 kwargs = dict(delimiter=",", dtype=None) with warnings.catch_warnings(record=True) as w: warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) - mtest = np.ndfromtxt(TextIO(data), **kwargs) + mtest = np.genfromtxt(TextIO(data), **kwargs) assert_(w[0].category is np.VisibleDeprecationWarning) ctrl = np.array([('01/01/2003 ', 1.3, ' abcde')], dtype=[('f0', '|S12'), ('f1', float), ('f2', '|S8')]) assert_equal(mtest, ctrl) with warnings.catch_warnings(record=True) as w: warnings.filterwarnings('always', '', np.VisibleDeprecationWarning) - mtest = np.ndfromtxt(TextIO(data), autostrip=True, **kwargs) + mtest = np.genfromtxt(TextIO(data), autostrip=True, **kwargs) assert_(w[0].category is np.VisibleDeprecationWarning) ctrl = np.array([('01/01/2003', 1.3, 'abcde')], dtype=[('f0', '|S10'), ('f1', float), ('f2', '|S5')]) @@ -1934,12 +1983,12 @@ M 33 21.99 # w/ dtype=None ctrl = np.array([(0, 1, 2), (3, 4, 5)], dtype=[(_, int) for _ in ('A', 'f0', 'C')]) - test = np.ndfromtxt(TextIO(data), dtype=None, **kwargs) + test = np.genfromtxt(TextIO(data), dtype=None, **kwargs) assert_equal(test, ctrl) # w/ default dtype ctrl = np.array([(0, 1, 2), (3, 4, 5)], dtype=[(_, float) for _ in ('A', 'f0', 'C')]) - test = np.ndfromtxt(TextIO(data), **kwargs) + test = np.genfromtxt(TextIO(data), **kwargs) def test_names_auto_completion(self): # Make sure that names are properly completed @@ -1975,13 +2024,13 @@ M 33 21.99 kwargs = dict(delimiter=(5, 5, 4), names=True, dtype=None) ctrl = np.array([(0, 1, 2.3), (45, 67, 9.)], dtype=[('A', int), ('B', int), ('C', float)]) - test = np.ndfromtxt(TextIO(data), **kwargs) + test = np.genfromtxt(TextIO(data), **kwargs) assert_equal(test, ctrl) # kwargs = dict(delimiter=5, names=True, dtype=None) ctrl = np.array([(0, 1, 2.3), (45, 67, 9.)], dtype=[('A', int), ('B', int), ('C', float)]) - test = np.ndfromtxt(TextIO(data), **kwargs) + test = np.genfromtxt(TextIO(data), **kwargs) assert_equal(test, ctrl) def test_filling_values(self): @@ -1989,7 +2038,7 @@ M 33 21.99 data = b"1, 2, 3\n1, , 5\n0, 6, \n" kwargs = dict(delimiter=",", dtype=None, filling_values=-999) ctrl = np.array([[1, 2, 3], [1, -999, 5], [0, 6, -999]], dtype=int) - test = np.ndfromtxt(TextIO(data), **kwargs) + test = np.genfromtxt(TextIO(data), **kwargs) assert_equal(test, ctrl) def test_comments_is_none(self): @@ -2278,7 +2327,7 @@ M 33 21.99 data = TextIO('73786976294838206464 17179869184 1024') - test = np.ndfromtxt(data, dtype=None) + test = np.genfromtxt(data, dtype=None) assert_equal(test.dtype.names, ['f0', 'f1', 'f2']) @@ -2342,7 +2391,7 @@ class TestPathUsage(object): np.savez(path, lab='place holder') with np.load(path) as data: assert_array_equal(data['lab'], 'place holder') - + def test_savez_compressed_load(self): # Test that pathlib.Path instances can be used with savez. with temppath(suffix='.npz') as path: @@ -2368,7 +2417,7 @@ class TestPathUsage(object): f.write(u'1 2\n3 4') control = np.array([[1, 2], [3, 4]], dtype=int) - test = np.ndfromtxt(path, dtype=int) + test = np.genfromtxt(path, dtype=int) assert_array_equal(test, control) def test_mafromtxt(self): @@ -2378,7 +2427,7 @@ class TestPathUsage(object): with path.open('w') as f: f.write(u'1,2,3.0\n4,5,6.0\n') - test = np.mafromtxt(path, delimiter=',') + test = np.genfromtxt(path, delimiter=',', usemask=True) control = ma.array([(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)]) assert_equal(test, control) diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py index 504372faf..b7261c63f 100644 --- a/numpy/lib/tests/test_nanfunctions.py +++ b/numpy/lib/tests/test_nanfunctions.py @@ -1,8 +1,10 @@ from __future__ import division, absolute_import, print_function import warnings +import pytest import numpy as np +from numpy.lib.nanfunctions import _nan_mask from numpy.testing import ( assert_, assert_equal, assert_almost_equal, assert_no_warnings, assert_raises, assert_array_equal, suppress_warnings @@ -925,3 +927,29 @@ class TestNanFunctions_Quantile(object): p = p.tolist() np.nanquantile(np.arange(100.), p, interpolation="midpoint") assert_array_equal(p, p0) + +@pytest.mark.parametrize("arr, expected", [ + # array of floats with some nans + (np.array([np.nan, 5.0, np.nan, np.inf]), + np.array([False, True, False, True])), + # int64 array that can't possibly have nans + (np.array([1, 5, 7, 9], dtype=np.int64), + True), + # bool array that can't possibly have nans + (np.array([False, True, False, True]), + True), + # 2-D complex array with nans + (np.array([[np.nan, 5.0], + [np.nan, np.inf]], dtype=np.complex64), + np.array([[False, True], + [False, True]])), + ]) +def test__nan_mask(arr, expected): + for out in [None, np.empty(arr.shape, dtype=np.bool_)]: + actual = _nan_mask(arr, out=out) + assert_equal(actual, expected) + # the above won't distinguish between True proper + # and an array of True values; we want True proper + # for types that can't possibly contain NaN + if type(expected) is not np.ndarray: + assert actual is True diff --git a/numpy/lib/tests/test_packbits.py b/numpy/lib/tests/test_packbits.py index fde5c37f2..95a465c36 100644 --- a/numpy/lib/tests/test_packbits.py +++ b/numpy/lib/tests/test_packbits.py @@ -2,7 +2,8 @@ from __future__ import division, absolute_import, print_function import numpy as np from numpy.testing import assert_array_equal, assert_equal, assert_raises - +import pytest +from itertools import chain def test_packbits(): # Copied from the docstring. @@ -50,8 +51,8 @@ def test_packbits_empty_with_axis(): assert_equal(b.dtype, np.uint8) assert_equal(b.shape, out_shape) - -def test_packbits_large(): +@pytest.mark.parametrize('bitorder', ('little', 'big')) +def test_packbits_large(bitorder): # test data large enough for 16 byte vectorization a = np.array([1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, @@ -71,7 +72,7 @@ def test_packbits_large(): a = a.repeat(3) for dtype in '?bBhHiIlLqQ': arr = np.array(a, dtype=dtype) - b = np.packbits(arr, axis=None) + b = np.packbits(arr, axis=None, bitorder=bitorder) assert_equal(b.dtype, np.uint8) r = [252, 127, 192, 3, 254, 7, 252, 0, 7, 31, 240, 0, 28, 1, 255, 252, 113, 248, 3, 255, 192, 28, 15, 192, 28, 126, 0, 224, 127, 255, @@ -81,9 +82,10 @@ def test_packbits_large(): 255, 224, 1, 255, 252, 126, 63, 0, 1, 192, 252, 14, 63, 0, 15, 199, 252, 113, 255, 3, 128, 56, 252, 14, 7, 0, 113, 255, 255, 142, 56, 227, 129, 248, 227, 129, 199, 31, 128] - assert_array_equal(b, r) + if bitorder == 'big': + assert_array_equal(b, r) # equal for size being multiple of 8 - assert_array_equal(np.unpackbits(b)[:-4], a) + assert_array_equal(np.unpackbits(b, bitorder=bitorder)[:-4], a) # check last byte of different remainders (16 byte vectorization) b = [np.packbits(arr[:-i], axis=None)[-1] for i in range(1, 16)] @@ -229,6 +231,20 @@ def test_unpackbits(): [0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 0, 1, 1, 1]])) +def test_pack_unpack_order(): + a = np.array([[2], [7], [23]], dtype=np.uint8) + b = np.unpackbits(a, axis=1) + assert_equal(b.dtype, np.uint8) + b_little = np.unpackbits(a, axis=1, bitorder='little') + b_big = np.unpackbits(a, axis=1, bitorder='big') + assert_array_equal(b, b_big) + assert_array_equal(a, np.packbits(b_little, axis=1, bitorder='little')) + assert_array_equal(b[:,::-1], b_little) + assert_array_equal(a, np.packbits(b_big, axis=1, bitorder='big')) + assert_raises(ValueError, np.unpackbits, a, bitorder='r') + assert_raises(TypeError, np.unpackbits, a, bitorder=10) + + def test_unpackbits_empty(): a = np.empty((0,), dtype=np.uint8) @@ -266,3 +282,97 @@ def test_unpackbits_large(): assert_array_equal(np.packbits(np.unpackbits(d, axis=1), axis=1), d) d = d.T.copy() assert_array_equal(np.packbits(np.unpackbits(d, axis=0), axis=0), d) + + +class TestCount(): + x = np.array([ + [1, 0, 1, 0, 0, 1, 0], + [0, 1, 1, 1, 0, 0, 0], + [0, 0, 1, 0, 0, 1, 1], + [1, 1, 0, 0, 0, 1, 1], + [1, 0, 1, 0, 1, 0, 1], + [0, 0, 1, 1, 1, 0, 0], + [0, 1, 0, 1, 0, 1, 0], + ], dtype=np.uint8) + padded1 = np.zeros(57, dtype=np.uint8) + padded1[:49] = x.ravel() + padded1b = np.zeros(57, dtype=np.uint8) + padded1b[:49] = x[::-1].copy().ravel() + padded2 = np.zeros((9, 9), dtype=np.uint8) + padded2[:7, :7] = x + + @pytest.mark.parametrize('bitorder', ('little', 'big')) + @pytest.mark.parametrize('count', chain(range(58), range(-1, -57, -1))) + def test_roundtrip(self, bitorder, count): + if count < 0: + # one extra zero of padding + cutoff = count - 1 + else: + cutoff = count + # test complete invertibility of packbits and unpackbits with count + packed = np.packbits(self.x, bitorder=bitorder) + unpacked = np.unpackbits(packed, count=count, bitorder=bitorder) + assert_equal(unpacked.dtype, np.uint8) + assert_array_equal(unpacked, self.padded1[:cutoff]) + + @pytest.mark.parametrize('kwargs', [ + {}, {'count': None}, + ]) + def test_count(self, kwargs): + packed = np.packbits(self.x) + unpacked = np.unpackbits(packed, **kwargs) + assert_equal(unpacked.dtype, np.uint8) + assert_array_equal(unpacked, self.padded1[:-1]) + + @pytest.mark.parametrize('bitorder', ('little', 'big')) + # delta==-1 when count<0 because one extra zero of padding + @pytest.mark.parametrize('count', chain(range(8), range(-1, -9, -1))) + def test_roundtrip_axis(self, bitorder, count): + if count < 0: + # one extra zero of padding + cutoff = count - 1 + else: + cutoff = count + packed0 = np.packbits(self.x, axis=0, bitorder=bitorder) + unpacked0 = np.unpackbits(packed0, axis=0, count=count, + bitorder=bitorder) + assert_equal(unpacked0.dtype, np.uint8) + assert_array_equal(unpacked0, self.padded2[:cutoff, :self.x.shape[1]]) + + packed1 = np.packbits(self.x, axis=1, bitorder=bitorder) + unpacked1 = np.unpackbits(packed1, axis=1, count=count, + bitorder=bitorder) + assert_equal(unpacked1.dtype, np.uint8) + assert_array_equal(unpacked1, self.padded2[:self.x.shape[0], :cutoff]) + + @pytest.mark.parametrize('kwargs', [ + {}, {'count': None}, + {'bitorder' : 'little'}, + {'bitorder': 'little', 'count': None}, + {'bitorder' : 'big'}, + {'bitorder': 'big', 'count': None}, + ]) + def test_axis_count(self, kwargs): + packed0 = np.packbits(self.x, axis=0) + unpacked0 = np.unpackbits(packed0, axis=0, **kwargs) + assert_equal(unpacked0.dtype, np.uint8) + if kwargs.get('bitorder', 'big') == 'big': + assert_array_equal(unpacked0, self.padded2[:-1, :self.x.shape[1]]) + else: + assert_array_equal(unpacked0[::-1, :], self.padded2[:-1, :self.x.shape[1]]) + + packed1 = np.packbits(self.x, axis=1) + unpacked1 = np.unpackbits(packed1, axis=1, **kwargs) + assert_equal(unpacked1.dtype, np.uint8) + if kwargs.get('bitorder', 'big') == 'big': + assert_array_equal(unpacked1, self.padded2[:self.x.shape[0], :-1]) + else: + assert_array_equal(unpacked1[:, ::-1], self.padded2[:self.x.shape[0], :-1]) + + def test_bad_count(self): + packed0 = np.packbits(self.x, axis=0) + assert_raises(ValueError, np.unpackbits, packed0, axis=0, count=-9) + packed1 = np.packbits(self.x, axis=1) + assert_raises(ValueError, np.unpackbits, packed1, axis=1, count=-9) + packed = np.packbits(self.x) + assert_raises(ValueError, np.unpackbits, packed, count=-57) diff --git a/numpy/lib/tests/test_polynomial.py b/numpy/lib/tests/test_polynomial.py index 77414ba7c..89759bd83 100644 --- a/numpy/lib/tests/test_polynomial.py +++ b/numpy/lib/tests/test_polynomial.py @@ -246,16 +246,16 @@ class TestPolynomial(object): assert_equal(r.coeffs.dtype, np.complex128) assert_equal(q*a + r, b) - def test_poly_coeffs_immutable(self): - """ Coefficients should not be modifiable """ + def test_poly_coeffs_mutable(self): + """ Coefficients should be modifiable """ p = np.poly1d([1, 2, 3]) - try: - # despite throwing an exception, this used to change state - p.coeffs += 1 - except Exception: - pass - assert_equal(p.coeffs, [1, 2, 3]) + p.coeffs += 1 + assert_equal(p.coeffs, [2, 3, 4]) p.coeffs[2] += 10 - assert_equal(p.coeffs, [1, 2, 3]) + assert_equal(p.coeffs, [2, 3, 14]) + + # this never used to be allowed - let's not add features to deprecated + # APIs + assert_raises(AttributeError, setattr, p, 'coeffs', np.array(1)) diff --git a/numpy/lib/tests/test_recfunctions.py b/numpy/lib/tests/test_recfunctions.py index 11f8a5afa..f713fb64d 100644 --- a/numpy/lib/tests/test_recfunctions.py +++ b/numpy/lib/tests/test_recfunctions.py @@ -214,6 +214,8 @@ class TestRecFunctions(object): dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')]) out = np.mean(structured_to_unstructured(b[['x', 'z']]), axis=-1) assert_equal(out, np.array([ 3. , 5.5, 9. , 11. ])) + out = np.mean(structured_to_unstructured(b[['x']]), axis=-1) + assert_equal(out, np.array([ 1. , 4. , 7. , 10. ])) c = np.arange(20).reshape((4,5)) out = unstructured_to_structured(c, a.dtype) @@ -221,9 +223,9 @@ class TestRecFunctions(object): ( 5, ( 6., 7), [ 8., 9.]), (10, (11., 12), [13., 14.]), (15, (16., 17), [18., 19.])], - dtype=[('a', '<i4'), - ('b', [('f0', '<f4'), ('f1', '<u2')]), - ('c', '<f4', (2,))]) + dtype=[('a', 'i4'), + ('b', [('f0', 'f4'), ('f1', 'u2')]), + ('c', 'f4', (2,))]) assert_equal(out, want) d = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], @@ -241,6 +243,15 @@ class TestRecFunctions(object): assert_(dd.base is d) assert_(ddd.base is d) + # including uniform fields with subarrays unpacked + d = np.array([(1, [2, 3], [[ 4, 5], [ 6, 7]]), + (8, [9, 10], [[11, 12], [13, 14]])], + dtype=[('x0', 'i4'), ('x1', ('i4', 2)), ('x2', ('i4', (2, 2)))]) + dd = structured_to_unstructured(d) + ddd = unstructured_to_structured(dd, d.dtype) + assert_(dd.base is d) + assert_(ddd.base is d) + # test that nested fields with identical names don't break anything point = np.dtype([('x', int), ('y', int)]) triangle = np.dtype([('a', point), ('b', point), ('c', point)]) diff --git a/numpy/lib/tests/test_regression.py b/numpy/lib/tests/test_regression.py index 4c46bc46b..4cd812f5d 100644 --- a/numpy/lib/tests/test_regression.py +++ b/numpy/lib/tests/test_regression.py @@ -21,8 +21,8 @@ class TestRegression(object): # Ticket #91 x = np.random.random((3, 3)) y = x.copy() - np.cov(x, rowvar=1) - np.cov(y, rowvar=0) + np.cov(x, rowvar=True) + np.cov(y, rowvar=False) assert_array_equal(x, y) def test_mem_digitize(self): @@ -56,7 +56,7 @@ class TestRegression(object): def test_poly1d_nan_roots(self): # Ticket #396 - p = np.poly1d([np.nan, np.nan, 1], r=0) + p = np.poly1d([np.nan, np.nan, 1], r=False) assert_raises(np.linalg.LinAlgError, getattr, p, "r") def test_mem_polymul(self): diff --git a/numpy/lib/tests/test_stride_tricks.py b/numpy/lib/tests/test_stride_tricks.py index b2bd7da3e..955fb914c 100644 --- a/numpy/lib/tests/test_stride_tricks.py +++ b/numpy/lib/tests/test_stride_tricks.py @@ -4,7 +4,7 @@ import numpy as np from numpy.core._rational_tests import rational from numpy.testing import ( assert_equal, assert_array_equal, assert_raises, assert_, - assert_raises_regex + assert_raises_regex, assert_warns, ) from numpy.lib.stride_tricks import ( as_strided, broadcast_arrays, _broadcast_shape, broadcast_to @@ -415,12 +415,28 @@ def test_writeable(): assert_equal(result.flags.writeable, False) assert_raises(ValueError, result.__setitem__, slice(None), 0) - # but the result of broadcast_arrays needs to be writeable (for now), to + # but the result of broadcast_arrays needs to be writeable, to # preserve backwards compatibility for results in [broadcast_arrays(original), broadcast_arrays(0, original)]: for result in results: + # This will change to False in a future version + if any([s == 0 for s in result.strides]): + with assert_warns(FutureWarning): + assert_equal(result.flags.writeable, True) + with assert_warns(DeprecationWarning): + result[:] = 0 + # Warning not emitted, writing to the array resets it + assert_equal(result.flags.writeable, True) + for results in [broadcast_arrays(original), + broadcast_arrays(0, original)]: + for result in results: + # resets the warn_on_write DeprecationWarning + result.flags.writeable = True + # check: no warning emitted assert_equal(result.flags.writeable, True) + result[:] = 0 + # keep readonly input readonly original.flags.writeable = False _, result = broadcast_arrays(0, original) diff --git a/numpy/lib/tests/test_twodim_base.py b/numpy/lib/tests/test_twodim_base.py index bf93b4adb..bb844e4bd 100644 --- a/numpy/lib/tests/test_twodim_base.py +++ b/numpy/lib/tests/test_twodim_base.py @@ -5,7 +5,7 @@ from __future__ import division, absolute_import, print_function from numpy.testing import ( assert_equal, assert_array_equal, assert_array_max_ulp, - assert_array_almost_equal, assert_raises, + assert_array_almost_equal, assert_raises, assert_ ) from numpy import ( @@ -17,6 +17,9 @@ from numpy import ( import numpy as np +from numpy.core.tests.test_overrides import requires_array_function + + def get_mat(n): data = arange(n) data = add.outer(data, data) @@ -273,6 +276,27 @@ class TestHistogram2d(object): assert_array_equal(H, answer) assert_array_equal(xe, array([0., 0.25, 0.5, 0.75, 1])) + @requires_array_function + def test_dispatch(self): + class ShouldDispatch: + def __array_function__(self, function, types, args, kwargs): + return types, args, kwargs + + xy = [1, 2] + s_d = ShouldDispatch() + r = histogram2d(s_d, xy) + # Cannot use assert_equal since that dispatches... + assert_(r == ((ShouldDispatch,), (s_d, xy), {})) + r = histogram2d(xy, s_d) + assert_(r == ((ShouldDispatch,), (xy, s_d), {})) + r = histogram2d(xy, xy, bins=s_d) + assert_(r, ((ShouldDispatch,), (xy, xy), dict(bins=s_d))) + r = histogram2d(xy, xy, bins=[s_d, 5]) + assert_(r, ((ShouldDispatch,), (xy, xy), dict(bins=[s_d, 5]))) + assert_raises(Exception, histogram2d, xy, xy, bins=[s_d]) + r = histogram2d(xy, xy, weights=s_d) + assert_(r, ((ShouldDispatch,), (xy, xy), dict(weights=s_d))) + class TestTri(object): def test_dtype(self): diff --git a/numpy/lib/tests/test_type_check.py b/numpy/lib/tests/test_type_check.py index 2982ca31a..b3f114b92 100644 --- a/numpy/lib/tests/test_type_check.py +++ b/numpy/lib/tests/test_type_check.py @@ -360,6 +360,14 @@ class TestNanToNum(object): assert_(vals[1] == 0) assert_all(vals[2] > 1e10) and assert_all(np.isfinite(vals[2])) assert_equal(type(vals), np.ndarray) + + # perform the same tests but with nan, posinf and neginf keywords + with np.errstate(divide='ignore', invalid='ignore'): + vals = nan_to_num(np.array((-1., 0, 1))/0., + nan=10, posinf=20, neginf=30) + assert_equal(vals, [30, 10, 20]) + assert_all(np.isfinite(vals[[0, 2]])) + assert_equal(type(vals), np.ndarray) # perform the same test but in-place with np.errstate(divide='ignore', invalid='ignore'): @@ -371,26 +379,48 @@ class TestNanToNum(object): assert_(vals[1] == 0) assert_all(vals[2] > 1e10) and assert_all(np.isfinite(vals[2])) assert_equal(type(vals), np.ndarray) + + # perform the same test but in-place + with np.errstate(divide='ignore', invalid='ignore'): + vals = np.array((-1., 0, 1))/0. + result = nan_to_num(vals, copy=False, nan=10, posinf=20, neginf=30) + + assert_(result is vals) + assert_equal(vals, [30, 10, 20]) + assert_all(np.isfinite(vals[[0, 2]])) + assert_equal(type(vals), np.ndarray) def test_array(self): vals = nan_to_num([1]) assert_array_equal(vals, np.array([1], int)) assert_equal(type(vals), np.ndarray) + vals = nan_to_num([1], nan=10, posinf=20, neginf=30) + assert_array_equal(vals, np.array([1], int)) + assert_equal(type(vals), np.ndarray) def test_integer(self): vals = nan_to_num(1) assert_all(vals == 1) assert_equal(type(vals), np.int_) + vals = nan_to_num(1, nan=10, posinf=20, neginf=30) + assert_all(vals == 1) + assert_equal(type(vals), np.int_) def test_float(self): vals = nan_to_num(1.0) assert_all(vals == 1.0) assert_equal(type(vals), np.float_) + vals = nan_to_num(1.1, nan=10, posinf=20, neginf=30) + assert_all(vals == 1.1) + assert_equal(type(vals), np.float_) def test_complex_good(self): vals = nan_to_num(1+1j) assert_all(vals == 1+1j) assert_equal(type(vals), np.complex_) + vals = nan_to_num(1+1j, nan=10, posinf=20, neginf=30) + assert_all(vals == 1+1j) + assert_equal(type(vals), np.complex_) def test_complex_bad(self): with np.errstate(divide='ignore', invalid='ignore'): @@ -414,6 +444,16 @@ class TestNanToNum(object): # !! inf. Comment out for now, and see if it # !! changes #assert_all(vals.real < -1e10) and assert_all(np.isfinite(vals)) + + def test_do_not_rewrite_previous_keyword(self): + # This is done to test that when, for instance, nan=np.inf then these + # values are not rewritten by posinf keyword to the posinf value. + with np.errstate(divide='ignore', invalid='ignore'): + vals = nan_to_num(np.array((-1., 0, 1))/0., nan=np.inf, posinf=999) + assert_all(np.isfinite(vals[[0, 2]])) + assert_all(vals[0] < -1e10) + assert_equal(vals[[1, 2]], [np.inf, 999]) + assert_equal(type(vals), np.ndarray) class TestRealIfClose(object): diff --git a/numpy/lib/tests/test_utils.py b/numpy/lib/tests/test_utils.py index 2723f3440..9673a05fa 100644 --- a/numpy/lib/tests/test_utils.py +++ b/numpy/lib/tests/test_utils.py @@ -1,5 +1,6 @@ from __future__ import division, absolute_import, print_function +import inspect import sys import pytest @@ -38,6 +39,32 @@ def old_func3(self, x): new_func3 = deprecate(old_func3, old_name="old_func3", new_name="new_func3") +def old_func4(self, x): + """Summary. + + Further info. + """ + return x +new_func4 = deprecate(old_func4) + + +def old_func5(self, x): + """Summary. + + Bizarre indentation. + """ + return x +new_func5 = deprecate(old_func5) + + +def old_func6(self, x): + """ + Also in PEP-257. + """ + return x +new_func6 = deprecate(old_func6) + + def test_deprecate_decorator(): assert_('deprecated' in old_func.__doc__) @@ -51,6 +78,25 @@ def test_deprecate_fn(): assert_('new_func3' in new_func3.__doc__) +@pytest.mark.skipif(sys.flags.optimize == 2, reason="-OO discards docstrings") +def test_deprecate_help_indentation(): + _compare_docs(old_func4, new_func4) + _compare_docs(old_func5, new_func5) + _compare_docs(old_func6, new_func6) + + +def _compare_docs(old_func, new_func): + old_doc = inspect.getdoc(old_func) + new_doc = inspect.getdoc(new_func) + index = new_doc.index('\n\n') + 2 + assert_equal(new_doc[index:], old_doc) + + +@pytest.mark.skipif(sys.flags.optimize == 2, reason="-OO discards docstrings") +def test_deprecate_preserve_whitespace(): + assert_('\n Bizarre' in new_func5.__doc__) + + def test_safe_eval_nameconstant(): # Test if safe_eval supports Python 3.4 _ast.NameConstant utils.safe_eval('None') diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py index 27d848608..f3dc6c8e1 100644 --- a/numpy/lib/twodim_base.py +++ b/numpy/lib/twodim_base.py @@ -77,13 +77,13 @@ def fliplr(m): -------- >>> A = np.diag([1.,2.,3.]) >>> A - array([[ 1., 0., 0.], - [ 0., 2., 0.], - [ 0., 0., 3.]]) + array([[1., 0., 0.], + [0., 2., 0.], + [0., 0., 3.]]) >>> np.fliplr(A) - array([[ 0., 0., 1.], - [ 0., 2., 0.], - [ 3., 0., 0.]]) + array([[0., 0., 1.], + [0., 2., 0.], + [3., 0., 0.]]) >>> A = np.random.randn(2,3,5) >>> np.all(np.fliplr(A) == A[:,::-1,...]) @@ -129,13 +129,13 @@ def flipud(m): -------- >>> A = np.diag([1.0, 2, 3]) >>> A - array([[ 1., 0., 0.], - [ 0., 2., 0.], - [ 0., 0., 3.]]) + array([[1., 0., 0.], + [0., 2., 0.], + [0., 0., 3.]]) >>> np.flipud(A) - array([[ 0., 0., 3.], - [ 0., 2., 0.], - [ 1., 0., 0.]]) + array([[0., 0., 3.], + [0., 2., 0.], + [1., 0., 0.]]) >>> A = np.random.randn(2,3,5) >>> np.all(np.flipud(A) == A[::-1,...]) @@ -191,9 +191,9 @@ def eye(N, M=None, k=0, dtype=float, order='C'): array([[1, 0], [0, 1]]) >>> np.eye(3, k=1) - array([[ 0., 1., 0.], - [ 0., 0., 1.], - [ 0., 0., 0.]]) + array([[0., 1., 0.], + [0., 0., 1.], + [0., 0., 0.]]) """ if M is None: @@ -378,9 +378,9 @@ def tri(N, M=None, k=0, dtype=float): [1, 1, 1, 1, 1]]) >>> np.tri(3, 5, -1) - array([[ 0., 0., 0., 0., 0.], - [ 1., 0., 0., 0., 0.], - [ 1., 1., 0., 0., 0.]]) + array([[0., 0., 0., 0., 0.], + [1., 0., 0., 0., 0.], + [1., 1., 0., 0., 0.]]) """ if M is None: @@ -540,7 +540,7 @@ def vander(x, N=None, increasing=False): of the differences between the values of the input vector: >>> np.linalg.det(np.vander(x)) - 48.000000000000043 + 48.000000000000043 # may vary >>> (5-3)*(5-2)*(5-1)*(3-2)*(3-1)*(2-1) 48 @@ -565,7 +565,20 @@ def vander(x, N=None, increasing=False): def _histogram2d_dispatcher(x, y, bins=None, range=None, normed=None, weights=None, density=None): - return (x, y, bins, weights) + yield x + yield y + + # This terrible logic is adapted from the checks in histogram2d + try: + N = len(bins) + except TypeError: + N = 1 + if N == 2: + yield from bins # bins=[x, y] + else: + yield bins + + yield weights @array_function_dispatch(_histogram2d_dispatcher) @@ -644,7 +657,7 @@ def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, Examples -------- - >>> import matplotlib as mpl + >>> from matplotlib.image import NonUniformImage >>> import matplotlib.pyplot as plt Construct a 2-D histogram with variable bin width. First define the bin @@ -666,6 +679,7 @@ def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, >>> ax = fig.add_subplot(131, title='imshow: square bins') >>> plt.imshow(H, interpolation='nearest', origin='low', ... extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]]) + <matplotlib.image.AxesImage object at 0x...> :func:`pcolormesh <matplotlib.pyplot.pcolormesh>` can display actual edges: @@ -673,13 +687,14 @@ def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, ... aspect='equal') >>> X, Y = np.meshgrid(xedges, yedges) >>> ax.pcolormesh(X, Y, H) + <matplotlib.collections.QuadMesh object at 0x...> :class:`NonUniformImage <matplotlib.image.NonUniformImage>` can be used to display actual bin edges with interpolation: >>> ax = fig.add_subplot(133, title='NonUniformImage: interpolated', ... aspect='equal', xlim=xedges[[0, -1]], ylim=yedges[[0, -1]]) - >>> im = mpl.image.NonUniformImage(ax, interpolation='bilinear') + >>> im = NonUniformImage(ax, interpolation='bilinear') >>> xcenters = (xedges[:-1] + xedges[1:]) / 2 >>> ycenters = (yedges[:-1] + yedges[1:]) / 2 >>> im.set_data(xcenters, ycenters, H) @@ -829,7 +844,7 @@ def tril_indices(n, k=0, m=None): Both for indexing: >>> a[il1] - array([ 0, 4, 5, 8, 9, 10, 12, 13, 14, 15]) + array([ 0, 4, 5, ..., 13, 14, 15]) And for assigning values: @@ -944,7 +959,7 @@ def triu_indices(n, k=0, m=None): Both for indexing: >>> a[iu1] - array([ 0, 1, 2, 3, 5, 6, 7, 10, 11, 15]) + array([ 0, 1, 2, ..., 10, 11, 15]) And for assigning values: diff --git a/numpy/lib/type_check.py b/numpy/lib/type_check.py index 90b1e9a6e..ac4b03a6c 100644 --- a/numpy/lib/type_check.py +++ b/numpy/lib/type_check.py @@ -105,11 +105,11 @@ def asfarray(a, dtype=_nx.float_): Examples -------- >>> np.asfarray([2, 3]) - array([ 2., 3.]) + array([2., 3.]) >>> np.asfarray([2, 3], dtype='float') - array([ 2., 3.]) + array([2., 3.]) >>> np.asfarray([2, 3], dtype='int8') - array([ 2., 3.]) + array([2., 3.]) """ if not _nx.issubdtype(dtype, _nx.inexact): @@ -146,13 +146,13 @@ def real(val): -------- >>> a = np.array([1+2j, 3+4j, 5+6j]) >>> a.real - array([ 1., 3., 5.]) + array([1., 3., 5.]) >>> a.real = 9 >>> a - array([ 9.+2.j, 9.+4.j, 9.+6.j]) + array([9.+2.j, 9.+4.j, 9.+6.j]) >>> a.real = np.array([9, 8, 7]) >>> a - array([ 9.+2.j, 8.+4.j, 7.+6.j]) + array([9.+2.j, 8.+4.j, 7.+6.j]) >>> np.real(1 + 1j) 1.0 @@ -192,10 +192,10 @@ def imag(val): -------- >>> a = np.array([1+2j, 3+4j, 5+6j]) >>> a.imag - array([ 2., 4., 6.]) + array([2., 4., 6.]) >>> a.imag = np.array([8, 10, 12]) >>> a - array([ 1. +8.j, 3.+10.j, 5.+12.j]) + array([1. +8.j, 3.+10.j, 5.+12.j]) >>> np.imag(1 + 1j) 1.0 @@ -363,18 +363,23 @@ def _getmaxmin(t): return f.max, f.min -def _nan_to_num_dispatcher(x, copy=None): +def _nan_to_num_dispatcher(x, copy=None, nan=None, posinf=None, neginf=None): return (x,) @array_function_dispatch(_nan_to_num_dispatcher) -def nan_to_num(x, copy=True): +def nan_to_num(x, copy=True, nan=0.0, posinf=None, neginf=None): """ - Replace NaN with zero and infinity with large finite numbers. + Replace NaN with zero and infinity with large finite numbers (default + behaviour) or with the numbers defined by the user using the `nan`, + `posinf` and/or `neginf` keywords. - If `x` is inexact, NaN is replaced by zero, and infinity and -infinity - replaced by the respectively largest and most negative finite floating - point values representable by ``x.dtype``. + If `x` is inexact, NaN is replaced by zero or by the user defined value in + `nan` keyword, infinity is replaced by the largest finite floating point + values representable by ``x.dtype`` or by the user defined value in + `posinf` keyword and -infinity is replaced by the most negative finite + floating point values representable by ``x.dtype`` or by the user defined + value in `neginf` keyword. For complex dtypes, the above is applied to each of the real and imaginary components of `x` separately. @@ -390,6 +395,17 @@ def nan_to_num(x, copy=True): in-place (False). The in-place operation only occurs if casting to an array does not require a copy. Default is True. + nan : int, float, optional + Value to be used to fill NaN values. If no value is passed + then NaN values will be replaced with 0.0. + posinf : int, float, optional + Value to be used to fill positive infinity values. If no value is + passed then positive infinity values will be replaced with a very + large number. + neginf : int, float, optional + Value to be used to fill negative infinity values. If no value is + passed then negative infinity values will be replaced with a very + small (or negative) number. .. versionadded:: 1.13 @@ -422,13 +438,20 @@ def nan_to_num(x, copy=True): 0.0 >>> x = np.array([np.inf, -np.inf, np.nan, -128, 128]) >>> np.nan_to_num(x) - array([ 1.79769313e+308, -1.79769313e+308, 0.00000000e+000, - -1.28000000e+002, 1.28000000e+002]) + array([ 1.79769313e+308, -1.79769313e+308, 0.00000000e+000, # may vary + -1.28000000e+002, 1.28000000e+002]) + >>> np.nan_to_num(x, nan=-9999, posinf=33333333, neginf=33333333) + array([ 3.3333333e+07, 3.3333333e+07, -9.9990000e+03, + -1.2800000e+02, 1.2800000e+02]) >>> y = np.array([complex(np.inf, np.nan), np.nan, complex(np.nan, np.inf)]) + array([ 1.79769313e+308, -1.79769313e+308, 0.00000000e+000, # may vary + -1.28000000e+002, 1.28000000e+002]) >>> np.nan_to_num(y) - array([ 1.79769313e+308 +0.00000000e+000j, + array([ 1.79769313e+308 +0.00000000e+000j, # may vary 0.00000000e+000 +0.00000000e+000j, 0.00000000e+000 +1.79769313e+308j]) + >>> np.nan_to_num(y, nan=111111, posinf=222222) + array([222222.+111111.j, 111111. +0.j, 111111.+222222.j]) """ x = _nx.array(x, subok=True, copy=copy) xtype = x.dtype.type @@ -442,10 +465,17 @@ def nan_to_num(x, copy=True): dest = (x.real, x.imag) if iscomplex else (x,) maxf, minf = _getmaxmin(x.real.dtype) + if posinf is not None: + maxf = posinf + if neginf is not None: + minf = neginf for d in dest: - _nx.copyto(d, 0.0, where=isnan(d)) - _nx.copyto(d, maxf, where=isposinf(d)) - _nx.copyto(d, minf, where=isneginf(d)) + idx_nan = isnan(d) + idx_posinf = isposinf(d) + idx_neginf = isneginf(d) + _nx.copyto(d, nan, where=idx_nan) + _nx.copyto(d, maxf, where=idx_posinf) + _nx.copyto(d, minf, where=idx_neginf) return x[()] if isscalar else x #----------------------------------------------------------------------------- @@ -490,12 +520,12 @@ def real_if_close(a, tol=100): Examples -------- >>> np.finfo(float).eps - 2.2204460492503131e-16 + 2.2204460492503131e-16 # may vary >>> np.real_if_close([2.1 + 4e-14j], tol=1000) - array([ 2.1]) + array([2.1]) >>> np.real_if_close([2.1 + 4e-13j], tol=1000) - array([ 2.1 +4.00000000e-13j]) + array([2.1+4.e-13j]) """ a = asanyarray(a) @@ -511,6 +541,9 @@ def real_if_close(a, tol=100): def _asscalar_dispatcher(a): + # 2018-10-10, 1.16 + warnings.warn('np.asscalar(a) is deprecated since NumPy v1.16, use ' + 'a.item() instead', DeprecationWarning, stacklevel=3) return (a,) @@ -538,12 +571,7 @@ def asscalar(a): -------- >>> np.asscalar(np.array([24])) 24 - """ - - # 2018-10-10, 1.16 - warnings.warn('np.asscalar(a) is deprecated since NumPy v1.16, use ' - 'a.item() instead', DeprecationWarning, stacklevel=1) return a.item() #----------------------------------------------------------------------------- @@ -672,11 +700,11 @@ def common_type(*arrays): Examples -------- >>> np.common_type(np.arange(2, dtype=np.float32)) - <type 'numpy.float32'> + <class 'numpy.float32'> >>> np.common_type(np.arange(2, dtype=np.float32), np.arange(2)) - <type 'numpy.float64'> + <class 'numpy.float64'> >>> np.common_type(np.arange(4), np.array([45, 6.j]), np.array([45.0])) - <type 'numpy.complex128'> + <class 'numpy.complex128'> """ is_complex = False diff --git a/numpy/lib/ufunclike.py b/numpy/lib/ufunclike.py index 9a9e6f9dd..96fd5b319 100644 --- a/numpy/lib/ufunclike.py +++ b/numpy/lib/ufunclike.py @@ -8,7 +8,9 @@ from __future__ import division, absolute_import, print_function __all__ = ['fix', 'isneginf', 'isposinf'] import numpy.core.numeric as nx -from numpy.core.overrides import array_function_dispatch, ENABLE_ARRAY_FUNCTION +from numpy.core.overrides import ( + array_function_dispatch, ARRAY_FUNCTION_ENABLED, +) import warnings import functools @@ -43,7 +45,7 @@ def _fix_out_named_y(f): Allow the out argument to be passed as the name `y` (deprecated) This decorator should only be used if _deprecate_out_named_y is used on - a corresponding dispatcher fucntion. + a corresponding dispatcher function. """ @functools.wraps(f) def func(x, out=None, **kwargs): @@ -55,8 +57,14 @@ def _fix_out_named_y(f): return func -if not ENABLE_ARRAY_FUNCTION: - _fix_out_named_y = _deprecate_out_named_y +def _fix_and_maybe_deprecate_out_named_y(f): + """ + Use the appropriate decorator, depending upon if dispatching is being used. + """ + if ARRAY_FUNCTION_ENABLED: + return _fix_out_named_y(f) + else: + return _deprecate_out_named_y(f) @_deprecate_out_named_y @@ -65,7 +73,7 @@ def _dispatcher(x, out=None): @array_function_dispatch(_dispatcher, verify=False, module='numpy') -@_fix_out_named_y +@_fix_and_maybe_deprecate_out_named_y def fix(x, out=None): """ Round to nearest integer towards zero. @@ -112,7 +120,7 @@ def fix(x, out=None): @array_function_dispatch(_dispatcher, verify=False, module='numpy') -@_fix_out_named_y +@_fix_and_maybe_deprecate_out_named_y def isposinf(x, out=None): """ Test element-wise for positive infinity, return result as bool array. @@ -154,11 +162,11 @@ def isposinf(x, out=None): Examples -------- >>> np.isposinf(np.PINF) - array(True, dtype=bool) + True >>> np.isposinf(np.inf) - array(True, dtype=bool) + True >>> np.isposinf(np.NINF) - array(False, dtype=bool) + False >>> np.isposinf([-np.inf, 0., np.inf]) array([False, False, True]) @@ -181,7 +189,7 @@ def isposinf(x, out=None): @array_function_dispatch(_dispatcher, verify=False, module='numpy') -@_fix_out_named_y +@_fix_and_maybe_deprecate_out_named_y def isneginf(x, out=None): """ Test element-wise for negative infinity, return result as bool array. @@ -224,11 +232,11 @@ def isneginf(x, out=None): Examples -------- >>> np.isneginf(np.NINF) - array(True, dtype=bool) + True >>> np.isneginf(np.inf) - array(False, dtype=bool) + False >>> np.isneginf(np.PINF) - array(False, dtype=bool) + False >>> np.isneginf([-np.inf, 0., np.inf]) array([ True, False, False]) diff --git a/numpy/lib/utils.py b/numpy/lib/utils.py index 84edf4021..c7dbcc5f9 100644 --- a/numpy/lib/utils.py +++ b/numpy/lib/utils.py @@ -105,6 +105,20 @@ class _Deprecate(object): if doc is None: doc = depdoc else: + lines = doc.expandtabs().split('\n') + indent = _get_indent(lines[1:]) + if lines[0].lstrip(): + # Indent the original first line to let inspect.cleandoc() + # dedent the docstring despite the deprecation notice. + doc = indent * ' ' + doc + else: + # Remove the same leading blank lines as cleandoc() would. + skip = len(lines[0]) + 1 + for line in lines[1:]: + if len(line) > indent: + break + skip += len(line) + 1 + doc = doc[skip:] doc = '\n\n'.join([depdoc, doc]) newfunc.__doc__ = doc try: @@ -115,6 +129,21 @@ class _Deprecate(object): newfunc.__dict__.update(d) return newfunc + +def _get_indent(lines): + """ + Determines the leading whitespace that could be removed from all the lines. + """ + indent = sys.maxsize + for line in lines: + content = len(line.lstrip()) + if content: + indent = min(indent, len(line) - content) + if indent == sys.maxsize: + indent = 0 + return indent + + def deprecate(*args, **kwargs): """ Issues a DeprecationWarning, adds warning to `old_name`'s @@ -150,10 +179,8 @@ def deprecate(*args, **kwargs): Warning: >>> olduint = np.deprecate(np.uint) + DeprecationWarning: `uint64` is deprecated! # may vary >>> olduint(6) - /usr/lib/python2.5/site-packages/numpy/lib/utils.py:114: - DeprecationWarning: uint32 is deprecated - warnings.warn(str1, DeprecationWarning, stacklevel=2) 6 """ @@ -201,8 +228,8 @@ def byte_bounds(a): >>> low, high = np.byte_bounds(I) >>> high - low == I.size*I.itemsize True - >>> I = np.eye(2, dtype='G'); I.dtype - dtype('complex192') + >>> I = np.eye(2); I.dtype + dtype('float64') >>> low, high = np.byte_bounds(I) >>> high - low == I.size*I.itemsize True @@ -263,17 +290,17 @@ def who(vardict=None): >>> np.who() Name Shape Bytes Type =========================================================== - a 10 40 int32 + a 10 80 int64 b 20 160 float64 - Upper bound on total bytes = 200 + Upper bound on total bytes = 240 >>> d = {'x': np.arange(2.0), 'y': np.arange(3.0), 'txt': 'Some str', ... 'idx':5} >>> np.who(d) Name Shape Bytes Type =========================================================== - y 3 24 float64 x 2 16 float64 + y 3 24 float64 Upper bound on total bytes = 40 """ @@ -733,7 +760,7 @@ def lookfor(what, module=None, import_modules=True, regenerate=False, Examples -------- - >>> np.lookfor('binary representation') + >>> np.lookfor('binary representation') # doctest: +SKIP Search results for 'binary representation' ------------------------------------------ numpy.binary_repr @@ -1104,12 +1131,11 @@ def safe_eval(source): >>> np.safe_eval('open("/home/user/.ssh/id_dsa").read()') Traceback (most recent call last): ... - SyntaxError: Unsupported source construct: compiler.ast.CallFunc + ValueError: malformed node or string: <_ast.Call object at 0x...> """ # Local import to speed up numpy's import time. import ast - return ast.literal_eval(source) @@ -1142,17 +1168,12 @@ def _median_nancheck(data, result, axis, out): n = n.filled(False) if result.ndim == 0: if n == True: - warnings.warn("Invalid value encountered in median", - RuntimeWarning, stacklevel=3) if out is not None: out[...] = data.dtype.type(np.nan) result = out else: result = data.dtype.type(np.nan) elif np.count_nonzero(n.ravel()) > 0: - warnings.warn("Invalid value encountered in median for" + - " %d results" % np.count_nonzero(n.ravel()), - RuntimeWarning, stacklevel=3) result[n] = np.nan return result |