diff options
author | Eric Wieser <wieser.eric@gmail.com> | 2019-04-23 01:33:13 -0700 |
---|---|---|
committer | Eric Wieser <wieser.eric@gmail.com> | 2019-04-23 01:33:13 -0700 |
commit | 20472595f5b9f4b2fcfedcf6aae9684f95af1c8c (patch) | |
tree | 6e39eabe01a85454c1703b1a1ee201e57d02b1eb /numpy/lib | |
parent | b5895be146cdc3063ffa9ca8ae27b5bcf7992719 (diff) | |
parent | f91b033aa35b929610c0db12f16b1b0c1ddc08e6 (diff) | |
download | numpy-20472595f5b9f4b2fcfedcf6aae9684f95af1c8c.tar.gz |
Merge remote-tracking branch 'upstream/master' into fix-1-field-unstructured
Diffstat (limited to 'numpy/lib')
38 files changed, 2356 insertions, 2254 deletions
diff --git a/numpy/lib/_datasource.py b/numpy/lib/_datasource.py index 30237b76f..816f7624e 100644 --- a/numpy/lib/_datasource.py +++ b/numpy/lib/_datasource.py @@ -20,17 +20,18 @@ gzip, bz2 and xz are supported. Example:: >>> # Create a DataSource, use os.curdir (default) for local storage. - >>> ds = datasource.DataSource() + >>> from numpy import DataSource + >>> ds = DataSource() >>> >>> # Open a remote file. >>> # DataSource downloads the file, stores it locally in: >>> # './www.google.com/index.html' >>> # opens the file and returns a file object. - >>> fp = ds.open('http://www.google.com/index.html') + >>> fp = ds.open('http://www.google.com/') # doctest: +SKIP >>> >>> # Use the file as you normally would - >>> fp.read() - >>> fp.close() + >>> fp.read() # doctest: +SKIP + >>> fp.close() # doctest: +SKIP """ from __future__ import division, absolute_import, print_function @@ -156,6 +157,7 @@ class _FileOpeners(object): Examples -------- + >>> import gzip >>> np.lib._datasource._file_openers.keys() [None, '.bz2', '.gz', '.xz', '.lzma'] >>> np.lib._datasource._file_openers['.gz'] is gzip.open @@ -290,7 +292,7 @@ class DataSource(object): URLs require a scheme string (``http://``) to be used, without it they will fail:: - >>> repos = DataSource() + >>> repos = np.DataSource() >>> repos.exists('www.google.com/index.html') False >>> repos.exists('http://www.google.com/index.html') @@ -302,17 +304,17 @@ class DataSource(object): -------- :: - >>> ds = DataSource('/home/guido') - >>> urlname = 'http://www.google.com/index.html' - >>> gfile = ds.open('http://www.google.com/index.html') # remote file + >>> ds = np.DataSource('/home/guido') + >>> urlname = 'http://www.google.com/' + >>> gfile = ds.open('http://www.google.com/') >>> ds.abspath(urlname) - '/home/guido/www.google.com/site/index.html' + '/home/guido/www.google.com/index.html' - >>> ds = DataSource(None) # use with temporary file + >>> ds = np.DataSource(None) # use with temporary file >>> ds.open('/home/guido/foobar.txt') <open file '/home/guido.foobar.txt', mode 'r' at 0x91d4430> >>> ds.abspath('/home/guido/foobar.txt') - '/tmp/tmpy4pgsP/home/guido/foobar.txt' + '/tmp/.../home/guido/foobar.txt' """ @@ -545,6 +547,11 @@ class DataSource(object): is accessible if it exists in either location. """ + + # First test for local path + if os.path.exists(path): + return True + # We import this here because importing urllib2 is slow and # a significant fraction of numpy's total import time. if sys.version_info[0] >= 3: @@ -554,10 +561,6 @@ class DataSource(object): from urllib2 import urlopen from urllib2 import URLError - # Test local path - if os.path.exists(path): - return True - # Test cached url upath = self.abspath(path) if os.path.exists(upath): diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index b604b8c52..0ebd39b8c 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -8,7 +8,7 @@ __docformat__ = "restructuredtext en" import sys import numpy as np import numpy.core.numeric as nx -from numpy.compat import asbytes, asunicode, bytes, asbytes_nested, basestring +from numpy.compat import asbytes, asunicode, bytes, basestring if sys.version_info[0] >= 3: from builtins import bool, int, float, complex, object, str @@ -146,11 +146,17 @@ def flatten_dtype(ndtype, flatten_base=False): >>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float), ... ('block', int, (2, 3))]) >>> np.lib._iotools.flatten_dtype(dt) - [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32')] + [dtype('S4'), dtype('float64'), dtype('float64'), dtype('int64')] >>> np.lib._iotools.flatten_dtype(dt, flatten_base=True) - [dtype('|S4'), dtype('float64'), dtype('float64'), dtype('int32'), - dtype('int32'), dtype('int32'), dtype('int32'), dtype('int32'), - dtype('int32')] + [dtype('S4'), + dtype('float64'), + dtype('float64'), + dtype('int64'), + dtype('int64'), + dtype('int64'), + dtype('int64'), + dtype('int64'), + dtype('int64')] """ names = ndtype.names @@ -309,13 +315,13 @@ class NameValidator(object): -------- >>> validator = np.lib._iotools.NameValidator() >>> validator(['file', 'field2', 'with space', 'CaSe']) - ['file_', 'field2', 'with_space', 'CaSe'] + ('file_', 'field2', 'with_space', 'CaSe') >>> validator = np.lib._iotools.NameValidator(excludelist=['excl'], - deletechars='q', - case_sensitive='False') + ... deletechars='q', + ... case_sensitive=False) >>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe']) - ['excl_', 'field2', 'no_', 'with_space', 'case'] + ('EXCL', 'FIELD2', 'NO_Q', 'WITH_SPACE', 'CASE') """ # @@ -599,7 +605,7 @@ class StringConverter(object): -------- >>> import dateutil.parser >>> import datetime - >>> dateparser = datetustil.parser.parse + >>> dateparser = dateutil.parser.parse >>> defaultdate = datetime.date(2000, 1, 1) >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate) """ @@ -693,7 +699,7 @@ class StringConverter(object): self.func = lambda x: int(float(x)) # Store the list of strings corresponding to missing values. if missing_values is None: - self.missing_values = set(['']) + self.missing_values = {''} else: if isinstance(missing_values, basestring): missing_values = missing_values.split(",") diff --git a/numpy/lib/_version.py b/numpy/lib/_version.py index c3563a7fa..8aa999fc9 100644 --- a/numpy/lib/_version.py +++ b/numpy/lib/_version.py @@ -47,9 +47,12 @@ class NumpyVersion(): >>> from numpy.lib import NumpyVersion >>> if NumpyVersion(np.__version__) < '1.7.0': ... print('skip') - skip + >>> # skip >>> NumpyVersion('1.7') # raises ValueError, add ".0" + Traceback (most recent call last): + ... + ValueError: Not a valid numpy version string """ diff --git a/numpy/lib/arraypad.py b/numpy/lib/arraypad.py index 4f6371058..07146f404 100644 --- a/numpy/lib/arraypad.py +++ b/numpy/lib/arraypad.py @@ -16,50 +16,67 @@ __all__ = ['pad'] # Private utility functions. -def _arange_ndarray(arr, shape, axis, reverse=False): +def _linear_ramp(ndim, axis, start, stop, size, reverse=False): """ - Create an ndarray of `shape` with increments along specified `axis` + Create a linear ramp of `size` in `axis` with `ndim`. + + This algorithm behaves like a vectorized version of `numpy.linspace`. + The resulting linear ramp is broadcastable to any array that matches the + ramp in `shape[axis]` and `ndim`. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - shape : tuple of ints - Shape of desired array. Should be equivalent to `arr.shape` except - `shape[axis]` which may have any positive value. + ndim : int + Number of dimensions of the resulting array. All dimensions except + the one specified by `axis` will have the size 1. axis : int - Axis to increment along. + The dimension that contains the linear ramp of `size`. + start : int or ndarray + The starting value(s) of the linear ramp. If given as an array, its + size must match `size`. + stop : int or ndarray + The stop value(s) (not included!) of the linear ramp. If given as an + array, its size must match `size`. + size : int + The number of elements in the linear ramp. If this argument is 0 the + dimensions of `ramp` will all be of length 1 except for the one given + by `axis` which will be 0. reverse : bool - If False, increment in a positive fashion from 1 to `shape[axis]`, - inclusive. If True, the bounds are the same but the order reversed. + If False, increment in a positive fashion, otherwise decrement. Returns ------- - padarr : ndarray - Output array sized to pad `arr` along `axis`, with linear range from - 1 to `shape[axis]` along specified `axis`. - - Notes - ----- - The range is deliberately 1-indexed for this specific use case. Think of - this algorithm as broadcasting `np.arange` to a single `axis` of an - arbitrarily shaped ndarray. + ramp : ndarray + Output array of dtype np.float64 that in- or decrements along the given + `axis`. + Examples + -------- + >>> _linear_ramp(ndim=2, axis=0, start=np.arange(3), stop=10, size=2) + array([[0. , 1. , 2. ], + [5. , 5.5, 6. ]]) + >>> _linear_ramp(ndim=3, axis=0, start=2, stop=0, size=0) + array([], shape=(0, 1, 1), dtype=float64) """ - initshape = tuple(1 if i != axis else shape[axis] - for (i, x) in enumerate(arr.shape)) - if not reverse: - padarr = np.arange(1, shape[axis] + 1) - else: - padarr = np.arange(shape[axis], 0, -1) - padarr = padarr.reshape(initshape) - for i, dim in enumerate(shape): - if padarr.shape[i] != dim: - padarr = padarr.repeat(dim, axis=i) - return padarr + # Create initial ramp + ramp = np.arange(size, dtype=np.float64) + if reverse: + ramp = ramp[::-1] + + # Make sure, that ramp is broadcastable + init_shape = (1,) * axis + (size,) + (1,) * (ndim - axis - 1) + ramp = ramp.reshape(init_shape) + + if size != 0: + # And scale to given start and stop values + gain = (stop - start) / float(size) + ramp = ramp * gain + ramp += start + return ramp -def _round_ifneeded(arr, dtype): + +def _round_if_needed(arr, dtype): """ Rounds arr inplace if destination dtype is integer. @@ -69,821 +86,418 @@ def _round_ifneeded(arr, dtype): Input array. dtype : dtype The dtype of the destination array. - """ if np.issubdtype(dtype, np.integer): arr.round(out=arr) -def _slice_at_axis(shape, sl, axis): - """ - Construct a slice tuple the length of shape, with sl at the specified axis - """ - slice_tup = (slice(None),) - return slice_tup * axis + (sl,) + slice_tup * (len(shape) - axis - 1) - - -def _slice_first(shape, n, axis): - """ Construct a slice tuple to take the first n elements along axis """ - return _slice_at_axis(shape, slice(0, n), axis=axis) - - -def _slice_last(shape, n, axis): - """ Construct a slice tuple to take the last n elements along axis """ - dim = shape[axis] # doing this explicitly makes n=0 work - return _slice_at_axis(shape, slice(dim - n, dim), axis=axis) - - -def _do_prepend(arr, pad_chunk, axis): - return np.concatenate( - (pad_chunk.astype(arr.dtype, copy=False), arr), axis=axis) - - -def _do_append(arr, pad_chunk, axis): - return np.concatenate( - (arr, pad_chunk.astype(arr.dtype, copy=False)), axis=axis) - - -def _prepend_const(arr, pad_amt, val, axis=-1): - """ - Prepend constant `val` along `axis` of `arr`. - - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to prepend. - val : scalar - Constant value to use. For best results should be of type `arr.dtype`; - if not `arr.dtype` will be cast to `arr.dtype`. - axis : int - Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, with `pad_amt` constant `val` prepended along `axis`. - - """ - if pad_amt == 0: - return arr - padshape = tuple(x if i != axis else pad_amt - for (i, x) in enumerate(arr.shape)) - return _do_prepend(arr, np.full(padshape, val, dtype=arr.dtype), axis) - - -def _append_const(arr, pad_amt, val, axis=-1): +def _slice_at_axis(sl, axis): """ - Append constant `val` along `axis` of `arr`. + Construct tuple of slices to slice an array in the given dimension. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to append. - val : scalar - Constant value to use. For best results should be of type `arr.dtype`; - if not `arr.dtype` will be cast to `arr.dtype`. + sl : slice + The slice for the given dimension. axis : int - Axis along which to pad `arr`. + The axis to which `sl` is applied. All other dimensions are left + "unsliced". Returns ------- - padarr : ndarray - Output array, with `pad_amt` constant `val` appended along `axis`. - - """ - if pad_amt == 0: - return arr - padshape = tuple(x if i != axis else pad_amt - for (i, x) in enumerate(arr.shape)) - return _do_append(arr, np.full(padshape, val, dtype=arr.dtype), axis) + sl : tuple of slices + A tuple with slices matching `shape` in length. - - -def _prepend_edge(arr, pad_amt, axis=-1): - """ - Prepend `pad_amt` to `arr` along `axis` by extending edge values. - - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to prepend. - axis : int - Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, extended by `pad_amt` edge values appended along `axis`. - - """ - if pad_amt == 0: - return arr - - edge_slice = _slice_first(arr.shape, 1, axis=axis) - edge_arr = arr[edge_slice] - return _do_prepend(arr, edge_arr.repeat(pad_amt, axis=axis), axis) - - -def _append_edge(arr, pad_amt, axis=-1): + Examples + -------- + >>> _slice_at_axis(slice(None, 3, -1), 1) + (slice(None, None, None), slice(None, 3, -1), (...,)) """ - Append `pad_amt` to `arr` along `axis` by extending edge values. - - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to append. - axis : int - Axis along which to pad `arr`. + return (slice(None),) * axis + (sl,) + (...,) - Returns - ------- - padarr : ndarray - Output array, extended by `pad_amt` edge values prepended along - `axis`. +def _view_roi(array, original_area_slice, axis): """ - if pad_amt == 0: - return arr - - edge_slice = _slice_last(arr.shape, 1, axis=axis) - edge_arr = arr[edge_slice] - return _do_append(arr, edge_arr.repeat(pad_amt, axis=axis), axis) - + Get a view of the current region of interest during iterative padding. -def _prepend_ramp(arr, pad_amt, end, axis=-1): - """ - Prepend linear ramp along `axis`. + When padding multiple dimensions iteratively corner values are + unnecessarily overwritten multiple times. This function reduces the + working area for the first dimensions so that corners are excluded. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to prepend. - end : scalar - Constal value to use. For best results should be of type `arr.dtype`; - if not `arr.dtype` will be cast to `arr.dtype`. + array : ndarray + The array with the region of interest. + original_area_slice : tuple of slices + Denotes the area with original values of the unpadded array. axis : int - Axis along which to pad `arr`. + The currently padded dimension assuming that `axis` is padded before + `axis` + 1. Returns ------- - padarr : ndarray - Output array, with `pad_amt` values prepended along `axis`. The - prepended region ramps linearly from the edge value to `end`. - + roi : ndarray + The region of interest of the original `array`. """ - if pad_amt == 0: - return arr + axis += 1 + sl = (slice(None),) * axis + original_area_slice[axis:] + return array[sl] - # Generate shape for final concatenated array - padshape = tuple(x if i != axis else pad_amt - for (i, x) in enumerate(arr.shape)) - # Generate an n-dimensional array incrementing along `axis` - ramp_arr = _arange_ndarray(arr, padshape, axis, - reverse=True).astype(np.float64) - - # Appropriate slicing to extract n-dimensional edge along `axis` - edge_slice = _slice_first(arr.shape, 1, axis=axis) - - # Extract edge, and extend along `axis` - edge_pad = arr[edge_slice].repeat(pad_amt, axis) - - # Linear ramp - slope = (end - edge_pad) / float(pad_amt) - ramp_arr = ramp_arr * slope - ramp_arr += edge_pad - _round_ifneeded(ramp_arr, arr.dtype) - - # Ramp values will most likely be float, cast them to the same type as arr - return _do_prepend(arr, ramp_arr, axis) - - -def _append_ramp(arr, pad_amt, end, axis=-1): +def _pad_simple(array, pad_width, fill_value=None): """ - Append linear ramp along `axis`. + Pad array on all sides with either a single value or undefined values. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to append. - end : scalar - Constal value to use. For best results should be of type `arr.dtype`; - if not `arr.dtype` will be cast to `arr.dtype`. - axis : int - Axis along which to pad `arr`. + array : ndarray + Array to grow. + pad_width : sequence of tuple[int, int] + Pad width on both sides for each dimension in `arr`. + fill_value : scalar, optional + If provided the padded area is filled with this value, otherwise + the pad area left undefined. Returns ------- - padarr : ndarray - Output array, with `pad_amt` values appended along `axis`. The - appended region ramps linearly from the edge value to `end`. - + padded : ndarray + The padded array with the same dtype as`array`. Its order will default + to C-style if `array` is not F-contiguous. + original_area_slice : tuple + A tuple of slices pointing to the area of the original array. """ - if pad_amt == 0: - return arr - - # Generate shape for final concatenated array - padshape = tuple(x if i != axis else pad_amt - for (i, x) in enumerate(arr.shape)) - - # Generate an n-dimensional array incrementing along `axis` - ramp_arr = _arange_ndarray(arr, padshape, axis, - reverse=False).astype(np.float64) - - # Slice a chunk from the edge to calculate stats on - edge_slice = _slice_last(arr.shape, 1, axis=axis) + # Allocate grown array + new_shape = tuple( + left + size + right + for size, (left, right) in zip(array.shape, pad_width) + ) + order = 'F' if array.flags.fnc else 'C' # Fortran and not also C-order + padded = np.empty(new_shape, dtype=array.dtype, order=order) - # Extract edge, and extend along `axis` - edge_pad = arr[edge_slice].repeat(pad_amt, axis) + if fill_value is not None: + padded.fill(fill_value) - # Linear ramp - slope = (end - edge_pad) / float(pad_amt) - ramp_arr = ramp_arr * slope - ramp_arr += edge_pad - _round_ifneeded(ramp_arr, arr.dtype) + # Copy old array into correct space + original_area_slice = tuple( + slice(left, left + size) + for size, (left, right) in zip(array.shape, pad_width) + ) + padded[original_area_slice] = array - # Ramp values will most likely be float, cast them to the same type as arr - return _do_append(arr, ramp_arr, axis) + return padded, original_area_slice -def _prepend_max(arr, pad_amt, num, axis=-1): +def _set_pad_area(padded, axis, width_pair, value_pair): """ - Prepend `pad_amt` maximum values along `axis`. + Set empty-padded area in given dimension. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to prepend. - num : int - Depth into `arr` along `axis` to calculate maximum. - Range: [1, `arr.shape[axis]`] or None (entire axis) + padded : ndarray + Array with the pad area which is modified inplace. axis : int - Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, with `pad_amt` values appended along `axis`. The - prepended region is the maximum of the first `num` values along - `axis`. - + Dimension with the pad area to set. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. + value_pair : tuple of scalars or ndarrays + Values inserted into the pad area on each side. It must match or be + broadcastable to the shape of `arr`. """ - if pad_amt == 0: - return arr - - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _prepend_edge(arr, pad_amt, axis) + left_slice = _slice_at_axis(slice(None, width_pair[0]), axis) + padded[left_slice] = value_pair[0] - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None + right_slice = _slice_at_axis( + slice(padded.shape[axis] - width_pair[1], None), axis) + padded[right_slice] = value_pair[1] - # Slice a chunk from the edge to calculate stats on - max_slice = _slice_first(arr.shape, num, axis=axis) - # Extract slice, calculate max - max_chunk = arr[max_slice].max(axis=axis, keepdims=True) - - # Concatenate `arr` with `max_chunk`, extended along `axis` by `pad_amt` - return _do_prepend(arr, max_chunk.repeat(pad_amt, axis=axis), axis) - - -def _append_max(arr, pad_amt, num, axis=-1): +def _get_edges(padded, axis, width_pair): """ - Pad one `axis` of `arr` with the maximum of the last `num` elements. + Retrieve edge values from empty-padded array in given dimension. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to append. - num : int - Depth into `arr` along `axis` to calculate maximum. - Range: [1, `arr.shape[axis]`] or None (entire axis) + padded : ndarray + Empty-padded array. axis : int - Axis along which to pad `arr`. + Dimension in which the edges are considered. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. Returns ------- - padarr : ndarray - Output array, with `pad_amt` values appended along `axis`. The - appended region is the maximum of the final `num` values along `axis`. - + left_edge, right_edge : ndarray + Edge values of the valid area in `padded` in the given dimension. Its + shape will always match `padded` except for the dimension given by + `axis` which will have a length of 1. """ - if pad_amt == 0: - return arr - - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _append_edge(arr, pad_amt, axis) - - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None + left_index = width_pair[0] + left_slice = _slice_at_axis(slice(left_index, left_index + 1), axis) + left_edge = padded[left_slice] - # Slice a chunk from the edge to calculate stats on - if num is not None: - max_slice = _slice_last(arr.shape, num, axis=axis) - else: - max_slice = tuple(slice(None) for x in arr.shape) - - # Extract slice, calculate max - max_chunk = arr[max_slice].max(axis=axis, keepdims=True) + right_index = padded.shape[axis] - width_pair[1] + right_slice = _slice_at_axis(slice(right_index - 1, right_index), axis) + right_edge = padded[right_slice] - # Concatenate `arr` with `max_chunk`, extended along `axis` by `pad_amt` - return _do_append(arr, max_chunk.repeat(pad_amt, axis=axis), axis) + return left_edge, right_edge -def _prepend_mean(arr, pad_amt, num, axis=-1): +def _get_linear_ramps(padded, axis, width_pair, end_value_pair): """ - Prepend `pad_amt` mean values along `axis`. + Construct linear ramps for empty-padded array in given dimension. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to prepend. - num : int - Depth into `arr` along `axis` to calculate mean. - Range: [1, `arr.shape[axis]`] or None (entire axis) + padded : ndarray + Empty-padded array. axis : int - Axis along which to pad `arr`. + Dimension in which the ramps are constructed. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. + end_value_pair : (scalar, scalar) + End values for the linear ramps which form the edge of the fully padded + array. These values are included in the linear ramps. Returns ------- - padarr : ndarray - Output array, with `pad_amt` values prepended along `axis`. The - prepended region is the mean of the first `num` values along `axis`. - + left_ramp, right_ramp : ndarray + Linear ramps to set on both sides of `padded`. """ - if pad_amt == 0: - return arr - - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _prepend_edge(arr, pad_amt, axis) - - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None + edge_pair = _get_edges(padded, axis, width_pair) - # Slice a chunk from the edge to calculate stats on - mean_slice = _slice_first(arr.shape, num, axis=axis) + left_ramp = _linear_ramp( + padded.ndim, axis, start=end_value_pair[0], stop=edge_pair[0], + size=width_pair[0], reverse=False + ) + _round_if_needed(left_ramp, padded.dtype) - # Extract slice, calculate mean - mean_chunk = arr[mean_slice].mean(axis, keepdims=True) - _round_ifneeded(mean_chunk, arr.dtype) + right_ramp = _linear_ramp( + padded.ndim, axis, start=end_value_pair[1], stop=edge_pair[1], + size=width_pair[1], reverse=True + ) + _round_if_needed(right_ramp, padded.dtype) - # Concatenate `arr` with `mean_chunk`, extended along `axis` by `pad_amt` - return _do_prepend(arr, mean_chunk.repeat(pad_amt, axis), axis=axis) + return left_ramp, right_ramp -def _append_mean(arr, pad_amt, num, axis=-1): +def _get_stats(padded, axis, width_pair, length_pair, stat_func): """ - Append `pad_amt` mean values along `axis`. + Calculate statistic for the empty-padded array in given dimnsion. Parameters ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to append. - num : int - Depth into `arr` along `axis` to calculate mean. - Range: [1, `arr.shape[axis]`] or None (entire axis) + padded : ndarray + Empty-padded array. axis : int - Axis along which to pad `arr`. + Dimension in which the statistic is calculated. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. + length_pair : 2-element sequence of None or int + Gives the number of values in valid area from each side that is + taken into account when calculating the statistic. If None the entire + valid area in `padded` is considered. + stat_func : function + Function to compute statistic. The expected signature is + ``stat_func(x: ndarray, axis: int, keepdims: bool) -> ndarray``. Returns ------- - padarr : ndarray - Output array, with `pad_amt` values appended along `axis`. The - appended region is the maximum of the final `num` values along `axis`. - - """ - if pad_amt == 0: - return arr - - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _append_edge(arr, pad_amt, axis) - - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None - - # Slice a chunk from the edge to calculate stats on - if num is not None: - mean_slice = _slice_last(arr.shape, num, axis=axis) - else: - mean_slice = tuple(slice(None) for x in arr.shape) - - # Extract slice, calculate mean - mean_chunk = arr[mean_slice].mean(axis=axis, keepdims=True) - _round_ifneeded(mean_chunk, arr.dtype) - - # Concatenate `arr` with `mean_chunk`, extended along `axis` by `pad_amt` - return _do_append(arr, mean_chunk.repeat(pad_amt, axis), axis=axis) - - -def _prepend_med(arr, pad_amt, num, axis=-1): - """ - Prepend `pad_amt` median values along `axis`. + left_stat, right_stat : ndarray + Calculated statistic for both sides of `padded`. + """ + # Calculate indices of the edges of the area with original values + left_index = width_pair[0] + right_index = padded.shape[axis] - width_pair[1] + # as well as its length + max_length = right_index - left_index + + # Limit stat_lengths to max_length + left_length, right_length = length_pair + if left_length is None or max_length < left_length: + left_length = max_length + if right_length is None or max_length < right_length: + right_length = max_length + + # Calculate statistic for the left side + left_slice = _slice_at_axis( + slice(left_index, left_index + left_length), axis) + left_chunk = padded[left_slice] + left_stat = stat_func(left_chunk, axis=axis, keepdims=True) + _round_if_needed(left_stat, padded.dtype) + + if left_length == right_length == max_length: + # return early as right_stat must be identical to left_stat + return left_stat, left_stat + + # Calculate statistic for the right side + right_slice = _slice_at_axis( + slice(right_index - right_length, right_index), axis) + right_chunk = padded[right_slice] + right_stat = stat_func(right_chunk, axis=axis, keepdims=True) + _round_if_needed(right_stat, padded.dtype) + return left_stat, right_stat + + +def _set_reflect_both(padded, axis, width_pair, method, include_edge=False): + """ + Pad `axis` of `arr` with reflection. Parameters ---------- - arr : ndarray + padded : ndarray Input array of arbitrary shape. - pad_amt : int - Amount of padding to prepend. - num : int - Depth into `arr` along `axis` to calculate median. - Range: [1, `arr.shape[axis]`] or None (entire axis) axis : int Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, with `pad_amt` values prepended along `axis`. The - prepended region is the median of the first `num` values along `axis`. - - """ - if pad_amt == 0: - return arr - - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _prepend_edge(arr, pad_amt, axis) - - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None - - # Slice a chunk from the edge to calculate stats on - med_slice = _slice_first(arr.shape, num, axis=axis) - - # Extract slice, calculate median - med_chunk = np.median(arr[med_slice], axis=axis, keepdims=True) - _round_ifneeded(med_chunk, arr.dtype) - - # Concatenate `arr` with `med_chunk`, extended along `axis` by `pad_amt` - return _do_prepend(arr, med_chunk.repeat(pad_amt, axis), axis=axis) - - -def _append_med(arr, pad_amt, num, axis=-1): - """ - Append `pad_amt` median values along `axis`. - - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to append. - num : int - Depth into `arr` along `axis` to calculate median. - Range: [1, `arr.shape[axis]`] or None (entire axis) - axis : int - Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, with `pad_amt` values appended along `axis`. The - appended region is the median of the final `num` values along `axis`. - - """ - if pad_amt == 0: - return arr - - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _append_edge(arr, pad_amt, axis) - - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None - - # Slice a chunk from the edge to calculate stats on - if num is not None: - med_slice = _slice_last(arr.shape, num, axis=axis) - else: - med_slice = tuple(slice(None) for x in arr.shape) - - # Extract slice, calculate median - med_chunk = np.median(arr[med_slice], axis=axis, keepdims=True) - _round_ifneeded(med_chunk, arr.dtype) - - # Concatenate `arr` with `med_chunk`, extended along `axis` by `pad_amt` - return _do_append(arr, med_chunk.repeat(pad_amt, axis), axis=axis) - - -def _prepend_min(arr, pad_amt, num, axis=-1): - """ - Prepend `pad_amt` minimum values along `axis`. - - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to prepend. - num : int - Depth into `arr` along `axis` to calculate minimum. - Range: [1, `arr.shape[axis]`] or None (entire axis) - axis : int - Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, with `pad_amt` values prepended along `axis`. The - prepended region is the minimum of the first `num` values along - `axis`. - - """ - if pad_amt == 0: - return arr - - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _prepend_edge(arr, pad_amt, axis) - - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None - - # Slice a chunk from the edge to calculate stats on - min_slice = _slice_first(arr.shape, num, axis=axis) - - # Extract slice, calculate min - min_chunk = arr[min_slice].min(axis=axis, keepdims=True) - - # Concatenate `arr` with `min_chunk`, extended along `axis` by `pad_amt` - return _do_prepend(arr, min_chunk.repeat(pad_amt, axis), axis=axis) - - -def _append_min(arr, pad_amt, num, axis=-1): - """ - Append `pad_amt` median values along `axis`. - - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : int - Amount of padding to append. - num : int - Depth into `arr` along `axis` to calculate minimum. - Range: [1, `arr.shape[axis]`] or None (entire axis) - axis : int - Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, with `pad_amt` values appended along `axis`. The - appended region is the minimum of the final `num` values along `axis`. - - """ - if pad_amt == 0: - return arr - - # Equivalent to edge padding for single value, so do that instead - if num == 1: - return _append_edge(arr, pad_amt, axis) - - # Use entire array if `num` is too large - if num is not None: - if num >= arr.shape[axis]: - num = None - - # Slice a chunk from the edge to calculate stats on - if num is not None: - min_slice = _slice_last(arr.shape, num, axis=axis) - else: - min_slice = tuple(slice(None) for x in arr.shape) - - # Extract slice, calculate min - min_chunk = arr[min_slice].min(axis=axis, keepdims=True) - - # Concatenate `arr` with `min_chunk`, extended along `axis` by `pad_amt` - return _do_append(arr, min_chunk.repeat(pad_amt, axis), axis=axis) - - -def _pad_ref(arr, pad_amt, method, axis=-1): - """ - Pad `axis` of `arr` by reflection. - - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. - pad_amt : tuple of ints, length 2 - Padding to (prepend, append) along `axis`. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. method : str Controls method of reflection; options are 'even' or 'odd'. - axis : int - Axis along which to pad `arr`. + include_edge : bool + If true, edge value is included in reflection, otherwise the edge + value forms the symmetric axis to the reflection. Returns ------- - padarr : ndarray - Output array, with `pad_amt[0]` values prepended and `pad_amt[1]` - values appended along `axis`. Both regions are padded with reflected - values from the original array. - - Notes - ----- - This algorithm does not pad with repetition, i.e. the edges are not - repeated in the reflection. For that behavior, use `mode='symmetric'`. - - The modes 'reflect', 'symmetric', and 'wrap' must be padded with a - single function, lest the indexing tricks in non-integer multiples of the - original shape would violate repetition in the final iteration. - - """ - # Implicit booleanness to test for zero (or None) in any scalar type - if pad_amt[0] == 0 and pad_amt[1] == 0: - return arr - - ########################################################################## - # Prepended region - - # Slice off a reverse indexed chunk from near edge to pad `arr` before - ref_slice = _slice_at_axis(arr.shape, slice(pad_amt[0], 0, -1), axis=axis) - - ref_chunk1 = arr[ref_slice] - - # Memory/computationally more expensive, only do this if `method='odd'` - if 'odd' in method and pad_amt[0] > 0: - edge_slice1 = _slice_first(arr.shape, 1, axis=axis) - edge_chunk = arr[edge_slice1] - ref_chunk1 = 2 * edge_chunk - ref_chunk1 - del edge_chunk - - ########################################################################## - # Appended region - - # Slice off a reverse indexed chunk from far edge to pad `arr` after - start = arr.shape[axis] - pad_amt[1] - 1 - end = arr.shape[axis] - 1 - ref_slice = _slice_at_axis(arr.shape, slice(start, end), axis=axis) - rev_idx = _slice_at_axis(arr.shape, slice(None, None, -1), axis=axis) - ref_chunk2 = arr[ref_slice][rev_idx] - - if 'odd' in method: - edge_slice2 = _slice_last(arr.shape, 1, axis=axis) - edge_chunk = arr[edge_slice2] - ref_chunk2 = 2 * edge_chunk - ref_chunk2 - del edge_chunk - - # Concatenate `arr` with both chunks, extending along `axis` - return np.concatenate((ref_chunk1, arr, ref_chunk2), axis=axis) - - -def _pad_sym(arr, pad_amt, method, axis=-1): - """ - Pad `axis` of `arr` by symmetry. - - Parameters - ---------- - arr : ndarray - Input array of arbitrary shape. pad_amt : tuple of ints, length 2 - Padding to (prepend, append) along `axis`. - method : str - Controls method of symmetry; options are 'even' or 'odd'. - axis : int - Axis along which to pad `arr`. - - Returns - ------- - padarr : ndarray - Output array, with `pad_amt[0]` values prepended and `pad_amt[1]` - values appended along `axis`. Both regions are padded with symmetric - values from the original array. - - Notes - ----- - This algorithm DOES pad with repetition, i.e. the edges are repeated. - For padding without repeated edges, use `mode='reflect'`. - - The modes 'reflect', 'symmetric', and 'wrap' must be padded with a - single function, lest the indexing tricks in non-integer multiples of the - original shape would violate repetition in the final iteration. - + New index positions of padding to do along the `axis`. If these are + both 0, padding is done in this dimension. """ - # Implicit booleanness to test for zero (or None) in any scalar type - if pad_amt[0] == 0 and pad_amt[1] == 0: - return arr - - ########################################################################## - # Prepended region - - # Slice off a reverse indexed chunk from near edge to pad `arr` before - sym_slice = _slice_first(arr.shape, pad_amt[0], axis=axis) - rev_idx = _slice_at_axis(arr.shape, slice(None, None, -1), axis=axis) - sym_chunk1 = arr[sym_slice][rev_idx] - - # Memory/computationally more expensive, only do this if `method='odd'` - if 'odd' in method and pad_amt[0] > 0: - edge_slice1 = _slice_first(arr.shape, 1, axis=axis) - edge_chunk = arr[edge_slice1] - sym_chunk1 = 2 * edge_chunk - sym_chunk1 - del edge_chunk + left_pad, right_pad = width_pair + old_length = padded.shape[axis] - right_pad - left_pad - ########################################################################## - # Appended region - - # Slice off a reverse indexed chunk from far edge to pad `arr` after - sym_slice = _slice_last(arr.shape, pad_amt[1], axis=axis) - sym_chunk2 = arr[sym_slice][rev_idx] - - if 'odd' in method: - edge_slice2 = _slice_last(arr.shape, 1, axis=axis) - edge_chunk = arr[edge_slice2] - sym_chunk2 = 2 * edge_chunk - sym_chunk2 - del edge_chunk - - # Concatenate `arr` with both chunks, extending along `axis` - return np.concatenate((sym_chunk1, arr, sym_chunk2), axis=axis) - - -def _pad_wrap(arr, pad_amt, axis=-1): - """ - Pad `axis` of `arr` via wrapping. + if include_edge: + # Edge is included, we need to offset the pad amount by 1 + edge_offset = 1 + else: + edge_offset = 0 # Edge is not included, no need to offset pad amount + old_length -= 1 # but must be omitted from the chunk + + if left_pad > 0: + # Pad with reflected values on left side: + # First limit chunk size which can't be larger than pad area + chunk_length = min(old_length, left_pad) + # Slice right to left, stop on or next to edge, start relative to stop + stop = left_pad - edge_offset + start = stop + chunk_length + left_slice = _slice_at_axis(slice(start, stop, -1), axis) + left_chunk = padded[left_slice] + + if method == "odd": + # Negate chunk and align with edge + edge_slice = _slice_at_axis(slice(left_pad, left_pad + 1), axis) + left_chunk = 2 * padded[edge_slice] - left_chunk + + # Insert chunk into padded area + start = left_pad - chunk_length + stop = left_pad + pad_area = _slice_at_axis(slice(start, stop), axis) + padded[pad_area] = left_chunk + # Adjust pointer to left edge for next iteration + left_pad -= chunk_length + + if right_pad > 0: + # Pad with reflected values on right side: + # First limit chunk size which can't be larger than pad area + chunk_length = min(old_length, right_pad) + # Slice right to left, start on or next to edge, stop relative to start + start = -right_pad + edge_offset - 2 + stop = start - chunk_length + right_slice = _slice_at_axis(slice(start, stop, -1), axis) + right_chunk = padded[right_slice] + + if method == "odd": + # Negate chunk and align with edge + edge_slice = _slice_at_axis( + slice(-right_pad - 1, -right_pad), axis) + right_chunk = 2 * padded[edge_slice] - right_chunk + + # Insert chunk into padded area + start = padded.shape[axis] - right_pad + stop = start + chunk_length + pad_area = _slice_at_axis(slice(start, stop), axis) + padded[pad_area] = right_chunk + # Adjust pointer to right edge for next iteration + right_pad -= chunk_length + + return left_pad, right_pad + + +def _set_wrap_both(padded, axis, width_pair): + """ + Pad `axis` of `arr` with wrapped values. Parameters ---------- - arr : ndarray + padded : ndarray Input array of arbitrary shape. - pad_amt : tuple of ints, length 2 - Padding to (prepend, append) along `axis`. axis : int Axis along which to pad `arr`. + width_pair : (int, int) + Pair of widths that mark the pad area on both sides in the given + dimension. Returns ------- - padarr : ndarray - Output array, with `pad_amt[0]` values prepended and `pad_amt[1]` - values appended along `axis`. Both regions are padded wrapped values - from the opposite end of `axis`. - - Notes - ----- - This method of padding is also known as 'tile' or 'tiling'. - - The modes 'reflect', 'symmetric', and 'wrap' must be padded with a - single function, lest the indexing tricks in non-integer multiples of the - original shape would violate repetition in the final iteration. - - """ - # Implicit booleanness to test for zero (or None) in any scalar type - if pad_amt[0] == 0 and pad_amt[1] == 0: - return arr - - ########################################################################## - # Prepended region - - # Slice off a reverse indexed chunk from near edge to pad `arr` before - wrap_slice = _slice_last(arr.shape, pad_amt[0], axis=axis) - wrap_chunk1 = arr[wrap_slice] - - ########################################################################## - # Appended region - - # Slice off a reverse indexed chunk from far edge to pad `arr` after - wrap_slice = _slice_first(arr.shape, pad_amt[1], axis=axis) - wrap_chunk2 = arr[wrap_slice] - - # Concatenate `arr` with both chunks, extending along `axis` - return np.concatenate((wrap_chunk1, arr, wrap_chunk2), axis=axis) + pad_amt : tuple of ints, length 2 + New index positions of padding to do along the `axis`. If these are + both 0, padding is done in this dimension. + """ + left_pad, right_pad = width_pair + period = padded.shape[axis] - right_pad - left_pad + + # If the current dimension of `arr` doesn't contain enough valid values + # (not part of the undefined pad area) we need to pad multiple times. + # Each time the pad area shrinks on both sides which is communicated with + # these variables. + new_left_pad = 0 + new_right_pad = 0 + + if left_pad > 0: + # Pad with wrapped values on left side + # First slice chunk from right side of the non-pad area. + # Use min(period, left_pad) to ensure that chunk is not larger than + # pad area + right_slice = _slice_at_axis( + slice(-right_pad - min(period, left_pad), + -right_pad if right_pad != 0 else None), + axis + ) + right_chunk = padded[right_slice] + + if left_pad > period: + # Chunk is smaller than pad area + pad_area = _slice_at_axis(slice(left_pad - period, left_pad), axis) + new_left_pad = left_pad - period + else: + # Chunk matches pad area + pad_area = _slice_at_axis(slice(None, left_pad), axis) + padded[pad_area] = right_chunk + + if right_pad > 0: + # Pad with wrapped values on right side + # First slice chunk from left side of the non-pad area. + # Use min(period, right_pad) to ensure that chunk is not larger than + # pad area + left_slice = _slice_at_axis( + slice(left_pad, left_pad + min(period, right_pad),), axis) + left_chunk = padded[left_slice] + + if right_pad > period: + # Chunk is smaller than pad area + pad_area = _slice_at_axis( + slice(-right_pad, -right_pad + period), axis) + new_right_pad = right_pad - period + else: + # Chunk matches pad area + pad_area = _slice_at_axis(slice(-right_pad, None), axis) + padded[pad_area] = left_chunk + + return new_left_pad, new_right_pad def _as_pairs(x, ndim, as_index=False): @@ -953,23 +567,23 @@ def _as_pairs(x, ndim, as_index=False): return np.broadcast_to(x, (ndim, 2)).tolist() -############################################################################### -# Public functions +def _pad_dispatcher(array, pad_width, mode=None, **kwargs): + return (array,) -def _pad_dispatcher(array, pad_width, mode, **kwargs): - return (array,) +############################################################################### +# Public functions @array_function_dispatch(_pad_dispatcher, module='numpy') -def pad(array, pad_width, mode, **kwargs): +def pad(array, pad_width, mode='constant', **kwargs): """ - Pads an array. + Pad an array. Parameters ---------- array : array_like of rank N - Input array + The array to pad. pad_width : {sequence, array_like, int} Number of values padded to the edges of each axis. ((before_1, after_1), ... (before_N, after_N)) unique pad widths @@ -977,10 +591,10 @@ def pad(array, pad_width, mode, **kwargs): ((before, after),) yields same before and after pad for each axis. (pad,) or int is a shortcut for before = after = pad width for all axes. - mode : str or function + mode : str or function, optional One of the following string values or a user supplied function. - 'constant' + 'constant' (default) Pads with a constant value. 'edge' Pads with the edge values of array. @@ -1010,6 +624,11 @@ def pad(array, pad_width, mode, **kwargs): Pads with the wrap of the vector along the axis. The first values are used to pad the end and the end values are used to pad the beginning. + 'empty' + Pads with undefined values. + + .. versionadded:: 1.17 + <function> Padding function, see Notes. stat_length : sequence or int, optional @@ -1026,31 +645,31 @@ def pad(array, pad_width, mode, **kwargs): length for all axes. Default is ``None``, to use the entire axis. - constant_values : sequence or int, optional + constant_values : sequence or scalar, optional Used in 'constant'. The values to set the padded values for each axis. - ((before_1, after_1), ... (before_N, after_N)) unique pad constants + ``((before_1, after_1), ... (before_N, after_N))`` unique pad constants for each axis. - ((before, after),) yields same before and after constants for each + ``((before, after),)`` yields same before and after constants for each axis. - (constant,) or int is a shortcut for before = after = constant for + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for all axes. Default is 0. - end_values : sequence or int, optional + end_values : sequence or scalar, optional Used in 'linear_ramp'. The values used for the ending value of the linear_ramp and that will form the edge of the padded array. - ((before_1, after_1), ... (before_N, after_N)) unique end values + ``((before_1, after_1), ... (before_N, after_N))`` unique end values for each axis. - ((before, after),) yields same before and after end values for each + ``((before, after),)`` yields same before and after end values for each axis. - (constant,) or int is a shortcut for before = after = end value for + ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for all axes. Default is 0. @@ -1075,9 +694,8 @@ def pad(array, pad_width, mode, **kwargs): think about with a rank 2 array where the corners of the padded array are calculated by using padded values from the first axis. - The padding function, if used, should return a rank 1 array equal in - length to the vector argument with padded values replaced. It has the - following signature:: + The padding function, if used, should modify a rank 1 array in-place. It + has the following signature:: padding_func(vector, iaxis_pad_width, iaxis, kwargs) @@ -1085,7 +703,7 @@ def pad(array, pad_width, mode, **kwargs): vector : ndarray A rank 1 array already padded with zeros. Padded values are - vector[:pad_tuple[0]] and vector[-pad_tuple[1]:]. + vector[:iaxis_pad_width[0]] and vector[-iaxis_pad_width[1]:]. iaxis_pad_width : tuple A 2-tuple of ints, iaxis_pad_width[0] represents the number of values padded at the beginning of vector where @@ -1099,11 +717,11 @@ def pad(array, pad_width, mode, **kwargs): Examples -------- >>> a = [1, 2, 3, 4, 5] - >>> np.pad(a, (2,3), 'constant', constant_values=(4, 6)) - array([4, 4, 1, 2, 3, 4, 5, 6, 6, 6]) + >>> np.pad(a, (2, 3), 'constant', constant_values=(4, 6)) + array([4, 4, 1, ..., 6, 6, 6]) >>> np.pad(a, (2, 3), 'edge') - array([1, 1, 1, 2, 3, 4, 5, 5, 5, 5]) + array([1, 1, 1, ..., 5, 5, 5]) >>> np.pad(a, (2, 3), 'linear_ramp', end_values=(5, -4)) array([ 5, 3, 1, 2, 3, 4, 5, 2, -1, -4]) @@ -1147,7 +765,6 @@ def pad(array, pad_width, mode, **kwargs): ... pad_value = kwargs.get('padder', 10) ... vector[:pad_width[0]] = pad_value ... vector[-pad_width[1]:] = pad_value - ... return vector >>> a = np.arange(6) >>> a = a.reshape((2, 3)) >>> np.pad(a, 2, pad_with) @@ -1165,15 +782,30 @@ def pad(array, pad_width, mode, **kwargs): [100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100]]) """ - if not np.asarray(pad_width).dtype.kind == 'i': + array = np.asarray(array) + pad_width = np.asarray(pad_width) + + if not pad_width.dtype.kind == 'i': raise TypeError('`pad_width` must be of integral type.') - narray = np.array(array) - pad_width = _as_pairs(pad_width, narray.ndim, as_index=True) + # Broadcast to shape (array.ndim, 2) + pad_width = _as_pairs(pad_width, array.ndim, as_index=True) - allowedkwargs = { + if callable(mode): + # Old behavior: Use user-supplied function with np.apply_along_axis + function = mode + # Create a new zero padded array + padded, _ = _pad_simple(array, pad_width, fill_value=0) + # And apply along each axis + for axis in range(padded.ndim): + np.apply_along_axis( + function, axis, padded, pad_width[axis], axis, kwargs) + return padded + + # Make sure that no unsupported keywords were passed for the current mode + allowed_kwargs = { + 'empty': [], 'edge': [], 'wrap': [], 'constant': ['constant_values'], - 'edge': [], 'linear_ramp': ['end_values'], 'maximum': ['stat_length'], 'mean': ['stat_length'], @@ -1181,175 +813,101 @@ def pad(array, pad_width, mode, **kwargs): 'minimum': ['stat_length'], 'reflect': ['reflect_type'], 'symmetric': ['reflect_type'], - 'wrap': [], - } - - kwdefaults = { - 'stat_length': None, - 'constant_values': 0, - 'end_values': 0, - 'reflect_type': 'even', - } - - if isinstance(mode, np.compat.basestring): - # Make sure have allowed kwargs appropriate for mode - for key in kwargs: - if key not in allowedkwargs[mode]: - raise ValueError('%s keyword not in allowed keywords %s' % - (key, allowedkwargs[mode])) - - # Set kwarg defaults - for kw in allowedkwargs[mode]: - kwargs.setdefault(kw, kwdefaults[kw]) - - # Need to only normalize particular keywords. - for i in kwargs: - if i == 'stat_length': - kwargs[i] = _as_pairs(kwargs[i], narray.ndim, as_index=True) - if i in ['end_values', 'constant_values']: - kwargs[i] = _as_pairs(kwargs[i], narray.ndim) - else: - # Drop back to old, slower np.apply_along_axis mode for user-supplied - # vector function - function = mode - - # Create a new padded array - rank = list(range(narray.ndim)) - total_dim_increase = [np.sum(pad_width[i]) for i in rank] - offset_slices = tuple( - slice(pad_width[i][0], pad_width[i][0] + narray.shape[i]) - for i in rank) - new_shape = np.array(narray.shape) + total_dim_increase - newmat = np.zeros(new_shape, narray.dtype) - - # Insert the original array into the padded array - newmat[offset_slices] = narray - - # This is the core of pad ... - for iaxis in rank: - np.apply_along_axis(function, - iaxis, - newmat, - pad_width[iaxis], - iaxis, - kwargs) - return newmat - - # If we get here, use new padding method - newmat = narray.copy() - - # API preserved, but completely new algorithm which pads by building the - # entire block to pad before/after `arr` with in one step, for each axis. - if mode == 'constant': - for axis, ((pad_before, pad_after), (before_val, after_val)) \ - in enumerate(zip(pad_width, kwargs['constant_values'])): - newmat = _prepend_const(newmat, pad_before, before_val, axis) - newmat = _append_const(newmat, pad_after, after_val, axis) - - elif mode == 'edge': - for axis, (pad_before, pad_after) in enumerate(pad_width): - newmat = _prepend_edge(newmat, pad_before, axis) - newmat = _append_edge(newmat, pad_after, axis) - - elif mode == 'linear_ramp': - for axis, ((pad_before, pad_after), (before_val, after_val)) \ - in enumerate(zip(pad_width, kwargs['end_values'])): - newmat = _prepend_ramp(newmat, pad_before, before_val, axis) - newmat = _append_ramp(newmat, pad_after, after_val, axis) - - elif mode == 'maximum': - for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \ - in enumerate(zip(pad_width, kwargs['stat_length'])): - newmat = _prepend_max(newmat, pad_before, chunk_before, axis) - newmat = _append_max(newmat, pad_after, chunk_after, axis) - - elif mode == 'mean': - for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \ - in enumerate(zip(pad_width, kwargs['stat_length'])): - newmat = _prepend_mean(newmat, pad_before, chunk_before, axis) - newmat = _append_mean(newmat, pad_after, chunk_after, axis) - - elif mode == 'median': - for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \ - in enumerate(zip(pad_width, kwargs['stat_length'])): - newmat = _prepend_med(newmat, pad_before, chunk_before, axis) - newmat = _append_med(newmat, pad_after, chunk_after, axis) - - elif mode == 'minimum': - for axis, ((pad_before, pad_after), (chunk_before, chunk_after)) \ - in enumerate(zip(pad_width, kwargs['stat_length'])): - newmat = _prepend_min(newmat, pad_before, chunk_before, axis) - newmat = _append_min(newmat, pad_after, chunk_after, axis) - - elif mode == 'reflect': - for axis, (pad_before, pad_after) in enumerate(pad_width): - if narray.shape[axis] == 0: - # Axes with non-zero padding cannot be empty. - if pad_before > 0 or pad_after > 0: - raise ValueError("There aren't any elements to reflect" - " in axis {} of `array`".format(axis)) - # Skip zero padding on empty axes. - continue - - # Recursive padding along any axis where `pad_amt` is too large - # for indexing tricks. We can only safely pad the original axis - # length, to keep the period of the reflections consistent. - if ((pad_before > 0) or - (pad_after > 0)) and newmat.shape[axis] == 1: + } + try: + unsupported_kwargs = set(kwargs) - set(allowed_kwargs[mode]) + except KeyError: + raise ValueError("mode '{}' is not supported".format(mode)) + if unsupported_kwargs: + raise ValueError("unsupported keyword arguments for mode '{}': {}" + .format(mode, unsupported_kwargs)) + + stat_functions = {"maximum": np.max, "minimum": np.min, + "mean": np.mean, "median": np.median} + + # Create array with final shape and original values + # (padded area is undefined) + padded, original_area_slice = _pad_simple(array, pad_width) + # And prepare iteration over all dimensions + # (zipping may be more readable than using enumerate) + axes = range(padded.ndim) + + if mode == "constant": + values = kwargs.get("constant_values", 0) + values = _as_pairs(values, padded.ndim) + for axis, width_pair, value_pair in zip(axes, pad_width, values): + roi = _view_roi(padded, original_area_slice, axis) + _set_pad_area(roi, axis, width_pair, value_pair) + + elif mode == "empty": + pass # Do nothing as _pad_simple already returned the correct result + + elif array.size == 0: + # Only modes "constant" and "empty" can extend empty axes, all other + # modes depend on `array` not being empty + # -> ensure every empty axis is only "padded with 0" + for axis, width_pair in zip(axes, pad_width): + if array.shape[axis] == 0 and any(width_pair): + raise ValueError( + "can't extend empty axis {} using modes other than " + "'constant' or 'empty'".format(axis) + ) + # passed, don't need to do anything more as _pad_simple already + # returned the correct result + + elif mode == "edge": + for axis, width_pair in zip(axes, pad_width): + roi = _view_roi(padded, original_area_slice, axis) + edge_pair = _get_edges(roi, axis, width_pair) + _set_pad_area(roi, axis, width_pair, edge_pair) + + elif mode == "linear_ramp": + end_values = kwargs.get("end_values", 0) + end_values = _as_pairs(end_values, padded.ndim) + for axis, width_pair, value_pair in zip(axes, pad_width, end_values): + roi = _view_roi(padded, original_area_slice, axis) + ramp_pair = _get_linear_ramps(roi, axis, width_pair, value_pair) + _set_pad_area(roi, axis, width_pair, ramp_pair) + + elif mode in stat_functions: + func = stat_functions[mode] + length = kwargs.get("stat_length", None) + length = _as_pairs(length, padded.ndim, as_index=True) + for axis, width_pair, length_pair in zip(axes, pad_width, length): + roi = _view_roi(padded, original_area_slice, axis) + stat_pair = _get_stats(roi, axis, width_pair, length_pair, func) + _set_pad_area(roi, axis, width_pair, stat_pair) + + elif mode in {"reflect", "symmetric"}: + method = kwargs.get("reflect_type", "even") + include_edge = True if mode == "symmetric" else False + for axis, (left_index, right_index) in zip(axes, pad_width): + if array.shape[axis] == 1 and (left_index > 0 or right_index > 0): # Extending singleton dimension for 'reflect' is legacy # behavior; it really should raise an error. - newmat = _prepend_edge(newmat, pad_before, axis) - newmat = _append_edge(newmat, pad_after, axis) + edge_pair = _get_edges(padded, axis, (left_index, right_index)) + _set_pad_area( + padded, axis, (left_index, right_index), edge_pair) continue - method = kwargs['reflect_type'] - safe_pad = newmat.shape[axis] - 1 - while ((pad_before > safe_pad) or (pad_after > safe_pad)): - pad_iter_b = min(safe_pad, - safe_pad * (pad_before // safe_pad)) - pad_iter_a = min(safe_pad, safe_pad * (pad_after // safe_pad)) - newmat = _pad_ref(newmat, (pad_iter_b, - pad_iter_a), method, axis) - pad_before -= pad_iter_b - pad_after -= pad_iter_a - safe_pad += pad_iter_b + pad_iter_a - newmat = _pad_ref(newmat, (pad_before, pad_after), method, axis) - - elif mode == 'symmetric': - for axis, (pad_before, pad_after) in enumerate(pad_width): - # Recursive padding along any axis where `pad_amt` is too large - # for indexing tricks. We can only safely pad the original axis - # length, to keep the period of the reflections consistent. - method = kwargs['reflect_type'] - safe_pad = newmat.shape[axis] - while ((pad_before > safe_pad) or - (pad_after > safe_pad)): - pad_iter_b = min(safe_pad, - safe_pad * (pad_before // safe_pad)) - pad_iter_a = min(safe_pad, safe_pad * (pad_after // safe_pad)) - newmat = _pad_sym(newmat, (pad_iter_b, - pad_iter_a), method, axis) - pad_before -= pad_iter_b - pad_after -= pad_iter_a - safe_pad += pad_iter_b + pad_iter_a - newmat = _pad_sym(newmat, (pad_before, pad_after), method, axis) - - elif mode == 'wrap': - for axis, (pad_before, pad_after) in enumerate(pad_width): - # Recursive padding along any axis where `pad_amt` is too large - # for indexing tricks. We can only safely pad the original axis - # length, to keep the period of the reflections consistent. - safe_pad = newmat.shape[axis] - while ((pad_before > safe_pad) or - (pad_after > safe_pad)): - pad_iter_b = min(safe_pad, - safe_pad * (pad_before // safe_pad)) - pad_iter_a = min(safe_pad, safe_pad * (pad_after // safe_pad)) - newmat = _pad_wrap(newmat, (pad_iter_b, pad_iter_a), axis) - - pad_before -= pad_iter_b - pad_after -= pad_iter_a - safe_pad += pad_iter_b + pad_iter_a - newmat = _pad_wrap(newmat, (pad_before, pad_after), axis) - - return newmat + roi = _view_roi(padded, original_area_slice, axis) + while left_index > 0 or right_index > 0: + # Iteratively pad until dimension is filled with reflected + # values. This is necessary if the pad area is larger than + # the length of the original values in the current dimension. + left_index, right_index = _set_reflect_both( + roi, axis, (left_index, right_index), + method, include_edge + ) + + elif mode == "wrap": + for axis, (left_index, right_index) in zip(axes, pad_width): + roi = _view_roi(padded, original_area_slice, axis) + while left_index > 0 or right_index > 0: + # Iteratively pad until dimension is filled with wrapped + # values. This is necessary if the pad area is larger than + # the length of the original values in the current dimension. + left_index, right_index = _set_wrap_both( + roi, axis, (left_index, right_index)) + + return padded diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py index fd64ecbd6..b53d8c03f 100644 --- a/numpy/lib/arraysetops.py +++ b/numpy/lib/arraysetops.py @@ -82,7 +82,7 @@ def ediff1d(ary, to_end=None, to_begin=None): array([ 1, 2, 3, -7]) >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99])) - array([-99, 1, 2, 3, -7, 88, 99]) + array([-99, 1, 2, ..., -7, 88, 99]) The returned array is always 1D. @@ -94,8 +94,7 @@ def ediff1d(ary, to_end=None, to_begin=None): # force a 1d array ary = np.asanyarray(ary).ravel() - # we have unit tests enforcing - # propagation of the dtype of input + # enforce propagation of the dtype of input # ary to returned result dtype_req = ary.dtype @@ -106,23 +105,22 @@ def ediff1d(ary, to_end=None, to_begin=None): if to_begin is None: l_begin = 0 else: - to_begin = np.asanyarray(to_begin) - if not np.can_cast(to_begin, dtype_req): - raise TypeError("dtype of to_begin must be compatible " - "with input ary") - - to_begin = to_begin.ravel() + _to_begin = np.asanyarray(to_begin, dtype=dtype_req) + if not np.all(_to_begin == to_begin): + raise ValueError("cannot convert 'to_begin' to array with dtype " + "'%r' as required for input ary" % dtype_req) + to_begin = _to_begin.ravel() l_begin = len(to_begin) if to_end is None: l_end = 0 else: - to_end = np.asanyarray(to_end) - if not np.can_cast(to_end, dtype_req): - raise TypeError("dtype of to_end must be compatible " - "with input ary") - - to_end = to_end.ravel() + _to_end = np.asanyarray(to_end, dtype=dtype_req) + # check that casting has not overflowed + if not np.all(_to_end == to_end): + raise ValueError("cannot convert 'to_end' to array with dtype " + "'%r' as required for input ary" % dtype_req) + to_end = _to_end.ravel() l_end = len(to_end) # do the calculation in place and copy to_begin and to_end @@ -241,13 +239,11 @@ def unique(ar, return_index=False, return_inverse=False, >>> a = np.array(['a', 'b', 'b', 'c', 'a']) >>> u, indices = np.unique(a, return_index=True) >>> u - array(['a', 'b', 'c'], - dtype='|S1') + array(['a', 'b', 'c'], dtype='<U1') >>> indices array([0, 1, 3]) >>> a[indices] - array(['a', 'b', 'c'], - dtype='|S1') + array(['a', 'b', 'c'], dtype='<U1') Reconstruct the input array from the unique values: @@ -256,9 +252,9 @@ def unique(ar, return_index=False, return_inverse=False, >>> u array([1, 2, 3, 4, 6]) >>> indices - array([0, 1, 4, 3, 1, 2, 1]) + array([0, 1, 4, ..., 1, 2, 1]) >>> u[indices] - array([1, 2, 6, 4, 2, 3, 2]) + array([1, 2, 6, ..., 2, 3, 2]) """ ar = np.asanyarray(ar) @@ -661,8 +657,8 @@ def isin(element, test_elements, assume_unique=False, invert=False): >>> test_elements = [1, 2, 4, 8] >>> mask = np.isin(element, test_elements) >>> mask - array([[ False, True], - [ True, False]]) + array([[False, True], + [ True, False]]) >>> element[mask] array([2, 4]) @@ -676,7 +672,7 @@ def isin(element, test_elements, assume_unique=False, invert=False): >>> mask = np.isin(element, test_elements, invert=True) >>> mask array([[ True, False], - [ False, True]]) + [False, True]]) >>> element[mask] array([0, 6]) @@ -685,14 +681,14 @@ def isin(element, test_elements, assume_unique=False, invert=False): >>> test_set = {1, 2, 4, 8} >>> np.isin(element, test_set) - array([[ False, False], - [ False, False]]) + array([[False, False], + [False, False]]) Casting the set to a list gives the expected result: >>> np.isin(element, list(test_set)) - array([[ False, True], - [ True, False]]) + array([[False, True], + [ True, False]]) """ element = np.asarray(element) return in1d(element, test_elements, assume_unique=assume_unique, diff --git a/numpy/lib/arrayterator.py b/numpy/lib/arrayterator.py index f2d4fe9fd..c16668582 100644 --- a/numpy/lib/arrayterator.py +++ b/numpy/lib/arrayterator.py @@ -80,9 +80,8 @@ class Arrayterator(object): >>> for subarr in a_itor: ... if not subarr.all(): - ... print(subarr, subarr.shape) - ... - [[[[0 1]]]] (1, 1, 1, 2) + ... print(subarr, subarr.shape) # doctest: +SKIP + >>> # [[[[0 1]]]] (1, 1, 1, 2) """ @@ -160,7 +159,7 @@ class Arrayterator(object): ... if not subarr: ... print(subarr, type(subarr)) ... - 0 <type 'numpy.int32'> + 0 <class 'numpy.int64'> """ for block in self: diff --git a/numpy/lib/financial.py b/numpy/lib/financial.py index e1e297492..216687475 100644 --- a/numpy/lib/financial.py +++ b/numpy/lib/financial.py @@ -127,7 +127,7 @@ def fv(rate, nper, pmt, pv, when='end'): >>> a = np.array((0.05, 0.06, 0.07))/12 >>> np.fv(a, 10*12, -100, -100) - array([ 15692.92889434, 16569.87435405, 17509.44688102]) + array([ 15692.92889434, 16569.87435405, 17509.44688102]) # may vary """ when = _convert_when(when) @@ -275,7 +275,7 @@ def nper(rate, pmt, pv, fv=0, when='end'): If you only had $150/month to pay towards the loan, how long would it take to pay-off a loan of $8,000 at 7% annual interest? - >>> print(round(np.nper(0.07/12, -150, 8000), 5)) + >>> print(np.round(np.nper(0.07/12, -150, 8000), 5)) 64.07335 So, over 64 months would be required to pay off the loan. @@ -286,10 +286,10 @@ def nper(rate, pmt, pv, fv=0, when='end'): >>> np.nper(*(np.ogrid[0.07/12: 0.08/12: 0.01/12, ... -150 : -99 : 50 , ... 8000 : 9001 : 1000])) - array([[[ 64.07334877, 74.06368256], - [ 108.07548412, 127.99022654]], - [[ 66.12443902, 76.87897353], - [ 114.70165583, 137.90124779]]]) + array([[[ 64.07334877, 74.06368256], + [108.07548412, 127.99022654]], + [[ 66.12443902, 76.87897353], + [114.70165583, 137.90124779]]]) """ when = _convert_when(when) @@ -539,7 +539,7 @@ def pv(rate, nper, pmt, fv=0, when='end'): >>> a = np.array((0.05, 0.04, 0.03))/12 >>> np.pv(a, 10*12, -100, 15692.93) - array([ -100.00067132, -649.26771385, -1273.78633713]) + array([ -100.00067132, -649.26771385, -1273.78633713]) # may vary So, to end up with the same $15692.93 under the same $100 per month "savings plan," for annual interest rates of 4% and 3%, one would @@ -704,15 +704,15 @@ def irr(values): Examples -------- - >>> round(irr([-100, 39, 59, 55, 20]), 5) + >>> round(np.irr([-100, 39, 59, 55, 20]), 5) 0.28095 - >>> round(irr([-100, 0, 0, 74]), 5) + >>> round(np.irr([-100, 0, 0, 74]), 5) -0.0955 - >>> round(irr([-100, 100, 0, -7]), 5) + >>> round(np.irr([-100, 100, 0, -7]), 5) -0.0833 - >>> round(irr([-100, 100, 0, 7]), 5) + >>> round(np.irr([-100, 100, 0, 7]), 5) 0.06206 - >>> round(irr([-5, 10.5, 1, -8, 1]), 5) + >>> round(np.irr([-5, 10.5, 1, -8, 1]), 5) 0.0886 (Compare with the Example given for numpy.lib.financial.npv) @@ -777,7 +777,7 @@ def npv(rate, values): Examples -------- >>> np.npv(0.281,[-100, 39, 59, 55, 20]) - -0.0084785916384548798 + -0.0084785916384548798 # may vary (Compare with the Example given for numpy.lib.financial.irr) diff --git a/numpy/lib/format.py b/numpy/lib/format.py index 10945e5e8..553c9371d 100644 --- a/numpy/lib/format.py +++ b/numpy/lib/format.py @@ -149,7 +149,7 @@ data HEADER_LEN." Notes ----- The ``.npy`` format, including motivation for creating it and a comparison of -alternatives, is described in the `"npy-format" NEP +alternatives, is described in the `"npy-format" NEP <https://www.numpy.org/neps/nep-0001-npy-format.html>`_, however details have evolved with time and this document is more current. @@ -162,9 +162,8 @@ import io import warnings from numpy.lib.utils import safe_eval from numpy.compat import ( - asbytes, asstr, isfileobj, long, os_fspath + asbytes, asstr, isfileobj, long, os_fspath, pickle ) -from numpy.core.numeric import pickle MAGIC_PREFIX = b'\x93NUMPY' @@ -525,7 +524,7 @@ def _read_array_header(fp, version): elif version == (2, 0): hlength_type = '<I' else: - raise ValueError("Invalid version %r" % version) + raise ValueError("Invalid version {!r}".format(version)) hlength_str = _read_bytes(fp, struct.calcsize(hlength_type), "array header length") header_length = struct.unpack(hlength_type, hlength_str)[0] @@ -541,29 +540,29 @@ def _read_array_header(fp, version): try: d = safe_eval(header) except SyntaxError as e: - msg = "Cannot parse header: %r\nException: %r" - raise ValueError(msg % (header, e)) + msg = "Cannot parse header: {!r}\nException: {!r}" + raise ValueError(msg.format(header, e)) if not isinstance(d, dict): - msg = "Header is not a dictionary: %r" - raise ValueError(msg % d) + msg = "Header is not a dictionary: {!r}" + raise ValueError(msg.format(d)) keys = sorted(d.keys()) if keys != ['descr', 'fortran_order', 'shape']: - msg = "Header does not contain the correct keys: %r" - raise ValueError(msg % (keys,)) + msg = "Header does not contain the correct keys: {!r}" + raise ValueError(msg.format(keys)) # Sanity-check the values. if (not isinstance(d['shape'], tuple) or not numpy.all([isinstance(x, (int, long)) for x in d['shape']])): - msg = "shape is not valid: %r" - raise ValueError(msg % (d['shape'],)) + msg = "shape is not valid: {!r}" + raise ValueError(msg.format(d['shape'])) if not isinstance(d['fortran_order'], bool): - msg = "fortran_order is not a valid bool: %r" - raise ValueError(msg % (d['fortran_order'],)) + msg = "fortran_order is not a valid bool: {!r}" + raise ValueError(msg.format(d['fortran_order'])) try: dtype = descr_to_dtype(d['descr']) except TypeError as e: - msg = "descr is not a valid dtype descriptor: %r" - raise ValueError(msg % (d['descr'],)) + msg = "descr is not a valid dtype descriptor: {!r}" + raise ValueError(msg.format(d['descr'])) return d['shape'], d['fortran_order'], dtype @@ -645,7 +644,7 @@ def write_array(fp, array, version=None, allow_pickle=True, pickle_kwargs=None): fp.write(chunk.tobytes('C')) -def read_array(fp, allow_pickle=True, pickle_kwargs=None): +def read_array(fp, allow_pickle=False, pickle_kwargs=None): """ Read an array from an NPY file. @@ -655,7 +654,11 @@ def read_array(fp, allow_pickle=True, pickle_kwargs=None): If this is not a real file object, then this may take extra memory and time. allow_pickle : bool, optional - Whether to allow reading pickled data. Default: True + Whether to allow writing pickled data. Default: False + + .. versionchanged:: 1.16.3 + Made default False in response to CVE-2019-6446. + pickle_kwargs : dict Additional keyword arguments to pass to pickle.load. These are only useful when loading object arrays saved on Python 2 when using diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 5f87c8b2c..2e82fa075 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -31,7 +31,6 @@ from numpy.core.overrides import set_module from numpy.core import overrides from numpy.core.function_base import add_newdoc from numpy.lib.twodim_base import diag -from .utils import deprecate from numpy.core.multiarray import ( _insert, add_docstring, bincount, normalize_axis_index, _monotonicity, interp as compiled_interp, interp_complex as compiled_interp_complex @@ -218,12 +217,12 @@ def flip(m, axis=None): [2, 3]], [[4, 5], [6, 7]]]) - >>> flip(A, 0) + >>> np.flip(A, 0) array([[[4, 5], [6, 7]], [[0, 1], [2, 3]]]) - >>> flip(A, 1) + >>> np.flip(A, 1) array([[[2, 3], [0, 1]], [[6, 7], @@ -239,7 +238,7 @@ def flip(m, axis=None): [[1, 0], [3, 2]]]) >>> A = np.random.randn(3,4,5) - >>> np.all(flip(A,2) == A[:,:,::-1,...]) + >>> np.all(np.flip(A,2) == A[:,:,::-1,...]) True """ if not hasattr(m, 'ndim'): @@ -359,7 +358,7 @@ def average(a, axis=None, weights=None, returned=False): Examples -------- - >>> data = range(1,5) + >>> data = list(range(1,5)) >>> data [1, 2, 3, 4] >>> np.average(data) @@ -373,13 +372,12 @@ def average(a, axis=None, weights=None, returned=False): [2, 3], [4, 5]]) >>> np.average(data, axis=1, weights=[1./4, 3./4]) - array([ 0.75, 2.75, 4.75]) + array([0.75, 2.75, 4.75]) >>> np.average(data, weights=[1./4, 3./4]) - Traceback (most recent call last): - ... + ... TypeError: Axis must be specified when shapes of a and weights differ. - + >>> a = np.ones(5, dtype=np.float128) >>> w = np.ones(5, dtype=np.complex64) >>> avg = np.average(a, weights=w) @@ -586,7 +584,7 @@ def piecewise(x, condlist, funclist, *args, **kw): ``x >= 0``. >>> np.piecewise(x, [x < 0, x >= 0], [lambda x: -x, lambda x: x]) - array([ 2.5, 1.5, 0.5, 0.5, 1.5, 2.5]) + array([2.5, 1.5, 0.5, 0.5, 1.5, 2.5]) Apply the same function to a scalar value. @@ -671,7 +669,7 @@ def select(condlist, choicelist, default=0): >>> condlist = [x<3, x>5] >>> choicelist = [x, x**2] >>> np.select(condlist, choicelist) - array([ 0, 1, 2, 0, 0, 0, 36, 49, 64, 81]) + array([ 0, 1, 2, ..., 49, 64, 81]) """ # Check the size of condlist and choicelist are the same, or abort. @@ -854,9 +852,9 @@ def gradient(f, *varargs, **kwargs): -------- >>> f = np.array([1, 2, 4, 7, 11, 16], dtype=float) >>> np.gradient(f) - array([ 1. , 1.5, 2.5, 3.5, 4.5, 5. ]) + array([1. , 1.5, 2.5, 3.5, 4.5, 5. ]) >>> np.gradient(f, 2) - array([ 0.5 , 0.75, 1.25, 1.75, 2.25, 2.5 ]) + array([0.5 , 0.75, 1.25, 1.75, 2.25, 2.5 ]) Spacing can be also specified with an array that represents the coordinates of the values F along the dimensions. @@ -864,13 +862,13 @@ def gradient(f, *varargs, **kwargs): >>> x = np.arange(f.size) >>> np.gradient(f, x) - array([ 1. , 1.5, 2.5, 3.5, 4.5, 5. ]) + array([1. , 1.5, 2.5, 3.5, 4.5, 5. ]) Or a non uniform one: >>> x = np.array([0., 1., 1.5, 3.5, 4., 6.], dtype=float) >>> np.gradient(f, x) - array([ 1. , 3. , 3.5, 6.7, 6.9, 2.5]) + array([1. , 3. , 3.5, 6.7, 6.9, 2.5]) For two dimensional arrays, the return will be two arrays ordered by axis. In this example the first array stands for the gradient in @@ -878,8 +876,8 @@ def gradient(f, *varargs, **kwargs): >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=float)) [array([[ 2., 2., -1.], - [ 2., 2., -1.]]), array([[ 1. , 2.5, 4. ], - [ 1. , 1. , 1. ]])] + [ 2., 2., -1.]]), array([[1. , 2.5, 4. ], + [1. , 1. , 1. ]])] In this example the spacing is also specified: uniform for axis=0 and non uniform for axis=1 @@ -888,17 +886,17 @@ def gradient(f, *varargs, **kwargs): >>> y = [1., 1.5, 3.5] >>> np.gradient(np.array([[1, 2, 6], [3, 4, 5]], dtype=float), dx, y) [array([[ 1. , 1. , -0.5], - [ 1. , 1. , -0.5]]), array([[ 2. , 2. , 2. ], - [ 2. , 1.7, 0.5]])] + [ 1. , 1. , -0.5]]), array([[2. , 2. , 2. ], + [2. , 1.7, 0.5]])] It is possible to specify how boundaries are treated using `edge_order` >>> x = np.array([0, 1, 2, 3, 4]) >>> f = x**2 >>> np.gradient(f, edge_order=1) - array([ 1., 2., 4., 6., 7.]) + array([1., 2., 4., 6., 7.]) >>> np.gradient(f, edge_order=2) - array([-0., 2., 4., 6., 8.]) + array([0., 2., 4., 6., 8.]) The `axis` keyword can be used to specify a subset of axes of which the gradient is calculated @@ -1151,7 +1149,7 @@ def diff(a, n=1, axis=-1, prepend=np._NoValue, append=np._NoValue): """ Calculate the n-th discrete difference along the given axis. - The first difference is given by ``out[n] = a[n+1] - a[n]`` along + The first difference is given by ``out[i] = a[i+1] - a[i]`` along the given axis, higher differences are calculated by using `diff` recursively. @@ -1200,7 +1198,7 @@ def diff(a, n=1, axis=-1, prepend=np._NoValue, append=np._NoValue): >>> np.diff(u8_arr) array([255], dtype=uint8) >>> u8_arr[1,...] - u8_arr[0,...] - array(255, np.uint8) + 255 If this is not desirable, then the array should be cast to a larger integer type first: @@ -1340,7 +1338,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): >>> np.interp(2.5, xp, fp) 1.0 >>> np.interp([0, 1, 1.5, 2.72, 3.14], xp, fp) - array([ 3. , 3. , 2.5 , 0.56, 0. ]) + array([3. , 3. , 2.5 , 0.56, 0. ]) >>> UNDEF = -99.0 >>> np.interp(3.14, xp, fp, right=UNDEF) -99.0 @@ -1364,7 +1362,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): >>> xp = [190, -190, 350, -350] >>> fp = [5, 10, 3, 4] >>> np.interp(x, xp, fp, period=360) - array([7.5, 5., 8.75, 6.25, 3., 3.25, 3.5, 3.75]) + array([7.5 , 5. , 8.75, 6.25, 3. , 3.25, 3.5 , 3.75]) Complex interpolation: @@ -1372,7 +1370,7 @@ def interp(x, xp, fp, left=None, right=None, period=None): >>> xp = [2,3,5] >>> fp = [1.0j, 0, 2+3j] >>> np.interp(x, xp, fp) - array([ 0.+1.j , 1.+1.5j]) + array([0.+1.j , 1.+1.5j]) """ @@ -1431,9 +1429,9 @@ def angle(z, deg=False): Returns ------- angle : ndarray or scalar - The counterclockwise angle from the positive real axis on - the complex plane, with dtype as numpy.float64. - + The counterclockwise angle from the positive real axis on the complex + plane in the range ``(-pi, pi]``, with dtype as numpy.float64. + ..versionchanged:: 1.16.0 This function works on subclasses of ndarray like `ma.array`. @@ -1445,7 +1443,7 @@ def angle(z, deg=False): Examples -------- >>> np.angle([1.0, 1.0j, 1+1j]) # in radians - array([ 0. , 1.57079633, 0.78539816]) + array([ 0. , 1.57079633, 0.78539816]) # may vary >>> np.angle(1+1j, deg=True) # in degrees 45.0 @@ -1505,9 +1503,9 @@ def unwrap(p, discont=pi, axis=-1): >>> phase = np.linspace(0, np.pi, num=5) >>> phase[3:] += np.pi >>> phase - array([ 0. , 0.78539816, 1.57079633, 5.49778714, 6.28318531]) + array([ 0. , 0.78539816, 1.57079633, 5.49778714, 6.28318531]) # may vary >>> np.unwrap(phase) - array([ 0. , 0.78539816, 1.57079633, -0.78539816, 0. ]) + array([ 0. , 0.78539816, 1.57079633, -0.78539816, 0. ]) # may vary """ p = asarray(p) @@ -1547,10 +1545,10 @@ def sort_complex(a): Examples -------- >>> np.sort_complex([5, 3, 6, 2, 1]) - array([ 1.+0.j, 2.+0.j, 3.+0.j, 5.+0.j, 6.+0.j]) + array([1.+0.j, 2.+0.j, 3.+0.j, 5.+0.j, 6.+0.j]) >>> np.sort_complex([1 + 2j, 2 - 1j, 3 - 2j, 3 - 3j, 3 + 5j]) - array([ 1.+2.j, 2.-1.j, 3.-3.j, 3.-2.j, 3.+5.j]) + array([1.+2.j, 2.-1.j, 3.-3.j, 3.-2.j, 3.+5.j]) """ b = array(a, copy=True) @@ -1596,7 +1594,7 @@ def trim_zeros(filt, trim='fb'): array([1, 2, 3, 0, 2, 1]) >>> np.trim_zeros(a, 'b') - array([0, 0, 0, 1, 2, 3, 0, 2, 1]) + array([0, 0, 0, ..., 0, 2, 1]) The input data type is preserved, list/tuple in means list/tuple out. @@ -1931,6 +1929,30 @@ class vectorize(object): vectorized : callable Vectorized function. + See Also + -------- + frompyfunc : Takes an arbitrary Python function and returns a ufunc + + Notes + ----- + The `vectorize` function is provided primarily for convenience, not for + performance. The implementation is essentially a for loop. + + If `otypes` is not specified, then a call to the function with the + first argument will be used to determine the number of outputs. The + results of this call will be cached if `cache` is `True` to prevent + calling the function twice. However, to implement the cache, the + original function must be wrapped which will slow down subsequent + calls, so only do this if your function is expensive. + + The new keyword argument interface and `excluded` argument support + further degrades performance. + + References + ---------- + .. [1] NumPy Reference, section `Generalized Universal Function API + <https://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html>`_. + Examples -------- >>> def myfunc(a, b): @@ -1958,11 +1980,11 @@ class vectorize(object): >>> out = vfunc([1, 2, 3, 4], 2) >>> type(out[0]) - <type 'numpy.int32'> + <class 'numpy.int64'> >>> vfunc = np.vectorize(myfunc, otypes=[float]) >>> out = vfunc([1, 2, 3, 4], 2) >>> type(out[0]) - <type 'numpy.float64'> + <class 'numpy.float64'> The `excluded` argument can be used to prevent vectorizing over certain arguments. This can be useful for array-like arguments of a fixed length @@ -1990,7 +2012,7 @@ class vectorize(object): >>> import scipy.stats >>> pearsonr = np.vectorize(scipy.stats.pearsonr, - ... signature='(n),(n)->(),()') + ... signature='(n),(n)->(),()') >>> pearsonr([[0, 1, 2, 3]], [[1, 2, 3, 4], [4, 3, 2, 1]]) (array([ 1., -1.]), array([ 0., 0.])) @@ -1998,36 +2020,12 @@ class vectorize(object): >>> convolve = np.vectorize(np.convolve, signature='(n),(m)->(k)') >>> convolve(np.eye(4), [1, 2, 1]) - array([[ 1., 2., 1., 0., 0., 0.], - [ 0., 1., 2., 1., 0., 0.], - [ 0., 0., 1., 2., 1., 0.], - [ 0., 0., 0., 1., 2., 1.]]) - - See Also - -------- - frompyfunc : Takes an arbitrary Python function and returns a ufunc - - Notes - ----- - The `vectorize` function is provided primarily for convenience, not for - performance. The implementation is essentially a for loop. - - If `otypes` is not specified, then a call to the function with the - first argument will be used to determine the number of outputs. The - results of this call will be cached if `cache` is `True` to prevent - calling the function twice. However, to implement the cache, the - original function must be wrapped which will slow down subsequent - calls, so only do this if your function is expensive. - - The new keyword argument interface and `excluded` argument support - further degrades performance. + array([[1., 2., 1., 0., 0., 0.], + [0., 1., 2., 1., 0., 0.], + [0., 0., 1., 2., 1., 0.], + [0., 0., 0., 1., 2., 1.]]) - References - ---------- - .. [1] NumPy Reference, section `Generalized Universal Function API - <https://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html>`_. """ - def __init__(self, pyfunc, otypes=None, doc=None, excluded=None, cache=False, signature=None): self.pyfunc = pyfunc @@ -2311,10 +2309,14 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, array `m` and let ``f = fweights`` and ``a = aweights`` for brevity. The steps to compute the weighted covariance are as follows:: + >>> m = np.arange(10, dtype=np.float64) + >>> f = np.arange(10) * 2 + >>> a = np.arange(10) ** 2. + >>> ddof = 9 # N - 1 >>> w = f * a >>> v1 = np.sum(w) >>> v2 = np.sum(w * a) - >>> m -= np.sum(m * w, axis=1, keepdims=True) / v1 + >>> m -= np.sum(m * w, axis=None, keepdims=True) / v1 >>> cov = np.dot(m * w, m.T) * v1 / (v1**2 - ddof * v2) Note that when ``a == 1``, the normalization factor @@ -2346,14 +2348,14 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, >>> x = [-2.1, -1, 4.3] >>> y = [3, 1.1, 0.12] >>> X = np.stack((x, y), axis=0) - >>> print(np.cov(X)) - [[ 11.71 -4.286 ] - [ -4.286 2.14413333]] - >>> print(np.cov(x, y)) - [[ 11.71 -4.286 ] - [ -4.286 2.14413333]] - >>> print(np.cov(x)) - 11.71 + >>> np.cov(X) + array([[11.71 , -4.286 ], # may vary + [-4.286 , 2.144133]]) + >>> np.cov(x, y) + array([[11.71 , -4.286 ], # may vary + [-4.286 , 2.144133]]) + >>> np.cov(x) + array(11.71) """ # Check inputs @@ -2590,12 +2592,12 @@ def blackman(M): Examples -------- + >>> import matplotlib.pyplot as plt >>> np.blackman(12) - array([ -1.38777878e-17, 3.26064346e-02, 1.59903635e-01, - 4.14397981e-01, 7.36045180e-01, 9.67046769e-01, - 9.67046769e-01, 7.36045180e-01, 4.14397981e-01, - 1.59903635e-01, 3.26064346e-02, -1.38777878e-17]) - + array([-1.38777878e-17, 3.26064346e-02, 1.59903635e-01, # may vary + 4.14397981e-01, 7.36045180e-01, 9.67046769e-01, + 9.67046769e-01, 7.36045180e-01, 4.14397981e-01, + 1.59903635e-01, 3.26064346e-02, -1.38777878e-17]) Plot the window and the frequency response: @@ -2604,30 +2606,31 @@ def blackman(M): >>> plt.plot(window) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Blackman window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Blackman window') >>> plt.ylabel("Amplitude") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Amplitude') >>> plt.xlabel("Sample") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Sample') >>> plt.show() >>> plt.figure() - <matplotlib.figure.Figure object at 0x...> + <Figure size 640x480 with 0 Axes> >>> A = fft(window, 2048) / 25.5 >>> mag = np.abs(fftshift(A)) >>> freq = np.linspace(-0.5, 0.5, len(A)) - >>> response = 20 * np.log10(mag) + >>> with np.errstate(divide='ignore', invalid='ignore'): + ... response = 20 * np.log10(mag) + ... >>> response = np.clip(response, -100, 100) >>> plt.plot(freq, response) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Frequency response of Blackman window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Frequency response of Blackman window') >>> plt.ylabel("Magnitude [dB]") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Magnitude [dB]') >>> plt.xlabel("Normalized frequency [cycles per sample]") - <matplotlib.text.Text object at 0x...> - >>> plt.axis('tight') - (-0.5, 0.5, -100.0, ...) + Text(0.5, 0, 'Normalized frequency [cycles per sample]') + >>> _ = plt.axis('tight') >>> plt.show() """ @@ -2699,8 +2702,9 @@ def bartlett(M): Examples -------- + >>> import matplotlib.pyplot as plt >>> np.bartlett(12) - array([ 0. , 0.18181818, 0.36363636, 0.54545455, 0.72727273, + array([ 0. , 0.18181818, 0.36363636, 0.54545455, 0.72727273, # may vary 0.90909091, 0.90909091, 0.72727273, 0.54545455, 0.36363636, 0.18181818, 0. ]) @@ -2711,30 +2715,31 @@ def bartlett(M): >>> plt.plot(window) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Bartlett window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Bartlett window') >>> plt.ylabel("Amplitude") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Amplitude') >>> plt.xlabel("Sample") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Sample') >>> plt.show() >>> plt.figure() - <matplotlib.figure.Figure object at 0x...> + <Figure size 640x480 with 0 Axes> >>> A = fft(window, 2048) / 25.5 >>> mag = np.abs(fftshift(A)) >>> freq = np.linspace(-0.5, 0.5, len(A)) - >>> response = 20 * np.log10(mag) + >>> with np.errstate(divide='ignore', invalid='ignore'): + ... response = 20 * np.log10(mag) + ... >>> response = np.clip(response, -100, 100) >>> plt.plot(freq, response) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Frequency response of Bartlett window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Frequency response of Bartlett window') >>> plt.ylabel("Magnitude [dB]") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Magnitude [dB]') >>> plt.xlabel("Normalized frequency [cycles per sample]") - <matplotlib.text.Text object at 0x...> - >>> plt.axis('tight') - (-0.5, 0.5, -100.0, ...) + Text(0.5, 0, 'Normalized frequency [cycles per sample]') + >>> _ = plt.axis('tight') >>> plt.show() """ @@ -2801,41 +2806,44 @@ def hanning(M): Examples -------- >>> np.hanning(12) - array([ 0. , 0.07937323, 0.29229249, 0.57115742, 0.82743037, - 0.97974649, 0.97974649, 0.82743037, 0.57115742, 0.29229249, - 0.07937323, 0. ]) + array([0. , 0.07937323, 0.29229249, 0.57115742, 0.82743037, + 0.97974649, 0.97974649, 0.82743037, 0.57115742, 0.29229249, + 0.07937323, 0. ]) Plot the window and its frequency response: + >>> import matplotlib.pyplot as plt >>> from numpy.fft import fft, fftshift >>> window = np.hanning(51) >>> plt.plot(window) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Hann window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Hann window') >>> plt.ylabel("Amplitude") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Amplitude') >>> plt.xlabel("Sample") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Sample') >>> plt.show() >>> plt.figure() - <matplotlib.figure.Figure object at 0x...> + <Figure size 640x480 with 0 Axes> >>> A = fft(window, 2048) / 25.5 >>> mag = np.abs(fftshift(A)) >>> freq = np.linspace(-0.5, 0.5, len(A)) - >>> response = 20 * np.log10(mag) + >>> with np.errstate(divide='ignore', invalid='ignore'): + ... response = 20 * np.log10(mag) + ... >>> response = np.clip(response, -100, 100) >>> plt.plot(freq, response) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Frequency response of the Hann window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Frequency response of the Hann window') >>> plt.ylabel("Magnitude [dB]") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Magnitude [dB]') >>> plt.xlabel("Normalized frequency [cycles per sample]") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Normalized frequency [cycles per sample]') >>> plt.axis('tight') - (-0.5, 0.5, -100.0, ...) + ... >>> plt.show() """ @@ -2900,26 +2908,27 @@ def hamming(M): Examples -------- >>> np.hamming(12) - array([ 0.08 , 0.15302337, 0.34890909, 0.60546483, 0.84123594, + array([ 0.08 , 0.15302337, 0.34890909, 0.60546483, 0.84123594, # may vary 0.98136677, 0.98136677, 0.84123594, 0.60546483, 0.34890909, 0.15302337, 0.08 ]) Plot the window and the frequency response: + >>> import matplotlib.pyplot as plt >>> from numpy.fft import fft, fftshift >>> window = np.hamming(51) >>> plt.plot(window) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Hamming window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Hamming window') >>> plt.ylabel("Amplitude") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Amplitude') >>> plt.xlabel("Sample") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Sample') >>> plt.show() >>> plt.figure() - <matplotlib.figure.Figure object at 0x...> + <Figure size 640x480 with 0 Axes> >>> A = fft(window, 2048) / 25.5 >>> mag = np.abs(fftshift(A)) >>> freq = np.linspace(-0.5, 0.5, len(A)) @@ -2928,13 +2937,13 @@ def hamming(M): >>> plt.plot(freq, response) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Frequency response of Hamming window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Frequency response of Hamming window') >>> plt.ylabel("Magnitude [dB]") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Magnitude [dB]') >>> plt.xlabel("Normalized frequency [cycles per sample]") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Normalized frequency [cycles per sample]') >>> plt.axis('tight') - (-0.5, 0.5, -100.0, ...) + ... >>> plt.show() """ @@ -3083,9 +3092,9 @@ def i0(x): Examples -------- >>> np.i0([0.]) - array(1.0) + array(1.0) # may vary >>> np.i0([0., 1. + 2j]) - array([ 1.00000000+0.j , 0.18785373+0.64616944j]) + array([ 1.00000000+0.j , 0.18785373+0.64616944j]) # may vary """ x = atleast_1d(x).copy() @@ -3180,11 +3189,12 @@ def kaiser(M, beta): Examples -------- + >>> import matplotlib.pyplot as plt >>> np.kaiser(12, 14) - array([ 7.72686684e-06, 3.46009194e-03, 4.65200189e-02, - 2.29737120e-01, 5.99885316e-01, 9.45674898e-01, - 9.45674898e-01, 5.99885316e-01, 2.29737120e-01, - 4.65200189e-02, 3.46009194e-03, 7.72686684e-06]) + array([7.72686684e-06, 3.46009194e-03, 4.65200189e-02, # may vary + 2.29737120e-01, 5.99885316e-01, 9.45674898e-01, + 9.45674898e-01, 5.99885316e-01, 2.29737120e-01, + 4.65200189e-02, 3.46009194e-03, 7.72686684e-06]) Plot the window and the frequency response: @@ -3194,15 +3204,15 @@ def kaiser(M, beta): >>> plt.plot(window) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Kaiser window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Kaiser window') >>> plt.ylabel("Amplitude") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Amplitude') >>> plt.xlabel("Sample") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Sample') >>> plt.show() >>> plt.figure() - <matplotlib.figure.Figure object at 0x...> + <Figure size 640x480 with 0 Axes> >>> A = fft(window, 2048) / 25.5 >>> mag = np.abs(fftshift(A)) >>> freq = np.linspace(-0.5, 0.5, len(A)) @@ -3211,13 +3221,13 @@ def kaiser(M, beta): >>> plt.plot(freq, response) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Frequency response of Kaiser window") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Frequency response of Kaiser window') >>> plt.ylabel("Magnitude [dB]") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Magnitude [dB]') >>> plt.xlabel("Normalized frequency [cycles per sample]") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'Normalized frequency [cycles per sample]') >>> plt.axis('tight') - (-0.5, 0.5, -100.0, ...) + (-0.5, 0.5, -100.0, ...) # may vary >>> plt.show() """ @@ -3273,31 +3283,32 @@ def sinc(x): Examples -------- + >>> import matplotlib.pyplot as plt >>> x = np.linspace(-4, 4, 41) >>> np.sinc(x) - array([ -3.89804309e-17, -4.92362781e-02, -8.40918587e-02, + array([-3.89804309e-17, -4.92362781e-02, -8.40918587e-02, # may vary -8.90384387e-02, -5.84680802e-02, 3.89804309e-17, - 6.68206631e-02, 1.16434881e-01, 1.26137788e-01, - 8.50444803e-02, -3.89804309e-17, -1.03943254e-01, + 6.68206631e-02, 1.16434881e-01, 1.26137788e-01, + 8.50444803e-02, -3.89804309e-17, -1.03943254e-01, -1.89206682e-01, -2.16236208e-01, -1.55914881e-01, - 3.89804309e-17, 2.33872321e-01, 5.04551152e-01, - 7.56826729e-01, 9.35489284e-01, 1.00000000e+00, - 9.35489284e-01, 7.56826729e-01, 5.04551152e-01, - 2.33872321e-01, 3.89804309e-17, -1.55914881e-01, - -2.16236208e-01, -1.89206682e-01, -1.03943254e-01, - -3.89804309e-17, 8.50444803e-02, 1.26137788e-01, - 1.16434881e-01, 6.68206631e-02, 3.89804309e-17, + 3.89804309e-17, 2.33872321e-01, 5.04551152e-01, + 7.56826729e-01, 9.35489284e-01, 1.00000000e+00, + 9.35489284e-01, 7.56826729e-01, 5.04551152e-01, + 2.33872321e-01, 3.89804309e-17, -1.55914881e-01, + -2.16236208e-01, -1.89206682e-01, -1.03943254e-01, + -3.89804309e-17, 8.50444803e-02, 1.26137788e-01, + 1.16434881e-01, 6.68206631e-02, 3.89804309e-17, -5.84680802e-02, -8.90384387e-02, -8.40918587e-02, -4.92362781e-02, -3.89804309e-17]) >>> plt.plot(x, np.sinc(x)) [<matplotlib.lines.Line2D object at 0x...>] >>> plt.title("Sinc Function") - <matplotlib.text.Text object at 0x...> + Text(0.5, 1.0, 'Sinc Function') >>> plt.ylabel("Amplitude") - <matplotlib.text.Text object at 0x...> + Text(0, 0.5, 'Amplitude') >>> plt.xlabel("X") - <matplotlib.text.Text object at 0x...> + Text(0.5, 0, 'X') >>> plt.show() It works in 2-D as well: @@ -3469,18 +3480,18 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False): >>> np.median(a) 3.5 >>> np.median(a, axis=0) - array([ 6.5, 4.5, 2.5]) + array([6.5, 4.5, 2.5]) >>> np.median(a, axis=1) - array([ 7., 2.]) + array([7., 2.]) >>> m = np.median(a, axis=0) >>> out = np.zeros_like(m) >>> np.median(a, axis=0, out=m) - array([ 6.5, 4.5, 2.5]) + array([6.5, 4.5, 2.5]) >>> m - array([ 6.5, 4.5, 2.5]) + array([6.5, 4.5, 2.5]) >>> b = a.copy() >>> np.median(b, axis=1, overwrite_input=True) - array([ 7., 2.]) + array([7., 2.]) >>> assert not np.all(a==b) >>> b = a.copy() >>> np.median(b, axis=None, overwrite_input=True) @@ -3647,23 +3658,23 @@ def percentile(a, q, axis=None, out=None, >>> np.percentile(a, 50) 3.5 >>> np.percentile(a, 50, axis=0) - array([[ 6.5, 4.5, 2.5]]) + array([6.5, 4.5, 2.5]) >>> np.percentile(a, 50, axis=1) - array([ 7., 2.]) + array([7., 2.]) >>> np.percentile(a, 50, axis=1, keepdims=True) - array([[ 7.], - [ 2.]]) + array([[7.], + [2.]]) >>> m = np.percentile(a, 50, axis=0) >>> out = np.zeros_like(m) >>> np.percentile(a, 50, axis=0, out=out) - array([[ 6.5, 4.5, 2.5]]) + array([6.5, 4.5, 2.5]) >>> m - array([[ 6.5, 4.5, 2.5]]) + array([6.5, 4.5, 2.5]) >>> b = a.copy() >>> np.percentile(b, 50, axis=1, overwrite_input=True) - array([ 7., 2.]) + array([7., 2.]) >>> assert not np.all(a == b) The different types of interpolation can be visualized graphically: @@ -3789,21 +3800,21 @@ def quantile(a, q, axis=None, out=None, >>> np.quantile(a, 0.5) 3.5 >>> np.quantile(a, 0.5, axis=0) - array([[ 6.5, 4.5, 2.5]]) + array([6.5, 4.5, 2.5]) >>> np.quantile(a, 0.5, axis=1) - array([ 7., 2.]) + array([7., 2.]) >>> np.quantile(a, 0.5, axis=1, keepdims=True) - array([[ 7.], - [ 2.]]) + array([[7.], + [2.]]) >>> m = np.quantile(a, 0.5, axis=0) >>> out = np.zeros_like(m) >>> np.quantile(a, 0.5, axis=0, out=out) - array([[ 6.5, 4.5, 2.5]]) + array([6.5, 4.5, 2.5]) >>> m - array([[ 6.5, 4.5, 2.5]]) + array([6.5, 4.5, 2.5]) >>> b = a.copy() >>> np.quantile(b, 0.5, axis=1, overwrite_input=True) - array([ 7., 2.]) + array([7., 2.]) >>> assert not np.all(a == b) """ q = np.asanyarray(q) @@ -3950,8 +3961,6 @@ def _quantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, r = add(x1, x2) if np.any(n): - warnings.warn("Invalid value encountered in percentile", - RuntimeWarning, stacklevel=3) if zerod: if ap.ndim == 1: if out is not None: @@ -4032,9 +4041,9 @@ def trapz(y, x=None, dx=1.0, axis=-1): array([[0, 1, 2], [3, 4, 5]]) >>> np.trapz(a, axis=0) - array([ 1.5, 2.5, 3.5]) + array([1.5, 2.5, 3.5]) >>> np.trapz(a, axis=1) - array([ 2., 8.]) + array([2., 8.]) """ y = asanyarray(y) @@ -4152,17 +4161,17 @@ def meshgrid(*xi, **kwargs): >>> y = np.linspace(0, 1, ny) >>> xv, yv = np.meshgrid(x, y) >>> xv - array([[ 0. , 0.5, 1. ], - [ 0. , 0.5, 1. ]]) + array([[0. , 0.5, 1. ], + [0. , 0.5, 1. ]]) >>> yv - array([[ 0., 0., 0.], - [ 1., 1., 1.]]) + array([[0., 0., 0.], + [1., 1., 1.]]) >>> xv, yv = np.meshgrid(x, y, sparse=True) # make sparse output arrays >>> xv - array([[ 0. , 0.5, 1. ]]) + array([[0. , 0.5, 1. ]]) >>> yv - array([[ 0.], - [ 1.]]) + array([[0.], + [1.]]) `meshgrid` is very useful to evaluate functions on a grid. @@ -4224,7 +4233,7 @@ def delete(arr, obj, axis=None): arr : array_like Input array. obj : slice, int or array of ints - Indicate which sub-arrays to remove. + Indicate indices of sub-arrays to remove along the specified axis. axis : int, optional The axis along which to delete the subarray defined by `obj`. If `axis` is None, `obj` is applied to the flattened array. @@ -4245,6 +4254,7 @@ def delete(arr, obj, axis=None): ----- Often it is preferable to use a boolean mask. For example: + >>> arr = np.arange(12) + 1 >>> mask = np.ones(len(arr), dtype=bool) >>> mask[[0,2,4]] = False >>> result = arr[mask,...] @@ -4476,7 +4486,7 @@ def insert(arr, obj, values, axis=None): [2, 2], [3, 3]]) >>> np.insert(a, 1, 5) - array([1, 5, 1, 2, 2, 3, 3]) + array([1, 5, 1, ..., 2, 3, 3]) >>> np.insert(a, 1, 5, axis=1) array([[1, 5, 1], [2, 5, 2], @@ -4496,13 +4506,13 @@ def insert(arr, obj, values, axis=None): >>> b array([1, 1, 2, 2, 3, 3]) >>> np.insert(b, [2, 2], [5, 6]) - array([1, 1, 5, 6, 2, 2, 3, 3]) + array([1, 1, 5, ..., 2, 3, 3]) >>> np.insert(b, slice(2, 4), [5, 6]) - array([1, 1, 5, 2, 6, 2, 3, 3]) + array([1, 1, 5, ..., 2, 3, 3]) >>> np.insert(b, [2, 2], [7.13, False]) # type casting - array([1, 1, 7, 0, 2, 2, 3, 3]) + array([1, 1, 7, ..., 2, 3, 3]) >>> x = np.arange(8).reshape(2, 4) >>> idx = (1, 3) @@ -4666,7 +4676,7 @@ def append(arr, values, axis=None): Examples -------- >>> np.append([1, 2, 3], [[4, 5, 6], [7, 8, 9]]) - array([1, 2, 3, 4, 5, 6, 7, 8, 9]) + array([1, 2, 3, ..., 7, 8, 9]) When `axis` is specified, `values` must have the correct shape. @@ -4676,8 +4686,8 @@ def append(arr, values, axis=None): [7, 8, 9]]) >>> np.append([[1, 2, 3], [4, 5, 6]], [7, 8, 9], axis=0) Traceback (most recent call last): - ... - ValueError: arrays must have same number of dimensions + ... + ValueError: all the input arrays must have same number of dimensions """ arr = asanyarray(arr) diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index 482eabe14..bd44d2732 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -461,7 +461,8 @@ def _histogram_bin_edges_dispatcher(a, bins=None, range=None, weights=None): @array_function_dispatch(_histogram_bin_edges_dispatcher) def histogram_bin_edges(a, bins=10, range=None, weights=None): r""" - Function to calculate only the edges of the bins used by the `histogram` function. + Function to calculate only the edges of the bins used by the `histogram` + function. Parameters ---------- @@ -608,6 +609,7 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): 'Sqrt' .. math:: n_h = \sqrt n + The simplest and fastest estimator. Only takes into account the data size. @@ -645,7 +647,7 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): >>> hist_0, bins_0 = np.histogram(arr[group_id == 0], bins='auto') >>> hist_1, bins_1 = np.histogram(arr[group_id == 1], bins='auto') - >>> hist_0; hist1 + >>> hist_0; hist_1 array([1, 1, 1]) array([2, 1, 1, 2]) >>> bins_0; bins_1 @@ -748,14 +750,14 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, >>> np.histogram([1, 2, 1], bins=[0, 1, 2, 3]) (array([0, 2, 1]), array([0, 1, 2, 3])) >>> np.histogram(np.arange(4), bins=np.arange(5), density=True) - (array([ 0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4])) + (array([0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4])) >>> np.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3]) (array([1, 4, 1]), array([0, 1, 2, 3])) >>> a = np.arange(5) >>> hist, bin_edges = np.histogram(a, density=True) >>> hist - array([ 0.5, 0. , 0.5, 0. , 0. , 0.5, 0. , 0.5, 0. , 0.5]) + array([0.5, 0. , 0.5, 0. , 0. , 0.5, 0. , 0.5, 0. , 0.5]) >>> hist.sum() 2.4999999999999996 >>> np.sum(hist * np.diff(bin_edges)) @@ -770,8 +772,9 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, >>> rng = np.random.RandomState(10) # deterministic random data >>> a = np.hstack((rng.normal(size=1000), ... rng.normal(loc=5, scale=2, size=1000))) - >>> plt.hist(a, bins='auto') # arguments are passed to np.histogram + >>> _ = plt.hist(a, bins='auto') # arguments are passed to np.histogram >>> plt.title("Histogram with 'auto' bins") + Text(0.5, 1.0, "Histogram with 'auto' bins") >>> plt.show() """ diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py index 56abe293a..40c1cda05 100644 --- a/numpy/lib/index_tricks.py +++ b/numpy/lib/index_tricks.py @@ -269,8 +269,9 @@ class OGridClass(nd_grid): the stop value **is inclusive**. Returns - ---------- - mesh-grid `ndarrays` with only one dimension :math:`\\neq 1` + ------- + mesh-grid + `ndarrays` with only one dimension not equal to 1 See Also -------- @@ -478,7 +479,7 @@ class RClass(AxisConcatenator): Examples -------- >>> np.r_[np.array([1,2,3]), 0, 0, np.array([4,5,6])] - array([1, 2, 3, 0, 0, 4, 5, 6]) + array([1, 2, 3, ..., 4, 5, 6]) >>> np.r_[-1:1:6j, [0]*3, 5, 6] array([-1. , -0.6, -0.2, 0.2, 0.6, 1. , 0. , 0. , 0. , 5. , 6. ]) @@ -538,7 +539,7 @@ class CClass(AxisConcatenator): [2, 5], [3, 6]]) >>> np.c_[np.array([[1,2,3]]), 0, 0, np.array([[4,5,6]])] - array([[1, 2, 3, 0, 0, 4, 5, 6]]) + array([[1, 2, 3, ..., 4, 5, 6]]) """ @@ -812,8 +813,8 @@ def fill_diagonal(a, val, wrap=False): The wrap option affects only tall matrices: >>> # tall matrices no wrap - >>> a = np.zeros((5, 3),int) - >>> fill_diagonal(a, 4) + >>> a = np.zeros((5, 3), int) + >>> np.fill_diagonal(a, 4) >>> a array([[4, 0, 0], [0, 4, 0], @@ -822,8 +823,8 @@ def fill_diagonal(a, val, wrap=False): [0, 0, 0]]) >>> # tall matrices wrap - >>> a = np.zeros((5, 3),int) - >>> fill_diagonal(a, 4, wrap=True) + >>> a = np.zeros((5, 3), int) + >>> np.fill_diagonal(a, 4, wrap=True) >>> a array([[4, 0, 0], [0, 4, 0], @@ -832,13 +833,30 @@ def fill_diagonal(a, val, wrap=False): [4, 0, 0]]) >>> # wide matrices - >>> a = np.zeros((3, 5),int) - >>> fill_diagonal(a, 4, wrap=True) + >>> a = np.zeros((3, 5), int) + >>> np.fill_diagonal(a, 4, wrap=True) >>> a array([[4, 0, 0, 0, 0], [0, 4, 0, 0, 0], [0, 0, 4, 0, 0]]) + The anti-diagonal can be filled by reversing the order of elements + using either `numpy.flipud` or `numpy.fliplr`. + + >>> a = np.zeros((3, 3), int); + >>> np.fill_diagonal(np.fliplr(a), [1,2,3]) # Horizontal flip + >>> a + array([[0, 0, 1], + [0, 2, 0], + [3, 0, 0]]) + >>> np.fill_diagonal(np.flipud(a), [1,2,3]) # Vertical flip + >>> a + array([[0, 0, 3], + [0, 2, 0], + [1, 0, 0]]) + + Note that the order in which the diagonal is filled varies depending + on the flip function. """ if a.ndim < 2: raise ValueError("array must be at least 2-d") diff --git a/numpy/lib/mixins.py b/numpy/lib/mixins.py index 0379ecb1a..52ad45b68 100644 --- a/numpy/lib/mixins.py +++ b/numpy/lib/mixins.py @@ -69,9 +69,6 @@ class NDArrayOperatorsMixin(object): deferring to the ``__array_ufunc__`` method, which subclasses must implement. - This class does not yet implement the special operators corresponding - to ``matmul`` (``@``), because ``np.matmul`` is not yet a NumPy ufunc. - It is useful for writing classes that do not inherit from `numpy.ndarray`, but that should support arithmetic and numpy universal functions like arrays as described in `A Mechanism for Overriding Ufuncs @@ -155,6 +152,8 @@ class NDArrayOperatorsMixin(object): __add__, __radd__, __iadd__ = _numeric_methods(um.add, 'add') __sub__, __rsub__, __isub__ = _numeric_methods(um.subtract, 'sub') __mul__, __rmul__, __imul__ = _numeric_methods(um.multiply, 'mul') + __matmul__, __rmatmul__, __imatmul__ = _numeric_methods( + um.matmul, 'matmul') if sys.version_info.major < 3: # Python 3 uses only __truediv__ and __floordiv__ __div__, __rdiv__, __idiv__ = _numeric_methods(um.divide, 'div') diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index d73d84467..77c851fcf 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -40,6 +40,33 @@ __all__ = [ ] +def _nan_mask(a, out=None): + """ + Parameters + ---------- + a : array-like + Input array with at least 1 dimension. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output and will prevent the allocation of a new array. + + Returns + ------- + y : bool ndarray or True + A bool array where ``np.nan`` positions are marked with ``False`` + and other positions are marked with ``True``. If the type of ``a`` + is such that it can't possibly contain ``np.nan``, returns ``True``. + """ + # we assume that a is an array for this private function + + if a.dtype.kind not in 'fc': + return True + + y = np.isnan(a, out=out) + y = np.invert(y, out=y) + return y + def _replace_nan(a, val): """ If `a` is of inexact type, make a copy of `a`, replace NaNs with @@ -271,9 +298,9 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue): >>> np.nanmin(a) 1.0 >>> np.nanmin(a, axis=0) - array([ 1., 2.]) + array([1., 2.]) >>> np.nanmin(a, axis=1) - array([ 1., 3.]) + array([1., 3.]) When positive infinity and negative infinity are present: @@ -384,9 +411,9 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue): >>> np.nanmax(a) 3.0 >>> np.nanmax(a, axis=0) - array([ 3., 2.]) + array([3., 2.]) >>> np.nanmax(a, axis=1) - array([ 2., 3.]) + array([2., 3.]) When positive infinity and negative infinity are present: @@ -601,12 +628,15 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): >>> np.nansum(a) 3.0 >>> np.nansum(a, axis=0) - array([ 2., 1.]) + array([2., 1.]) >>> np.nansum([1, np.nan, np.inf]) inf >>> np.nansum([1, np.nan, np.NINF]) -inf - >>> np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present + >>> from numpy.testing import suppress_warnings + >>> with suppress_warnings() as sup: + ... sup.filter(RuntimeWarning) + ... np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present nan """ @@ -677,7 +707,7 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): >>> np.nanprod(a) 6.0 >>> np.nanprod(a, axis=0) - array([ 3., 2.]) + array([3., 2.]) """ a, mask = _replace_nan(a, 1) @@ -738,16 +768,16 @@ def nancumsum(a, axis=None, dtype=None, out=None): >>> np.nancumsum([1]) array([1]) >>> np.nancumsum([1, np.nan]) - array([ 1., 1.]) + array([1., 1.]) >>> a = np.array([[1, 2], [3, np.nan]]) >>> np.nancumsum(a) - array([ 1., 3., 6., 6.]) + array([1., 3., 6., 6.]) >>> np.nancumsum(a, axis=0) - array([[ 1., 2.], - [ 4., 2.]]) + array([[1., 2.], + [4., 2.]]) >>> np.nancumsum(a, axis=1) - array([[ 1., 3.], - [ 3., 3.]]) + array([[1., 3.], + [3., 3.]]) """ a, mask = _replace_nan(a, 0) @@ -805,16 +835,16 @@ def nancumprod(a, axis=None, dtype=None, out=None): >>> np.nancumprod([1]) array([1]) >>> np.nancumprod([1, np.nan]) - array([ 1., 1.]) + array([1., 1.]) >>> a = np.array([[1, 2], [3, np.nan]]) >>> np.nancumprod(a) - array([ 1., 2., 6., 6.]) + array([1., 2., 6., 6.]) >>> np.nancumprod(a, axis=0) - array([[ 1., 2.], - [ 3., 2.]]) + array([[1., 2.], + [3., 2.]]) >>> np.nancumprod(a, axis=1) - array([[ 1., 2.], - [ 3., 3.]]) + array([[1., 2.], + [3., 3.]]) """ a, mask = _replace_nan(a, 1) @@ -895,9 +925,9 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): >>> np.nanmean(a) 2.6666666666666665 >>> np.nanmean(a, axis=0) - array([ 2., 4.]) + array([2., 4.]) >>> np.nanmean(a, axis=1) - array([ 1., 3.5]) + array([1., 3.5]) # may vary """ arr, mask = _replace_nan(a, 0) @@ -1049,19 +1079,19 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValu >>> a = np.array([[10.0, 7, 4], [3, 2, 1]]) >>> a[0, 1] = np.nan >>> a - array([[ 10., nan, 4.], - [ 3., 2., 1.]]) + array([[10., nan, 4.], + [ 3., 2., 1.]]) >>> np.median(a) nan >>> np.nanmedian(a) 3.0 >>> np.nanmedian(a, axis=0) - array([ 6.5, 2., 2.5]) + array([6.5, 2. , 2.5]) >>> np.median(a, axis=1) - array([ 7., 2.]) + array([nan, 2.]) >>> b = a.copy() >>> np.nanmedian(b, axis=1, overwrite_input=True) - array([ 7., 2.]) + array([7., 2.]) >>> assert not np.all(a==b) >>> b = a.copy() >>> np.nanmedian(b, axis=None, overwrite_input=True) @@ -1177,27 +1207,27 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) >>> a[0][1] = np.nan >>> a - array([[ 10., nan, 4.], - [ 3., 2., 1.]]) + array([[10., nan, 4.], + [ 3., 2., 1.]]) >>> np.percentile(a, 50) nan >>> np.nanpercentile(a, 50) - 3.5 + 3.0 >>> np.nanpercentile(a, 50, axis=0) - array([ 6.5, 2., 2.5]) + array([6.5, 2. , 2.5]) >>> np.nanpercentile(a, 50, axis=1, keepdims=True) - array([[ 7.], - [ 2.]]) + array([[7.], + [2.]]) >>> m = np.nanpercentile(a, 50, axis=0) >>> out = np.zeros_like(m) >>> np.nanpercentile(a, 50, axis=0, out=out) - array([ 6.5, 2., 2.5]) + array([6.5, 2. , 2.5]) >>> m - array([ 6.5, 2. , 2.5]) + array([6.5, 2. , 2.5]) >>> b = a.copy() >>> np.nanpercentile(b, 50, axis=1, overwrite_input=True) - array([ 7., 2.]) + array([7., 2.]) >>> assert not np.all(a==b) """ @@ -1291,26 +1321,26 @@ def nanquantile(a, q, axis=None, out=None, overwrite_input=False, >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) >>> a[0][1] = np.nan >>> a - array([[ 10., nan, 4.], - [ 3., 2., 1.]]) + array([[10., nan, 4.], + [ 3., 2., 1.]]) >>> np.quantile(a, 0.5) nan >>> np.nanquantile(a, 0.5) - 3.5 + 3.0 >>> np.nanquantile(a, 0.5, axis=0) - array([ 6.5, 2., 2.5]) + array([6.5, 2. , 2.5]) >>> np.nanquantile(a, 0.5, axis=1, keepdims=True) - array([[ 7.], - [ 2.]]) + array([[7.], + [2.]]) >>> m = np.nanquantile(a, 0.5, axis=0) >>> out = np.zeros_like(m) >>> np.nanquantile(a, 0.5, axis=0, out=out) - array([ 6.5, 2., 2.5]) + array([6.5, 2. , 2.5]) >>> m - array([ 6.5, 2. , 2.5]) + array([6.5, 2. , 2.5]) >>> b = a.copy() >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True) - array([ 7., 2.]) + array([7., 2.]) >>> assert not np.all(a==b) """ a = np.asanyarray(a) @@ -1465,12 +1495,12 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): Examples -------- >>> a = np.array([[1, np.nan], [3, 4]]) - >>> np.var(a) + >>> np.nanvar(a) 1.5555555555555554 >>> np.nanvar(a, axis=0) - array([ 1., 0.]) + array([1., 0.]) >>> np.nanvar(a, axis=1) - array([ 0., 0.25]) + array([0., 0.25]) # may vary """ arr, mask = _replace_nan(a, 0) @@ -1619,9 +1649,9 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): >>> np.nanstd(a) 1.247219128924647 >>> np.nanstd(a, axis=0) - array([ 1., 0.]) + array([1., 0.]) >>> np.nanstd(a, axis=1) - array([ 0., 0.5]) + array([0., 0.5]) # may vary """ var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index f623c58e7..ed2e26aac 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -7,6 +7,7 @@ import functools import itertools import warnings import weakref +import contextlib from operator import itemgetter, index as opindex import numpy as np @@ -23,10 +24,9 @@ from ._iotools import ( ) from numpy.compat import ( - asbytes, asstr, asunicode, asbytes_nested, bytes, basestring, unicode, - os_fspath, os_PathLike + asbytes, asstr, asunicode, bytes, basestring, os_fspath, os_PathLike, + pickle, contextlib_nullcontext ) -from numpy.core.numeric import pickle if sys.version_info[0] >= 3: from collections.abc import Mapping @@ -146,7 +146,11 @@ class NpzFile(Mapping): An object on which attribute can be performed as an alternative to getitem access on the `NpzFile` instance itself. allow_pickle : bool, optional - Allow loading pickled data. Default: True + Allow loading pickled data. Default: False + + .. versionchanged:: 1.16.3 + Made default False in response to CVE-2019-6446. + pickle_kwargs : dict, optional Additional keyword arguments to pass on to pickle.load. These are only useful when loading object arrays saved on @@ -168,13 +172,13 @@ class NpzFile(Mapping): >>> x = np.arange(10) >>> y = np.sin(x) >>> np.savez(outfile, x=x, y=y) - >>> outfile.seek(0) + >>> _ = outfile.seek(0) >>> npz = np.load(outfile) >>> isinstance(npz, np.lib.io.NpzFile) True - >>> npz.files - ['y', 'x'] + >>> sorted(npz.files) + ['x', 'y'] >>> npz['x'] # getitem access array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) >>> npz.f.x # attribute lookup @@ -182,7 +186,7 @@ class NpzFile(Mapping): """ - def __init__(self, fid, own_fid=False, allow_pickle=True, + def __init__(self, fid, own_fid=False, allow_pickle=False, pickle_kwargs=None): # Import is postponed to here since zipfile depends on gzip, an # optional component of the so-called standard library. @@ -285,11 +289,17 @@ class NpzFile(Mapping): @set_module('numpy') -def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, +def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII'): """ Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files. + .. warning:: Loading files that contain object arrays uses the ``pickle`` + module, which is not secure against erroneous or maliciously + constructed data. Consider passing ``allow_pickle=False`` to + load data that is known not to contain object arrays for the + safer handling of untrusted sources. + Parameters ---------- file : file-like object, string, or pathlib.Path @@ -307,8 +317,11 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True, Allow loading pickled object arrays stored in npy files. Reasons for disallowing pickles include security, as loading pickled data can execute arbitrary code. If pickles are disallowed, loading object - arrays will fail. - Default: True + arrays will fail. Default: False + + .. versionchanged:: 1.16.3 + Made default False in response to CVE-2019-6446. + fix_imports : bool, optional Only useful when loading Python 2 generated pickled files on Python 3, which includes npy/npz files containing object arrays. If `fix_imports` @@ -502,7 +515,7 @@ def save(file, arr, allow_pickle=True, fix_imports=True): >>> x = np.arange(10) >>> np.save(outfile, x) - >>> outfile.seek(0) # Only needed here to simulate closing & reopening file + >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file >>> np.load(outfile) array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) @@ -597,10 +610,10 @@ def savez(file, *args, **kwds): Using `savez` with \\*args, the arrays are saved with default names. >>> np.savez(outfile, x, y) - >>> outfile.seek(0) # Only needed here to simulate closing & reopening file + >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file >>> npzfile = np.load(outfile) >>> npzfile.files - ['arr_1', 'arr_0'] + ['arr_0', 'arr_1'] >>> npzfile['arr_0'] array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) @@ -608,10 +621,10 @@ def savez(file, *args, **kwds): >>> outfile = TemporaryFile() >>> np.savez(outfile, x=x, y=y) - >>> outfile.seek(0) + >>> _ = outfile.seek(0) >>> npzfile = np.load(outfile) - >>> npzfile.files - ['y', 'x'] + >>> sorted(npzfile.files) + ['x', 'y'] >>> npzfile['x'] array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) @@ -829,7 +842,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, `genfromtxt`): ``converters = {3: lambda s: float(s.strip() or 0)}``. Default: None. skiprows : int, optional - Skip the first `skiprows` lines; default: 0. + Skip the first `skiprows` lines, including comments; default: 0. usecols : int or sequence, optional Which columns to read, with 0 being the first. For example, ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns. @@ -891,21 +904,21 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, >>> from io import StringIO # StringIO behaves like a file object >>> c = StringIO(u"0 1\\n2 3") >>> np.loadtxt(c) - array([[ 0., 1.], - [ 2., 3.]]) + array([[0., 1.], + [2., 3.]]) >>> d = StringIO(u"M 21 72\\nF 35 58") >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'), ... 'formats': ('S1', 'i4', 'f4')}) - array([('M', 21, 72.0), ('F', 35, 58.0)], - dtype=[('gender', '|S1'), ('age', '<i4'), ('weight', '<f4')]) + array([(b'M', 21, 72.), (b'F', 35, 58.)], + dtype=[('gender', 'S1'), ('age', '<i4'), ('weight', '<f4')]) >>> c = StringIO(u"1,0,2\\n3,0,4") >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True) >>> x - array([ 1., 3.]) + array([1., 3.]) >>> y - array([ 2., 4.]) + array([2., 4.]) """ # Type conversions for Py3 convenience @@ -1118,7 +1131,6 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, if type(x) is bytes: return conv(x) return conv(x.encode("latin1")) - import functools converters[i] = functools.partial(tobytes_first, conv=conv) else: converters[i] = conv @@ -1376,7 +1388,7 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', # Complex dtype -- each field indicates a separate column else: - ncol = len(X.dtype.descr) + ncol = len(X.dtype.names) else: ncol = X.shape[1] @@ -1481,17 +1493,17 @@ def fromregex(file, regexp, dtype, encoding=None): Examples -------- >>> f = open('test.dat', 'w') - >>> f.write("1312 foo\\n1534 bar\\n444 qux") + >>> _ = f.write("1312 foo\\n1534 bar\\n444 qux") >>> f.close() >>> regexp = r"(\\d+)\\s+(...)" # match [digits, whitespace, anything] >>> output = np.fromregex('test.dat', regexp, ... [('num', np.int64), ('key', 'S3')]) >>> output - array([(1312L, 'foo'), (1534L, 'bar'), (444L, 'qux')], - dtype=[('num', '<i8'), ('key', '|S3')]) + array([(1312, b'foo'), (1534, b'bar'), ( 444, b'qux')], + dtype=[('num', '<i8'), ('key', 'S3')]) >>> output['num'] - array([1312, 1534, 444], dtype=int64) + array([1312, 1534, 444]) """ own_fh = False @@ -1674,26 +1686,26 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'), ... ('mystring','S5')], delimiter=",") >>> data - array((1, 1.3, 'abcde'), - dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')]) + array((1, 1.3, b'abcde'), + dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')]) Using dtype = None - >>> s.seek(0) # needed for StringIO example only + >>> _ = s.seek(0) # needed for StringIO example only >>> data = np.genfromtxt(s, dtype=None, ... names = ['myint','myfloat','mystring'], delimiter=",") >>> data - array((1, 1.3, 'abcde'), - dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')]) + array((1, 1.3, b'abcde'), + dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')]) Specifying dtype and names - >>> s.seek(0) + >>> _ = s.seek(0) >>> data = np.genfromtxt(s, dtype="i8,f8,S5", ... names=['myint','myfloat','mystring'], delimiter=",") >>> data - array((1, 1.3, 'abcde'), - dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')]) + array((1, 1.3, b'abcde'), + dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')]) An example with fixed-width columns @@ -1701,8 +1713,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'], ... delimiter=[1,3,5]) >>> data - array((1, 1.3, 'abcde'), - dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '|S5')]) + array((1, 1.3, b'abcde'), + dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', 'S5')]) """ if max_rows is not None: @@ -1729,301 +1741,299 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, byte_converters = False # Initialize the filehandle, the LineSplitter and the NameValidator - own_fhd = False try: if isinstance(fname, os_PathLike): fname = os_fspath(fname) if isinstance(fname, basestring): - fhd = iter(np.lib._datasource.open(fname, 'rt', encoding=encoding)) - own_fhd = True + fid = np.lib._datasource.open(fname, 'rt', encoding=encoding) + fid_ctx = contextlib.closing(fid) else: - fhd = iter(fname) + fid = fname + fid_ctx = contextlib_nullcontext(fid) + fhd = iter(fid) except TypeError: raise TypeError( "fname must be a string, filehandle, list of strings, " "or generator. Got %s instead." % type(fname)) - split_line = LineSplitter(delimiter=delimiter, comments=comments, - autostrip=autostrip, encoding=encoding) - validate_names = NameValidator(excludelist=excludelist, - deletechars=deletechars, - case_sensitive=case_sensitive, - replace_space=replace_space) + with fid_ctx: + split_line = LineSplitter(delimiter=delimiter, comments=comments, + autostrip=autostrip, encoding=encoding) + validate_names = NameValidator(excludelist=excludelist, + deletechars=deletechars, + case_sensitive=case_sensitive, + replace_space=replace_space) - # Skip the first `skip_header` rows - for i in range(skip_header): - next(fhd) - - # Keep on until we find the first valid values - first_values = None - try: - while not first_values: - first_line = _decode_line(next(fhd), encoding) - if (names is True) and (comments is not None): - if comments in first_line: - first_line = ( - ''.join(first_line.split(comments)[1:])) - first_values = split_line(first_line) - except StopIteration: - # return an empty array if the datafile is empty - first_line = '' - first_values = [] - warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2) - - # Should we take the first values as names ? - if names is True: - fval = first_values[0].strip() - if comments is not None: - if fval in comments: - del first_values[0] + # Skip the first `skip_header` rows + for i in range(skip_header): + next(fhd) - # Check the columns to use: make sure `usecols` is a list - if usecols is not None: + # Keep on until we find the first valid values + first_values = None try: - usecols = [_.strip() for _ in usecols.split(",")] - except AttributeError: + while not first_values: + first_line = _decode_line(next(fhd), encoding) + if (names is True) and (comments is not None): + if comments in first_line: + first_line = ( + ''.join(first_line.split(comments)[1:])) + first_values = split_line(first_line) + except StopIteration: + # return an empty array if the datafile is empty + first_line = '' + first_values = [] + warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2) + + # Should we take the first values as names ? + if names is True: + fval = first_values[0].strip() + if comments is not None: + if fval in comments: + del first_values[0] + + # Check the columns to use: make sure `usecols` is a list + if usecols is not None: try: - usecols = list(usecols) - except TypeError: - usecols = [usecols, ] - nbcols = len(usecols or first_values) - - # Check the names and overwrite the dtype.names if needed - if names is True: - names = validate_names([str(_.strip()) for _ in first_values]) - first_line = '' - elif _is_string_like(names): - names = validate_names([_.strip() for _ in names.split(',')]) - elif names: - names = validate_names(names) - # Get the dtype - if dtype is not None: - dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names, - excludelist=excludelist, - deletechars=deletechars, - case_sensitive=case_sensitive, - replace_space=replace_space) - # Make sure the names is a list (for 2.5) - if names is not None: - names = list(names) - - if usecols: - for (i, current) in enumerate(usecols): - # if usecols is a list of names, convert to a list of indices - if _is_string_like(current): - usecols[i] = names.index(current) - elif current < 0: - usecols[i] = current + len(first_values) - # If the dtype is not None, make sure we update it - if (dtype is not None) and (len(dtype) > nbcols): - descr = dtype.descr - dtype = np.dtype([descr[_] for _ in usecols]) - names = list(dtype.names) - # If `names` is not None, update the names - elif (names is not None) and (len(names) > nbcols): - names = [names[_] for _ in usecols] - elif (names is not None) and (dtype is not None): - names = list(dtype.names) - - # Process the missing values ............................... - # Rename missing_values for convenience - user_missing_values = missing_values or () - if isinstance(user_missing_values, bytes): - user_missing_values = user_missing_values.decode('latin1') - - # Define the list of missing_values (one column: one list) - missing_values = [list(['']) for _ in range(nbcols)] - - # We have a dictionary: process it field by field - if isinstance(user_missing_values, dict): - # Loop on the items - for (key, val) in user_missing_values.items(): - # Is the key a string ? - if _is_string_like(key): - try: - # Transform it into an integer - key = names.index(key) - except ValueError: - # We couldn't find it: the name must have been dropped - continue - # Redefine the key as needed if it's a column number - if usecols: + usecols = [_.strip() for _ in usecols.split(",")] + except AttributeError: try: - key = usecols.index(key) - except ValueError: - pass - # Transform the value as a list of string - if isinstance(val, (list, tuple)): - val = [str(_) for _ in val] + usecols = list(usecols) + except TypeError: + usecols = [usecols, ] + nbcols = len(usecols or first_values) + + # Check the names and overwrite the dtype.names if needed + if names is True: + names = validate_names([str(_.strip()) for _ in first_values]) + first_line = '' + elif _is_string_like(names): + names = validate_names([_.strip() for _ in names.split(',')]) + elif names: + names = validate_names(names) + # Get the dtype + if dtype is not None: + dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names, + excludelist=excludelist, + deletechars=deletechars, + case_sensitive=case_sensitive, + replace_space=replace_space) + # Make sure the names is a list (for 2.5) + if names is not None: + names = list(names) + + if usecols: + for (i, current) in enumerate(usecols): + # if usecols is a list of names, convert to a list of indices + if _is_string_like(current): + usecols[i] = names.index(current) + elif current < 0: + usecols[i] = current + len(first_values) + # If the dtype is not None, make sure we update it + if (dtype is not None) and (len(dtype) > nbcols): + descr = dtype.descr + dtype = np.dtype([descr[_] for _ in usecols]) + names = list(dtype.names) + # If `names` is not None, update the names + elif (names is not None) and (len(names) > nbcols): + names = [names[_] for _ in usecols] + elif (names is not None) and (dtype is not None): + names = list(dtype.names) + + # Process the missing values ............................... + # Rename missing_values for convenience + user_missing_values = missing_values or () + if isinstance(user_missing_values, bytes): + user_missing_values = user_missing_values.decode('latin1') + + # Define the list of missing_values (one column: one list) + missing_values = [list(['']) for _ in range(nbcols)] + + # We have a dictionary: process it field by field + if isinstance(user_missing_values, dict): + # Loop on the items + for (key, val) in user_missing_values.items(): + # Is the key a string ? + if _is_string_like(key): + try: + # Transform it into an integer + key = names.index(key) + except ValueError: + # We couldn't find it: the name must have been dropped + continue + # Redefine the key as needed if it's a column number + if usecols: + try: + key = usecols.index(key) + except ValueError: + pass + # Transform the value as a list of string + if isinstance(val, (list, tuple)): + val = [str(_) for _ in val] + else: + val = [str(val), ] + # Add the value(s) to the current list of missing + if key is None: + # None acts as default + for miss in missing_values: + miss.extend(val) + else: + missing_values[key].extend(val) + # We have a sequence : each item matches a column + elif isinstance(user_missing_values, (list, tuple)): + for (value, entry) in zip(user_missing_values, missing_values): + value = str(value) + if value not in entry: + entry.append(value) + # We have a string : apply it to all entries + elif isinstance(user_missing_values, basestring): + user_value = user_missing_values.split(",") + for entry in missing_values: + entry.extend(user_value) + # We have something else: apply it to all entries + else: + for entry in missing_values: + entry.extend([str(user_missing_values)]) + + # Process the filling_values ............................... + # Rename the input for convenience + user_filling_values = filling_values + if user_filling_values is None: + user_filling_values = [] + # Define the default + filling_values = [None] * nbcols + # We have a dictionary : update each entry individually + if isinstance(user_filling_values, dict): + for (key, val) in user_filling_values.items(): + if _is_string_like(key): + try: + # Transform it into an integer + key = names.index(key) + except ValueError: + # We couldn't find it: the name must have been dropped, + continue + # Redefine the key if it's a column number and usecols is defined + if usecols: + try: + key = usecols.index(key) + except ValueError: + pass + # Add the value to the list + filling_values[key] = val + # We have a sequence : update on a one-to-one basis + elif isinstance(user_filling_values, (list, tuple)): + n = len(user_filling_values) + if (n <= nbcols): + filling_values[:n] = user_filling_values else: - val = [str(val), ] - # Add the value(s) to the current list of missing - if key is None: - # None acts as default - for miss in missing_values: - miss.extend(val) + filling_values = user_filling_values[:nbcols] + # We have something else : use it for all entries + else: + filling_values = [user_filling_values] * nbcols + + # Initialize the converters ................................ + if dtype is None: + # Note: we can't use a [...]*nbcols, as we would have 3 times the same + # ... converter, instead of 3 different converters. + converters = [StringConverter(None, missing_values=miss, default=fill) + for (miss, fill) in zip(missing_values, filling_values)] + else: + dtype_flat = flatten_dtype(dtype, flatten_base=True) + # Initialize the converters + if len(dtype_flat) > 1: + # Flexible type : get a converter from each dtype + zipit = zip(dtype_flat, missing_values, filling_values) + converters = [StringConverter(dt, locked=True, + missing_values=miss, default=fill) + for (dt, miss, fill) in zipit] else: - missing_values[key].extend(val) - # We have a sequence : each item matches a column - elif isinstance(user_missing_values, (list, tuple)): - for (value, entry) in zip(user_missing_values, missing_values): - value = str(value) - if value not in entry: - entry.append(value) - # We have a string : apply it to all entries - elif isinstance(user_missing_values, basestring): - user_value = user_missing_values.split(",") - for entry in missing_values: - entry.extend(user_value) - # We have something else: apply it to all entries - else: - for entry in missing_values: - entry.extend([str(user_missing_values)]) - - # Process the filling_values ............................... - # Rename the input for convenience - user_filling_values = filling_values - if user_filling_values is None: - user_filling_values = [] - # Define the default - filling_values = [None] * nbcols - # We have a dictionary : update each entry individually - if isinstance(user_filling_values, dict): - for (key, val) in user_filling_values.items(): - if _is_string_like(key): + # Set to a default converter (but w/ different missing values) + zipit = zip(missing_values, filling_values) + converters = [StringConverter(dtype, locked=True, + missing_values=miss, default=fill) + for (miss, fill) in zipit] + # Update the converters to use the user-defined ones + uc_update = [] + for (j, conv) in user_converters.items(): + # If the converter is specified by column names, use the index instead + if _is_string_like(j): try: - # Transform it into an integer - key = names.index(key) + j = names.index(j) + i = j except ValueError: - # We couldn't find it: the name must have been dropped, continue - # Redefine the key if it's a column number and usecols is defined - if usecols: + elif usecols: try: - key = usecols.index(key) + i = usecols.index(j) except ValueError: - pass - # Add the value to the list - filling_values[key] = val - # We have a sequence : update on a one-to-one basis - elif isinstance(user_filling_values, (list, tuple)): - n = len(user_filling_values) - if (n <= nbcols): - filling_values[:n] = user_filling_values - else: - filling_values = user_filling_values[:nbcols] - # We have something else : use it for all entries - else: - filling_values = [user_filling_values] * nbcols - - # Initialize the converters ................................ - if dtype is None: - # Note: we can't use a [...]*nbcols, as we would have 3 times the same - # ... converter, instead of 3 different converters. - converters = [StringConverter(None, missing_values=miss, default=fill) - for (miss, fill) in zip(missing_values, filling_values)] - else: - dtype_flat = flatten_dtype(dtype, flatten_base=True) - # Initialize the converters - if len(dtype_flat) > 1: - # Flexible type : get a converter from each dtype - zipit = zip(dtype_flat, missing_values, filling_values) - converters = [StringConverter(dt, locked=True, - missing_values=miss, default=fill) - for (dt, miss, fill) in zipit] - else: - # Set to a default converter (but w/ different missing values) - zipit = zip(missing_values, filling_values) - converters = [StringConverter(dtype, locked=True, - missing_values=miss, default=fill) - for (miss, fill) in zipit] - # Update the converters to use the user-defined ones - uc_update = [] - for (j, conv) in user_converters.items(): - # If the converter is specified by column names, use the index instead - if _is_string_like(j): - try: - j = names.index(j) + # Unused converter specified + continue + else: i = j - except ValueError: - continue - elif usecols: - try: - i = usecols.index(j) - except ValueError: - # Unused converter specified + # Find the value to test - first_line is not filtered by usecols: + if len(first_line): + testing_value = first_values[j] + else: + testing_value = None + if conv is bytes: + user_conv = asbytes + elif byte_converters: + # converters may use decode to workaround numpy's old behaviour, + # so encode the string again before passing to the user converter + def tobytes_first(x, conv): + if type(x) is bytes: + return conv(x) + return conv(x.encode("latin1")) + user_conv = functools.partial(tobytes_first, conv=conv) + else: + user_conv = conv + converters[i].update(user_conv, locked=True, + testing_value=testing_value, + default=filling_values[i], + missing_values=missing_values[i],) + uc_update.append((i, user_conv)) + # Make sure we have the corrected keys in user_converters... + user_converters.update(uc_update) + + # Fixme: possible error as following variable never used. + # miss_chars = [_.missing_values for _ in converters] + + # Initialize the output lists ... + # ... rows + rows = [] + append_to_rows = rows.append + # ... masks + if usemask: + masks = [] + append_to_masks = masks.append + # ... invalid + invalid = [] + append_to_invalid = invalid.append + + # Parse each line + for (i, line) in enumerate(itertools.chain([first_line, ], fhd)): + values = split_line(line) + nbvalues = len(values) + # Skip an empty line + if nbvalues == 0: continue - else: - i = j - # Find the value to test - first_line is not filtered by usecols: - if len(first_line): - testing_value = first_values[j] - else: - testing_value = None - if conv is bytes: - user_conv = asbytes - elif byte_converters: - # converters may use decode to workaround numpy's old behaviour, - # so encode the string again before passing to the user converter - def tobytes_first(x, conv): - if type(x) is bytes: - return conv(x) - return conv(x.encode("latin1")) - import functools - user_conv = functools.partial(tobytes_first, conv=conv) - else: - user_conv = conv - converters[i].update(user_conv, locked=True, - testing_value=testing_value, - default=filling_values[i], - missing_values=missing_values[i],) - uc_update.append((i, user_conv)) - # Make sure we have the corrected keys in user_converters... - user_converters.update(uc_update) - - # Fixme: possible error as following variable never used. - # miss_chars = [_.missing_values for _ in converters] - - # Initialize the output lists ... - # ... rows - rows = [] - append_to_rows = rows.append - # ... masks - if usemask: - masks = [] - append_to_masks = masks.append - # ... invalid - invalid = [] - append_to_invalid = invalid.append - - # Parse each line - for (i, line) in enumerate(itertools.chain([first_line, ], fhd)): - values = split_line(line) - nbvalues = len(values) - # Skip an empty line - if nbvalues == 0: - continue - if usecols: - # Select only the columns we need - try: - values = [values[_] for _ in usecols] - except IndexError: + if usecols: + # Select only the columns we need + try: + values = [values[_] for _ in usecols] + except IndexError: + append_to_invalid((i + skip_header + 1, nbvalues)) + continue + elif nbvalues != nbcols: append_to_invalid((i + skip_header + 1, nbvalues)) continue - elif nbvalues != nbcols: - append_to_invalid((i + skip_header + 1, nbvalues)) - continue - # Store the values - append_to_rows(tuple(values)) - if usemask: - append_to_masks(tuple([v.strip() in m - for (v, m) in zip(values, - missing_values)])) - if len(rows) == max_rows: - break - - if own_fhd: - fhd.close() + # Store the values + append_to_rows(tuple(values)) + if usemask: + append_to_masks(tuple([v.strip() in m + for (v, m) in zip(values, + missing_values)])) + if len(rows) == max_rows: + break # Upgrade the converters (if needed) if dtype is None: @@ -2126,10 +2136,10 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, if names is None: # If the dtype is uniform (before sizing strings) - base = set([ + base = { c_type for c, c_type in zip(converters, column_types) - if c._checked]) + if c._checked} if len(base) == 1: uniform_type, = base (ddtype, mdtype) = (uniform_type, bool) diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py index 81af185eb..1f08abf36 100644 --- a/numpy/lib/polynomial.py +++ b/numpy/lib/polynomial.py @@ -110,7 +110,7 @@ def poly(seq_of_zeros): Given a sequence of a polynomial's zeros: >>> np.poly((0, 0, 0)) # Multiple root example - array([1, 0, 0, 0]) + array([1., 0., 0., 0.]) The line above represents z**3 + 0*z**2 + 0*z + 0. @@ -119,14 +119,14 @@ def poly(seq_of_zeros): The line above represents z**3 - z/4 - >>> np.poly((np.random.random(1.)[0], 0, np.random.random(1.)[0])) - array([ 1. , -0.77086955, 0.08618131, 0. ]) #random + >>> np.poly((np.random.random(1)[0], 0, np.random.random(1)[0])) + array([ 1. , -0.77086955, 0.08618131, 0. ]) # random Given a square array object: >>> P = np.array([[0, 1./3], [-1./2, 0]]) >>> np.poly(P) - array([ 1. , 0. , 0.16666667]) + array([1. , 0. , 0.16666667]) Note how in all cases the leading coefficient is always 1. @@ -295,7 +295,7 @@ def polyint(p, m=1, k=None): >>> p = np.poly1d([1,1,1]) >>> P = np.polyint(p) >>> P - poly1d([ 0.33333333, 0.5 , 1. , 0. ]) + poly1d([ 0.33333333, 0.5 , 1. , 0. ]) # may vary >>> np.polyder(P) == p True @@ -310,7 +310,7 @@ def polyint(p, m=1, k=None): 0.0 >>> P = np.polyint(p, 3, k=[6,5,3]) >>> P - poly1d([ 0.01666667, 0.04166667, 0.16666667, 3. , 5. , 3. ]) + poly1d([ 0.01666667, 0.04166667, 0.16666667, 3. , 5. , 3. ]) # may vary Note that 3 = 6 / 2!, and that the constants are given in the order of integrations. Constant of the highest-order polynomial term comes first: @@ -404,7 +404,7 @@ def polyder(p, m=1): >>> np.polyder(p, 3) poly1d([6]) >>> np.polyder(p, 4) - poly1d([ 0.]) + poly1d([0.]) """ m = int(m) @@ -463,9 +463,14 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): w : array_like, shape (M,), optional Weights to apply to the y-coordinates of the sample points. For gaussian uncertainties, use 1/sigma (not 1/sigma**2). - cov : bool, optional - Return the estimate and the covariance matrix of the estimate - If full is True, then cov is not returned. + cov : bool or str, optional + If given and not `False`, return not just the estimate but also its + covariance matrix. By default, the covariance are scaled by + chi2/sqrt(N-dof), i.e., the weights are presumed to be unreliable + except in a relative sense and everything is scaled such that the + reduced chi2 is unity. This scaling is omitted if ``cov='unscaled'``, + as is relevant for the case that the weights are 1/sigma**2, with + sigma known to be a reliable estimate of the uncertainty. Returns ------- @@ -543,32 +548,35 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): Examples -------- + >>> import warnings >>> x = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0]) >>> y = np.array([0.0, 0.8, 0.9, 0.1, -0.8, -1.0]) >>> z = np.polyfit(x, y, 3) >>> z - array([ 0.08703704, -0.81349206, 1.69312169, -0.03968254]) + array([ 0.08703704, -0.81349206, 1.69312169, -0.03968254]) # may vary It is convenient to use `poly1d` objects for dealing with polynomials: >>> p = np.poly1d(z) >>> p(0.5) - 0.6143849206349179 + 0.6143849206349179 # may vary >>> p(3.5) - -0.34732142857143039 + -0.34732142857143039 # may vary >>> p(10) - 22.579365079365115 + 22.579365079365115 # may vary High-order polynomials may oscillate wildly: - >>> p30 = np.poly1d(np.polyfit(x, y, 30)) - /... RankWarning: Polyfit may be poorly conditioned... + >>> with warnings.catch_warnings(): + ... warnings.simplefilter('ignore', np.RankWarning) + ... p30 = np.poly1d(np.polyfit(x, y, 30)) + ... >>> p30(4) - -0.80000000000000204 + -0.80000000000000204 # may vary >>> p30(5) - -0.99999999999999445 + -0.99999999999999445 # may vary >>> p30(4.5) - -0.10547061179440398 + -0.10547061179440398 # may vary Illustration: @@ -626,21 +634,24 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): # warn on rank reduction, which indicates an ill conditioned matrix if rank != order and not full: msg = "Polyfit may be poorly conditioned" - warnings.warn(msg, RankWarning, stacklevel=2) + warnings.warn(msg, RankWarning, stacklevel=3) if full: return c, resids, rank, s, rcond elif cov: Vbase = inv(dot(lhs.T, lhs)) Vbase /= NX.outer(scale, scale) - # Some literature ignores the extra -2.0 factor in the denominator, but - # it is included here because the covariance of Multivariate Student-T - # (which is implied by a Bayesian uncertainty analysis) includes it. - # Plus, it gives a slightly more conservative estimate of uncertainty. - if len(x) <= order + 2: - raise ValueError("the number of data points must exceed order + 2 " - "for Bayesian estimate the covariance matrix") - fac = resids / (len(x) - order - 2.0) + if cov == "unscaled": + fac = 1 + else: + if len(x) <= order: + raise ValueError("the number of data points must exceed order " + "to scale the covariance matrix") + # note, this used to be: fac = resids / (len(x) - order - 2.0) + # it was deciced that the "- 2" (originally justified by "Bayesian + # uncertainty analysis") is not was the user expects + # (see gh-11196 and gh-11197) + fac = resids / (len(x) - order) if y.ndim == 1: return c, Vbase * fac else: @@ -695,6 +706,8 @@ def polyval(p, x): for polynomials of high degree the values may be inaccurate due to rounding errors. Use carefully. + If `x` is a subtype of `ndarray` the return value will be of the same type. + References ---------- .. [1] I. N. Bronshtein, K. A. Semendyayev, and K. A. Hirsch (Eng. @@ -706,18 +719,18 @@ def polyval(p, x): >>> np.polyval([3,0,1], 5) # 3 * 5**2 + 0 * 5**1 + 1 76 >>> np.polyval([3,0,1], np.poly1d(5)) - poly1d([ 76.]) + poly1d([76.]) >>> np.polyval(np.poly1d([3,0,1]), 5) 76 >>> np.polyval(np.poly1d([3,0,1]), np.poly1d(5)) - poly1d([ 76.]) + poly1d([76.]) """ p = NX.asarray(p) if isinstance(x, poly1d): y = 0 else: - x = NX.asarray(x) + x = NX.asanyarray(x) y = NX.zeros_like(x) for i in range(len(p)): y = y * x + p[i] @@ -863,8 +876,7 @@ def polymul(a1, a2): See Also -------- poly1d : A one-dimensional polynomial class. - poly, polyadd, polyder, polydiv, polyfit, polyint, polysub, - polyval + poly, polyadd, polyder, polydiv, polyfit, polyint, polysub, polyval convolve : Array convolution. Same output as polymul, but has parameter for overlap mode. @@ -926,7 +938,7 @@ def polydiv(u, v): See Also -------- - poly, polyadd, polyder, polydiv, polyfit, polyint, polymul, polysub, + poly, polyadd, polyder, polydiv, polyfit, polyint, polymul, polysub polyval Notes @@ -943,7 +955,7 @@ def polydiv(u, v): >>> x = np.array([3.0, 5.0, 2.0]) >>> y = np.array([2.0, 1.0]) >>> np.polydiv(x, y) - (array([ 1.5 , 1.75]), array([ 0.25])) + (array([1.5 , 1.75]), array([0.25])) """ truepoly = (isinstance(u, poly1d) or isinstance(u, poly1d)) @@ -1038,7 +1050,7 @@ class poly1d(object): >>> p.r array([-1.+1.41421356j, -1.-1.41421356j]) >>> p(p.r) - array([ -4.44089210e-16+0.j, -4.44089210e-16+0.j]) + array([ -4.44089210e-16+0.j, -4.44089210e-16+0.j]) # may vary These numbers in the previous line represent (0, 0) to machine precision @@ -1065,7 +1077,7 @@ class poly1d(object): poly1d([ 1, 4, 10, 12, 9]) >>> (p**3 + 4) / p - (poly1d([ 1., 4., 10., 12., 9.]), poly1d([ 4.])) + (poly1d([ 1., 4., 10., 12., 9.]), poly1d([4.])) ``asarray(p)`` gives the coefficient array, so polynomials can be used in all functions that accept arrays: @@ -1087,7 +1099,7 @@ class poly1d(object): Construct a polynomial from its roots: >>> np.poly1d([1, 2], True) - poly1d([ 1, -3, 2]) + poly1d([ 1., -3., 2.]) This is the same polynomial as obtained by: @@ -1099,8 +1111,14 @@ class poly1d(object): @property def coeffs(self): - """ A copy of the polynomial coefficients """ - return self._coeffs.copy() + """ The polynomial coefficients """ + return self._coeffs + + @coeffs.setter + def coeffs(self, value): + # allowing this makes p.coeffs *= 2 legal + if value is not self._coeffs: + raise AttributeError("Cannot set attribute") @property def variable(self): diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py index bf588a490..ccbcfad91 100644 --- a/numpy/lib/recfunctions.py +++ b/numpy/lib/recfunctions.py @@ -57,11 +57,10 @@ def recursive_fill_fields(input, output): Examples -------- >>> from numpy.lib import recfunctions as rfn - >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', int), ('B', float)]) + >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', np.int64), ('B', np.float64)]) >>> b = np.zeros((3,), dtype=a.dtype) >>> rfn.recursive_fill_fields(a, b) - array([(1, 10.0), (2, 20.0), (0, 0.0)], - dtype=[('A', '<i4'), ('B', '<f8')]) + array([(1, 10.), (2, 20.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')]) """ newdtype = output.dtype @@ -89,11 +88,11 @@ def get_fieldspec(dtype): Examples -------- - >>> dt = np.dtype([(('a', 'A'), int), ('b', float, 3)]) + >>> dt = np.dtype([(('a', 'A'), np.int64), ('b', np.double, 3)]) >>> dt.descr - [(('a', 'A'), '<i4'), ('b', '<f8', (3,))] + [(('a', 'A'), '<i8'), ('b', '<f8', (3,))] >>> get_fieldspec(dt) - [(('a', 'A'), dtype('int32')), ('b', dtype(('<f8', (3,))))] + [(('a', 'A'), dtype('int64')), ('b', dtype(('<f8', (3,))))] """ if dtype.names is None: @@ -120,10 +119,15 @@ def get_names(adtype): Examples -------- >>> from numpy.lib import recfunctions as rfn - >>> rfn.get_names(np.empty((1,), dtype=int)) is None - True + >>> rfn.get_names(np.empty((1,), dtype=int)) + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' + >>> rfn.get_names(np.empty((1,), dtype=[('A',int), ('B', float)])) - ('A', 'B') + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) >>> rfn.get_names(adtype) ('a', ('b', ('ba', 'bb'))) @@ -153,9 +157,13 @@ def get_names_flat(adtype): -------- >>> from numpy.lib import recfunctions as rfn >>> rfn.get_names_flat(np.empty((1,), dtype=int)) is None - True + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' >>> rfn.get_names_flat(np.empty((1,), dtype=[('A',int), ('B', float)])) - ('A', 'B') + Traceback (most recent call last): + ... + AttributeError: 'numpy.ndarray' object has no attribute 'names' >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) >>> rfn.get_names_flat(adtype) ('a', 'b', 'ba', 'bb') @@ -403,20 +411,18 @@ def merge_arrays(seqarrays, fill_value=-1, flatten=False, -------- >>> from numpy.lib import recfunctions as rfn >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.]))) - masked_array(data = [(1, 10.0) (2, 20.0) (--, 30.0)], - mask = [(False, False) (False, False) (True, False)], - fill_value = (999999, 1e+20), - dtype = [('f0', '<i4'), ('f1', '<f8')]) - - >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])), - ... usemask=False) - array([(1, 10.0), (2, 20.0), (-1, 30.0)], - dtype=[('f0', '<i4'), ('f1', '<f8')]) - >>> rfn.merge_arrays((np.array([1, 2]).view([('a', int)]), + array([( 1, 10.), ( 2, 20.), (-1, 30.)], + dtype=[('f0', '<i8'), ('f1', '<f8')]) + + >>> rfn.merge_arrays((np.array([1, 2], dtype=np.int64), + ... np.array([10., 20., 30.])), usemask=False) + array([(1, 10.0), (2, 20.0), (-1, 30.0)], + dtype=[('f0', '<i8'), ('f1', '<f8')]) + >>> rfn.merge_arrays((np.array([1, 2]).view([('a', np.int64)]), ... np.array([10., 20., 30.])), ... usemask=False, asrecarray=True) - rec.array([(1, 10.0), (2, 20.0), (-1, 30.0)], - dtype=[('a', '<i4'), ('f1', '<f8')]) + rec.array([( 1, 10.), ( 2, 20.), (-1, 30.)], + dtype=[('a', '<i8'), ('f1', '<f8')]) Notes ----- @@ -547,16 +553,14 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False): -------- >>> from numpy.lib import recfunctions as rfn >>> a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], - ... dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) + ... dtype=[('a', np.int64), ('b', [('ba', np.double), ('bb', np.int64)])]) >>> rfn.drop_fields(a, 'a') - array([((2.0, 3),), ((5.0, 6),)], - dtype=[('b', [('ba', '<f8'), ('bb', '<i4')])]) + array([((2., 3),), ((5., 6),)], + dtype=[('b', [('ba', '<f8'), ('bb', '<i8')])]) >>> rfn.drop_fields(a, 'ba') - array([(1, (3,)), (4, (6,))], - dtype=[('a', '<i4'), ('b', [('bb', '<i4')])]) + array([(1, (3,)), (4, (6,))], dtype=[('a', '<i8'), ('b', [('bb', '<i8')])]) >>> rfn.drop_fields(a, ['ba', 'bb']) - array([(1,), (4,)], - dtype=[('a', '<i4')]) + array([(1,), (4,)], dtype=[('a', '<i8')]) """ if _is_string_like(drop_names): drop_names = [drop_names] @@ -648,8 +652,8 @@ def rename_fields(base, namemapper): >>> a = np.array([(1, (2, [3.0, 30.])), (4, (5, [6.0, 60.]))], ... dtype=[('a', int),('b', [('ba', float), ('bb', (float, 2))])]) >>> rfn.rename_fields(a, {'a':'A', 'bb':'BB'}) - array([(1, (2.0, [3.0, 30.0])), (4, (5.0, [6.0, 60.0]))], - dtype=[('A', '<i4'), ('b', [('ba', '<f8'), ('BB', '<f8', 2)])]) + array([(1, (2., [ 3., 30.])), (4, (5., [ 6., 60.]))], + dtype=[('A', '<i8'), ('b', [('ba', '<f8'), ('BB', '<f8', (2,))])]) """ def _recursive_rename_fields(ndtype, namemapper): @@ -834,18 +838,18 @@ def repack_fields(a, align=False, recurse=False): ... print("offsets:", [d.fields[name][1] for name in d.names]) ... print("itemsize:", d.itemsize) ... - >>> dt = np.dtype('u1,i4,f4', align=True) + >>> dt = np.dtype('u1,<i4,<f4', align=True) >>> dt - dtype({'names':['f0','f1','f2'], 'formats':['u1','<i4','<f8'], 'offsets':[0,4,8], 'itemsize':16}, align=True) + dtype({'names':['f0','f1','f2'], 'formats':['u1','<i8','<f8'], 'offsets':[0,8,16], 'itemsize':24}, align=True) >>> print_offsets(dt) - offsets: [0, 4, 8] - itemsize: 16 + offsets: [0, 8, 16] + itemsize: 24 >>> packed_dt = repack_fields(dt) >>> packed_dt - dtype([('f0', 'u1'), ('f1', '<i4'), ('f2', '<f8')]) + dtype([('f0', 'u1'), ('f1', '<i8'), ('f2', '<f8')]) >>> print_offsets(packed_dt) - offsets: [0, 1, 5] - itemsize: 13 + offsets: [0, 1, 9] + itemsize: 17 """ if not isinstance(a, np.dtype): @@ -1244,15 +1248,16 @@ def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False, True >>> z = np.array([('A', 1), ('B', 2)], dtype=[('A', '|S3'), ('B', float)]) >>> zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], - ... dtype=[('A', '|S3'), ('B', float), ('C', float)]) + ... dtype=[('A', '|S3'), ('B', np.double), ('C', np.double)]) >>> test = rfn.stack_arrays((z,zz)) >>> test - masked_array(data = [('A', 1.0, --) ('B', 2.0, --) ('a', 10.0, 100.0) ('b', 20.0, 200.0) - ('c', 30.0, 300.0)], - mask = [(False, False, True) (False, False, True) (False, False, False) - (False, False, False) (False, False, False)], - fill_value = ('N/A', 1e+20, 1e+20), - dtype = [('A', '|S3'), ('B', '<f8'), ('C', '<f8')]) + masked_array(data=[(b'A', 1.0, --), (b'B', 2.0, --), (b'a', 10.0, 100.0), + (b'b', 20.0, 200.0), (b'c', 30.0, 300.0)], + mask=[(False, False, True), (False, False, True), + (False, False, False), (False, False, False), + (False, False, False)], + fill_value=(b'N/A', 1.e+20, 1.e+20), + dtype=[('A', 'S3'), ('B', '<f8'), ('C', '<f8')]) """ if isinstance(arrays, ndarray): @@ -1331,7 +1336,10 @@ def find_duplicates(a, key=None, ignoremask=True, return_index=False): >>> a = np.ma.array([1, 1, 1, 2, 2, 3, 3], ... mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype) >>> rfn.find_duplicates(a, ignoremask=True, return_index=True) - ... # XXX: judging by the output, the ignoremask flag has no effect + (masked_array(data=[(1,), (1,), (2,), (2,)], + mask=[(False,), (False,), (False,), (False,)], + fill_value=(999999,), + dtype=[('a', '<i8')]), array([0, 1, 3, 4])) """ a = np.asanyarray(a).ravel() # Get a dictionary of fields diff --git a/numpy/lib/scimath.py b/numpy/lib/scimath.py index 9ca006841..5ac790ce9 100644 --- a/numpy/lib/scimath.py +++ b/numpy/lib/scimath.py @@ -59,7 +59,7 @@ def _tocomplex(arr): >>> a = np.array([1,2,3],np.short) >>> ac = np.lib.scimath._tocomplex(a); ac - array([ 1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64) + array([1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64) >>> ac.dtype dtype('complex64') @@ -70,7 +70,7 @@ def _tocomplex(arr): >>> b = np.array([1,2,3],np.double) >>> bc = np.lib.scimath._tocomplex(b); bc - array([ 1.+0.j, 2.+0.j, 3.+0.j]) + array([1.+0.j, 2.+0.j, 3.+0.j]) >>> bc.dtype dtype('complex128') @@ -81,13 +81,13 @@ def _tocomplex(arr): >>> c = np.array([1,2,3],np.csingle) >>> cc = np.lib.scimath._tocomplex(c); cc - array([ 1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64) + array([1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64) >>> c *= 2; c - array([ 2.+0.j, 4.+0.j, 6.+0.j], dtype=complex64) + array([2.+0.j, 4.+0.j, 6.+0.j], dtype=complex64) >>> cc - array([ 1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64) + array([1.+0.j, 2.+0.j, 3.+0.j], dtype=complex64) """ if issubclass(arr.dtype.type, (nt.single, nt.byte, nt.short, nt.ubyte, nt.ushort, nt.csingle)): @@ -170,7 +170,7 @@ def _fix_real_abs_gt_1(x): array([0, 1]) >>> np.lib.scimath._fix_real_abs_gt_1([0,2]) - array([ 0.+0.j, 2.+0.j]) + array([0.+0.j, 2.+0.j]) """ x = asarray(x) if any(isreal(x) & (abs(x) > 1)): @@ -212,14 +212,14 @@ def sqrt(x): >>> np.lib.scimath.sqrt(1) 1.0 >>> np.lib.scimath.sqrt([1, 4]) - array([ 1., 2.]) + array([1., 2.]) But it automatically handles negative inputs: >>> np.lib.scimath.sqrt(-1) - (0.0+1.0j) + 1j >>> np.lib.scimath.sqrt([-1,4]) - array([ 0.+1.j, 2.+0.j]) + array([0.+1.j, 2.+0.j]) """ x = _fix_real_lt_zero(x) @@ -317,7 +317,7 @@ def log10(x): 1.0 >>> np.emath.log10([-10**1, -10**2, 10**2]) - array([ 1.+1.3644j, 2.+1.3644j, 2.+0.j ]) + array([1.+1.3644j, 2.+1.3644j, 2.+0.j ]) """ x = _fix_real_lt_zero(x) @@ -354,9 +354,9 @@ def logn(n, x): >>> np.set_printoptions(precision=4) >>> np.lib.scimath.logn(2, [4, 8]) - array([ 2., 3.]) + array([2., 3.]) >>> np.lib.scimath.logn(2, [-4, -8, 8]) - array([ 2.+4.5324j, 3.+4.5324j, 3.+0.j ]) + array([2.+4.5324j, 3.+4.5324j, 3.+0.j ]) """ x = _fix_real_lt_zero(x) @@ -405,7 +405,7 @@ def log2(x): >>> np.emath.log2(8) 3.0 >>> np.emath.log2([-4, -8, 8]) - array([ 2.+4.5324j, 3.+4.5324j, 3.+0.j ]) + array([2.+4.5324j, 3.+4.5324j, 3.+0.j ]) """ x = _fix_real_lt_zero(x) @@ -451,9 +451,9 @@ def power(x, p): >>> np.lib.scimath.power([2, 4], 2) array([ 4, 16]) >>> np.lib.scimath.power([2, 4], -2) - array([ 0.25 , 0.0625]) + array([0.25 , 0.0625]) >>> np.lib.scimath.power([-2, 4], 2) - array([ 4.+0.j, 16.+0.j]) + array([ 4.-0.j, 16.+0.j]) """ x = _fix_real_lt_zero(x) @@ -499,7 +499,7 @@ def arccos(x): 0.0 >>> np.emath.arccos([1,2]) - array([ 0.-0.j , 0.+1.317j]) + array([0.-0.j , 0.-1.317j]) """ x = _fix_real_abs_gt_1(x) @@ -545,7 +545,7 @@ def arcsin(x): 0.0 >>> np.emath.arcsin([0,1]) - array([ 0. , 1.5708]) + array([0. , 1.5708]) """ x = _fix_real_abs_gt_1(x) @@ -589,11 +589,14 @@ def arctanh(x): -------- >>> np.set_printoptions(precision=4) - >>> np.emath.arctanh(np.eye(2)) - array([[ Inf, 0.], - [ 0., Inf]]) + >>> from numpy.testing import suppress_warnings + >>> with suppress_warnings() as sup: + ... sup.filter(RuntimeWarning) + ... np.emath.arctanh(np.eye(2)) + array([[inf, 0.], + [ 0., inf]]) >>> np.emath.arctanh([1j]) - array([ 0.+0.7854j]) + array([0.+0.7854j]) """ x = _fix_real_abs_gt_1(x) diff --git a/numpy/lib/shape_base.py b/numpy/lib/shape_base.py index f56c4f4db..ac2a25604 100644 --- a/numpy/lib/shape_base.py +++ b/numpy/lib/shape_base.py @@ -11,8 +11,7 @@ from numpy.core.fromnumeric import product, reshape, transpose from numpy.core.multiarray import normalize_axis_index from numpy.core import overrides from numpy.core import vstack, atleast_3d -from numpy.core.shape_base import ( - _arrays_for_stack_dispatcher, _warn_for_nonsequence) +from numpy.core.shape_base import _arrays_for_stack_dispatcher from numpy.lib.index_tricks import ndindex from numpy.matrixlib.defmatrix import matrix # this raises all the right alarm bells @@ -129,7 +128,7 @@ def take_along_axis(arr, indices, axis): [40, 50, 60]]) >>> ai = np.argsort(a, axis=1); ai array([[0, 2, 1], - [1, 2, 0]], dtype=int64) + [1, 2, 0]]) >>> np.take_along_axis(a, ai, axis=1) array([[10, 20, 30], [40, 50, 60]]) @@ -142,7 +141,7 @@ def take_along_axis(arr, indices, axis): >>> ai = np.expand_dims(np.argmax(a, axis=1), axis=1) >>> ai array([[1], - [0], dtype=int64) + [0]]) >>> np.take_along_axis(a, ai, axis=1) array([[30], [60]]) @@ -152,10 +151,10 @@ def take_along_axis(arr, indices, axis): >>> ai_min = np.expand_dims(np.argmin(a, axis=1), axis=1) >>> ai_max = np.expand_dims(np.argmax(a, axis=1), axis=1) - >>> ai = np.concatenate([ai_min, ai_max], axis=axis) - >> ai + >>> ai = np.concatenate([ai_min, ai_max], axis=1) + >>> ai array([[0, 1], - [1, 0]], dtype=int64) + [1, 0]]) >>> np.take_along_axis(a, ai, axis=1) array([[10, 30], [40, 60]]) @@ -243,7 +242,7 @@ def put_along_axis(arr, indices, values, axis): >>> ai = np.expand_dims(np.argmax(a, axis=1), axis=1) >>> ai array([[1], - [0]], dtype=int64) + [0]]) >>> np.put_along_axis(a, ai, 99, axis=1) >>> a array([[10, 99, 20], @@ -330,9 +329,9 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs): ... return (a[0] + a[-1]) * 0.5 >>> b = np.array([[1,2,3], [4,5,6], [7,8,9]]) >>> np.apply_along_axis(my_func, 0, b) - array([ 4., 5., 6.]) + array([4., 5., 6.]) >>> np.apply_along_axis(my_func, 1, b) - array([ 2., 5., 8.]) + array([2., 5., 8.]) For a function that returns a 1D array, the number of dimensions in `outarr` is the same as `arr`. @@ -630,7 +629,6 @@ def column_stack(tup): [3, 4]]) """ - _warn_for_nonsequence(tup) arrays = [] for v in tup: arr = array(v, copy=False, subok=True) @@ -695,7 +693,6 @@ def dstack(tup): [[3, 4]]]) """ - _warn_for_nonsequence(tup) return _nx.concatenate([atleast_3d(_m) for _m in tup], 2) @@ -732,11 +729,11 @@ def array_split(ary, indices_or_sections, axis=0): -------- >>> x = np.arange(8.0) >>> np.array_split(x, 3) - [array([ 0., 1., 2.]), array([ 3., 4., 5.]), array([ 6., 7.])] + [array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7.])] >>> x = np.arange(7.0) >>> np.array_split(x, 3) - [array([ 0., 1., 2.]), array([ 3., 4.]), array([ 5., 6.])] + [array([0., 1., 2.]), array([3., 4.]), array([5., 6.])] """ try: @@ -828,14 +825,14 @@ def split(ary, indices_or_sections, axis=0): -------- >>> x = np.arange(9.0) >>> np.split(x, 3) - [array([ 0., 1., 2.]), array([ 3., 4., 5.]), array([ 6., 7., 8.])] + [array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7., 8.])] >>> x = np.arange(8.0) >>> np.split(x, [3, 5, 6, 10]) - [array([ 0., 1., 2.]), - array([ 3., 4.]), - array([ 5.]), - array([ 6., 7.]), + [array([0., 1., 2.]), + array([3., 4.]), + array([5.]), + array([6., 7.]), array([], dtype=float64)] """ @@ -872,43 +869,43 @@ def hsplit(ary, indices_or_sections): -------- >>> x = np.arange(16.0).reshape(4, 4) >>> x - array([[ 0., 1., 2., 3.], - [ 4., 5., 6., 7.], - [ 8., 9., 10., 11.], - [ 12., 13., 14., 15.]]) + array([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.], + [12., 13., 14., 15.]]) >>> np.hsplit(x, 2) [array([[ 0., 1.], [ 4., 5.], [ 8., 9.], - [ 12., 13.]]), + [12., 13.]]), array([[ 2., 3.], [ 6., 7.], - [ 10., 11.], - [ 14., 15.]])] + [10., 11.], + [14., 15.]])] >>> np.hsplit(x, np.array([3, 6])) - [array([[ 0., 1., 2.], - [ 4., 5., 6.], - [ 8., 9., 10.], - [ 12., 13., 14.]]), - array([[ 3.], - [ 7.], - [ 11.], - [ 15.]]), - array([], dtype=float64)] + [array([[ 0., 1., 2.], + [ 4., 5., 6.], + [ 8., 9., 10.], + [12., 13., 14.]]), + array([[ 3.], + [ 7.], + [11.], + [15.]]), + array([], shape=(4, 0), dtype=float64)] With a higher dimensional array the split is still along the second axis. >>> x = np.arange(8.0).reshape(2, 2, 2) >>> x - array([[[ 0., 1.], - [ 2., 3.]], - [[ 4., 5.], - [ 6., 7.]]]) + array([[[0., 1.], + [2., 3.]], + [[4., 5.], + [6., 7.]]]) >>> np.hsplit(x, 2) - [array([[[ 0., 1.]], - [[ 4., 5.]]]), - array([[[ 2., 3.]], - [[ 6., 7.]]])] + [array([[[0., 1.]], + [[4., 5.]]]), + array([[[2., 3.]], + [[6., 7.]]])] """ if _nx.ndim(ary) == 0: @@ -936,35 +933,31 @@ def vsplit(ary, indices_or_sections): -------- >>> x = np.arange(16.0).reshape(4, 4) >>> x - array([[ 0., 1., 2., 3.], - [ 4., 5., 6., 7.], - [ 8., 9., 10., 11.], - [ 12., 13., 14., 15.]]) + array([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.], + [12., 13., 14., 15.]]) >>> np.vsplit(x, 2) - [array([[ 0., 1., 2., 3.], - [ 4., 5., 6., 7.]]), - array([[ 8., 9., 10., 11.], - [ 12., 13., 14., 15.]])] + [array([[0., 1., 2., 3.], + [4., 5., 6., 7.]]), array([[ 8., 9., 10., 11.], + [12., 13., 14., 15.]])] >>> np.vsplit(x, np.array([3, 6])) - [array([[ 0., 1., 2., 3.], - [ 4., 5., 6., 7.], - [ 8., 9., 10., 11.]]), - array([[ 12., 13., 14., 15.]]), - array([], dtype=float64)] + [array([[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.], + [ 8., 9., 10., 11.]]), array([[12., 13., 14., 15.]]), array([], shape=(0, 4), dtype=float64)] With a higher dimensional array the split is still along the first axis. >>> x = np.arange(8.0).reshape(2, 2, 2) >>> x - array([[[ 0., 1.], - [ 2., 3.]], - [[ 4., 5.], - [ 6., 7.]]]) + array([[[0., 1.], + [2., 3.]], + [[4., 5.], + [6., 7.]]]) >>> np.vsplit(x, 2) - [array([[[ 0., 1.], - [ 2., 3.]]]), - array([[[ 4., 5.], - [ 6., 7.]]])] + [array([[[0., 1.], + [2., 3.]]]), array([[[4., 5.], + [6., 7.]]])] """ if _nx.ndim(ary) < 2: @@ -989,30 +982,28 @@ def dsplit(ary, indices_or_sections): -------- >>> x = np.arange(16.0).reshape(2, 2, 4) >>> x - array([[[ 0., 1., 2., 3.], - [ 4., 5., 6., 7.]], - [[ 8., 9., 10., 11.], - [ 12., 13., 14., 15.]]]) + array([[[ 0., 1., 2., 3.], + [ 4., 5., 6., 7.]], + [[ 8., 9., 10., 11.], + [12., 13., 14., 15.]]]) >>> np.dsplit(x, 2) - [array([[[ 0., 1.], - [ 4., 5.]], - [[ 8., 9.], - [ 12., 13.]]]), - array([[[ 2., 3.], - [ 6., 7.]], - [[ 10., 11.], - [ 14., 15.]]])] + [array([[[ 0., 1.], + [ 4., 5.]], + [[ 8., 9.], + [12., 13.]]]), array([[[ 2., 3.], + [ 6., 7.]], + [[10., 11.], + [14., 15.]]])] >>> np.dsplit(x, np.array([3, 6])) - [array([[[ 0., 1., 2.], - [ 4., 5., 6.]], - [[ 8., 9., 10.], - [ 12., 13., 14.]]]), - array([[[ 3.], - [ 7.]], - [[ 11.], - [ 15.]]]), - array([], dtype=float64)] - + [array([[[ 0., 1., 2.], + [ 4., 5., 6.]], + [[ 8., 9., 10.], + [12., 13., 14.]]]), + array([[[ 3.], + [ 7.]], + [[11.], + [15.]]]), + array([], shape=(2, 2, 0), dtype=float64)] """ if _nx.ndim(ary) < 3: raise ValueError('dsplit only works on arrays of 3 or more dimensions') @@ -1092,15 +1083,15 @@ def kron(a, b): Examples -------- >>> np.kron([1,10,100], [5,6,7]) - array([ 5, 6, 7, 50, 60, 70, 500, 600, 700]) + array([ 5, 6, 7, ..., 500, 600, 700]) >>> np.kron([5,6,7], [1,10,100]) - array([ 5, 50, 500, 6, 60, 600, 7, 70, 700]) + array([ 5, 50, 500, ..., 7, 70, 700]) >>> np.kron(np.eye(2), np.ones((2,2))) - array([[ 1., 1., 0., 0.], - [ 1., 1., 0., 0.], - [ 0., 0., 1., 1.], - [ 0., 0., 1., 1.]]) + array([[1., 1., 0., 0.], + [1., 1., 0., 0.], + [0., 0., 1., 1.], + [0., 0., 1., 1.]]) >>> a = np.arange(100).reshape((2,5,2,5)) >>> b = np.arange(24).reshape((2,3,4)) diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py index b4888f1bd..e04fdc808 100644 --- a/numpy/lib/tests/test__iotools.py +++ b/numpy/lib/tests/test__iotools.py @@ -1,6 +1,5 @@ from __future__ import division, absolute_import, print_function -import sys import time from datetime import date @@ -246,7 +245,7 @@ class TestStringConverter(object): converter = StringConverter(int, default=0, missing_values="N/A") assert_equal( - converter.missing_values, set(['', 'N/A'])) + converter.missing_values, {'', 'N/A'}) def test_int64_dtype(self): "Check that int64 integer types can be specified" diff --git a/numpy/lib/tests/test_arraypad.py b/numpy/lib/tests/test_arraypad.py index 20f6e4a1b..b7630cdcd 100644 --- a/numpy/lib/tests/test_arraypad.py +++ b/numpy/lib/tests/test_arraypad.py @@ -2,18 +2,31 @@ """ from __future__ import division, absolute_import, print_function +from itertools import chain import pytest import numpy as np -from numpy.testing import (assert_array_equal, assert_raises, assert_allclose, - assert_equal) -from numpy.lib import pad +from numpy.testing import assert_array_equal, assert_allclose, assert_equal from numpy.lib.arraypad import _as_pairs -class TestAsPairs(object): +_all_modes = { + 'constant': {'constant_values': 0}, + 'edge': {}, + 'linear_ramp': {'end_values': 0}, + 'maximum': {'stat_length': None}, + 'mean': {'stat_length': None}, + 'median': {'stat_length': None}, + 'minimum': {'stat_length': None}, + 'reflect': {'reflect_type': 'even'}, + 'symmetric': {'reflect_type': 'even'}, + 'wrap': {}, + 'empty': {} +} + +class TestAsPairs(object): def test_single_value(self): """Test casting for a single value.""" expected = np.array([[3, 3]] * 10) @@ -97,52 +110,31 @@ class TestAsPairs(object): class TestConditionalShortcuts(object): - def test_zero_padding_shortcuts(self): + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_zero_padding_shortcuts(self, mode): test = np.arange(120).reshape(4, 5, 6) - pad_amt = [(0, 0) for axis in test.shape] - modes = ['constant', - 'edge', - 'linear_ramp', - 'maximum', - 'mean', - 'median', - 'minimum', - 'reflect', - 'symmetric', - 'wrap', - ] - for mode in modes: - assert_array_equal(test, pad(test, pad_amt, mode=mode)) - - def test_shallow_statistic_range(self): + pad_amt = [(0, 0) for _ in test.shape] + assert_array_equal(test, np.pad(test, pad_amt, mode=mode)) + + @pytest.mark.parametrize("mode", ['maximum', 'mean', 'median', 'minimum',]) + def test_shallow_statistic_range(self, mode): test = np.arange(120).reshape(4, 5, 6) - pad_amt = [(1, 1) for axis in test.shape] - modes = ['maximum', - 'mean', - 'median', - 'minimum', - ] - for mode in modes: - assert_array_equal(pad(test, pad_amt, mode='edge'), - pad(test, pad_amt, mode=mode, stat_length=1)) - - def test_clip_statistic_range(self): + pad_amt = [(1, 1) for _ in test.shape] + assert_array_equal(np.pad(test, pad_amt, mode='edge'), + np.pad(test, pad_amt, mode=mode, stat_length=1)) + + @pytest.mark.parametrize("mode", ['maximum', 'mean', 'median', 'minimum',]) + def test_clip_statistic_range(self, mode): test = np.arange(30).reshape(5, 6) - pad_amt = [(3, 3) for axis in test.shape] - modes = ['maximum', - 'mean', - 'median', - 'minimum', - ] - for mode in modes: - assert_array_equal(pad(test, pad_amt, mode=mode), - pad(test, pad_amt, mode=mode, stat_length=30)) + pad_amt = [(3, 3) for _ in test.shape] + assert_array_equal(np.pad(test, pad_amt, mode=mode), + np.pad(test, pad_amt, mode=mode, stat_length=30)) class TestStatistic(object): def test_check_mean_stat_length(self): a = np.arange(100).astype('f') - a = pad(a, ((25, 20), ), 'mean', stat_length=((2, 3), )) + a = np.pad(a, ((25, 20), ), 'mean', stat_length=((2, 3), )) b = np.array( [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, @@ -166,7 +158,7 @@ class TestStatistic(object): def test_check_maximum_1(self): a = np.arange(100) - a = pad(a, (25, 20), 'maximum') + a = np.pad(a, (25, 20), 'maximum') b = np.array( [99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, @@ -190,7 +182,7 @@ class TestStatistic(object): def test_check_maximum_2(self): a = np.arange(100) + 1 - a = pad(a, (25, 20), 'maximum') + a = np.pad(a, (25, 20), 'maximum') b = np.array( [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, @@ -214,7 +206,7 @@ class TestStatistic(object): def test_check_maximum_stat_length(self): a = np.arange(100) + 1 - a = pad(a, (25, 20), 'maximum', stat_length=10) + a = np.pad(a, (25, 20), 'maximum', stat_length=10) b = np.array( [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, @@ -238,7 +230,7 @@ class TestStatistic(object): def test_check_minimum_1(self): a = np.arange(100) - a = pad(a, (25, 20), 'minimum') + a = np.pad(a, (25, 20), 'minimum') b = np.array( [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -262,7 +254,7 @@ class TestStatistic(object): def test_check_minimum_2(self): a = np.arange(100) + 2 - a = pad(a, (25, 20), 'minimum') + a = np.pad(a, (25, 20), 'minimum') b = np.array( [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -286,7 +278,7 @@ class TestStatistic(object): def test_check_minimum_stat_length(self): a = np.arange(100) + 1 - a = pad(a, (25, 20), 'minimum', stat_length=10) + a = np.pad(a, (25, 20), 'minimum', stat_length=10) b = np.array( [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -310,7 +302,7 @@ class TestStatistic(object): def test_check_median(self): a = np.arange(100).astype('f') - a = pad(a, (25, 20), 'median') + a = np.pad(a, (25, 20), 'median') b = np.array( [49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, @@ -334,7 +326,7 @@ class TestStatistic(object): def test_check_median_01(self): a = np.array([[3, 1, 4], [4, 5, 9], [9, 8, 2]]) - a = pad(a, 1, 'median') + a = np.pad(a, 1, 'median') b = np.array( [[4, 4, 5, 4, 4], @@ -348,7 +340,7 @@ class TestStatistic(object): def test_check_median_02(self): a = np.array([[3, 1, 4], [4, 5, 9], [9, 8, 2]]) - a = pad(a.T, 1, 'median').T + a = np.pad(a.T, 1, 'median').T b = np.array( [[5, 4, 5, 4, 5], @@ -364,7 +356,7 @@ class TestStatistic(object): a = np.arange(100).astype('f') a[1] = 2. a[97] = 96. - a = pad(a, (25, 20), 'median', stat_length=(3, 5)) + a = np.pad(a, (25, 20), 'median', stat_length=(3, 5)) b = np.array( [ 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., @@ -388,7 +380,7 @@ class TestStatistic(object): def test_check_mean_shape_one(self): a = [[4, 5, 6]] - a = pad(a, (5, 7), 'mean', stat_length=2) + a = np.pad(a, (5, 7), 'mean', stat_length=2) b = np.array( [[4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], [4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6], @@ -410,7 +402,7 @@ class TestStatistic(object): def test_check_mean_2(self): a = np.arange(100).astype('f') - a = pad(a, (25, 20), 'mean') + a = np.pad(a, (25, 20), 'mean') b = np.array( [49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, @@ -433,7 +425,7 @@ class TestStatistic(object): assert_array_equal(a, b) @pytest.mark.parametrize("mode", [ - pytest.param("mean", marks=pytest.mark.xfail(reason="gh-11216")), + "mean", "median", "minimum", "maximum" @@ -446,11 +438,42 @@ class TestStatistic(object): a = np.pad(a, (1, 1), mode) assert_equal(a[0], a[-1]) + @pytest.mark.parametrize("mode", ["mean", "median", "minimum", "maximum"]) + @pytest.mark.parametrize( + "stat_length", [-2, (-2,), (3, -1), ((5, 2), (-2, 3)), ((-4,), (2,))] + ) + def test_check_negative_stat_length(self, mode, stat_length): + arr = np.arange(30).reshape((6, 5)) + match = "index can't contain negative values" + with pytest.raises(ValueError, match=match): + np.pad(arr, 2, mode, stat_length=stat_length) + + def test_simple_stat_length(self): + a = np.arange(30) + a = np.reshape(a, (6, 5)) + a = np.pad(a, ((2, 3), (3, 2)), mode='mean', stat_length=(3,)) + b = np.array( + [[6, 6, 6, 5, 6, 7, 8, 9, 8, 8], + [6, 6, 6, 5, 6, 7, 8, 9, 8, 8], + + [1, 1, 1, 0, 1, 2, 3, 4, 3, 3], + [6, 6, 6, 5, 6, 7, 8, 9, 8, 8], + [11, 11, 11, 10, 11, 12, 13, 14, 13, 13], + [16, 16, 16, 15, 16, 17, 18, 19, 18, 18], + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], + [26, 26, 26, 25, 26, 27, 28, 29, 28, 28], + + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], + [21, 21, 21, 20, 21, 22, 23, 24, 23, 23]] + ) + assert_array_equal(a, b) + class TestConstant(object): def test_check_constant(self): a = np.arange(100) - a = pad(a, (25, 20), 'constant', constant_values=(10, 20)) + a = np.pad(a, (25, 20), 'constant', constant_values=(10, 20)) b = np.array( [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, @@ -474,7 +497,7 @@ class TestConstant(object): def test_check_constant_zeros(self): a = np.arange(100) - a = pad(a, (25, 20), 'constant') + a = np.pad(a, (25, 20), 'constant') b = np.array( [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -500,7 +523,7 @@ class TestConstant(object): # If input array is int, but constant_values are float, the dtype of # the array to be padded is kept arr = np.arange(30).reshape(5, 6) - test = pad(arr, (1, 2), mode='constant', + test = np.pad(arr, (1, 2), mode='constant', constant_values=1.1) expected = np.array( [[ 1, 1, 1, 1, 1, 1, 1, 1, 1], @@ -521,7 +544,7 @@ class TestConstant(object): # the array to be padded is kept - here retaining the float constants arr = np.arange(30).reshape(5, 6) arr_float = arr.astype(np.float64) - test = pad(arr_float, ((1, 2), (1, 2)), mode='constant', + test = np.pad(arr_float, ((1, 2), (1, 2)), mode='constant', constant_values=1.1) expected = np.array( [[ 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1], @@ -539,7 +562,7 @@ class TestConstant(object): def test_check_constant_float3(self): a = np.arange(100, dtype=float) - a = pad(a, (25, 20), 'constant', constant_values=(-1.1, -1.2)) + a = np.pad(a, (25, 20), 'constant', constant_values=(-1.1, -1.2)) b = np.array( [-1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, -1.1, @@ -563,7 +586,7 @@ class TestConstant(object): def test_check_constant_odd_pad_amount(self): arr = np.arange(30).reshape(5, 6) - test = pad(arr, ((1,), (2,)), mode='constant', + test = np.pad(arr, ((1,), (2,)), mode='constant', constant_values=3) expected = np.array( [[ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], @@ -620,11 +643,16 @@ class TestConstant(object): assert_array_equal(arr, expected) + def test_pad_empty_dimension(self): + arr = np.zeros((3, 0, 2)) + result = np.pad(arr, [(0,), (2,), (1,)], mode="constant") + assert result.shape == (3, 4, 4) + class TestLinearRamp(object): def test_check_simple(self): a = np.arange(100).astype('f') - a = pad(a, (25, 20), 'linear_ramp', end_values=(4, 5)) + a = np.pad(a, (25, 20), 'linear_ramp', end_values=(4, 5)) b = np.array( [4.00, 3.84, 3.68, 3.52, 3.36, 3.20, 3.04, 2.88, 2.72, 2.56, 2.40, 2.24, 2.08, 1.92, 1.76, 1.60, 1.44, 1.28, 1.12, 0.96, @@ -648,7 +676,7 @@ class TestLinearRamp(object): def test_check_2d(self): arr = np.arange(20).reshape(4, 5).astype(np.float64) - test = pad(arr, (2, 2), mode='linear_ramp', end_values=(0, 0)) + test = np.pad(arr, (2, 2), mode='linear_ramp', end_values=(0, 0)) expected = np.array( [[0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0.5, 1., 1.5, 2., 1., 0.], @@ -679,11 +707,19 @@ class TestLinearRamp(object): ]) assert_equal(actual, expected) + def test_end_values(self): + """Ensure that end values are exact.""" + a = np.pad(np.ones(10).reshape(2, 5), (223, 123), mode="linear_ramp") + assert_equal(a[:, 0], 0.) + assert_equal(a[:, -1], 0.) + assert_equal(a[0, :], 0.) + assert_equal(a[-1, :], 0.) + class TestReflect(object): def test_check_simple(self): a = np.arange(100) - a = pad(a, (25, 20), 'reflect') + a = np.pad(a, (25, 20), 'reflect') b = np.array( [25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, @@ -707,7 +743,7 @@ class TestReflect(object): def test_check_odd_method(self): a = np.arange(100) - a = pad(a, (25, 20), 'reflect', reflect_type='odd') + a = np.pad(a, (25, 20), 'reflect', reflect_type='odd') b = np.array( [-25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, @@ -731,7 +767,7 @@ class TestReflect(object): def test_check_large_pad(self): a = [[4, 5, 6], [6, 7, 8]] - a = pad(a, (5, 7), 'reflect') + a = np.pad(a, (5, 7), 'reflect') b = np.array( [[7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7, 8, 7, 6, 7], [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], @@ -754,7 +790,7 @@ class TestReflect(object): def test_check_shape(self): a = [[4, 5, 6]] - a = pad(a, (5, 7), 'reflect') + a = np.pad(a, (5, 7), 'reflect') b = np.array( [[5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], [5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5, 6, 5, 4, 5], @@ -775,30 +811,49 @@ class TestReflect(object): assert_array_equal(a, b) def test_check_01(self): - a = pad([1, 2, 3], 2, 'reflect') + a = np.pad([1, 2, 3], 2, 'reflect') b = np.array([3, 2, 1, 2, 3, 2, 1]) assert_array_equal(a, b) def test_check_02(self): - a = pad([1, 2, 3], 3, 'reflect') + a = np.pad([1, 2, 3], 3, 'reflect') b = np.array([2, 3, 2, 1, 2, 3, 2, 1, 2]) assert_array_equal(a, b) def test_check_03(self): - a = pad([1, 2, 3], 4, 'reflect') + a = np.pad([1, 2, 3], 4, 'reflect') b = np.array([1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3]) assert_array_equal(a, b) - def test_check_padding_an_empty_array(self): - a = pad(np.zeros((0, 3)), ((0,), (1,)), mode='reflect') - b = np.zeros((0, 5)) - assert_array_equal(a, b) + +class TestEmptyArray(object): + """Check how padding behaves on arrays with an empty dimension.""" + + @pytest.mark.parametrize( + # Keep parametrization ordered, otherwise pytest-xdist might believe + # that different tests were collected during parallelization + "mode", sorted(_all_modes.keys() - {"constant", "empty"}) + ) + def test_pad_empty_dimension(self, mode): + match = ("can't extend empty axis 0 using modes other than 'constant' " + "or 'empty'") + with pytest.raises(ValueError, match=match): + np.pad([], 4, mode=mode) + with pytest.raises(ValueError, match=match): + np.pad(np.ndarray(0), 4, mode=mode) + with pytest.raises(ValueError, match=match): + np.pad(np.zeros((0, 3)), ((1,), (0,)), mode=mode) + + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_pad_non_empty_dimension(self, mode): + result = np.pad(np.ones((2, 0, 2)), ((3,), (0,), (1,)), mode=mode) + assert result.shape == (8, 0, 4) class TestSymmetric(object): def test_check_simple(self): a = np.arange(100) - a = pad(a, (25, 20), 'symmetric') + a = np.pad(a, (25, 20), 'symmetric') b = np.array( [24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, @@ -822,7 +877,7 @@ class TestSymmetric(object): def test_check_odd_method(self): a = np.arange(100) - a = pad(a, (25, 20), 'symmetric', reflect_type='odd') + a = np.pad(a, (25, 20), 'symmetric', reflect_type='odd') b = np.array( [-24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, @@ -846,7 +901,7 @@ class TestSymmetric(object): def test_check_large_pad(self): a = [[4, 5, 6], [6, 7, 8]] - a = pad(a, (5, 7), 'symmetric') + a = np.pad(a, (5, 7), 'symmetric') b = np.array( [[5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], @@ -870,7 +925,7 @@ class TestSymmetric(object): def test_check_large_pad_odd(self): a = [[4, 5, 6], [6, 7, 8]] - a = pad(a, (5, 7), 'symmetric', reflect_type='odd') + a = np.pad(a, (5, 7), 'symmetric', reflect_type='odd') b = np.array( [[-3, -2, -2, -1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6], [-3, -2, -2, -1, 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6], @@ -893,7 +948,7 @@ class TestSymmetric(object): def test_check_shape(self): a = [[4, 5, 6]] - a = pad(a, (5, 7), 'symmetric') + a = np.pad(a, (5, 7), 'symmetric') b = np.array( [[5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], [5, 6, 6, 5, 4, 4, 5, 6, 6, 5, 4, 4, 5, 6, 6], @@ -914,17 +969,17 @@ class TestSymmetric(object): assert_array_equal(a, b) def test_check_01(self): - a = pad([1, 2, 3], 2, 'symmetric') + a = np.pad([1, 2, 3], 2, 'symmetric') b = np.array([2, 1, 1, 2, 3, 3, 2]) assert_array_equal(a, b) def test_check_02(self): - a = pad([1, 2, 3], 3, 'symmetric') + a = np.pad([1, 2, 3], 3, 'symmetric') b = np.array([3, 2, 1, 1, 2, 3, 3, 2, 1]) assert_array_equal(a, b) def test_check_03(self): - a = pad([1, 2, 3], 6, 'symmetric') + a = np.pad([1, 2, 3], 6, 'symmetric') b = np.array([1, 2, 3, 3, 2, 1, 1, 2, 3, 3, 2, 1, 1, 2, 3]) assert_array_equal(a, b) @@ -932,7 +987,7 @@ class TestSymmetric(object): class TestWrap(object): def test_check_simple(self): a = np.arange(100) - a = pad(a, (25, 20), 'wrap') + a = np.pad(a, (25, 20), 'wrap') b = np.array( [75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, @@ -957,7 +1012,7 @@ class TestWrap(object): def test_check_large_pad(self): a = np.arange(12) a = np.reshape(a, (3, 4)) - a = pad(a, (10, 12), 'wrap') + a = np.pad(a, (10, 12), 'wrap') b = np.array( [[10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11, 8, 9, 10, 11], @@ -1015,12 +1070,12 @@ class TestWrap(object): assert_array_equal(a, b) def test_check_01(self): - a = pad([1, 2, 3], 3, 'wrap') + a = np.pad([1, 2, 3], 3, 'wrap') b = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]) assert_array_equal(a, b) def test_check_02(self): - a = pad([1, 2, 3], 4, 'wrap') + a = np.pad([1, 2, 3], 4, 'wrap') b = np.array([3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1]) assert_array_equal(a, b) @@ -1029,35 +1084,25 @@ class TestWrap(object): b = np.pad(a, (0, 5), mode="wrap") assert_array_equal(a, b[:-5, :-5]) + def test_repeated_wrapping(self): + """ + Check wrapping on each side individually if the wrapped area is longer + than the original array. + """ + a = np.arange(5) + b = np.pad(a, (12, 0), mode="wrap") + assert_array_equal(np.r_[a, a, a, a][3:], b) -class TestStatLen(object): - def test_check_simple(self): - a = np.arange(30) - a = np.reshape(a, (6, 5)) - a = pad(a, ((2, 3), (3, 2)), mode='mean', stat_length=(3,)) - b = np.array( - [[6, 6, 6, 5, 6, 7, 8, 9, 8, 8], - [6, 6, 6, 5, 6, 7, 8, 9, 8, 8], - - [1, 1, 1, 0, 1, 2, 3, 4, 3, 3], - [6, 6, 6, 5, 6, 7, 8, 9, 8, 8], - [11, 11, 11, 10, 11, 12, 13, 14, 13, 13], - [16, 16, 16, 15, 16, 17, 18, 19, 18, 18], - [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], - [26, 26, 26, 25, 26, 27, 28, 29, 28, 28], - - [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], - [21, 21, 21, 20, 21, 22, 23, 24, 23, 23], - [21, 21, 21, 20, 21, 22, 23, 24, 23, 23]] - ) - assert_array_equal(a, b) + a = np.arange(5) + b = np.pad(a, (0, 12), mode="wrap") + assert_array_equal(np.r_[a, a, a, a][:-3], b) class TestEdge(object): def test_check_simple(self): a = np.arange(12) a = np.reshape(a, (4, 3)) - a = pad(a, ((2, 3), (3, 2)), 'edge') + a = np.pad(a, ((2, 3), (3, 2)), 'edge') b = np.array( [[0, 0, 0, 0, 1, 2, 2, 2], [0, 0, 0, 0, 1, 2, 2, 2], @@ -1077,56 +1122,123 @@ class TestEdge(object): # Check a pad_width of the form ((1, 2),). # Regression test for issue gh-7808. a = np.array([1, 2, 3]) - padded = pad(a, ((1, 2),), 'edge') + padded = np.pad(a, ((1, 2),), 'edge') expected = np.array([1, 1, 2, 3, 3, 3]) assert_array_equal(padded, expected) a = np.array([[1, 2, 3], [4, 5, 6]]) - padded = pad(a, ((1, 2),), 'edge') - expected = pad(a, ((1, 2), (1, 2)), 'edge') + padded = np.pad(a, ((1, 2),), 'edge') + expected = np.pad(a, ((1, 2), (1, 2)), 'edge') assert_array_equal(padded, expected) a = np.arange(24).reshape(2, 3, 4) - padded = pad(a, ((1, 2),), 'edge') - expected = pad(a, ((1, 2), (1, 2), (1, 2)), 'edge') + padded = np.pad(a, ((1, 2),), 'edge') + expected = np.pad(a, ((1, 2), (1, 2), (1, 2)), 'edge') assert_array_equal(padded, expected) -class TestZeroPadWidth(object): - def test_zero_pad_width(self): - arr = np.arange(30) - arr = np.reshape(arr, (6, 5)) - for pad_width in (0, (0, 0), ((0, 0), (0, 0))): - assert_array_equal(arr, pad(arr, pad_width, mode='constant')) +class TestEmpty(object): + def test_simple(self): + arr = np.arange(24).reshape(4, 6) + result = np.pad(arr, [(2, 3), (3, 1)], mode="empty") + assert result.shape == (9, 10) + assert_equal(arr, result[2:-3, 3:-1]) + def test_pad_empty_dimension(self): + arr = np.zeros((3, 0, 2)) + result = np.pad(arr, [(0,), (2,), (1,)], mode="empty") + assert result.shape == (3, 4, 4) -class TestLegacyVectorFunction(object): - def test_legacy_vector_functionality(self): - def _padwithtens(vector, pad_width, iaxis, kwargs): - vector[:pad_width[0]] = 10 - vector[-pad_width[1]:] = 10 - return vector - a = np.arange(6).reshape(2, 3) - a = pad(a, 2, _padwithtens) - b = np.array( - [[10, 10, 10, 10, 10, 10, 10], - [10, 10, 10, 10, 10, 10, 10], +def test_legacy_vector_functionality(): + def _padwithtens(vector, pad_width, iaxis, kwargs): + vector[:pad_width[0]] = 10 + vector[-pad_width[1]:] = 10 - [10, 10, 0, 1, 2, 10, 10], - [10, 10, 3, 4, 5, 10, 10], + a = np.arange(6).reshape(2, 3) + a = np.pad(a, 2, _padwithtens) + b = np.array( + [[10, 10, 10, 10, 10, 10, 10], + [10, 10, 10, 10, 10, 10, 10], - [10, 10, 10, 10, 10, 10, 10], - [10, 10, 10, 10, 10, 10, 10]] - ) - assert_array_equal(a, b) + [10, 10, 0, 1, 2, 10, 10], + [10, 10, 3, 4, 5, 10, 10], + [10, 10, 10, 10, 10, 10, 10], + [10, 10, 10, 10, 10, 10, 10]] + ) + assert_array_equal(a, b) -class TestNdarrayPadWidth(object): - def test_check_simple(self): + +def test_unicode_mode(): + a = np.pad([1], 2, mode=u'constant') + b = np.array([0, 0, 1, 0, 0]) + assert_array_equal(a, b) + + +@pytest.mark.parametrize("mode", ["edge", "symmetric", "reflect", "wrap"]) +def test_object_input(mode): + # Regression test for issue gh-11395. + a = np.full((4, 3), fill_value=None) + pad_amt = ((2, 3), (3, 2)) + b = np.full((9, 8), fill_value=None) + assert_array_equal(np.pad(a, pad_amt, mode=mode), b) + + +class TestPadWidth(object): + @pytest.mark.parametrize("pad_width", [ + (4, 5, 6, 7), + ((1,), (2,), (3,)), + ((1, 2), (3, 4), (5, 6)), + ((3, 4, 5), (0, 1, 2)), + ]) + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_misshaped_pad_width(self, pad_width, mode): + arr = np.arange(30).reshape((6, 5)) + match = "operands could not be broadcast together" + with pytest.raises(ValueError, match=match): + np.pad(arr, pad_width, mode) + + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_misshaped_pad_width_2(self, mode): + arr = np.arange(30).reshape((6, 5)) + match = ("input operand has more dimensions than allowed by the axis " + "remapping") + with pytest.raises(ValueError, match=match): + np.pad(arr, (((3,), (4,), (5,)), ((0,), (1,), (2,))), mode) + + @pytest.mark.parametrize( + "pad_width", [-2, (-2,), (3, -1), ((5, 2), (-2, 3)), ((-4,), (2,))]) + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_negative_pad_width(self, pad_width, mode): + arr = np.arange(30).reshape((6, 5)) + match = "index can't contain negative values" + with pytest.raises(ValueError, match=match): + np.pad(arr, pad_width, mode) + + @pytest.mark.parametrize("pad_width", [ + "3", + "word", + None, + object(), + 3.4, + ((2, 3, 4), (3, 2)), # dtype=object (tuple) + complex(1, -1), + ((-2.1, 3), (3, 2)), + ]) + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_bad_type(self, pad_width, mode): + arr = np.arange(30).reshape((6, 5)) + match = "`pad_width` must be of integral type." + with pytest.raises(TypeError, match=match): + np.pad(arr, pad_width, mode) + with pytest.raises(TypeError, match=match): + np.pad(arr, np.array(pad_width), mode) + + def test_pad_width_as_ndarray(self): a = np.arange(12) a = np.reshape(a, (4, 3)) - a = pad(a, np.array(((2, 3), (3, 2))), 'edge') + a = np.pad(a, np.array(((2, 3), (3, 2))), 'edge') b = np.array( [[0, 0, 0, 0, 1, 2, 2, 2], [0, 0, 0, 0, 1, 2, 2, 2], @@ -1142,121 +1254,68 @@ class TestNdarrayPadWidth(object): ) assert_array_equal(a, b) - -class TestUnicodeInput(object): - def test_unicode_mode(self): - constant_mode = u'constant' - a = np.pad([1], 2, mode=constant_mode) - b = np.array([0, 0, 1, 0, 0]) - assert_array_equal(a, b) - - -class TestObjectInput(object): - def test_object_input(self): - # Regression test for issue gh-11395. - a = np.full((4, 3), None) - pad_amt = ((2, 3), (3, 2)) - b = np.full((9, 8), None) - modes = ['edge', - 'symmetric', - 'reflect', - 'wrap', - ] - for mode in modes: - assert_array_equal(pad(a, pad_amt, mode=mode), b) - - -class TestValueError1(object): - def test_check_simple(self): - arr = np.arange(30) - arr = np.reshape(arr, (6, 5)) - kwargs = dict(mode='mean', stat_length=(3, )) - assert_raises(ValueError, pad, arr, ((2, 3), (3, 2), (4, 5)), - **kwargs) - - def test_check_negative_stat_length(self): - arr = np.arange(30) - arr = np.reshape(arr, (6, 5)) - kwargs = dict(mode='mean', stat_length=(-3, )) - assert_raises(ValueError, pad, arr, ((2, 3), (3, 2)), - **kwargs) - - def test_check_negative_pad_width(self): - arr = np.arange(30) - arr = np.reshape(arr, (6, 5)) - kwargs = dict(mode='mean', stat_length=(3, )) - assert_raises(ValueError, pad, arr, ((-2, 3), (3, 2)), - **kwargs) - - def test_check_empty_array(self): - assert_raises(ValueError, pad, [], 4, mode='reflect') - assert_raises(ValueError, pad, np.ndarray(0), 4, mode='reflect') - assert_raises(ValueError, pad, np.zeros((0, 3)), ((1,), (0,)), - mode='reflect') - - -class TestValueError2(object): - def test_check_negative_pad_amount(self): - arr = np.arange(30) - arr = np.reshape(arr, (6, 5)) - kwargs = dict(mode='mean', stat_length=(3, )) - assert_raises(ValueError, pad, arr, ((-2, 3), (3, 2)), - **kwargs) - - -class TestValueError3(object): - def test_check_kwarg_not_allowed(self): - arr = np.arange(30).reshape(5, 6) - assert_raises(ValueError, pad, arr, 4, mode='mean', - reflect_type='odd') - - def test_mode_not_set(self): - arr = np.arange(30).reshape(5, 6) - assert_raises(TypeError, pad, arr, 4) - - def test_malformed_pad_amount(self): - arr = np.arange(30).reshape(5, 6) - assert_raises(ValueError, pad, arr, (4, 5, 6, 7), mode='constant') - - def test_malformed_pad_amount2(self): - arr = np.arange(30).reshape(5, 6) - assert_raises(ValueError, pad, arr, ((3, 4, 5), (0, 1, 2)), - mode='constant') - - def test_pad_too_many_axes(self): - arr = np.arange(30).reshape(5, 6) - - # Attempt to pad using a 3D array equivalent - bad_shape = (((3,), (4,), (5,)), ((0,), (1,), (2,))) - assert_raises(ValueError, pad, arr, bad_shape, - mode='constant') - - -class TestTypeError1(object): - def test_float(self): - arr = np.arange(30) - assert_raises(TypeError, pad, arr, ((-2.1, 3), (3, 2))) - assert_raises(TypeError, pad, arr, np.array(((-2.1, 3), (3, 2)))) - - def test_str(self): - arr = np.arange(30) - assert_raises(TypeError, pad, arr, 'foo') - assert_raises(TypeError, pad, arr, np.array('foo')) - - def test_object(self): - class FooBar(object): - pass - arr = np.arange(30) - assert_raises(TypeError, pad, arr, FooBar()) - - def test_complex(self): - arr = np.arange(30) - assert_raises(TypeError, pad, arr, complex(1, -1)) - assert_raises(TypeError, pad, arr, np.array(complex(1, -1))) - - def test_check_wrong_pad_amount(self): - arr = np.arange(30) - arr = np.reshape(arr, (6, 5)) - kwargs = dict(mode='mean', stat_length=(3, )) - assert_raises(TypeError, pad, arr, ((2, 3, 4), (3, 2)), - **kwargs) + @pytest.mark.parametrize("pad_width", [0, (0, 0), ((0, 0), (0, 0))]) + @pytest.mark.parametrize("mode", _all_modes.keys()) + def test_zero_pad_width(self, pad_width, mode): + arr = np.arange(30).reshape(6, 5) + assert_array_equal(arr, np.pad(arr, pad_width, mode=mode)) + + +@pytest.mark.parametrize("mode", _all_modes.keys()) +def test_kwargs(mode): + """Test behavior of pad's kwargs for the given mode.""" + allowed = _all_modes[mode] + not_allowed = {} + for kwargs in _all_modes.values(): + if kwargs != allowed: + not_allowed.update(kwargs) + # Test if allowed keyword arguments pass + np.pad([1, 2, 3], 1, mode, **allowed) + # Test if prohibited keyword arguments of other modes raise an error + for key, value in not_allowed.items(): + match = "unsupported keyword arguments for mode '{}'".format(mode) + with pytest.raises(ValueError, match=match): + np.pad([1, 2, 3], 1, mode, **{key: value}) + + +def test_constant_zero_default(): + arr = np.array([1, 1]) + assert_array_equal(np.pad(arr, 2), [0, 0, 1, 1, 0, 0]) + + +@pytest.mark.parametrize("mode", [1, "const", object(), None, True, False]) +def test_unsupported_mode(mode): + match= "mode '{}' is not supported".format(mode) + with pytest.raises(ValueError, match=match): + np.pad([1, 2, 3], 4, mode=mode) + + +@pytest.mark.parametrize("mode", _all_modes.keys()) +def test_non_contiguous_array(mode): + arr = np.arange(24).reshape(4, 6)[::2, ::2] + result = np.pad(arr, (2, 3), mode) + assert result.shape == (7, 8) + assert_equal(result[2:-3, 2:-3], arr) + + +@pytest.mark.parametrize("mode", _all_modes.keys()) +def test_memory_layout_persistence(mode): + """Test if C and F order is preserved for all pad modes.""" + x = np.ones((5, 10), order='C') + assert np.pad(x, 5, mode).flags["C_CONTIGUOUS"] + x = np.ones((5, 10), order='F') + assert np.pad(x, 5, mode).flags["F_CONTIGUOUS"] + + +@pytest.mark.parametrize("dtype", chain( + # Skip "other" dtypes as they are not supported by all modes + np.sctypes["int"], + np.sctypes["uint"], + np.sctypes["float"], + np.sctypes["complex"] +)) +@pytest.mark.parametrize("mode", _all_modes.keys()) +def test_dtype_persistence(dtype, mode): + arr = np.zeros((3, 2, 1), dtype=dtype) + result = np.pad(arr, 1, mode=mode) + assert result.dtype == dtype diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index fef06ba53..93d4b279f 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -4,7 +4,6 @@ from __future__ import division, absolute_import, print_function import numpy as np -import sys from numpy.testing import (assert_array_equal, assert_equal, assert_raises, assert_raises_regex) @@ -137,8 +136,8 @@ class TestSetOps(object): np.nan), # should fail because attempting # to downcast to smaller int type: - (np.array([1, 2, 3], dtype=np.int32), - np.array([5, 7, 2], dtype=np.int64), + (np.array([1, 2, 3], dtype=np.int16), + np.array([5, 1<<20, 2], dtype=np.int32), None), # should fail because attempting to cast # two special floating point values @@ -153,8 +152,8 @@ class TestSetOps(object): # specifically, raise an appropriate # Exception when attempting to append or # prepend with an incompatible type - msg = 'must be compatible' - with assert_raises_regex(TypeError, msg): + msg = 'cannot convert' + with assert_raises_regex(ValueError, msg): ediff1d(ary=ary, to_end=append, to_begin=prepend) diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py index 0d7c7f7e0..2ebd483d5 100644 --- a/numpy/lib/tests/test_format.py +++ b/numpy/lib/tests/test_format.py @@ -287,7 +287,6 @@ from io import BytesIO import numpy as np from numpy.testing import ( assert_, assert_array_equal, assert_raises, assert_raises_regex, - raises ) from numpy.lib import format @@ -427,7 +426,7 @@ def roundtrip(arr): f = BytesIO() format.write_array(f, arr) f2 = BytesIO(f.getvalue()) - arr2 = format.read_array(f2) + arr2 = format.read_array(f2, allow_pickle=True) return arr2 @@ -577,7 +576,7 @@ def test_pickle_python2_python3(): path = os.path.join(data_dir, fname) for encoding in ['bytes', 'latin1']: - data_f = np.load(path, encoding=encoding) + data_f = np.load(path, allow_pickle=True, encoding=encoding) if fname.endswith('.npz'): data = data_f['x'] data_f.close() @@ -599,16 +598,19 @@ def test_pickle_python2_python3(): if sys.version_info[0] >= 3: if fname.startswith('py2'): if fname.endswith('.npz'): - data = np.load(path) + data = np.load(path, allow_pickle=True) assert_raises(UnicodeError, data.__getitem__, 'x') data.close() - data = np.load(path, fix_imports=False, encoding='latin1') + data = np.load(path, allow_pickle=True, fix_imports=False, + encoding='latin1') assert_raises(ImportError, data.__getitem__, 'x') data.close() else: - assert_raises(UnicodeError, np.load, path) + assert_raises(UnicodeError, np.load, path, + allow_pickle=True) assert_raises(ImportError, np.load, path, - encoding='latin1', fix_imports=False) + allow_pickle=True, fix_imports=False, + encoding='latin1') def test_pickle_disallow(): diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 0c789e012..e2c24a123 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -4,28 +4,28 @@ import operator import warnings import sys import decimal +import types import pytest import numpy as np from numpy import ma from numpy.testing import ( assert_, assert_equal, assert_array_equal, assert_almost_equal, - assert_array_almost_equal, assert_raises, assert_allclose, - assert_array_max_ulp, assert_warns, assert_raises_regex, suppress_warnings, - HAS_REFCOUNT, + assert_array_almost_equal, assert_raises, assert_allclose, IS_PYPY, + assert_warns, assert_raises_regex, suppress_warnings, HAS_REFCOUNT, ) import numpy.lib.function_base as nfb from numpy.random import rand from numpy.lib import ( add_newdoc_ufunc, angle, average, bartlett, blackman, corrcoef, cov, delete, diff, digitize, extract, flipud, gradient, hamming, hanning, - histogram, histogramdd, i0, insert, interp, kaiser, meshgrid, msort, - piecewise, place, rot90, select, setxor1d, sinc, split, trapz, trim_zeros, - unwrap, unique, vectorize + i0, insert, interp, kaiser, meshgrid, msort, piecewise, place, rot90, + select, setxor1d, sinc, trapz, trim_zeros, unwrap, unique, vectorize ) from numpy.compat import long +PY2 = sys.version_info[0] == 2 def get_mat(n): data = np.arange(n) @@ -33,6 +33,17 @@ def get_mat(n): return data +def _make_complex(real, imag): + """ + Like real + 1j * imag, but behaves as expected when imag contains non-finite + values + """ + ret = np.zeros(np.broadcast(real, imag).shape, np.complex_) + ret.real = real + ret.imag = imag + return ret + + class TestRot90(object): def test_basic(self): assert_raises(ValueError, rot90, np.ones(4)) @@ -355,9 +366,9 @@ class TestAverage(object): assert_equal(type(np.average(a, weights=w)), subclass) def test_upcasting(self): - types = [('i4', 'i4', 'f8'), ('i4', 'f4', 'f8'), ('f4', 'i4', 'f8'), + typs = [('i4', 'i4', 'f8'), ('i4', 'f4', 'f8'), ('f4', 'i4', 'f8'), ('f4', 'f4', 'f4'), ('f4', 'f8', 'f8')] - for at, wt, rt in types: + for at, wt, rt in typs: a = np.array([[1,2],[3,4]], dtype=at) w = np.array([[1,2],[3,4]], dtype=wt) assert_equal(np.average(a, weights=w).dtype, np.dtype(rt)) @@ -1500,6 +1511,49 @@ class TestVectorize(object): f(x) +class TestLeaks(object): + class A(object): + iters = 20 + + def bound(self, *args): + return 0 + + @staticmethod + def unbound(*args): + return 0 + + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") + @pytest.mark.parametrize('name, incr', [ + ('bound', A.iters), + ('unbound', 0), + ]) + def test_frompyfunc_leaks(self, name, incr): + # exposed in gh-11867 as np.vectorized, but the problem stems from + # frompyfunc. + # class.attribute = np.frompyfunc(<method>) creates a + # reference cycle if <method> is a bound class method. It requires a + # gc collection cycle to break the cycle (on CPython 3) + import gc + A_func = getattr(self.A, name) + gc.disable() + try: + refcount = sys.getrefcount(A_func) + for i in range(self.A.iters): + a = self.A() + a.f = np.frompyfunc(getattr(a, name), 1, 1) + out = a.f(np.arange(10)) + a = None + if PY2: + assert_equal(sys.getrefcount(A_func), refcount) + else: + # A.func is part of a reference cycle if incr is non-zero + assert_equal(sys.getrefcount(A_func), refcount + incr) + for i in range(5): + gc.collect() + assert_equal(sys.getrefcount(A_func), refcount) + finally: + gc.enable() + class TestDigitize(object): def test_forward(self): @@ -2311,7 +2365,7 @@ class TestInterp(object): x0 = np.nan assert_almost_equal(np.interp(x0, x, y), x0) - def test_non_finite_behavior(self): + def test_non_finite_behavior_exact_x(self): x = [1, 2, 2.5, 3, 4] xp = [1, 2, 3, 4] fp = [1, 2, np.inf, 4] @@ -2319,6 +2373,64 @@ class TestInterp(object): fp = [1, 2, np.nan, 4] assert_almost_equal(np.interp(x, xp, fp), [1, 2, np.nan, np.nan, 4]) + @pytest.fixture(params=[ + lambda x: np.float_(x), + lambda x: _make_complex(x, 0), + lambda x: _make_complex(0, x), + lambda x: _make_complex(x, np.multiply(x, -2)) + ], ids=[ + 'real', + 'complex-real', + 'complex-imag', + 'complex-both' + ]) + def sc(self, request): + """ scale function used by the below tests """ + return request.param + + def test_non_finite_any_nan(self, sc): + """ test that nans are propagated """ + assert_equal(np.interp(0.5, [np.nan, 1], sc([ 0, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, np.nan], sc([ 0, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, 1], sc([np.nan, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, 1], sc([ 0, np.nan])), sc(np.nan)) + + def test_non_finite_inf(self, sc): + """ Test that interp between opposite infs gives nan """ + assert_equal(np.interp(0.5, [-np.inf, +np.inf], sc([ 0, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, 1], sc([-np.inf, +np.inf])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, 1], sc([+np.inf, -np.inf])), sc(np.nan)) + + # unless the y values are equal + assert_equal(np.interp(0.5, [-np.inf, +np.inf], sc([ 10, 10])), sc(10)) + + def test_non_finite_half_inf_xf(self, sc): + """ Test that interp where both axes have a bound at inf gives nan """ + assert_equal(np.interp(0.5, [-np.inf, 1], sc([-np.inf, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [-np.inf, 1], sc([+np.inf, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [-np.inf, 1], sc([ 0, -np.inf])), sc(np.nan)) + assert_equal(np.interp(0.5, [-np.inf, 1], sc([ 0, +np.inf])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, +np.inf], sc([-np.inf, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, +np.inf], sc([+np.inf, 10])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, +np.inf], sc([ 0, -np.inf])), sc(np.nan)) + assert_equal(np.interp(0.5, [ 0, +np.inf], sc([ 0, +np.inf])), sc(np.nan)) + + def test_non_finite_half_inf_x(self, sc): + """ Test interp where the x axis has a bound at inf """ + assert_equal(np.interp(0.5, [-np.inf, -np.inf], sc([0, 10])), sc(10)) + assert_equal(np.interp(0.5, [-np.inf, 1 ], sc([0, 10])), sc(10)) + assert_equal(np.interp(0.5, [ 0, +np.inf], sc([0, 10])), sc(0)) + assert_equal(np.interp(0.5, [+np.inf, +np.inf], sc([0, 10])), sc(0)) + + def test_non_finite_half_inf_f(self, sc): + """ Test interp where the f axis has a bound at inf """ + assert_equal(np.interp(0.5, [0, 1], sc([ 0, -np.inf])), sc(-np.inf)) + assert_equal(np.interp(0.5, [0, 1], sc([ 0, +np.inf])), sc(+np.inf)) + assert_equal(np.interp(0.5, [0, 1], sc([-np.inf, 10])), sc(-np.inf)) + assert_equal(np.interp(0.5, [0, 1], sc([+np.inf, 10])), sc(+np.inf)) + assert_equal(np.interp(0.5, [0, 1], sc([-np.inf, -np.inf])), sc(-np.inf)) + assert_equal(np.interp(0.5, [0, 1], sc([+np.inf, +np.inf])), sc(+np.inf)) + def test_complex_interp(self): # test complex interpolation x = np.linspace(0, 1, 5) @@ -2393,11 +2505,8 @@ class TestPercentile(object): assert_equal(np.percentile(x, 100), 3.5) assert_equal(np.percentile(x, 50), 1.75) x[1] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(x, 0), np.nan) - assert_equal(np.percentile(x, 0, interpolation='nearest'), np.nan) - assert_(w[0].category is RuntimeWarning) + assert_equal(np.percentile(x, 0), np.nan) + assert_equal(np.percentile(x, 0, interpolation='nearest'), np.nan) def test_api(self): d = np.ones(5) @@ -2735,85 +2844,63 @@ class TestPercentile(object): def test_nan_behavior(self): a = np.arange(24, dtype=float) a[2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, 0.3), np.nan) - assert_equal(np.percentile(a, 0.3, axis=0), np.nan) - assert_equal(np.percentile(a, [0.3, 0.6], axis=0), - np.array([np.nan] * 2)) - assert_(w[0].category is RuntimeWarning) - assert_(w[1].category is RuntimeWarning) - assert_(w[2].category is RuntimeWarning) + assert_equal(np.percentile(a, 0.3), np.nan) + assert_equal(np.percentile(a, 0.3, axis=0), np.nan) + assert_equal(np.percentile(a, [0.3, 0.6], axis=0), + np.array([np.nan] * 2)) a = np.arange(24, dtype=float).reshape(2, 3, 4) a[1, 2, 3] = np.nan a[1, 1, 2] = np.nan # no axis - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, 0.3), np.nan) - assert_equal(np.percentile(a, 0.3).ndim, 0) - assert_(w[0].category is RuntimeWarning) + assert_equal(np.percentile(a, 0.3), np.nan) + assert_equal(np.percentile(a, 0.3).ndim, 0) # axis0 zerod b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 0) b[2, 3] = np.nan b[1, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, 0.3, 0), b) + assert_equal(np.percentile(a, 0.3, 0), b) # axis0 not zerod b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], 0) b[:, 2, 3] = np.nan b[:, 1, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, [0.3, 0.6], 0), b) + assert_equal(np.percentile(a, [0.3, 0.6], 0), b) # axis1 zerod b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, 1) b[1, 3] = np.nan b[1, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, 0.3, 1), b) + assert_equal(np.percentile(a, 0.3, 1), b) # axis1 not zerod b = np.percentile( np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], 1) b[:, 1, 3] = np.nan b[:, 1, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, [0.3, 0.6], 1), b) + assert_equal(np.percentile(a, [0.3, 0.6], 1), b) # axis02 zerod b = np.percentile( np.arange(24, dtype=float).reshape(2, 3, 4), 0.3, (0, 2)) b[1] = np.nan b[2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, 0.3, (0, 2)), b) + assert_equal(np.percentile(a, 0.3, (0, 2)), b) # axis02 not zerod b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], (0, 2)) b[:, 1] = np.nan b[:, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile(a, [0.3, 0.6], (0, 2)), b) + assert_equal(np.percentile(a, [0.3, 0.6], (0, 2)), b) # axis02 not zerod with nearest interpolation b = np.percentile(np.arange(24, dtype=float).reshape(2, 3, 4), [0.3, 0.6], (0, 2), interpolation='nearest') b[:, 1] = np.nan b[:, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.percentile( - a, [0.3, 0.6], (0, 2), interpolation='nearest'), b) + assert_equal(np.percentile( + a, [0.3, 0.6], (0, 2), interpolation='nearest'), b) class TestQuantile(object): @@ -2860,10 +2947,7 @@ class TestMedian(object): # check array scalar result assert_equal(np.median(a).ndim, 0) a[1] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.median(a).ndim, 0) - assert_(w[0].category is RuntimeWarning) + assert_equal(np.median(a).ndim, 0) def test_axis_keyword(self): a3 = np.array([[2, 3], @@ -2962,58 +3046,43 @@ class TestMedian(object): def test_nan_behavior(self): a = np.arange(24, dtype=float) a[2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.median(a), np.nan) - assert_equal(np.median(a, axis=0), np.nan) - assert_(w[0].category is RuntimeWarning) - assert_(w[1].category is RuntimeWarning) + assert_equal(np.median(a), np.nan) + assert_equal(np.median(a, axis=0), np.nan) a = np.arange(24, dtype=float).reshape(2, 3, 4) a[1, 2, 3] = np.nan a[1, 1, 2] = np.nan # no axis - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.median(a), np.nan) - assert_equal(np.median(a).ndim, 0) - assert_(w[0].category is RuntimeWarning) + assert_equal(np.median(a), np.nan) + assert_equal(np.median(a).ndim, 0) # axis0 b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 0) b[2, 3] = np.nan b[1, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.median(a, 0), b) - assert_equal(len(w), 1) + assert_equal(np.median(a, 0), b) # axis1 b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), 1) b[1, 3] = np.nan b[1, 2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.median(a, 1), b) - assert_equal(len(w), 1) + assert_equal(np.median(a, 1), b) # axis02 b = np.median(np.arange(24, dtype=float).reshape(2, 3, 4), (0, 2)) b[1] = np.nan b[2] = np.nan - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - assert_equal(np.median(a, (0, 2)), b) - assert_equal(len(w), 1) + assert_equal(np.median(a, (0, 2)), b) def test_empty(self): - # empty arrays + # mean(empty array) emits two warnings: empty slice and divide by 0 a = np.array([], dtype=float) with warnings.catch_warnings(record=True) as w: warnings.filterwarnings('always', '', RuntimeWarning) assert_equal(np.median(a), np.nan) assert_(w[0].category is RuntimeWarning) + assert_equal(len(w), 2) # multiple dimensions a = np.array([], dtype=float, ndmin=3) @@ -3108,6 +3177,7 @@ class TestAdd_newdoc_ufunc(object): class TestAdd_newdoc(object): @pytest.mark.skipif(sys.flags.optimize == 2, reason="Python running -OO") + @pytest.mark.xfail(IS_PYPY, reason="PyPy does not modify tp_doc") def test_add_doc(self): # test np.add_newdoc tgt = "Current flat index into the array." diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py index 49c0d9720..c96b01d42 100644 --- a/numpy/lib/tests/test_histograms.py +++ b/numpy/lib/tests/test_histograms.py @@ -6,7 +6,7 @@ from numpy.lib.histograms import histogram, histogramdd, histogram_bin_edges from numpy.testing import ( assert_, assert_equal, assert_array_equal, assert_almost_equal, assert_array_almost_equal, assert_raises, assert_allclose, - assert_array_max_ulp, assert_warns, assert_raises_regex, suppress_warnings, + assert_array_max_ulp, assert_raises_regex, suppress_warnings, ) @@ -289,13 +289,13 @@ class TestHistogram(object): def test_object_array_of_0d(self): # gh-7864 assert_raises(ValueError, - histogram, [np.array([0.4]) for i in range(10)] + [-np.inf]) + histogram, [np.array(0.4) for i in range(10)] + [-np.inf]) assert_raises(ValueError, - histogram, [np.array([0.4]) for i in range(10)] + [np.inf]) + histogram, [np.array(0.4) for i in range(10)] + [np.inf]) # these should not crash - np.histogram([np.array([0.5]) for i in range(10)] + [.500000000000001]) - np.histogram([np.array([0.5]) for i in range(10)] + [.5]) + np.histogram([np.array(0.5) for i in range(10)] + [.500000000000001]) + np.histogram([np.array(0.5) for i in range(10)] + [.5]) def test_some_nan_values(self): # gh-7503 diff --git a/numpy/lib/tests/test_index_tricks.py b/numpy/lib/tests/test_index_tricks.py index 3246f68ff..028bba37d 100644 --- a/numpy/lib/tests/test_index_tricks.py +++ b/numpy/lib/tests/test_index_tricks.py @@ -77,6 +77,26 @@ class TestRavelUnravelIndex(object): [[3, 6, 6], [4, 5, 1]]) assert_equal(np.unravel_index(1621, (6, 7, 8, 9)), [3, 1, 4, 1]) + def test_empty_indices(self): + msg1 = 'indices must be integral: the provided empty sequence was' + msg2 = 'only int indices permitted' + assert_raises_regex(TypeError, msg1, np.unravel_index, [], (10, 3, 5)) + assert_raises_regex(TypeError, msg1, np.unravel_index, (), (10, 3, 5)) + assert_raises_regex(TypeError, msg2, np.unravel_index, np.array([]), + (10, 3, 5)) + assert_equal(np.unravel_index(np.array([],dtype=int), (10, 3, 5)), + [[], [], []]) + assert_raises_regex(TypeError, msg1, np.ravel_multi_index, ([], []), + (10, 3)) + assert_raises_regex(TypeError, msg1, np.ravel_multi_index, ([], ['abc']), + (10, 3)) + assert_raises_regex(TypeError, msg2, np.ravel_multi_index, + (np.array([]), np.array([])), (5, 3)) + assert_equal(np.ravel_multi_index( + (np.array([], dtype=int), np.array([], dtype=int)), (5, 3)), []) + assert_equal(np.ravel_multi_index(np.array([[], []], dtype=int), + (5, 3)), []) + def test_big_indices(self): # ravel_multi_index for big indices (issue #7546) if np.intp == np.int64: diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index b746937b9..9d6111e37 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -6,7 +6,6 @@ import os import threading import time import warnings -import gc import io import re import pytest @@ -18,12 +17,12 @@ import locale import numpy as np import numpy.ma as ma from numpy.lib._iotools import ConverterError, ConversionWarning -from numpy.compat import asbytes, bytes, unicode, Path +from numpy.compat import asbytes, bytes, Path from numpy.ma.testutils import assert_equal from numpy.testing import ( assert_warns, assert_, assert_raises_regex, assert_raises, assert_allclose, assert_array_equal, temppath, tempdir, IS_PYPY, - HAS_REFCOUNT, suppress_warnings, assert_no_gc_cycles, + HAS_REFCOUNT, suppress_warnings, assert_no_gc_cycles, assert_no_warnings ) @@ -88,7 +87,7 @@ class RoundtripTest(object): """ save_kwds = kwargs.get('save_kwds', {}) - load_kwds = kwargs.get('load_kwds', {}) + load_kwds = kwargs.get('load_kwds', {"allow_pickle": True}) file_on_disk = kwargs.get('file_on_disk', False) if file_on_disk: @@ -348,13 +347,33 @@ class TestSaveTxt(object): assert_raises(ValueError, np.savetxt, c, np.array(1)) assert_raises(ValueError, np.savetxt, c, np.array([[[1], [2]]])) - def test_record(self): + def test_structured(self): a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) c = BytesIO() np.savetxt(c, a, fmt='%d') c.seek(0) assert_equal(c.readlines(), [b'1 2\n', b'3 4\n']) + def test_structured_padded(self): + # gh-13297 + a = np.array([(1, 2, 3),(4, 5, 6)], dtype=[ + ('foo', 'i4'), ('bar', 'i4'), ('baz', 'i4') + ]) + c = BytesIO() + np.savetxt(c, a[['foo', 'baz']], fmt='%d') + c.seek(0) + assert_equal(c.readlines(), [b'1 3\n', b'4 6\n']) + + @pytest.mark.skipif(Path is None, reason="No pathlib.Path") + def test_multifield_view(self): + a = np.ones(1, dtype=[('x', 'i4'), ('y', 'i4'), ('z', 'f4')]) + v = a[['x', 'z']] + with temppath(suffix='.npy') as path: + path = Path(path) + np.save(path, v) + data = np.load(path) + assert_array_equal(data, v) + def test_delimiter(self): a = np.array([[1., 2.], [3., 4.]]) c = BytesIO() @@ -1372,6 +1391,19 @@ M 33 21.99 control = np.array([(1, 2), (3, 4)], dtype=[('col1', int), ('col2', int)]) assert_equal(test, control) + def test_file_is_closed_on_error(self): + # gh-13200 + with tempdir() as tmpdir: + fpath = os.path.join(tmpdir, "test.csv") + with open(fpath, "wb") as f: + f.write(u'\N{GREEK PI SYMBOL}'.encode('utf8')) + + # ResourceWarnings are emitted from a destructor, so won't be + # detected by regular propagation to errors. + with assert_no_warnings(): + with pytest.raises(UnicodeDecodeError): + np.genfromtxt(fpath, encoding="ascii") + def test_autonames_and_usecols(self): # Tests names and usecols data = TextIO('A B C D\n aaaa 121 45 9.1') @@ -2049,7 +2081,6 @@ M 33 21.99 def test_utf8_file(self): utf8 = b"\xcf\x96" - latin1 = b"\xf6\xfc\xf6" with temppath() as path: with open(path, "wb") as f: f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2) diff --git a/numpy/lib/tests/test_mixins.py b/numpy/lib/tests/test_mixins.py index f2d915502..3dd5346b6 100644 --- a/numpy/lib/tests/test_mixins.py +++ b/numpy/lib/tests/test_mixins.py @@ -199,6 +199,17 @@ class TestNDArrayOperatorsMixin(object): err_msg = 'failed for operator {}'.format(op) _assert_equal_type_and_value(expected, actual, err_msg=err_msg) + def test_matmul(self): + array = np.array([1, 2], dtype=np.float64) + array_like = ArrayLike(array) + expected = ArrayLike(np.float64(5)) + _assert_equal_type_and_value(expected, np.matmul(array_like, array)) + if not PY2: + _assert_equal_type_and_value( + expected, operator.matmul(array_like, array)) + _assert_equal_type_and_value( + expected, operator.matmul(array, array_like)) + def test_ufunc_at(self): array = ArrayLike(np.array([1, 2, 3, 4])) assert_(np.negative.at(array, np.array([0, 1])) is None) diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py index 504372faf..b7261c63f 100644 --- a/numpy/lib/tests/test_nanfunctions.py +++ b/numpy/lib/tests/test_nanfunctions.py @@ -1,8 +1,10 @@ from __future__ import division, absolute_import, print_function import warnings +import pytest import numpy as np +from numpy.lib.nanfunctions import _nan_mask from numpy.testing import ( assert_, assert_equal, assert_almost_equal, assert_no_warnings, assert_raises, assert_array_equal, suppress_warnings @@ -925,3 +927,29 @@ class TestNanFunctions_Quantile(object): p = p.tolist() np.nanquantile(np.arange(100.), p, interpolation="midpoint") assert_array_equal(p, p0) + +@pytest.mark.parametrize("arr, expected", [ + # array of floats with some nans + (np.array([np.nan, 5.0, np.nan, np.inf]), + np.array([False, True, False, True])), + # int64 array that can't possibly have nans + (np.array([1, 5, 7, 9], dtype=np.int64), + True), + # bool array that can't possibly have nans + (np.array([False, True, False, True]), + True), + # 2-D complex array with nans + (np.array([[np.nan, 5.0], + [np.nan, np.inf]], dtype=np.complex64), + np.array([[False, True], + [False, True]])), + ]) +def test__nan_mask(arr, expected): + for out in [None, np.empty(arr.shape, dtype=np.bool_)]: + actual = _nan_mask(arr, out=out) + assert_equal(actual, expected) + # the above won't distinguish between True proper + # and an array of True values; we want True proper + # for types that can't possibly contain NaN + if type(expected) is not np.ndarray: + assert actual is True diff --git a/numpy/lib/tests/test_packbits.py b/numpy/lib/tests/test_packbits.py index fde5c37f2..00d5ca827 100644 --- a/numpy/lib/tests/test_packbits.py +++ b/numpy/lib/tests/test_packbits.py @@ -266,3 +266,66 @@ def test_unpackbits_large(): assert_array_equal(np.packbits(np.unpackbits(d, axis=1), axis=1), d) d = d.T.copy() assert_array_equal(np.packbits(np.unpackbits(d, axis=0), axis=0), d) + + +def test_unpackbits_count(): + # test complete invertibility of packbits and unpackbits with count + x = np.array([ + [1, 0, 1, 0, 0, 1, 0], + [0, 1, 1, 1, 0, 0, 0], + [0, 0, 1, 0, 0, 1, 1], + [1, 1, 0, 0, 0, 1, 1], + [1, 0, 1, 0, 1, 0, 1], + [0, 0, 1, 1, 1, 0, 0], + [0, 1, 0, 1, 0, 1, 0], + ], dtype=np.uint8) + + padded1 = np.zeros(57, dtype=np.uint8) + padded1[:49] = x.ravel() + + packed = np.packbits(x) + for count in range(58): + unpacked = np.unpackbits(packed, count=count) + assert_equal(unpacked.dtype, np.uint8) + assert_array_equal(unpacked, padded1[:count]) + for count in range(-1, -57, -1): + unpacked = np.unpackbits(packed, count=count) + assert_equal(unpacked.dtype, np.uint8) + # count -1 because padded1 has 57 instead of 56 elements + assert_array_equal(unpacked, padded1[:count-1]) + for kwargs in [{}, {'count': None}]: + unpacked = np.unpackbits(packed, **kwargs) + assert_equal(unpacked.dtype, np.uint8) + assert_array_equal(unpacked, padded1[:-1]) + assert_raises(ValueError, np.unpackbits, packed, count=-57) + + padded2 = np.zeros((9, 9), dtype=np.uint8) + padded2[:7, :7] = x + + packed0 = np.packbits(x, axis=0) + packed1 = np.packbits(x, axis=1) + for count in range(10): + unpacked0 = np.unpackbits(packed0, axis=0, count=count) + assert_equal(unpacked0.dtype, np.uint8) + assert_array_equal(unpacked0, padded2[:count, :x.shape[1]]) + unpacked1 = np.unpackbits(packed1, axis=1, count=count) + assert_equal(unpacked1.dtype, np.uint8) + assert_array_equal(unpacked1, padded2[:x.shape[1], :count]) + for count in range(-1, -9, -1): + unpacked0 = np.unpackbits(packed0, axis=0, count=count) + assert_equal(unpacked0.dtype, np.uint8) + # count -1 because one extra zero of padding + assert_array_equal(unpacked0, padded2[:count-1, :x.shape[1]]) + unpacked1 = np.unpackbits(packed1, axis=1, count=count) + assert_equal(unpacked1.dtype, np.uint8) + assert_array_equal(unpacked1, padded2[:x.shape[0], :count-1]) + for kwargs in [{}, {'count': None}]: + unpacked0 = np.unpackbits(packed0, axis=0, **kwargs) + assert_equal(unpacked0.dtype, np.uint8) + assert_array_equal(unpacked0, padded2[:-1, :x.shape[1]]) + unpacked1 = np.unpackbits(packed1, axis=1, **kwargs) + assert_equal(unpacked1.dtype, np.uint8) + assert_array_equal(unpacked1, padded2[:x.shape[0], :-1]) + assert_raises(ValueError, np.unpackbits, packed0, axis=0, count=-9) + assert_raises(ValueError, np.unpackbits, packed1, axis=1, count=-9) + diff --git a/numpy/lib/tests/test_polynomial.py b/numpy/lib/tests/test_polynomial.py index 9f7c117a2..89759bd83 100644 --- a/numpy/lib/tests/test_polynomial.py +++ b/numpy/lib/tests/test_polynomial.py @@ -3,7 +3,7 @@ from __future__ import division, absolute_import, print_function import numpy as np from numpy.testing import ( assert_, assert_equal, assert_array_equal, assert_almost_equal, - assert_array_almost_equal, assert_raises + assert_array_almost_equal, assert_raises, assert_allclose ) @@ -122,27 +122,34 @@ class TestPolynomial(object): weights = np.arange(8, 1, -1)**2/7.0 # Check exception when too few points for variance estimate. Note that - # the Bayesian estimate requires the number of data points to exceed - # degree + 3. + # the estimate requires the number of data points to exceed + # degree + 1 assert_raises(ValueError, np.polyfit, - [0, 1, 3], [0, 1, 3], deg=0, cov=True) + [1], [1], deg=0, cov=True) # check 1D case m, cov = np.polyfit(x, y+err, 2, cov=True) est = [3.8571, 0.2857, 1.619] assert_almost_equal(est, m, decimal=4) - val0 = [[2.9388, -5.8776, 1.6327], - [-5.8776, 12.7347, -4.2449], - [1.6327, -4.2449, 2.3220]] + val0 = [[ 1.4694, -2.9388, 0.8163], + [-2.9388, 6.3673, -2.1224], + [ 0.8163, -2.1224, 1.161 ]] assert_almost_equal(val0, cov, decimal=4) m2, cov2 = np.polyfit(x, y+err, 2, w=weights, cov=True) assert_almost_equal([4.8927, -1.0177, 1.7768], m2, decimal=4) - val = [[8.7929, -10.0103, 0.9756], - [-10.0103, 13.6134, -1.8178], - [0.9756, -1.8178, 0.6674]] + val = [[ 4.3964, -5.0052, 0.4878], + [-5.0052, 6.8067, -0.9089], + [ 0.4878, -0.9089, 0.3337]] assert_almost_equal(val, cov2, decimal=4) + m3, cov3 = np.polyfit(x, y+err, 2, w=weights, cov="unscaled") + assert_almost_equal([4.8927, -1.0177, 1.7768], m3, decimal=4) + val = [[ 0.1473, -0.1677, 0.0163], + [-0.1677, 0.228 , -0.0304], + [ 0.0163, -0.0304, 0.0112]] + assert_almost_equal(val, cov3, decimal=4) + # check 2D (n,1) case y = y[:, np.newaxis] c = c[:, np.newaxis] @@ -158,6 +165,29 @@ class TestPolynomial(object): assert_almost_equal(val0, cov[:, :, 0], decimal=4) assert_almost_equal(val0, cov[:, :, 1], decimal=4) + # check order 1 (deg=0) case, were the analytic results are simple + np.random.seed(123) + y = np.random.normal(size=(4, 10000)) + mean, cov = np.polyfit(np.zeros(y.shape[0]), y, deg=0, cov=True) + # Should get sigma_mean = sigma/sqrt(N) = 1./sqrt(4) = 0.5. + assert_allclose(mean.std(), 0.5, atol=0.01) + assert_allclose(np.sqrt(cov.mean()), 0.5, atol=0.01) + # Without scaling, since reduced chi2 is 1, the result should be the same. + mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=np.ones(y.shape[0]), + deg=0, cov="unscaled") + assert_allclose(mean.std(), 0.5, atol=0.01) + assert_almost_equal(np.sqrt(cov.mean()), 0.5) + # If we estimate our errors wrong, no change with scaling: + w = np.full(y.shape[0], 1./0.5) + mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=w, deg=0, cov=True) + assert_allclose(mean.std(), 0.5, atol=0.01) + assert_allclose(np.sqrt(cov.mean()), 0.5, atol=0.01) + # But if we do not scale, our estimate for the error in the mean will + # differ. + mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=w, deg=0, cov="unscaled") + assert_allclose(mean.std(), 0.5, atol=0.01) + assert_almost_equal(np.sqrt(cov.mean()), 0.25) + def test_objects(self): from decimal import Decimal p = np.poly1d([Decimal('4.0'), Decimal('3.0'), Decimal('2.0')]) @@ -216,16 +246,16 @@ class TestPolynomial(object): assert_equal(r.coeffs.dtype, np.complex128) assert_equal(q*a + r, b) - def test_poly_coeffs_immutable(self): - """ Coefficients should not be modifiable """ + def test_poly_coeffs_mutable(self): + """ Coefficients should be modifiable """ p = np.poly1d([1, 2, 3]) - try: - # despite throwing an exception, this used to change state - p.coeffs += 1 - except Exception: - pass - assert_equal(p.coeffs, [1, 2, 3]) + p.coeffs += 1 + assert_equal(p.coeffs, [2, 3, 4]) p.coeffs[2] += 10 - assert_equal(p.coeffs, [1, 2, 3]) + assert_equal(p.coeffs, [2, 3, 14]) + + # this never used to be allowed - let's not add features to deprecated + # APIs + assert_raises(AttributeError, setattr, p, 'coeffs', np.array(1)) diff --git a/numpy/lib/tests/test_recfunctions.py b/numpy/lib/tests/test_recfunctions.py index d1fcf2153..112678294 100644 --- a/numpy/lib/tests/test_recfunctions.py +++ b/numpy/lib/tests/test_recfunctions.py @@ -223,9 +223,9 @@ class TestRecFunctions(object): ( 5, ( 6., 7), [ 8., 9.]), (10, (11., 12), [13., 14.]), (15, (16., 17), [18., 19.])], - dtype=[('a', '<i4'), - ('b', [('f0', '<f4'), ('f1', '<u2')]), - ('c', '<f4', (2,))]) + dtype=[('a', 'i4'), + ('b', [('f0', 'f4'), ('f1', 'u2')]), + ('c', 'f4', (2,))]) assert_equal(out, want) d = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], diff --git a/numpy/lib/tests/test_shape_base.py b/numpy/lib/tests/test_shape_base.py index e338467f9..01ea028bb 100644 --- a/numpy/lib/tests/test_shape_base.py +++ b/numpy/lib/tests/test_shape_base.py @@ -260,8 +260,8 @@ class TestApplyAlongAxis(object): def test_with_iterable_object(self): # from issue 5248 d = np.array([ - [set([1, 11]), set([2, 22]), set([3, 33])], - [set([4, 44]), set([5, 55]), set([6, 66])] + [{1, 11}, {2, 22}, {3, 33}], + [{4, 44}, {5, 55}, {6, 66}] ]) actual = np.apply_along_axis(lambda a: set.union(*a), 0, d) expected = np.array([{1, 11, 4, 44}, {2, 22, 5, 55}, {3, 33, 6, 66}]) diff --git a/numpy/lib/tests/test_type_check.py b/numpy/lib/tests/test_type_check.py index 2982ca31a..b3f114b92 100644 --- a/numpy/lib/tests/test_type_check.py +++ b/numpy/lib/tests/test_type_check.py @@ -360,6 +360,14 @@ class TestNanToNum(object): assert_(vals[1] == 0) assert_all(vals[2] > 1e10) and assert_all(np.isfinite(vals[2])) assert_equal(type(vals), np.ndarray) + + # perform the same tests but with nan, posinf and neginf keywords + with np.errstate(divide='ignore', invalid='ignore'): + vals = nan_to_num(np.array((-1., 0, 1))/0., + nan=10, posinf=20, neginf=30) + assert_equal(vals, [30, 10, 20]) + assert_all(np.isfinite(vals[[0, 2]])) + assert_equal(type(vals), np.ndarray) # perform the same test but in-place with np.errstate(divide='ignore', invalid='ignore'): @@ -371,26 +379,48 @@ class TestNanToNum(object): assert_(vals[1] == 0) assert_all(vals[2] > 1e10) and assert_all(np.isfinite(vals[2])) assert_equal(type(vals), np.ndarray) + + # perform the same test but in-place + with np.errstate(divide='ignore', invalid='ignore'): + vals = np.array((-1., 0, 1))/0. + result = nan_to_num(vals, copy=False, nan=10, posinf=20, neginf=30) + + assert_(result is vals) + assert_equal(vals, [30, 10, 20]) + assert_all(np.isfinite(vals[[0, 2]])) + assert_equal(type(vals), np.ndarray) def test_array(self): vals = nan_to_num([1]) assert_array_equal(vals, np.array([1], int)) assert_equal(type(vals), np.ndarray) + vals = nan_to_num([1], nan=10, posinf=20, neginf=30) + assert_array_equal(vals, np.array([1], int)) + assert_equal(type(vals), np.ndarray) def test_integer(self): vals = nan_to_num(1) assert_all(vals == 1) assert_equal(type(vals), np.int_) + vals = nan_to_num(1, nan=10, posinf=20, neginf=30) + assert_all(vals == 1) + assert_equal(type(vals), np.int_) def test_float(self): vals = nan_to_num(1.0) assert_all(vals == 1.0) assert_equal(type(vals), np.float_) + vals = nan_to_num(1.1, nan=10, posinf=20, neginf=30) + assert_all(vals == 1.1) + assert_equal(type(vals), np.float_) def test_complex_good(self): vals = nan_to_num(1+1j) assert_all(vals == 1+1j) assert_equal(type(vals), np.complex_) + vals = nan_to_num(1+1j, nan=10, posinf=20, neginf=30) + assert_all(vals == 1+1j) + assert_equal(type(vals), np.complex_) def test_complex_bad(self): with np.errstate(divide='ignore', invalid='ignore'): @@ -414,6 +444,16 @@ class TestNanToNum(object): # !! inf. Comment out for now, and see if it # !! changes #assert_all(vals.real < -1e10) and assert_all(np.isfinite(vals)) + + def test_do_not_rewrite_previous_keyword(self): + # This is done to test that when, for instance, nan=np.inf then these + # values are not rewritten by posinf keyword to the posinf value. + with np.errstate(divide='ignore', invalid='ignore'): + vals = nan_to_num(np.array((-1., 0, 1))/0., nan=np.inf, posinf=999) + assert_all(np.isfinite(vals[[0, 2]])) + assert_all(vals[0] < -1e10) + assert_equal(vals[[1, 2]], [np.inf, 999]) + assert_equal(type(vals), np.ndarray) class TestRealIfClose(object): diff --git a/numpy/lib/tests/test_utils.py b/numpy/lib/tests/test_utils.py index 2723f3440..9673a05fa 100644 --- a/numpy/lib/tests/test_utils.py +++ b/numpy/lib/tests/test_utils.py @@ -1,5 +1,6 @@ from __future__ import division, absolute_import, print_function +import inspect import sys import pytest @@ -38,6 +39,32 @@ def old_func3(self, x): new_func3 = deprecate(old_func3, old_name="old_func3", new_name="new_func3") +def old_func4(self, x): + """Summary. + + Further info. + """ + return x +new_func4 = deprecate(old_func4) + + +def old_func5(self, x): + """Summary. + + Bizarre indentation. + """ + return x +new_func5 = deprecate(old_func5) + + +def old_func6(self, x): + """ + Also in PEP-257. + """ + return x +new_func6 = deprecate(old_func6) + + def test_deprecate_decorator(): assert_('deprecated' in old_func.__doc__) @@ -51,6 +78,25 @@ def test_deprecate_fn(): assert_('new_func3' in new_func3.__doc__) +@pytest.mark.skipif(sys.flags.optimize == 2, reason="-OO discards docstrings") +def test_deprecate_help_indentation(): + _compare_docs(old_func4, new_func4) + _compare_docs(old_func5, new_func5) + _compare_docs(old_func6, new_func6) + + +def _compare_docs(old_func, new_func): + old_doc = inspect.getdoc(old_func) + new_doc = inspect.getdoc(new_func) + index = new_doc.index('\n\n') + 2 + assert_equal(new_doc[index:], old_doc) + + +@pytest.mark.skipif(sys.flags.optimize == 2, reason="-OO discards docstrings") +def test_deprecate_preserve_whitespace(): + assert_('\n Bizarre' in new_func5.__doc__) + + def test_safe_eval_nameconstant(): # Test if safe_eval supports Python 3.4 _ast.NameConstant utils.safe_eval('None') diff --git a/numpy/lib/twodim_base.py b/numpy/lib/twodim_base.py index 27d848608..e165c9b02 100644 --- a/numpy/lib/twodim_base.py +++ b/numpy/lib/twodim_base.py @@ -77,13 +77,13 @@ def fliplr(m): -------- >>> A = np.diag([1.,2.,3.]) >>> A - array([[ 1., 0., 0.], - [ 0., 2., 0.], - [ 0., 0., 3.]]) + array([[1., 0., 0.], + [0., 2., 0.], + [0., 0., 3.]]) >>> np.fliplr(A) - array([[ 0., 0., 1.], - [ 0., 2., 0.], - [ 3., 0., 0.]]) + array([[0., 0., 1.], + [0., 2., 0.], + [3., 0., 0.]]) >>> A = np.random.randn(2,3,5) >>> np.all(np.fliplr(A) == A[:,::-1,...]) @@ -129,13 +129,13 @@ def flipud(m): -------- >>> A = np.diag([1.0, 2, 3]) >>> A - array([[ 1., 0., 0.], - [ 0., 2., 0.], - [ 0., 0., 3.]]) + array([[1., 0., 0.], + [0., 2., 0.], + [0., 0., 3.]]) >>> np.flipud(A) - array([[ 0., 0., 3.], - [ 0., 2., 0.], - [ 1., 0., 0.]]) + array([[0., 0., 3.], + [0., 2., 0.], + [1., 0., 0.]]) >>> A = np.random.randn(2,3,5) >>> np.all(np.flipud(A) == A[::-1,...]) @@ -191,9 +191,9 @@ def eye(N, M=None, k=0, dtype=float, order='C'): array([[1, 0], [0, 1]]) >>> np.eye(3, k=1) - array([[ 0., 1., 0.], - [ 0., 0., 1.], - [ 0., 0., 0.]]) + array([[0., 1., 0.], + [0., 0., 1.], + [0., 0., 0.]]) """ if M is None: @@ -378,9 +378,9 @@ def tri(N, M=None, k=0, dtype=float): [1, 1, 1, 1, 1]]) >>> np.tri(3, 5, -1) - array([[ 0., 0., 0., 0., 0.], - [ 1., 0., 0., 0., 0.], - [ 1., 1., 0., 0., 0.]]) + array([[0., 0., 0., 0., 0.], + [1., 0., 0., 0., 0.], + [1., 1., 0., 0., 0.]]) """ if M is None: @@ -540,7 +540,7 @@ def vander(x, N=None, increasing=False): of the differences between the values of the input vector: >>> np.linalg.det(np.vander(x)) - 48.000000000000043 + 48.000000000000043 # may vary >>> (5-3)*(5-2)*(5-1)*(3-2)*(3-1)*(2-1) 48 @@ -644,7 +644,7 @@ def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, Examples -------- - >>> import matplotlib as mpl + >>> from matplotlib.image import NonUniformImage >>> import matplotlib.pyplot as plt Construct a 2-D histogram with variable bin width. First define the bin @@ -666,6 +666,7 @@ def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, >>> ax = fig.add_subplot(131, title='imshow: square bins') >>> plt.imshow(H, interpolation='nearest', origin='low', ... extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]]) + <matplotlib.image.AxesImage object at 0x...> :func:`pcolormesh <matplotlib.pyplot.pcolormesh>` can display actual edges: @@ -673,13 +674,14 @@ def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, ... aspect='equal') >>> X, Y = np.meshgrid(xedges, yedges) >>> ax.pcolormesh(X, Y, H) + <matplotlib.collections.QuadMesh object at 0x...> :class:`NonUniformImage <matplotlib.image.NonUniformImage>` can be used to display actual bin edges with interpolation: >>> ax = fig.add_subplot(133, title='NonUniformImage: interpolated', ... aspect='equal', xlim=xedges[[0, -1]], ylim=yedges[[0, -1]]) - >>> im = mpl.image.NonUniformImage(ax, interpolation='bilinear') + >>> im = NonUniformImage(ax, interpolation='bilinear') >>> xcenters = (xedges[:-1] + xedges[1:]) / 2 >>> ycenters = (yedges[:-1] + yedges[1:]) / 2 >>> im.set_data(xcenters, ycenters, H) @@ -829,7 +831,7 @@ def tril_indices(n, k=0, m=None): Both for indexing: >>> a[il1] - array([ 0, 4, 5, 8, 9, 10, 12, 13, 14, 15]) + array([ 0, 4, 5, ..., 13, 14, 15]) And for assigning values: @@ -944,7 +946,7 @@ def triu_indices(n, k=0, m=None): Both for indexing: >>> a[iu1] - array([ 0, 1, 2, 3, 5, 6, 7, 10, 11, 15]) + array([ 0, 1, 2, ..., 10, 11, 15]) And for assigning values: diff --git a/numpy/lib/type_check.py b/numpy/lib/type_check.py index 1073613c9..2b254b6c0 100644 --- a/numpy/lib/type_check.py +++ b/numpy/lib/type_check.py @@ -75,10 +75,7 @@ def mintypecode(typechars, typeset='GDFgdf', default='d'): return default if 'F' in intersection and 'd' in intersection: return 'D' - l = [] - for t in intersection: - i = _typecodes_by_elsize.index(t) - l.append((i, t)) + l = [(_typecodes_by_elsize.index(t), t) for t in intersection] l.sort() return l[0][1] @@ -108,11 +105,11 @@ def asfarray(a, dtype=_nx.float_): Examples -------- >>> np.asfarray([2, 3]) - array([ 2., 3.]) + array([2., 3.]) >>> np.asfarray([2, 3], dtype='float') - array([ 2., 3.]) + array([2., 3.]) >>> np.asfarray([2, 3], dtype='int8') - array([ 2., 3.]) + array([2., 3.]) """ if not _nx.issubdtype(dtype, _nx.inexact): @@ -149,13 +146,13 @@ def real(val): -------- >>> a = np.array([1+2j, 3+4j, 5+6j]) >>> a.real - array([ 1., 3., 5.]) + array([1., 3., 5.]) >>> a.real = 9 >>> a - array([ 9.+2.j, 9.+4.j, 9.+6.j]) + array([9.+2.j, 9.+4.j, 9.+6.j]) >>> a.real = np.array([9, 8, 7]) >>> a - array([ 9.+2.j, 8.+4.j, 7.+6.j]) + array([9.+2.j, 8.+4.j, 7.+6.j]) >>> np.real(1 + 1j) 1.0 @@ -195,10 +192,10 @@ def imag(val): -------- >>> a = np.array([1+2j, 3+4j, 5+6j]) >>> a.imag - array([ 2., 4., 6.]) + array([2., 4., 6.]) >>> a.imag = np.array([8, 10, 12]) >>> a - array([ 1. +8.j, 3.+10.j, 5.+12.j]) + array([1. +8.j, 3.+10.j, 5.+12.j]) >>> np.imag(1 + 1j) 1.0 @@ -366,18 +363,23 @@ def _getmaxmin(t): return f.max, f.min -def _nan_to_num_dispatcher(x, copy=None): +def _nan_to_num_dispatcher(x, copy=None, nan=None, posinf=None, neginf=None): return (x,) @array_function_dispatch(_nan_to_num_dispatcher) -def nan_to_num(x, copy=True): +def nan_to_num(x, copy=True, nan=0.0, posinf=None, neginf=None): """ - Replace NaN with zero and infinity with large finite numbers. + Replace NaN with zero and infinity with large finite numbers (default + behaviour) or with the numbers defined by the user using the `nan`, + `posinf` and/or `neginf` keywords. - If `x` is inexact, NaN is replaced by zero, and infinity and -infinity - replaced by the respectively largest and most negative finite floating - point values representable by ``x.dtype``. + If `x` is inexact, NaN is replaced by zero or by the user defined value in + `nan` keyword, infinity is replaced by the largest finite floating point + values representable by ``x.dtype`` or by the user defined value in + `posinf` keyword and -infinity is replaced by the most negative finite + floating point values representable by ``x.dtype`` or by the user defined + value in `neginf` keyword. For complex dtypes, the above is applied to each of the real and imaginary components of `x` separately. @@ -393,6 +395,17 @@ def nan_to_num(x, copy=True): in-place (False). The in-place operation only occurs if casting to an array does not require a copy. Default is True. + nan : int, float, optional + Value to be used to fill NaN values. If no value is passed + then NaN values will be replaced with 0.0. + posinf : int, float, optional + Value to be used to fill positive infinity values. If no value is + passed then positive infinity values will be replaced with a very + large number. + neginf : int, float, optional + Value to be used to fill negative infinity values. If no value is + passed then negative infinity values will be replaced with a very + small (or negative) number. .. versionadded:: 1.13 @@ -425,13 +438,20 @@ def nan_to_num(x, copy=True): 0.0 >>> x = np.array([np.inf, -np.inf, np.nan, -128, 128]) >>> np.nan_to_num(x) - array([ 1.79769313e+308, -1.79769313e+308, 0.00000000e+000, - -1.28000000e+002, 1.28000000e+002]) + array([ 1.79769313e+308, -1.79769313e+308, 0.00000000e+000, # may vary + -1.28000000e+002, 1.28000000e+002]) + >>> np.nan_to_num(x, nan=-9999, posinf=33333333, neginf=33333333) + array([ 3.3333333e+07, 3.3333333e+07, -9.9990000e+03, + -1.2800000e+02, 1.2800000e+02]) >>> y = np.array([complex(np.inf, np.nan), np.nan, complex(np.nan, np.inf)]) + array([ 1.79769313e+308, -1.79769313e+308, 0.00000000e+000, # may vary + -1.28000000e+002, 1.28000000e+002]) >>> np.nan_to_num(y) - array([ 1.79769313e+308 +0.00000000e+000j, + array([ 1.79769313e+308 +0.00000000e+000j, # may vary 0.00000000e+000 +0.00000000e+000j, 0.00000000e+000 +1.79769313e+308j]) + >>> np.nan_to_num(y, nan=111111, posinf=222222) + array([222222.+111111.j, 111111. +0.j, 111111.+222222.j]) """ x = _nx.array(x, subok=True, copy=copy) xtype = x.dtype.type @@ -445,10 +465,17 @@ def nan_to_num(x, copy=True): dest = (x.real, x.imag) if iscomplex else (x,) maxf, minf = _getmaxmin(x.real.dtype) + if posinf is not None: + maxf = posinf + if neginf is not None: + minf = neginf for d in dest: - _nx.copyto(d, 0.0, where=isnan(d)) - _nx.copyto(d, maxf, where=isposinf(d)) - _nx.copyto(d, minf, where=isneginf(d)) + idx_nan = isnan(d) + idx_posinf = isposinf(d) + idx_neginf = isneginf(d) + _nx.copyto(d, nan, where=idx_nan) + _nx.copyto(d, maxf, where=idx_posinf) + _nx.copyto(d, minf, where=idx_neginf) return x[()] if isscalar else x #----------------------------------------------------------------------------- @@ -493,12 +520,12 @@ def real_if_close(a, tol=100): Examples -------- >>> np.finfo(float).eps - 2.2204460492503131e-16 + 2.2204460492503131e-16 # may vary >>> np.real_if_close([2.1 + 4e-14j], tol=1000) - array([ 2.1]) + array([2.1]) >>> np.real_if_close([2.1 + 4e-13j], tol=1000) - array([ 2.1 +4.00000000e-13j]) + array([2.1+4.e-13j]) """ a = asanyarray(a) @@ -541,7 +568,6 @@ def asscalar(a): -------- >>> np.asscalar(np.array([24])) 24 - """ # 2018-10-10, 1.16 @@ -675,11 +701,11 @@ def common_type(*arrays): Examples -------- >>> np.common_type(np.arange(2, dtype=np.float32)) - <type 'numpy.float32'> + <class 'numpy.float32'> >>> np.common_type(np.arange(2, dtype=np.float32), np.arange(2)) - <type 'numpy.float64'> + <class 'numpy.float64'> >>> np.common_type(np.arange(4), np.array([45, 6.j]), np.array([45.0])) - <type 'numpy.complex128'> + <class 'numpy.complex128'> """ is_complex = False diff --git a/numpy/lib/ufunclike.py b/numpy/lib/ufunclike.py index 9a9e6f9dd..8452604d9 100644 --- a/numpy/lib/ufunclike.py +++ b/numpy/lib/ufunclike.py @@ -8,7 +8,7 @@ from __future__ import division, absolute_import, print_function __all__ = ['fix', 'isneginf', 'isposinf'] import numpy.core.numeric as nx -from numpy.core.overrides import array_function_dispatch, ENABLE_ARRAY_FUNCTION +from numpy.core.overrides import array_function_dispatch import warnings import functools @@ -55,10 +55,6 @@ def _fix_out_named_y(f): return func -if not ENABLE_ARRAY_FUNCTION: - _fix_out_named_y = _deprecate_out_named_y - - @_deprecate_out_named_y def _dispatcher(x, out=None): return (x, out) @@ -154,11 +150,11 @@ def isposinf(x, out=None): Examples -------- >>> np.isposinf(np.PINF) - array(True, dtype=bool) + True >>> np.isposinf(np.inf) - array(True, dtype=bool) + True >>> np.isposinf(np.NINF) - array(False, dtype=bool) + False >>> np.isposinf([-np.inf, 0., np.inf]) array([False, False, True]) @@ -224,11 +220,11 @@ def isneginf(x, out=None): Examples -------- >>> np.isneginf(np.NINF) - array(True, dtype=bool) + True >>> np.isneginf(np.inf) - array(False, dtype=bool) + False >>> np.isneginf(np.PINF) - array(False, dtype=bool) + False >>> np.isneginf([-np.inf, 0., np.inf]) array([ True, False, False]) diff --git a/numpy/lib/utils.py b/numpy/lib/utils.py index 84edf4021..718b55c4b 100644 --- a/numpy/lib/utils.py +++ b/numpy/lib/utils.py @@ -105,6 +105,20 @@ class _Deprecate(object): if doc is None: doc = depdoc else: + lines = doc.expandtabs().split('\n') + indent = _get_indent(lines[1:]) + if lines[0].lstrip(): + # Indent the original first line to let inspect.cleandoc() + # dedent the docstring despite the deprecation notice. + doc = indent * ' ' + doc + else: + # Remove the same leading blank lines as cleandoc() would. + skip = len(lines[0]) + 1 + for line in lines[1:]: + if len(line) > indent: + break + skip += len(line) + 1 + doc = doc[skip:] doc = '\n\n'.join([depdoc, doc]) newfunc.__doc__ = doc try: @@ -115,6 +129,21 @@ class _Deprecate(object): newfunc.__dict__.update(d) return newfunc + +def _get_indent(lines): + """ + Determines the leading whitespace that could be removed from all the lines. + """ + indent = sys.maxsize + for line in lines: + content = len(line.lstrip()) + if content: + indent = min(indent, len(line) - content) + if indent == sys.maxsize: + indent = 0 + return indent + + def deprecate(*args, **kwargs): """ Issues a DeprecationWarning, adds warning to `old_name`'s @@ -150,10 +179,8 @@ def deprecate(*args, **kwargs): Warning: >>> olduint = np.deprecate(np.uint) + DeprecationWarning: `uint64` is deprecated! # may vary >>> olduint(6) - /usr/lib/python2.5/site-packages/numpy/lib/utils.py:114: - DeprecationWarning: uint32 is deprecated - warnings.warn(str1, DeprecationWarning, stacklevel=2) 6 """ @@ -201,8 +228,8 @@ def byte_bounds(a): >>> low, high = np.byte_bounds(I) >>> high - low == I.size*I.itemsize True - >>> I = np.eye(2, dtype='G'); I.dtype - dtype('complex192') + >>> I = np.eye(2); I.dtype + dtype('float64') >>> low, high = np.byte_bounds(I) >>> high - low == I.size*I.itemsize True @@ -263,17 +290,17 @@ def who(vardict=None): >>> np.who() Name Shape Bytes Type =========================================================== - a 10 40 int32 + a 10 80 int64 b 20 160 float64 - Upper bound on total bytes = 200 + Upper bound on total bytes = 240 >>> d = {'x': np.arange(2.0), 'y': np.arange(3.0), 'txt': 'Some str', ... 'idx':5} >>> np.who(d) Name Shape Bytes Type =========================================================== - y 3 24 float64 x 2 16 float64 + y 3 24 float64 Upper bound on total bytes = 40 """ @@ -733,7 +760,7 @@ def lookfor(what, module=None, import_modules=True, regenerate=False, Examples -------- - >>> np.lookfor('binary representation') + >>> np.lookfor('binary representation') # doctest: +SKIP Search results for 'binary representation' ------------------------------------------ numpy.binary_repr @@ -1104,7 +1131,7 @@ def safe_eval(source): >>> np.safe_eval('open("/home/user/.ssh/id_dsa").read()') Traceback (most recent call last): ... - SyntaxError: Unsupported source construct: compiler.ast.CallFunc + ValueError: malformed node or string: <_ast.Call object at 0x...> """ # Local import to speed up numpy's import time. @@ -1142,17 +1169,12 @@ def _median_nancheck(data, result, axis, out): n = n.filled(False) if result.ndim == 0: if n == True: - warnings.warn("Invalid value encountered in median", - RuntimeWarning, stacklevel=3) if out is not None: out[...] = data.dtype.type(np.nan) result = out else: result = data.dtype.type(np.nan) elif np.count_nonzero(n.ravel()) > 0: - warnings.warn("Invalid value encountered in median for" + - " %d results" % np.count_nonzero(n.ravel()), - RuntimeWarning, stacklevel=3) result[n] = np.nan return result |