diff options
Diffstat (limited to 'benchmarks')
-rw-r--r-- | benchmarks/README.rst | 37 | ||||
-rw-r--r-- | benchmarks/asv.conf.json | 3 | ||||
-rw-r--r-- | benchmarks/asv_compare.conf.json.tpl | 3 | ||||
-rw-r--r-- | benchmarks/benchmarks/__init__.py | 2 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_core.py | 53 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_creation.py | 81 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_function_base.py | 2 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_io.py | 13 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_itemselection.py | 22 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_lib.py | 21 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_linalg.py | 24 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_manipulate.py | 107 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_reduce.py | 36 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_scalar.py | 12 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_ufunc.py | 223 | ||||
-rw-r--r-- | benchmarks/benchmarks/bench_ufunc_strides.py | 285 | ||||
-rw-r--r-- | benchmarks/benchmarks/common.py | 157 |
17 files changed, 884 insertions, 197 deletions
diff --git a/benchmarks/README.rst b/benchmarks/README.rst index 2700e95e7..ef841a818 100644 --- a/benchmarks/README.rst +++ b/benchmarks/README.rst @@ -22,8 +22,8 @@ By default, `asv` ships with support for anaconda and virtualenv:: pip install asv pip install virtualenv -After contributing new benchmarks, you should test them locally -before submitting a pull request. +After contributing new benchmarks, you should test them locally before +submitting a pull request. To run all benchmarks, navigate to the root NumPy directory at the command line and execute:: @@ -31,11 +31,21 @@ the command line and execute:: python runtests.py --bench where ``--bench`` activates the benchmark suite instead of the -test suite. This builds NumPy and runs all available benchmarks +test suite. This builds NumPy and runs all available benchmarks defined in ``benchmarks/``. (Note: this could take a while. Each benchmark is run multiple times to measure the distribution in execution times.) +For **testing** benchmarks locally, it may be better to run these without +replications:: + + cd benchmarks/ + export REGEXP="bench.*Ufunc" + asv run --dry-run --show-stderr --python=same --quick -b $REGEXP + +Where the regular expression used to match benchmarks is stored in ``$REGEXP``, +and `--quick` is used to avoid repetitions. + To run benchmarks from a particular benchmark module, such as ``bench_core.py``, simply append the filename without the extension:: @@ -69,6 +79,27 @@ Command-line help is available as usual via ``asv --help`` and .. _ASV documentation: https://asv.readthedocs.io/ +Benchmarking versions +--------------------- + +To benchmark or visualize only releases on different machines locally, the tags with their commits can be generated, before being run with ``asv``, that is:: + + cd benchmarks + # Get commits for tags + # delete tag_commits.txt before re-runs + for gtag in $(git tag --list --sort taggerdate | grep "^v"); do + git log $gtag --oneline -n1 --decorate=no | awk '{print $1;}' >> tag_commits.txt + done + # Use the last 20 + tail --lines=20 tag_commits.txt > 20_vers.txt + asv run HASHFILE:20_vers.txt + # Publish and view + asv publish + asv preview + +For details on contributing these, see the `benchmark results repository`_. + +.. _benchmark results repository: https://github.com/HaoZeke/asv-numpy Writing benchmarks ------------------ diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index b60135524..267450448 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -43,7 +43,8 @@ // version. "matrix": { "Cython": [], - "setuptools": ["59.2.0"] + "setuptools": ["59.2.0"], + "packaging": [] }, // The directory (relative to the current directory) that benchmarks are diff --git a/benchmarks/asv_compare.conf.json.tpl b/benchmarks/asv_compare.conf.json.tpl index 01f4e41de..f0ef0bf49 100644 --- a/benchmarks/asv_compare.conf.json.tpl +++ b/benchmarks/asv_compare.conf.json.tpl @@ -47,7 +47,8 @@ // version. "matrix": { "Cython": [], - "setuptools": ["59.2.0"] + "setuptools": ["59.2.0"], + "packaging": [] }, // The directory (relative to the current directory) that benchmarks are diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py index 7b9f1d3e6..35fc87eac 100644 --- a/benchmarks/benchmarks/__init__.py +++ b/benchmarks/benchmarks/__init__.py @@ -26,7 +26,7 @@ def dirty_lock(lock_name, lock_on_count=1): lock_path = os.path.abspath(os.path.join( os.path.dirname(__file__), "..", "env", lock_name) ) - # ASV load the 'benchmark_dir' to discovering the available benchmarks + # ASV loads the 'benchmark_dir' to discover the available benchmarks # the issue here is ASV doesn't capture any strings from stdout or stderr # during this stage so we escape it and lock on the second increment try: diff --git a/benchmarks/benchmarks/bench_core.py b/benchmarks/benchmarks/bench_core.py index 4fcd7ace5..fe1cd37b6 100644 --- a/benchmarks/benchmarks/bench_core.py +++ b/benchmarks/benchmarks/bench_core.py @@ -45,6 +45,12 @@ class Core(Benchmark): def time_array_l_view(self): np.array(self.l_view) + def time_can_cast(self): + np.can_cast(self.l10x10, self.float64_dtype) + + def time_can_cast_same_kind(self): + np.can_cast(self.l10x10, self.float64_dtype, casting="same_kind") + def time_vstack_l(self): np.vstack(self.l) @@ -66,6 +72,9 @@ class Core(Benchmark): def time_empty_100(self): np.empty(100) + def time_empty_like(self): + np.empty_like(self.l10x10) + def time_eye_100(self): np.eye(100) @@ -206,13 +215,41 @@ class Indices(Benchmark): def time_indices(self): np.indices((1000, 500)) -class VarComplex(Benchmark): - params = [10**n for n in range(0, 9)] - def setup(self, n): - self.arr = np.random.randn(n) + 1j * np.random.randn(n) - def teardown(self, n): - del self.arr +class StatsMethods(Benchmark): + # Not testing, but in array_api (redundant) + # 8, 16, 32 bit variants, and 128 complexes + params = [['int64', 'uint64', 'float64', 'intp', + 'complex64', 'bool', 'float', 'int', + 'complex', 'complex256'], + [100**n for n in range(0, 2)]] + param_names = ['dtype', 'size'] + + def setup(self, dtype, size): + try: + self.data = np.ones(size, dtype=getattr(np, dtype)) + except AttributeError: # builtins throw AttributeError after 1.20 + self.data = np.ones(size, dtype=dtype) + if dtype.startswith('complex'): + self.data = np.random.randn(size) + 1j * np.random.randn(size) + + def time_min(self, dtype, size): + self.data.min() + + def time_max(self, dtype, size): + self.data.max() + + def time_mean(self, dtype, size): + self.data.mean() + + def time_std(self, dtype, size): + self.data.std() + + def time_prod(self, dtype, size): + self.data.prod() + + def time_var(self, dtype, size): + self.data.var() - def time_var(self, n): - self.arr.var() + def time_sum(self, dtype, size): + self.data.sum() diff --git a/benchmarks/benchmarks/bench_creation.py b/benchmarks/benchmarks/bench_creation.py new file mode 100644 index 000000000..3a577df7a --- /dev/null +++ b/benchmarks/benchmarks/bench_creation.py @@ -0,0 +1,81 @@ +from .common import Benchmark, TYPES1 + +import numpy as np + + +class MeshGrid(Benchmark): + """ Benchmark meshgrid generation + """ + params = [[16, 32], + [2, 3, 4], + ['ij', 'xy'], TYPES1] + param_names = ['size', 'ndims', 'ind', 'ndtype'] + timeout = 10 + + def setup(self, size, ndims, ind, ndtype): + self.grid_dims = [(np.random.ranf(size)).astype(ndtype) for + x in range(ndims)] + + def time_meshgrid(self, size, ndims, ind, ndtype): + np.meshgrid(*self.grid_dims, indexing=ind) + + +class Create(Benchmark): + """ Benchmark for creation functions + """ + # (64, 64), (128, 128), (256, 256) + # , (512, 512), (1024, 1024) + params = [[16, 32, 128, 256, 512, + (16, 16), (32, 32)], + ['C', 'F'], + TYPES1] + param_names = ['shape', 'order', 'npdtypes'] + timeout = 10 + + def setup(self, shape, order, npdtypes): + values = get_squares_() + self.xarg = values.get(npdtypes)[0] + + def time_full(self, shape, order, npdtypes): + np.full(shape, self.xarg[1], dtype=npdtypes, order=order) + + def time_full_like(self, shape, order, npdtypes): + np.full_like(self.xarg, self.xarg[0], order=order) + + def time_ones(self, shape, order, npdtypes): + np.ones(shape, dtype=npdtypes, order=order) + + def time_ones_like(self, shape, order, npdtypes): + np.ones_like(self.xarg, order=order) + + def time_zeros(self, shape, order, npdtypes): + np.zeros(shape, dtype=npdtypes, order=order) + + def time_zeros_like(self, shape, order, npdtypes): + np.zeros_like(self.xarg, order=order) + + def time_empty(self, shape, order, npdtypes): + np.empty(shape, dtype=npdtypes, order=order) + + def time_empty_like(self, shape, order, npdtypes): + np.empty_like(self.xarg, order=order) + + +class UfuncsFromDLP(Benchmark): + """ Benchmark for creation functions + """ + params = [[16, 32, (16, 16), + (32, 32), (64, 64)], + TYPES1] + param_names = ['shape', 'npdtypes'] + timeout = 10 + + def setup(self, shape, npdtypes): + if npdtypes in ['longdouble', 'clongdouble']: + raise NotImplementedError( + 'Only IEEE dtypes are supported') + values = get_squares_() + self.xarg = values.get(npdtypes)[0] + + def time_from_dlpack(self, shape, npdtypes): + np.from_dlpack(self.xarg) diff --git a/benchmarks/benchmarks/bench_function_base.py b/benchmarks/benchmarks/bench_function_base.py index 2e44ff76b..cc37bef39 100644 --- a/benchmarks/benchmarks/bench_function_base.py +++ b/benchmarks/benchmarks/bench_function_base.py @@ -248,7 +248,7 @@ class Sort(Benchmark): # In NumPy 1.17 and newer, 'merge' can be one of several # stable sorts, it isn't necessarily merge sort. ['quick', 'merge', 'heap'], - ['float64', 'int64', 'float32', 'uint32', 'int32', 'int16'], + ['float64', 'int64', 'float32', 'uint32', 'int32', 'int16', 'float16'], [ ('random',), ('ordered',), diff --git a/benchmarks/benchmarks/bench_io.py b/benchmarks/benchmarks/bench_io.py index 357adbb87..e316d07f3 100644 --- a/benchmarks/benchmarks/bench_io.py +++ b/benchmarks/benchmarks/bench_io.py @@ -1,7 +1,7 @@ -from .common import Benchmark, get_squares +from .common import Benchmark, get_squares, get_squares_ import numpy as np -from io import StringIO +from io import SEEK_SET, StringIO, BytesIO class Copy(Benchmark): @@ -67,6 +67,15 @@ class Savez(Benchmark): np.savez('tmp.npz', **self.squares) +class LoadNpyOverhead(Benchmark): + def setup(self): + self.buffer = BytesIO() + np.save(self.buffer, get_squares_()['float32']) + + def time_loadnpy_overhead(self): + self.buffer.seek(0, SEEK_SET) + np.load(self.buffer) + class LoadtxtCSVComments(Benchmark): # benchmarks for np.loadtxt comment handling # when reading in CSV files diff --git a/benchmarks/benchmarks/bench_itemselection.py b/benchmarks/benchmarks/bench_itemselection.py index 518258a8f..46a39372c 100644 --- a/benchmarks/benchmarks/bench_itemselection.py +++ b/benchmarks/benchmarks/bench_itemselection.py @@ -7,7 +7,7 @@ class Take(Benchmark): params = [ [(1000, 1), (1000, 2), (2, 1000, 1), (1000, 3)], ["raise", "wrap", "clip"], - TYPES1] + TYPES1 + ["O", "i,O"]] param_names = ["shape", "mode", "dtype"] def setup(self, shape, mode, dtype): @@ -21,7 +21,7 @@ class Take(Benchmark): class PutMask(Benchmark): params = [ [True, False], - TYPES1] + TYPES1 + ["O", "i,O"]] param_names = ["values_is_scalar", "dtype"] def setup(self, values_is_scalar, dtype): @@ -41,3 +41,21 @@ class PutMask(Benchmark): def time_sparse(self, values_is_scalar, dtype): np.putmask(self.arr, self.sparse_mask, self.vals) + +class Put(Benchmark): + params = [ + [True, False], + TYPES1 + ["O", "i,O"]] + param_names = ["values_is_scalar", "dtype"] + + def setup(self, values_is_scalar, dtype): + if values_is_scalar: + self.vals = np.array(1., dtype=dtype) + else: + self.vals = np.ones(1000, dtype=dtype) + + self.arr = np.ones(1000, dtype=dtype) + self.indx = np.arange(1000, dtype=np.intp) + + def time_ordered(self, values_is_scalar, dtype): + np.put(self.arr, self.indx, self.vals) diff --git a/benchmarks/benchmarks/bench_lib.py b/benchmarks/benchmarks/bench_lib.py index b64f8ab17..f792116a6 100644 --- a/benchmarks/benchmarks/bench_lib.py +++ b/benchmarks/benchmarks/bench_lib.py @@ -132,11 +132,26 @@ class Unique(Benchmark): # produce a randomly shuffled array with the # approximate desired percentage np.nan content base_array = np.random.uniform(size=array_size) - base_array[base_array < percent_nans / 100.] = np.nan + n_nan = int(percent_nans * array_size) + nan_indices = np.random.choice(np.arange(array_size), size=n_nan) + base_array[nan_indices] = np.nan self.arr = base_array - def time_unique(self, array_size, percent_nans): - np.unique(self.arr) + def time_unique_values(self, array_size, percent_nans): + np.unique(self.arr, return_index=False, + return_inverse=False, return_counts=False) + + def time_unique_counts(self, array_size, percent_nans): + np.unique(self.arr, return_index=False, + return_inverse=False, return_counts=True) + + def time_unique_inverse(self, array_size, percent_nans): + np.unique(self.arr, return_index=False, + return_inverse=True, return_counts=False) + + def time_unique_all(self, array_size, percent_nans): + np.unique(self.arr, return_index=True, + return_inverse=True, return_counts=True) class Isin(Benchmark): diff --git a/benchmarks/benchmarks/bench_linalg.py b/benchmarks/benchmarks/bench_linalg.py index a94ba1139..b4e39b084 100644 --- a/benchmarks/benchmarks/bench_linalg.py +++ b/benchmarks/benchmarks/bench_linalg.py @@ -190,3 +190,27 @@ class Einsum(Benchmark): # sum_of_products_contig_outstride0_oneļ¼non_contiguous arrays def time_einsum_noncon_contig_outstride0(self, dtype): np.einsum("i->", self.non_contiguous_dim1, optimize=True) + + +class LinAlgTransposeVdot(Benchmark): + # Smaller for speed + # , (128, 128), (256, 256), (512, 512), + # (1024, 1024) + params = [[(16, 16), (32, 32), + (64, 64)], TYPES1] + param_names = ['shape', 'npdtypes'] + + def setup(self, shape, npdtypes): + self.xarg = np.random.uniform(-1, 1, np.dot(*shape)).reshape(shape) + self.xarg = self.xarg.astype(npdtypes) + self.x2arg = np.random.uniform(-1, 1, np.dot(*shape)).reshape(shape) + self.x2arg = self.x2arg.astype(npdtypes) + if npdtypes.startswith('complex'): + self.xarg += self.xarg.T*1j + self.x2arg += self.x2arg.T*1j + + def time_transpose(self, shape, npdtypes): + np.transpose(self.xarg) + + def time_vdot(self, shape, npdtypes): + np.vdot(self.xarg, self.x2arg) diff --git a/benchmarks/benchmarks/bench_manipulate.py b/benchmarks/benchmarks/bench_manipulate.py new file mode 100644 index 000000000..0a312479c --- /dev/null +++ b/benchmarks/benchmarks/bench_manipulate.py @@ -0,0 +1,107 @@ +from .common import Benchmark, get_squares_, TYPES1, DLPACK_TYPES + +import numpy as np +from collections import deque + +class BroadcastArrays(Benchmark): + params = [[(16, 32), (32, 64), + (64, 128), (128, 256), + (256, 512), (512, 1024)], + TYPES1] + param_names = ['shape', 'ndtype'] + timeout = 10 + + def setup(self, shape, ndtype): + self.xarg = np.random.ranf(shape[0]*shape[1]).reshape(shape) + self.xarg = self.xarg.astype(ndtype) + if ndtype.startswith('complex'): + self.xarg += np.random.ranf(1)*1j + + def time_broadcast_arrays(self, shape, ndtype): + np.broadcast_arrays(self.xarg, np.ones(1)) + + +class BroadcastArraysTo(Benchmark): + params = [[16, 32, 64, 128, 256, 512], + TYPES1] + param_names = ['size', 'ndtype'] + timeout = 10 + + def setup(self, size, ndtype): + self.rng = np.random.default_rng() + self.xarg = self.rng.random(size) + self.xarg = self.xarg.astype(ndtype) + if ndtype.startswith('complex'): + self.xarg += self.rng.random(1)*1j + + def time_broadcast_to(self, size, ndtype): + np.broadcast_to(self.xarg, (size, size)) + + +class ConcatenateStackArrays(Benchmark): + # (64, 128), (128, 256), (256, 512) + params = [[(16, 32), (32, 64)], + [2, 3, 4, 5], + TYPES1] + param_names = ['shape', 'narrays', 'ndtype'] + timeout = 10 + + def setup(self, shape, narrays, ndtype): + self.xarg = [np.random.ranf(shape[0]*shape[1]).reshape(shape) + for x in range(narrays)] + self.xarg = [x.astype(ndtype) for x in self.xarg] + if ndtype.startswith('complex'): + [x + np.random.ranf(1)*1j for x in self.xarg] + + def time_concatenate_ax0(self, size, narrays, ndtype): + np.concatenate(self.xarg, axis=0) + + def time_concatenate_ax1(self, size, narrays, ndtype): + np.concatenate(self.xarg, axis=1) + + def time_stack_ax0(self, size, narrays, ndtype): + np.stack(self.xarg, axis=0) + + def time_stack_ax1(self, size, narrays, ndtype): + np.stack(self.xarg, axis=1) + + +class DimsManipulations(Benchmark): + params = [ + [(2, 1, 4), (2, 1), (5, 2, 3, 1)], + ] + param_names = ['shape'] + timeout = 10 + + def setup(self, shape): + self.xarg = np.ones(shape=shape) + self.reshaped = deque(shape) + self.reshaped.rotate(1) + self.reshaped = tuple(self.reshaped) + + def time_expand_dims(self, shape): + np.expand_dims(self.xarg, axis=1) + + def time_expand_dims_neg(self, shape): + np.expand_dims(self.xarg, axis=-1) + + def time_squeeze_dims(self, shape): + np.squeeze(self.xarg) + + def time_flip_all(self, shape): + np.flip(self.xarg, axis=None) + + def time_flip_one(self, shape): + np.flip(self.xarg, axis=1) + + def time_flip_neg(self, shape): + np.flip(self.xarg, axis=-1) + + def time_moveaxis(self, shape): + np.moveaxis(self.xarg, [0, 1], [-1, -2]) + + def time_roll(self, shape): + np.roll(self.xarg, 3) + + def time_reshape(self, shape): + np.reshape(self.xarg, self.reshaped) diff --git a/benchmarks/benchmarks/bench_reduce.py b/benchmarks/benchmarks/bench_reduce.py index ca07bd180..040b5ca73 100644 --- a/benchmarks/benchmarks/bench_reduce.py +++ b/benchmarks/benchmarks/bench_reduce.py @@ -45,19 +45,40 @@ class AnyAll(Benchmark): self.zeros.any() -class MinMax(Benchmark): - params = [np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32, - np.int64, np.uint64, np.float32, np.float64, np.intp] +class StatsReductions(Benchmark): + # Not testing, but in array_api (redundant) + # 8, 16, 32 bit variants, and 128 complexes + params = ['int64', 'uint64', 'float64', 'intp', + 'complex64', 'bool', 'float', 'int', + 'complex', 'complex256'], param_names = ['dtype'] def setup(self, dtype): - self.d = np.ones(20000, dtype=dtype) + try: + self.data = np.ones(200, dtype=getattr(np, dtype)) + except AttributeError: # builtins throw AttributeError after 1.20 + self.data = np.ones(200, dtype=dtype) + if dtype.startswith('complex'): + self.data = self.data * self.data.T*1j def time_min(self, dtype): - np.min(self.d) + np.min(self.data) def time_max(self, dtype): - np.max(self.d) + np.max(self.data) + + def time_mean(self, dtype): + np.mean(self.data) + + def time_std(self, dtype): + np.std(self.data) + + def time_prod(self, dtype): + np.prod(self.data) + + def time_var(self, dtype): + np.var(self.data) + class FMinMax(Benchmark): params = [np.float32, np.float64] @@ -72,6 +93,7 @@ class FMinMax(Benchmark): def time_max(self, dtype): np.fmax.reduce(self.d) + class ArgMax(Benchmark): params = [np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32, np.int64, np.uint64, np.float32, np.float64, bool] @@ -83,6 +105,7 @@ class ArgMax(Benchmark): def time_argmax(self, dtype): np.argmax(self.d) + class ArgMin(Benchmark): params = [np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32, np.int64, np.uint64, np.float32, np.float64, bool] @@ -94,6 +117,7 @@ class ArgMin(Benchmark): def time_argmin(self, dtype): np.argmin(self.d) + class SmallReduction(Benchmark): def setup(self): self.d = np.ones(100, dtype=np.float32) diff --git a/benchmarks/benchmarks/bench_scalar.py b/benchmarks/benchmarks/bench_scalar.py index 650daa89d..638f66df5 100644 --- a/benchmarks/benchmarks/bench_scalar.py +++ b/benchmarks/benchmarks/bench_scalar.py @@ -65,3 +65,15 @@ class ScalarMath(Benchmark): other + int32 other + int32 other + int32 + + +class ScalarStr(Benchmark): + # Test scalar to str conversion + params = [TYPES1] + param_names = ["type"] + + def setup(self, typename): + self.a = np.array([100] * 100, dtype=typename) + + def time_str_repr(self, typename): + res = [str(x) for x in self.a] diff --git a/benchmarks/benchmarks/bench_ufunc.py b/benchmarks/benchmarks/bench_ufunc.py index 36d8621e8..f7c77d90c 100644 --- a/benchmarks/benchmarks/bench_ufunc.py +++ b/benchmarks/benchmarks/bench_ufunc.py @@ -1,6 +1,9 @@ -from .common import Benchmark, get_squares_ +from .common import Benchmark, get_squares_, TYPES1, DLPACK_TYPES import numpy as np +import itertools +from packaging import version +import operator ufuncs = ['abs', 'absolute', 'add', 'arccos', 'arccosh', 'arcsin', 'arcsinh', @@ -13,11 +16,13 @@ ufuncs = ['abs', 'absolute', 'add', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'isinf', 'isnan', 'isnat', 'lcm', 'ldexp', 'left_shift', 'less', 'less_equal', 'log', 'log10', 'log1p', 'log2', 'logaddexp', 'logaddexp2', 'logical_and', 'logical_not', 'logical_or', - 'logical_xor', 'matmul', 'maximum', 'minimum', 'mod', 'modf', 'multiply', - 'negative', 'nextafter', 'not_equal', 'positive', 'power', - 'rad2deg', 'radians', 'reciprocal', 'remainder', 'right_shift', - 'rint', 'sign', 'signbit', 'sin', 'sinh', 'spacing', 'sqrt', - 'square', 'subtract', 'tan', 'tanh', 'true_divide', 'trunc'] + 'logical_xor', 'matmul', 'maximum', 'minimum', 'mod', 'modf', + 'multiply', 'negative', 'nextafter', 'not_equal', 'positive', + 'power', 'rad2deg', 'radians', 'reciprocal', 'remainder', + 'right_shift', 'rint', 'sign', 'signbit', 'sin', + 'sinh', 'spacing', 'sqrt', 'square', 'subtract', 'tan', 'tanh', + 'true_divide', 'trunc'] +arrayfuncdisp = ['real', 'round'] for name in dir(np): @@ -25,6 +30,30 @@ for name in dir(np): print("Missing ufunc %r" % (name,)) +class ArrayFunctionDispatcher(Benchmark): + params = [arrayfuncdisp] + param_names = ['func'] + timeout = 10 + + def setup(self, ufuncname): + np.seterr(all='ignore') + try: + self.afdn = getattr(np, ufuncname) + except AttributeError: + raise NotImplementedError() + self.args = [] + for _, aarg in get_squares_().items(): + arg = (aarg,) * 1 # no nin + try: + self.afdn(*arg) + except TypeError: + continue + self.args.append(arg) + + def time_afdn_types(self, ufuncname): + [self.afdn(*arg) for arg in self.args] + + class Broadcast(Benchmark): def setup(self): self.d = np.ones((50000, 100), dtype=np.float64) @@ -34,6 +63,20 @@ class Broadcast(Benchmark): self.d - self.e +class At(Benchmark): + def setup(self): + rng = np.random.default_rng(1) + self.vals = rng.random(10_000_000, dtype=np.float64) + self.idx = rng.integers(1000, size=10_000_000).astype(np.intp) + self.res = np.zeros(1000, dtype=self.vals.dtype) + + def time_sum_at(self): + np.add.at(self.res, self.idx, self.vals) + + def time_maximum_at(self): + np.maximum.at(self.res, self.idx, self.vals) + + class UFunc(Benchmark): params = [ufuncs] param_names = ['ufunc'] @@ -42,23 +85,179 @@ class UFunc(Benchmark): def setup(self, ufuncname): np.seterr(all='ignore') try: - self.f = getattr(np, ufuncname) + self.ufn = getattr(np, ufuncname) except AttributeError: raise NotImplementedError() self.args = [] - for t, a in get_squares_().items(): - arg = (a,) * self.f.nin + for _, aarg in get_squares_().items(): + arg = (aarg,) * self.ufn.nin try: - self.f(*arg) + self.ufn(*arg) except TypeError: continue self.args.append(arg) def time_ufunc_types(self, ufuncname): - [self.f(*arg) for arg in self.args] + [self.ufn(*arg) for arg in self.args] + + +class MethodsV0(Benchmark): + """ Benchmark for the methods which do not take any arguments + """ + params = [['__abs__', '__neg__', '__pos__'], TYPES1] + param_names = ['methods', 'npdtypes'] + timeout = 10 + + def setup(self, methname, npdtypes): + values = get_squares_() + self.xarg = values.get(npdtypes)[0] + + def time_ndarray_meth(self, methname, npdtypes): + getattr(operator, methname)(self.xarg) + + +class NDArrayLRShifts(Benchmark): + """ Benchmark for the shift methods + """ + params = [['__lshift__', '__rshift__'], + ['intp', 'int8', 'int16', + 'int32', 'int64', 'uint8', + 'uint16', 'uint32', 'uint64']] + param_names = ['methods', 'npdtypes'] + timeout = 10 + + def setup(self, methname, npdtypes): + self.vals = np.ones(1000, + dtype=getattr(np, npdtypes)) * \ + np.random.randint(9) + + def time_ndarray_meth(self, methname, npdtypes): + getattr(operator, methname)(*[self.vals, 2]) + + +class Methods0D(Benchmark): + """Zero dimension array methods + """ + params = [['__bool__', '__complex__', '__invert__', + '__float__', '__int__'], TYPES1] + param_names = ['methods', 'npdtypes'] + timeout = 10 + + def setup(self, methname, npdtypes): + self.xarg = np.array(3, dtype=npdtypes) + if (npdtypes.startswith('complex') and + methname in ['__float__', '__int__']) or \ + (npdtypes.startswith('int') and methname == '__invert__'): + # Skip + raise NotImplementedError + + def time_ndarray__0d__(self, methname, npdtypes): + meth = getattr(self.xarg, methname) + meth() + + +class MethodsV1(Benchmark): + """ Benchmark for the methods which take an argument + """ + params = [['__and__', '__add__', '__eq__', '__floordiv__', '__ge__', + '__gt__', '__le__', '__lt__', '__matmul__', + '__mod__', '__mul__', '__ne__', '__or__', + '__pow__', '__sub__', '__truediv__', '__xor__'], + TYPES1] + param_names = ['methods', 'npdtypes'] + timeout = 10 + + def setup(self, methname, npdtypes): + if ( + npdtypes.startswith("complex") + and methname in ["__floordiv__", "__mod__"] + ) or ( + not npdtypes.startswith("int") + and methname in ["__and__", "__or__", "__xor__"] + ): + raise NotImplementedError # skip + values = get_squares_().get(npdtypes) + self.xargs = [values[0], values[1]] + + def time_ndarray_meth(self, methname, npdtypes): + getattr(operator, methname)(*self.xargs) + + +class NDArrayGetItem(Benchmark): + param_names = ['margs', 'msize'] + params = [[0, (0, 0), (-1, 0), [0, -1]], + ['small', 'big']] + + def setup(self, margs, msize): + self.xs = np.random.uniform(-1, 1, 6).reshape(2, 3) + self.xl = np.random.uniform(-1, 1, 50*50).reshape(50, 50) + + def time_methods_getitem(self, margs, msize): + if msize == 'small': + mdat = self.xs + elif msize == 'big': + mdat = self.xl + getattr(mdat, '__getitem__')(margs) + + +class NDArraySetItem(Benchmark): + param_names = ['margs', 'msize'] + params = [[0, (0, 0), (-1, 0), [0, -1]], + ['small', 'big']] + + def setup(self, margs, msize): + self.xs = np.random.uniform(-1, 1, 6).reshape(2, 3) + self.xl = np.random.uniform(-1, 1, 100*100).reshape(100, 100) + + def time_methods_setitem(self, margs, msize): + if msize == 'small': + mdat = self.xs + elif msize == 'big': + mdat = self.xl + mdat[margs] = 17 + + +class DLPMethods(Benchmark): + """ Benchmark for DLPACK helpers + """ + params = [['__dlpack__', '__dlpack_device__'], DLPACK_TYPES] + param_names = ['methods', 'npdtypes'] + timeout = 10 + + def setup(self, methname, npdtypes): + values = get_squares_() + if npdtypes == 'bool': + if version.parse(np.__version__) > version.parse("1.25"): + self.xarg = values.get('int16')[0].astype('bool') + else: + raise NotImplementedError("Not supported before v1.25") + else: + self.xarg = values.get('int16')[0] + + def time_ndarray_dlp(self, methname, npdtypes): + meth = getattr(self.xarg, methname) + meth() + + +class NDArrayAsType(Benchmark): + """ Benchmark for type conversion + """ + params = [list(itertools.combinations(TYPES1, 2))] + param_names = ['typeconv'] + timeout = 10 + + def setup(self, typeconv): + if typeconv[0] == typeconv[1]: + raise NotImplementedError( + "Skipping test for converting to the same dtype") + self.xarg = get_squares_().get(typeconv[0]) + + def time_astype(self, typeconv): + self.xarg.astype(typeconv[1]) + class UFuncSmall(Benchmark): - """ Benchmark for a selection of ufuncs on a small arrays and scalars + """ Benchmark for a selection of ufuncs on a small arrays and scalars Since the arrays and scalars are small, we are benchmarking the overhead of the numpy ufunc functionality diff --git a/benchmarks/benchmarks/bench_ufunc_strides.py b/benchmarks/benchmarks/bench_ufunc_strides.py index f80bf90f9..898cc0818 100644 --- a/benchmarks/benchmarks/bench_ufunc_strides.py +++ b/benchmarks/benchmarks/bench_ufunc_strides.py @@ -1,156 +1,181 @@ -from .common import Benchmark +from .common import Benchmark, get_data import numpy as np -UNARY_UFUNCS = [obj for obj in np.core.umath.__dict__.values() if - isinstance(obj, np.ufunc)] -UNARY_OBJECT_UFUNCS = [uf for uf in UNARY_UFUNCS if "O->O" in uf.types] -UNARY_OBJECT_UFUNCS.remove(getattr(np, 'invert')) +UFUNCS = [obj for obj in np.core.umath.__dict__.values() if + isinstance(obj, np.ufunc)] +UFUNCS_UNARY = [uf for uf in UFUNCS if "O->O" in uf.types] -stride = [1, 2, 4] -stride_out = [1, 2, 4] -dtype = ['e', 'f', 'd'] - -class Unary(Benchmark): - params = [UNARY_OBJECT_UFUNCS, stride, stride_out, dtype] - param_names = ['ufunc', 'stride_in', 'stride_out', 'dtype'] - timeout = 10 - - def setup(self, ufuncname, stride, stride_out, dtype): - np.seterr(all='ignore') - try: - self.f = ufuncname - except AttributeError: - raise NotImplementedError(f"No ufunc {ufuncname} found") from None - N = 100000 - self.arr_out = np.empty(stride_out*N, dtype) - self.arr = np.random.rand(stride*N).astype(dtype) - if (ufuncname.__name__ == 'arccosh'): - self.arr = 1.0 + self.arr - - def time_ufunc(self, ufuncname, stride, stride_out, dtype): - self.f(self.arr[::stride], self.arr_out[::stride_out]) - -class AVX_UFunc_log(Benchmark): - params = [stride, dtype] - param_names = ['stride', 'dtype'] - timeout = 10 - - def setup(self, stride, dtype): - np.seterr(all='ignore') - N = 10000 - self.arr = np.array(np.random.random_sample(stride*N), dtype=dtype) - - def time_log(self, stride, dtype): - np.log(self.arr[::stride]) - - -binary_ufuncs = [ - 'maximum', 'minimum', 'fmax', 'fmin' -] -binary_dtype = ['f', 'd'] - -class Binary(Benchmark): - param_names = ['ufunc', 'stride_in0', 'stride_in1', 'stride_out', 'dtype'] - params = [binary_ufuncs, stride, stride, stride_out, binary_dtype] +class _AbstractBinary(Benchmark): + params = [] + param_names = ['ufunc', 'stride_in0', 'stride_in1' 'stride_out', 'dtype'] timeout = 10 + arrlen = 10000 + data_finite = True + data_denormal = False + data_zeros = False + + def setup(self, ufunc, stride_in0, stride_in1, stride_out, dtype): + ufunc_insig = f'{dtype}{dtype}->' + if ufunc_insig+dtype not in ufunc.types: + for st_sig in (ufunc_insig, dtype): + test = [sig for sig in ufunc.types if sig.startswith(st_sig)] + if test: + break + if not test: + raise NotImplementedError( + f"Ufunc {ufunc} doesn't support " + f"binary input of dtype {dtype}" + ) from None + tin, tout = test[0].split('->') + else: + tin = dtype + dtype + tout = dtype + + self.ufunc_args = [] + for i, (dt, stride) in enumerate(zip(tin, (stride_in0, stride_in1))): + self.ufunc_args += [get_data( + self.arrlen*stride, dt, i, + zeros=self.data_zeros, + finite=self.data_finite, + denormal=self.data_denormal, + )[::stride]] + for dt in tout: + self.ufunc_args += [ + np.empty(stride_out*self.arrlen, dt)[::stride_out] + ] - def setup(self, ufuncname, stride_in0, stride_in1, stride_out, dtype): np.seterr(all='ignore') - try: - self.f = getattr(np, ufuncname) - except AttributeError: - raise NotImplementedError(f"No ufunc {ufuncname} found") from None - N = 100000 - self.arr1 = np.array(np.random.rand(stride_in0*N), dtype=dtype) - self.arr2 = np.array(np.random.rand(stride_in1*N), dtype=dtype) - self.arr_out = np.empty(stride_out*N, dtype) - - def time_ufunc(self, ufuncname, stride_in0, stride_in1, stride_out, dtype): - self.f(self.arr1[::stride_in0], self.arr2[::stride_in1], - self.arr_out[::stride_out]) - -binary_int_ufuncs = ['maximum', 'minimum'] -binary_int_dtype = ['b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q'] + def time_binary(self, ufunc, stride_in0, stride_in1, stride_out, + dtype): + ufunc(*self.ufunc_args) -class BinaryInt(Binary): + def time_binary_scalar_in0(self, ufunc, stride_in0, stride_in1, + stride_out, dtype): + ufunc(self.ufunc_args[0][0], *self.ufunc_args[1:]) - param_names = ['ufunc', 'stride_in0', 'stride_in1', 'stride_out', 'dtype'] - params = [binary_int_ufuncs, stride, stride, stride_out, binary_int_dtype] - -class AVX_ldexp(Benchmark): - - params = [dtype, stride] - param_names = ['dtype', 'stride'] - timeout = 10 + def time_binary_scalar_in1(self, ufunc, stride_in0, stride_in1, + stride_out, dtype): + ufunc(self.ufunc_args[0], self.ufunc_args[1][0], *self.ufunc_args[2:]) - def setup(self, dtype, stride): - np.seterr(all='ignore') - self.f = getattr(np, 'ldexp') - N = 10000 - self.arr1 = np.array(np.random.rand(stride*N), dtype=dtype) - self.arr2 = np.array(np.random.rand(stride*N), dtype='i') - - def time_ufunc(self, dtype, stride): - self.f(self.arr1[::stride], self.arr2[::stride]) - -cmplx_bfuncs = ['add', - 'subtract', - 'multiply', - 'divide'] -cmplxstride = [1, 2, 4] -cmplxdtype = ['F', 'D'] - -class AVX_cmplx_arithmetic(Benchmark): - params = [cmplx_bfuncs, cmplxstride, cmplxdtype] - param_names = ['bfunc', 'stride', 'dtype'] - timeout = 10 - - def setup(self, bfuncname, stride, dtype): - np.seterr(all='ignore') - try: - self.f = getattr(np, bfuncname) - except AttributeError: - raise NotImplementedError(f"No bfunc {bfuncname} found") from None - N = 10000 - self.arr1 = np.ones(stride*N, dtype) - self.arr2 = np.ones(stride*N, dtype) - - def time_ufunc(self, bfuncname, stride, dtype): - self.f(self.arr1[::stride], self.arr2[::stride]) - -cmplx_ufuncs = ['reciprocal', - 'absolute', - 'square', - 'conjugate'] - -class AVX_cmplx_funcs(Benchmark): - params = [cmplx_ufuncs, cmplxstride, cmplxdtype] - param_names = ['bfunc', 'stride', 'dtype'] +class _AbstractUnary(Benchmark): + params = [] + param_names = ['ufunc', 'stride_in', 'stride_out', 'dtype'] timeout = 10 + arrlen = 10000 + data_finite = True + data_denormal = False + data_zeros = False + + def setup(self, ufunc, stride_in, stride_out, dtype): + arr_in = get_data( + stride_in*self.arrlen, dtype, + zeros=self.data_zeros, + finite=self.data_finite, + denormal=self.data_denormal, + ) + self.ufunc_args = [arr_in[::stride_in]] + + ufunc_insig = f'{dtype}->' + if ufunc_insig+dtype not in ufunc.types: + test = [sig for sig in ufunc.types if sig.startswith(ufunc_insig)] + if not test: + raise NotImplementedError( + f"Ufunc {ufunc} doesn't support " + f"unary input of dtype {dtype}" + ) from None + tout = test[0].split('->')[1] + else: + tout = dtype + + for dt in tout: + self.ufunc_args += [ + np.empty(stride_out*self.arrlen, dt)[::stride_out] + ] - def setup(self, bfuncname, stride, dtype): np.seterr(all='ignore') - try: - self.f = getattr(np, bfuncname) - except AttributeError: - raise NotImplementedError(f"No bfunc {bfuncname} found") from None - N = 10000 - self.arr1 = np.ones(stride*N, dtype) - def time_ufunc(self, bfuncname, stride, dtype): - self.f(self.arr1[::stride]) + def time_unary(self, ufunc, stride_in, stride_out, dtype): + ufunc(*self.ufunc_args) + +class UnaryFP(_AbstractUnary): + params = [UFUNCS_UNARY, [1, 2, 4], [1, 2, 4], ['e', 'f', 'd']] + + def setup(self, ufunc, stride_in, stride_out, dtype): + _AbstractUnary.setup(self, ufunc, stride_in, stride_out, dtype) + if (ufunc.__name__ == 'arccosh'): + self.ufunc_args[0] += 1.0 + +class UnaryFPSpecial(UnaryFP): + data_finite = False + data_denormal = True + data_zeros = True + +class BinaryFP(_AbstractBinary): + params = [ + [np.maximum, np.minimum, np.fmax, np.fmin, np.ldexp], + [1, 2, 4], [1, 2, 4], [1, 2, 4], ['f', 'd'] + ] + +class BinaryFPSpecial(BinaryFP): + data_finite = False + data_denormal = True + data_zeros = True + +class BinaryComplex(_AbstractBinary): + params = [ + [np.add, np.subtract, np.multiply, np.divide], + [1, 2, 4], [1, 2, 4], [1, 2, 4], + ['F', 'D'] + ] + +class UnaryComplex(_AbstractUnary): + params = [ + [np.reciprocal, np.absolute, np.square, np.conjugate], + [1, 2, 4], [1, 2, 4], ['F', 'D'] + ] + +class BinaryInt(_AbstractBinary): + arrlen = 100000 + params = [ + [np.maximum, np.minimum], + [1, 2], [1, 2], [1, 2], + ['b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q'] + ] + +class BinaryIntContig(_AbstractBinary): + params = [ + [getattr(np, uf) for uf in ( + 'add', 'subtract', 'multiply', 'bitwise_and', 'bitwise_or', + 'bitwise_xor', 'logical_and', 'logical_or', 'logical_xor', + 'right_shift', 'left_shift', + )], + [1], [1], [1], + ['b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q'] + ] + +class UnaryIntContig(_AbstractUnary): + arrlen = 100000 + params = [ + [getattr(np, uf) for uf in ( + 'positive', 'square', 'reciprocal', 'conjugate', 'logical_not', + 'invert', 'isnan', 'isinf', 'isfinite', + 'absolute', 'sign' + )], + [1], [1], + ['b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q'] + ] class Mandelbrot(Benchmark): def f(self,z): return np.abs(z) < 4.0 def g(self,z,c): - return np.sum(np.multiply(z,z) + c) + return np.sum(np.multiply(z, z) + c) def mandelbrot_numpy(self, c, maxiter): - output = np.zeros(c.shape, np.int) + output = np.zeros(c.shape, np.int32) z = np.empty(c.shape, np.complex64) for it in range(maxiter): notdone = self.f(z) diff --git a/benchmarks/benchmarks/common.py b/benchmarks/benchmarks/common.py index 0c40e85b0..d10fe999d 100644 --- a/benchmarks/benchmarks/common.py +++ b/benchmarks/benchmarks/common.py @@ -1,5 +1,8 @@ -import numpy +import numpy as np import random +import os +from functools import lru_cache +from pathlib import Path # Various pre-crafted datasets/variables for testing # !!! Must not be changed -- only appended !!! @@ -7,7 +10,7 @@ import random # sequences random.seed(1) # but will seed it nevertheless -numpy.random.seed(1) +np.random.seed(1) nx, ny = 1000, 1000 # reduced squares based on indexes_rand, primarily for testing more @@ -19,37 +22,37 @@ TYPES1 = [ 'int16', 'float16', 'int32', 'float32', 'int64', 'float64', 'complex64', - 'longfloat', 'complex128', + 'longdouble', 'complex128', ] -if 'complex256' in numpy.sctypeDict: - TYPES1.append('complex256') +if 'complex256' in np.sctypeDict: + TYPES1.append('clongdouble') +DLPACK_TYPES = [ + 'int16', 'float16', + 'int32', 'float32', + 'int64', 'float64', 'complex64', + 'complex128', 'bool', +] -def memoize(func): - result = [] - def wrapper(): - if not result: - result.append(func()) - return result[0] - return wrapper - +# Path for caching +CACHE_ROOT = Path(__file__).resolve().parent.parent / 'env' / 'numpy_benchdata' # values which will be used to construct our sample data matrices # replicate 10 times to speed up initial imports of this helper # and generate some redundancy -@memoize +@lru_cache(typed=True) def get_values(): - rnd = numpy.random.RandomState(1) - values = numpy.tile(rnd.uniform(0, 100, size=nx*ny//10), 10) + rnd = np.random.RandomState(1) + values = np.tile(rnd.uniform(0, 100, size=nx*ny//10), 10) return values -@memoize +@lru_cache(typed=True) def get_squares(): values = get_values() - squares = {t: numpy.array(values, - dtype=getattr(numpy, t)).reshape((nx, ny)) + squares = {t: np.array(values, + dtype=getattr(np, t)).reshape((nx, ny)) for t in TYPES1} # adjust complex ones to have non-degenerated imagery part -- use @@ -60,42 +63,42 @@ def get_squares(): return squares -@memoize +@lru_cache(typed=True) def get_squares_(): # smaller squares squares_ = {t: s[:nxs, :nys] for t, s in get_squares().items()} return squares_ -@memoize +@lru_cache(typed=True) def get_vectors(): # vectors vectors = {t: s[0] for t, s in get_squares().items()} return vectors -@memoize +@lru_cache(typed=True) def get_indexes(): indexes = list(range(nx)) # so we do not have all items indexes.pop(5) indexes.pop(95) - indexes = numpy.array(indexes) + indexes = np.array(indexes) return indexes -@memoize +@lru_cache(typed=True) def get_indexes_rand(): rnd = random.Random(1) indexes_rand = get_indexes().tolist() # copy rnd.shuffle(indexes_rand) # in-place shuffle - indexes_rand = numpy.array(indexes_rand) + indexes_rand = np.array(indexes_rand) return indexes_rand -@memoize +@lru_cache(typed=True) def get_indexes_(): # smaller versions indexes = get_indexes() @@ -103,12 +106,112 @@ def get_indexes_(): return indexes_ -@memoize +@lru_cache(typed=True) def get_indexes_rand_(): indexes_rand = get_indexes_rand() indexes_rand_ = indexes_rand[indexes_rand < nxs] return indexes_rand_ +@lru_cache(typed=True) +def get_data(size, dtype, ip_num=0, zeros=False, finite=True, denormal=False): + """ + Generates a cached random array that covers several scenarios that + may affect the benchmark for fairness and to stabilize the benchmark. + + Parameters + ---------- + size: int + Array length. + + dtype: dtype or dtype specifier + + ip_num: int + Input number, to avoid memory overload + and to provide unique data for each operand. + + zeros: bool + Spreading zeros along with generated data. + + finite: bool + Avoid spreading fp special cases nan/inf. + + denormal: + Spreading subnormal numbers along with generated data. + """ + dtype = np.dtype(dtype) + dname = dtype.name + cache_name = f'{dname}_{size}_{ip_num}_{int(zeros)}' + if dtype.kind in 'fc': + cache_name += f'{int(finite)}{int(denormal)}' + cache_name += '.bin' + cache_path = CACHE_ROOT / cache_name + if cache_path.exists(): + return np.fromfile(cache_path, dtype) + + array = np.ones(size, dtype) + rands = [] + if dtype.kind == 'i': + dinfo = np.iinfo(dtype) + scale = 8 + if zeros: + scale += 1 + lsize = size // scale + for low, high in ( + (-0x80, -1), + (1, 0x7f), + (-0x8000, -1), + (1, 0x7fff), + (-0x80000000, -1), + (1, 0x7fffffff), + (-0x8000000000000000, -1), + (1, 0x7fffffffffffffff), + ): + rands += [np.random.randint( + max(low, dinfo.min), + min(high, dinfo.max), + lsize, dtype + )] + elif dtype.kind == 'u': + dinfo = np.iinfo(dtype) + scale = 4 + if zeros: + scale += 1 + lsize = size // scale + for high in (0xff, 0xffff, 0xffffffff, 0xffffffffffffffff): + rands += [np.random.randint(1, min(high, dinfo.max), lsize, dtype)] + elif dtype.kind in 'fc': + scale = 1 + if zeros: + scale += 1 + if not finite: + scale += 2 + if denormal: + scale += 1 + dinfo = np.finfo(dtype) + lsize = size // scale + rands = [np.random.rand(lsize).astype(dtype)] + if not finite: + rands += [ + np.empty(lsize, dtype=dtype), np.empty(lsize, dtype=dtype) + ] + rands[1].fill(float('nan')) + rands[2].fill(float('inf')) + if denormal: + rands += [np.empty(lsize, dtype=dtype)] + rands[-1].fill(dinfo.smallest_subnormal) + + if rands: + if zeros: + rands += [np.zeros(lsize, dtype)] + stride = len(rands) + for start, r in enumerate(rands): + array[start:len(r)*stride:stride] = r + + if not CACHE_ROOT.exists(): + CACHE_ROOT.mkdir(parents=True) + array.tofile(cache_path) + return array + class Benchmark: pass |