summaryrefslogtreecommitdiff
path: root/benchmarks
diff options
context:
space:
mode:
Diffstat (limited to 'benchmarks')
-rw-r--r--benchmarks/README.rst37
-rw-r--r--benchmarks/asv.conf.json3
-rw-r--r--benchmarks/asv_compare.conf.json.tpl3
-rw-r--r--benchmarks/benchmarks/__init__.py2
-rw-r--r--benchmarks/benchmarks/bench_core.py53
-rw-r--r--benchmarks/benchmarks/bench_creation.py81
-rw-r--r--benchmarks/benchmarks/bench_function_base.py2
-rw-r--r--benchmarks/benchmarks/bench_io.py13
-rw-r--r--benchmarks/benchmarks/bench_itemselection.py22
-rw-r--r--benchmarks/benchmarks/bench_lib.py21
-rw-r--r--benchmarks/benchmarks/bench_linalg.py24
-rw-r--r--benchmarks/benchmarks/bench_manipulate.py107
-rw-r--r--benchmarks/benchmarks/bench_reduce.py36
-rw-r--r--benchmarks/benchmarks/bench_scalar.py12
-rw-r--r--benchmarks/benchmarks/bench_ufunc.py223
-rw-r--r--benchmarks/benchmarks/bench_ufunc_strides.py285
-rw-r--r--benchmarks/benchmarks/common.py157
17 files changed, 884 insertions, 197 deletions
diff --git a/benchmarks/README.rst b/benchmarks/README.rst
index 2700e95e7..ef841a818 100644
--- a/benchmarks/README.rst
+++ b/benchmarks/README.rst
@@ -22,8 +22,8 @@ By default, `asv` ships with support for anaconda and virtualenv::
pip install asv
pip install virtualenv
-After contributing new benchmarks, you should test them locally
-before submitting a pull request.
+After contributing new benchmarks, you should test them locally before
+submitting a pull request.
To run all benchmarks, navigate to the root NumPy directory at
the command line and execute::
@@ -31,11 +31,21 @@ the command line and execute::
python runtests.py --bench
where ``--bench`` activates the benchmark suite instead of the
-test suite. This builds NumPy and runs all available benchmarks
+test suite. This builds NumPy and runs all available benchmarks
defined in ``benchmarks/``. (Note: this could take a while. Each
benchmark is run multiple times to measure the distribution in
execution times.)
+For **testing** benchmarks locally, it may be better to run these without
+replications::
+
+ cd benchmarks/
+ export REGEXP="bench.*Ufunc"
+ asv run --dry-run --show-stderr --python=same --quick -b $REGEXP
+
+Where the regular expression used to match benchmarks is stored in ``$REGEXP``,
+and `--quick` is used to avoid repetitions.
+
To run benchmarks from a particular benchmark module, such as
``bench_core.py``, simply append the filename without the extension::
@@ -69,6 +79,27 @@ Command-line help is available as usual via ``asv --help`` and
.. _ASV documentation: https://asv.readthedocs.io/
+Benchmarking versions
+---------------------
+
+To benchmark or visualize only releases on different machines locally, the tags with their commits can be generated, before being run with ``asv``, that is::
+
+ cd benchmarks
+ # Get commits for tags
+ # delete tag_commits.txt before re-runs
+ for gtag in $(git tag --list --sort taggerdate | grep "^v"); do
+ git log $gtag --oneline -n1 --decorate=no | awk '{print $1;}' >> tag_commits.txt
+ done
+ # Use the last 20
+ tail --lines=20 tag_commits.txt > 20_vers.txt
+ asv run HASHFILE:20_vers.txt
+ # Publish and view
+ asv publish
+ asv preview
+
+For details on contributing these, see the `benchmark results repository`_.
+
+.. _benchmark results repository: https://github.com/HaoZeke/asv-numpy
Writing benchmarks
------------------
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
index b60135524..267450448 100644
--- a/benchmarks/asv.conf.json
+++ b/benchmarks/asv.conf.json
@@ -43,7 +43,8 @@
// version.
"matrix": {
"Cython": [],
- "setuptools": ["59.2.0"]
+ "setuptools": ["59.2.0"],
+ "packaging": []
},
// The directory (relative to the current directory) that benchmarks are
diff --git a/benchmarks/asv_compare.conf.json.tpl b/benchmarks/asv_compare.conf.json.tpl
index 01f4e41de..f0ef0bf49 100644
--- a/benchmarks/asv_compare.conf.json.tpl
+++ b/benchmarks/asv_compare.conf.json.tpl
@@ -47,7 +47,8 @@
// version.
"matrix": {
"Cython": [],
- "setuptools": ["59.2.0"]
+ "setuptools": ["59.2.0"],
+ "packaging": []
},
// The directory (relative to the current directory) that benchmarks are
diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
index 7b9f1d3e6..35fc87eac 100644
--- a/benchmarks/benchmarks/__init__.py
+++ b/benchmarks/benchmarks/__init__.py
@@ -26,7 +26,7 @@ def dirty_lock(lock_name, lock_on_count=1):
lock_path = os.path.abspath(os.path.join(
os.path.dirname(__file__), "..", "env", lock_name)
)
- # ASV load the 'benchmark_dir' to discovering the available benchmarks
+ # ASV loads the 'benchmark_dir' to discover the available benchmarks
# the issue here is ASV doesn't capture any strings from stdout or stderr
# during this stage so we escape it and lock on the second increment
try:
diff --git a/benchmarks/benchmarks/bench_core.py b/benchmarks/benchmarks/bench_core.py
index 4fcd7ace5..fe1cd37b6 100644
--- a/benchmarks/benchmarks/bench_core.py
+++ b/benchmarks/benchmarks/bench_core.py
@@ -45,6 +45,12 @@ class Core(Benchmark):
def time_array_l_view(self):
np.array(self.l_view)
+ def time_can_cast(self):
+ np.can_cast(self.l10x10, self.float64_dtype)
+
+ def time_can_cast_same_kind(self):
+ np.can_cast(self.l10x10, self.float64_dtype, casting="same_kind")
+
def time_vstack_l(self):
np.vstack(self.l)
@@ -66,6 +72,9 @@ class Core(Benchmark):
def time_empty_100(self):
np.empty(100)
+ def time_empty_like(self):
+ np.empty_like(self.l10x10)
+
def time_eye_100(self):
np.eye(100)
@@ -206,13 +215,41 @@ class Indices(Benchmark):
def time_indices(self):
np.indices((1000, 500))
-class VarComplex(Benchmark):
- params = [10**n for n in range(0, 9)]
- def setup(self, n):
- self.arr = np.random.randn(n) + 1j * np.random.randn(n)
- def teardown(self, n):
- del self.arr
+class StatsMethods(Benchmark):
+ # Not testing, but in array_api (redundant)
+ # 8, 16, 32 bit variants, and 128 complexes
+ params = [['int64', 'uint64', 'float64', 'intp',
+ 'complex64', 'bool', 'float', 'int',
+ 'complex', 'complex256'],
+ [100**n for n in range(0, 2)]]
+ param_names = ['dtype', 'size']
+
+ def setup(self, dtype, size):
+ try:
+ self.data = np.ones(size, dtype=getattr(np, dtype))
+ except AttributeError: # builtins throw AttributeError after 1.20
+ self.data = np.ones(size, dtype=dtype)
+ if dtype.startswith('complex'):
+ self.data = np.random.randn(size) + 1j * np.random.randn(size)
+
+ def time_min(self, dtype, size):
+ self.data.min()
+
+ def time_max(self, dtype, size):
+ self.data.max()
+
+ def time_mean(self, dtype, size):
+ self.data.mean()
+
+ def time_std(self, dtype, size):
+ self.data.std()
+
+ def time_prod(self, dtype, size):
+ self.data.prod()
+
+ def time_var(self, dtype, size):
+ self.data.var()
- def time_var(self, n):
- self.arr.var()
+ def time_sum(self, dtype, size):
+ self.data.sum()
diff --git a/benchmarks/benchmarks/bench_creation.py b/benchmarks/benchmarks/bench_creation.py
new file mode 100644
index 000000000..3a577df7a
--- /dev/null
+++ b/benchmarks/benchmarks/bench_creation.py
@@ -0,0 +1,81 @@
+from .common import Benchmark, TYPES1
+
+import numpy as np
+
+
+class MeshGrid(Benchmark):
+ """ Benchmark meshgrid generation
+ """
+ params = [[16, 32],
+ [2, 3, 4],
+ ['ij', 'xy'], TYPES1]
+ param_names = ['size', 'ndims', 'ind', 'ndtype']
+ timeout = 10
+
+ def setup(self, size, ndims, ind, ndtype):
+ self.grid_dims = [(np.random.ranf(size)).astype(ndtype) for
+ x in range(ndims)]
+
+ def time_meshgrid(self, size, ndims, ind, ndtype):
+ np.meshgrid(*self.grid_dims, indexing=ind)
+
+
+class Create(Benchmark):
+ """ Benchmark for creation functions
+ """
+ # (64, 64), (128, 128), (256, 256)
+ # , (512, 512), (1024, 1024)
+ params = [[16, 32, 128, 256, 512,
+ (16, 16), (32, 32)],
+ ['C', 'F'],
+ TYPES1]
+ param_names = ['shape', 'order', 'npdtypes']
+ timeout = 10
+
+ def setup(self, shape, order, npdtypes):
+ values = get_squares_()
+ self.xarg = values.get(npdtypes)[0]
+
+ def time_full(self, shape, order, npdtypes):
+ np.full(shape, self.xarg[1], dtype=npdtypes, order=order)
+
+ def time_full_like(self, shape, order, npdtypes):
+ np.full_like(self.xarg, self.xarg[0], order=order)
+
+ def time_ones(self, shape, order, npdtypes):
+ np.ones(shape, dtype=npdtypes, order=order)
+
+ def time_ones_like(self, shape, order, npdtypes):
+ np.ones_like(self.xarg, order=order)
+
+ def time_zeros(self, shape, order, npdtypes):
+ np.zeros(shape, dtype=npdtypes, order=order)
+
+ def time_zeros_like(self, shape, order, npdtypes):
+ np.zeros_like(self.xarg, order=order)
+
+ def time_empty(self, shape, order, npdtypes):
+ np.empty(shape, dtype=npdtypes, order=order)
+
+ def time_empty_like(self, shape, order, npdtypes):
+ np.empty_like(self.xarg, order=order)
+
+
+class UfuncsFromDLP(Benchmark):
+ """ Benchmark for creation functions
+ """
+ params = [[16, 32, (16, 16),
+ (32, 32), (64, 64)],
+ TYPES1]
+ param_names = ['shape', 'npdtypes']
+ timeout = 10
+
+ def setup(self, shape, npdtypes):
+ if npdtypes in ['longdouble', 'clongdouble']:
+ raise NotImplementedError(
+ 'Only IEEE dtypes are supported')
+ values = get_squares_()
+ self.xarg = values.get(npdtypes)[0]
+
+ def time_from_dlpack(self, shape, npdtypes):
+ np.from_dlpack(self.xarg)
diff --git a/benchmarks/benchmarks/bench_function_base.py b/benchmarks/benchmarks/bench_function_base.py
index 2e44ff76b..cc37bef39 100644
--- a/benchmarks/benchmarks/bench_function_base.py
+++ b/benchmarks/benchmarks/bench_function_base.py
@@ -248,7 +248,7 @@ class Sort(Benchmark):
# In NumPy 1.17 and newer, 'merge' can be one of several
# stable sorts, it isn't necessarily merge sort.
['quick', 'merge', 'heap'],
- ['float64', 'int64', 'float32', 'uint32', 'int32', 'int16'],
+ ['float64', 'int64', 'float32', 'uint32', 'int32', 'int16', 'float16'],
[
('random',),
('ordered',),
diff --git a/benchmarks/benchmarks/bench_io.py b/benchmarks/benchmarks/bench_io.py
index 357adbb87..e316d07f3 100644
--- a/benchmarks/benchmarks/bench_io.py
+++ b/benchmarks/benchmarks/bench_io.py
@@ -1,7 +1,7 @@
-from .common import Benchmark, get_squares
+from .common import Benchmark, get_squares, get_squares_
import numpy as np
-from io import StringIO
+from io import SEEK_SET, StringIO, BytesIO
class Copy(Benchmark):
@@ -67,6 +67,15 @@ class Savez(Benchmark):
np.savez('tmp.npz', **self.squares)
+class LoadNpyOverhead(Benchmark):
+ def setup(self):
+ self.buffer = BytesIO()
+ np.save(self.buffer, get_squares_()['float32'])
+
+ def time_loadnpy_overhead(self):
+ self.buffer.seek(0, SEEK_SET)
+ np.load(self.buffer)
+
class LoadtxtCSVComments(Benchmark):
# benchmarks for np.loadtxt comment handling
# when reading in CSV files
diff --git a/benchmarks/benchmarks/bench_itemselection.py b/benchmarks/benchmarks/bench_itemselection.py
index 518258a8f..46a39372c 100644
--- a/benchmarks/benchmarks/bench_itemselection.py
+++ b/benchmarks/benchmarks/bench_itemselection.py
@@ -7,7 +7,7 @@ class Take(Benchmark):
params = [
[(1000, 1), (1000, 2), (2, 1000, 1), (1000, 3)],
["raise", "wrap", "clip"],
- TYPES1]
+ TYPES1 + ["O", "i,O"]]
param_names = ["shape", "mode", "dtype"]
def setup(self, shape, mode, dtype):
@@ -21,7 +21,7 @@ class Take(Benchmark):
class PutMask(Benchmark):
params = [
[True, False],
- TYPES1]
+ TYPES1 + ["O", "i,O"]]
param_names = ["values_is_scalar", "dtype"]
def setup(self, values_is_scalar, dtype):
@@ -41,3 +41,21 @@ class PutMask(Benchmark):
def time_sparse(self, values_is_scalar, dtype):
np.putmask(self.arr, self.sparse_mask, self.vals)
+
+class Put(Benchmark):
+ params = [
+ [True, False],
+ TYPES1 + ["O", "i,O"]]
+ param_names = ["values_is_scalar", "dtype"]
+
+ def setup(self, values_is_scalar, dtype):
+ if values_is_scalar:
+ self.vals = np.array(1., dtype=dtype)
+ else:
+ self.vals = np.ones(1000, dtype=dtype)
+
+ self.arr = np.ones(1000, dtype=dtype)
+ self.indx = np.arange(1000, dtype=np.intp)
+
+ def time_ordered(self, values_is_scalar, dtype):
+ np.put(self.arr, self.indx, self.vals)
diff --git a/benchmarks/benchmarks/bench_lib.py b/benchmarks/benchmarks/bench_lib.py
index b64f8ab17..f792116a6 100644
--- a/benchmarks/benchmarks/bench_lib.py
+++ b/benchmarks/benchmarks/bench_lib.py
@@ -132,11 +132,26 @@ class Unique(Benchmark):
# produce a randomly shuffled array with the
# approximate desired percentage np.nan content
base_array = np.random.uniform(size=array_size)
- base_array[base_array < percent_nans / 100.] = np.nan
+ n_nan = int(percent_nans * array_size)
+ nan_indices = np.random.choice(np.arange(array_size), size=n_nan)
+ base_array[nan_indices] = np.nan
self.arr = base_array
- def time_unique(self, array_size, percent_nans):
- np.unique(self.arr)
+ def time_unique_values(self, array_size, percent_nans):
+ np.unique(self.arr, return_index=False,
+ return_inverse=False, return_counts=False)
+
+ def time_unique_counts(self, array_size, percent_nans):
+ np.unique(self.arr, return_index=False,
+ return_inverse=False, return_counts=True)
+
+ def time_unique_inverse(self, array_size, percent_nans):
+ np.unique(self.arr, return_index=False,
+ return_inverse=True, return_counts=False)
+
+ def time_unique_all(self, array_size, percent_nans):
+ np.unique(self.arr, return_index=True,
+ return_inverse=True, return_counts=True)
class Isin(Benchmark):
diff --git a/benchmarks/benchmarks/bench_linalg.py b/benchmarks/benchmarks/bench_linalg.py
index a94ba1139..b4e39b084 100644
--- a/benchmarks/benchmarks/bench_linalg.py
+++ b/benchmarks/benchmarks/bench_linalg.py
@@ -190,3 +190,27 @@ class Einsum(Benchmark):
# sum_of_products_contig_outstride0_one:non_contiguous arrays
def time_einsum_noncon_contig_outstride0(self, dtype):
np.einsum("i->", self.non_contiguous_dim1, optimize=True)
+
+
+class LinAlgTransposeVdot(Benchmark):
+ # Smaller for speed
+ # , (128, 128), (256, 256), (512, 512),
+ # (1024, 1024)
+ params = [[(16, 16), (32, 32),
+ (64, 64)], TYPES1]
+ param_names = ['shape', 'npdtypes']
+
+ def setup(self, shape, npdtypes):
+ self.xarg = np.random.uniform(-1, 1, np.dot(*shape)).reshape(shape)
+ self.xarg = self.xarg.astype(npdtypes)
+ self.x2arg = np.random.uniform(-1, 1, np.dot(*shape)).reshape(shape)
+ self.x2arg = self.x2arg.astype(npdtypes)
+ if npdtypes.startswith('complex'):
+ self.xarg += self.xarg.T*1j
+ self.x2arg += self.x2arg.T*1j
+
+ def time_transpose(self, shape, npdtypes):
+ np.transpose(self.xarg)
+
+ def time_vdot(self, shape, npdtypes):
+ np.vdot(self.xarg, self.x2arg)
diff --git a/benchmarks/benchmarks/bench_manipulate.py b/benchmarks/benchmarks/bench_manipulate.py
new file mode 100644
index 000000000..0a312479c
--- /dev/null
+++ b/benchmarks/benchmarks/bench_manipulate.py
@@ -0,0 +1,107 @@
+from .common import Benchmark, get_squares_, TYPES1, DLPACK_TYPES
+
+import numpy as np
+from collections import deque
+
+class BroadcastArrays(Benchmark):
+ params = [[(16, 32), (32, 64),
+ (64, 128), (128, 256),
+ (256, 512), (512, 1024)],
+ TYPES1]
+ param_names = ['shape', 'ndtype']
+ timeout = 10
+
+ def setup(self, shape, ndtype):
+ self.xarg = np.random.ranf(shape[0]*shape[1]).reshape(shape)
+ self.xarg = self.xarg.astype(ndtype)
+ if ndtype.startswith('complex'):
+ self.xarg += np.random.ranf(1)*1j
+
+ def time_broadcast_arrays(self, shape, ndtype):
+ np.broadcast_arrays(self.xarg, np.ones(1))
+
+
+class BroadcastArraysTo(Benchmark):
+ params = [[16, 32, 64, 128, 256, 512],
+ TYPES1]
+ param_names = ['size', 'ndtype']
+ timeout = 10
+
+ def setup(self, size, ndtype):
+ self.rng = np.random.default_rng()
+ self.xarg = self.rng.random(size)
+ self.xarg = self.xarg.astype(ndtype)
+ if ndtype.startswith('complex'):
+ self.xarg += self.rng.random(1)*1j
+
+ def time_broadcast_to(self, size, ndtype):
+ np.broadcast_to(self.xarg, (size, size))
+
+
+class ConcatenateStackArrays(Benchmark):
+ # (64, 128), (128, 256), (256, 512)
+ params = [[(16, 32), (32, 64)],
+ [2, 3, 4, 5],
+ TYPES1]
+ param_names = ['shape', 'narrays', 'ndtype']
+ timeout = 10
+
+ def setup(self, shape, narrays, ndtype):
+ self.xarg = [np.random.ranf(shape[0]*shape[1]).reshape(shape)
+ for x in range(narrays)]
+ self.xarg = [x.astype(ndtype) for x in self.xarg]
+ if ndtype.startswith('complex'):
+ [x + np.random.ranf(1)*1j for x in self.xarg]
+
+ def time_concatenate_ax0(self, size, narrays, ndtype):
+ np.concatenate(self.xarg, axis=0)
+
+ def time_concatenate_ax1(self, size, narrays, ndtype):
+ np.concatenate(self.xarg, axis=1)
+
+ def time_stack_ax0(self, size, narrays, ndtype):
+ np.stack(self.xarg, axis=0)
+
+ def time_stack_ax1(self, size, narrays, ndtype):
+ np.stack(self.xarg, axis=1)
+
+
+class DimsManipulations(Benchmark):
+ params = [
+ [(2, 1, 4), (2, 1), (5, 2, 3, 1)],
+ ]
+ param_names = ['shape']
+ timeout = 10
+
+ def setup(self, shape):
+ self.xarg = np.ones(shape=shape)
+ self.reshaped = deque(shape)
+ self.reshaped.rotate(1)
+ self.reshaped = tuple(self.reshaped)
+
+ def time_expand_dims(self, shape):
+ np.expand_dims(self.xarg, axis=1)
+
+ def time_expand_dims_neg(self, shape):
+ np.expand_dims(self.xarg, axis=-1)
+
+ def time_squeeze_dims(self, shape):
+ np.squeeze(self.xarg)
+
+ def time_flip_all(self, shape):
+ np.flip(self.xarg, axis=None)
+
+ def time_flip_one(self, shape):
+ np.flip(self.xarg, axis=1)
+
+ def time_flip_neg(self, shape):
+ np.flip(self.xarg, axis=-1)
+
+ def time_moveaxis(self, shape):
+ np.moveaxis(self.xarg, [0, 1], [-1, -2])
+
+ def time_roll(self, shape):
+ np.roll(self.xarg, 3)
+
+ def time_reshape(self, shape):
+ np.reshape(self.xarg, self.reshaped)
diff --git a/benchmarks/benchmarks/bench_reduce.py b/benchmarks/benchmarks/bench_reduce.py
index ca07bd180..040b5ca73 100644
--- a/benchmarks/benchmarks/bench_reduce.py
+++ b/benchmarks/benchmarks/bench_reduce.py
@@ -45,19 +45,40 @@ class AnyAll(Benchmark):
self.zeros.any()
-class MinMax(Benchmark):
- params = [np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32,
- np.int64, np.uint64, np.float32, np.float64, np.intp]
+class StatsReductions(Benchmark):
+ # Not testing, but in array_api (redundant)
+ # 8, 16, 32 bit variants, and 128 complexes
+ params = ['int64', 'uint64', 'float64', 'intp',
+ 'complex64', 'bool', 'float', 'int',
+ 'complex', 'complex256'],
param_names = ['dtype']
def setup(self, dtype):
- self.d = np.ones(20000, dtype=dtype)
+ try:
+ self.data = np.ones(200, dtype=getattr(np, dtype))
+ except AttributeError: # builtins throw AttributeError after 1.20
+ self.data = np.ones(200, dtype=dtype)
+ if dtype.startswith('complex'):
+ self.data = self.data * self.data.T*1j
def time_min(self, dtype):
- np.min(self.d)
+ np.min(self.data)
def time_max(self, dtype):
- np.max(self.d)
+ np.max(self.data)
+
+ def time_mean(self, dtype):
+ np.mean(self.data)
+
+ def time_std(self, dtype):
+ np.std(self.data)
+
+ def time_prod(self, dtype):
+ np.prod(self.data)
+
+ def time_var(self, dtype):
+ np.var(self.data)
+
class FMinMax(Benchmark):
params = [np.float32, np.float64]
@@ -72,6 +93,7 @@ class FMinMax(Benchmark):
def time_max(self, dtype):
np.fmax.reduce(self.d)
+
class ArgMax(Benchmark):
params = [np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32,
np.int64, np.uint64, np.float32, np.float64, bool]
@@ -83,6 +105,7 @@ class ArgMax(Benchmark):
def time_argmax(self, dtype):
np.argmax(self.d)
+
class ArgMin(Benchmark):
params = [np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32,
np.int64, np.uint64, np.float32, np.float64, bool]
@@ -94,6 +117,7 @@ class ArgMin(Benchmark):
def time_argmin(self, dtype):
np.argmin(self.d)
+
class SmallReduction(Benchmark):
def setup(self):
self.d = np.ones(100, dtype=np.float32)
diff --git a/benchmarks/benchmarks/bench_scalar.py b/benchmarks/benchmarks/bench_scalar.py
index 650daa89d..638f66df5 100644
--- a/benchmarks/benchmarks/bench_scalar.py
+++ b/benchmarks/benchmarks/bench_scalar.py
@@ -65,3 +65,15 @@ class ScalarMath(Benchmark):
other + int32
other + int32
other + int32
+
+
+class ScalarStr(Benchmark):
+ # Test scalar to str conversion
+ params = [TYPES1]
+ param_names = ["type"]
+
+ def setup(self, typename):
+ self.a = np.array([100] * 100, dtype=typename)
+
+ def time_str_repr(self, typename):
+ res = [str(x) for x in self.a]
diff --git a/benchmarks/benchmarks/bench_ufunc.py b/benchmarks/benchmarks/bench_ufunc.py
index 36d8621e8..f7c77d90c 100644
--- a/benchmarks/benchmarks/bench_ufunc.py
+++ b/benchmarks/benchmarks/bench_ufunc.py
@@ -1,6 +1,9 @@
-from .common import Benchmark, get_squares_
+from .common import Benchmark, get_squares_, TYPES1, DLPACK_TYPES
import numpy as np
+import itertools
+from packaging import version
+import operator
ufuncs = ['abs', 'absolute', 'add', 'arccos', 'arccosh', 'arcsin', 'arcsinh',
@@ -13,11 +16,13 @@ ufuncs = ['abs', 'absolute', 'add', 'arccos', 'arccosh', 'arcsin', 'arcsinh',
'isinf', 'isnan', 'isnat', 'lcm', 'ldexp', 'left_shift', 'less',
'less_equal', 'log', 'log10', 'log1p', 'log2', 'logaddexp',
'logaddexp2', 'logical_and', 'logical_not', 'logical_or',
- 'logical_xor', 'matmul', 'maximum', 'minimum', 'mod', 'modf', 'multiply',
- 'negative', 'nextafter', 'not_equal', 'positive', 'power',
- 'rad2deg', 'radians', 'reciprocal', 'remainder', 'right_shift',
- 'rint', 'sign', 'signbit', 'sin', 'sinh', 'spacing', 'sqrt',
- 'square', 'subtract', 'tan', 'tanh', 'true_divide', 'trunc']
+ 'logical_xor', 'matmul', 'maximum', 'minimum', 'mod', 'modf',
+ 'multiply', 'negative', 'nextafter', 'not_equal', 'positive',
+ 'power', 'rad2deg', 'radians', 'reciprocal', 'remainder',
+ 'right_shift', 'rint', 'sign', 'signbit', 'sin',
+ 'sinh', 'spacing', 'sqrt', 'square', 'subtract', 'tan', 'tanh',
+ 'true_divide', 'trunc']
+arrayfuncdisp = ['real', 'round']
for name in dir(np):
@@ -25,6 +30,30 @@ for name in dir(np):
print("Missing ufunc %r" % (name,))
+class ArrayFunctionDispatcher(Benchmark):
+ params = [arrayfuncdisp]
+ param_names = ['func']
+ timeout = 10
+
+ def setup(self, ufuncname):
+ np.seterr(all='ignore')
+ try:
+ self.afdn = getattr(np, ufuncname)
+ except AttributeError:
+ raise NotImplementedError()
+ self.args = []
+ for _, aarg in get_squares_().items():
+ arg = (aarg,) * 1 # no nin
+ try:
+ self.afdn(*arg)
+ except TypeError:
+ continue
+ self.args.append(arg)
+
+ def time_afdn_types(self, ufuncname):
+ [self.afdn(*arg) for arg in self.args]
+
+
class Broadcast(Benchmark):
def setup(self):
self.d = np.ones((50000, 100), dtype=np.float64)
@@ -34,6 +63,20 @@ class Broadcast(Benchmark):
self.d - self.e
+class At(Benchmark):
+ def setup(self):
+ rng = np.random.default_rng(1)
+ self.vals = rng.random(10_000_000, dtype=np.float64)
+ self.idx = rng.integers(1000, size=10_000_000).astype(np.intp)
+ self.res = np.zeros(1000, dtype=self.vals.dtype)
+
+ def time_sum_at(self):
+ np.add.at(self.res, self.idx, self.vals)
+
+ def time_maximum_at(self):
+ np.maximum.at(self.res, self.idx, self.vals)
+
+
class UFunc(Benchmark):
params = [ufuncs]
param_names = ['ufunc']
@@ -42,23 +85,179 @@ class UFunc(Benchmark):
def setup(self, ufuncname):
np.seterr(all='ignore')
try:
- self.f = getattr(np, ufuncname)
+ self.ufn = getattr(np, ufuncname)
except AttributeError:
raise NotImplementedError()
self.args = []
- for t, a in get_squares_().items():
- arg = (a,) * self.f.nin
+ for _, aarg in get_squares_().items():
+ arg = (aarg,) * self.ufn.nin
try:
- self.f(*arg)
+ self.ufn(*arg)
except TypeError:
continue
self.args.append(arg)
def time_ufunc_types(self, ufuncname):
- [self.f(*arg) for arg in self.args]
+ [self.ufn(*arg) for arg in self.args]
+
+
+class MethodsV0(Benchmark):
+ """ Benchmark for the methods which do not take any arguments
+ """
+ params = [['__abs__', '__neg__', '__pos__'], TYPES1]
+ param_names = ['methods', 'npdtypes']
+ timeout = 10
+
+ def setup(self, methname, npdtypes):
+ values = get_squares_()
+ self.xarg = values.get(npdtypes)[0]
+
+ def time_ndarray_meth(self, methname, npdtypes):
+ getattr(operator, methname)(self.xarg)
+
+
+class NDArrayLRShifts(Benchmark):
+ """ Benchmark for the shift methods
+ """
+ params = [['__lshift__', '__rshift__'],
+ ['intp', 'int8', 'int16',
+ 'int32', 'int64', 'uint8',
+ 'uint16', 'uint32', 'uint64']]
+ param_names = ['methods', 'npdtypes']
+ timeout = 10
+
+ def setup(self, methname, npdtypes):
+ self.vals = np.ones(1000,
+ dtype=getattr(np, npdtypes)) * \
+ np.random.randint(9)
+
+ def time_ndarray_meth(self, methname, npdtypes):
+ getattr(operator, methname)(*[self.vals, 2])
+
+
+class Methods0D(Benchmark):
+ """Zero dimension array methods
+ """
+ params = [['__bool__', '__complex__', '__invert__',
+ '__float__', '__int__'], TYPES1]
+ param_names = ['methods', 'npdtypes']
+ timeout = 10
+
+ def setup(self, methname, npdtypes):
+ self.xarg = np.array(3, dtype=npdtypes)
+ if (npdtypes.startswith('complex') and
+ methname in ['__float__', '__int__']) or \
+ (npdtypes.startswith('int') and methname == '__invert__'):
+ # Skip
+ raise NotImplementedError
+
+ def time_ndarray__0d__(self, methname, npdtypes):
+ meth = getattr(self.xarg, methname)
+ meth()
+
+
+class MethodsV1(Benchmark):
+ """ Benchmark for the methods which take an argument
+ """
+ params = [['__and__', '__add__', '__eq__', '__floordiv__', '__ge__',
+ '__gt__', '__le__', '__lt__', '__matmul__',
+ '__mod__', '__mul__', '__ne__', '__or__',
+ '__pow__', '__sub__', '__truediv__', '__xor__'],
+ TYPES1]
+ param_names = ['methods', 'npdtypes']
+ timeout = 10
+
+ def setup(self, methname, npdtypes):
+ if (
+ npdtypes.startswith("complex")
+ and methname in ["__floordiv__", "__mod__"]
+ ) or (
+ not npdtypes.startswith("int")
+ and methname in ["__and__", "__or__", "__xor__"]
+ ):
+ raise NotImplementedError # skip
+ values = get_squares_().get(npdtypes)
+ self.xargs = [values[0], values[1]]
+
+ def time_ndarray_meth(self, methname, npdtypes):
+ getattr(operator, methname)(*self.xargs)
+
+
+class NDArrayGetItem(Benchmark):
+ param_names = ['margs', 'msize']
+ params = [[0, (0, 0), (-1, 0), [0, -1]],
+ ['small', 'big']]
+
+ def setup(self, margs, msize):
+ self.xs = np.random.uniform(-1, 1, 6).reshape(2, 3)
+ self.xl = np.random.uniform(-1, 1, 50*50).reshape(50, 50)
+
+ def time_methods_getitem(self, margs, msize):
+ if msize == 'small':
+ mdat = self.xs
+ elif msize == 'big':
+ mdat = self.xl
+ getattr(mdat, '__getitem__')(margs)
+
+
+class NDArraySetItem(Benchmark):
+ param_names = ['margs', 'msize']
+ params = [[0, (0, 0), (-1, 0), [0, -1]],
+ ['small', 'big']]
+
+ def setup(self, margs, msize):
+ self.xs = np.random.uniform(-1, 1, 6).reshape(2, 3)
+ self.xl = np.random.uniform(-1, 1, 100*100).reshape(100, 100)
+
+ def time_methods_setitem(self, margs, msize):
+ if msize == 'small':
+ mdat = self.xs
+ elif msize == 'big':
+ mdat = self.xl
+ mdat[margs] = 17
+
+
+class DLPMethods(Benchmark):
+ """ Benchmark for DLPACK helpers
+ """
+ params = [['__dlpack__', '__dlpack_device__'], DLPACK_TYPES]
+ param_names = ['methods', 'npdtypes']
+ timeout = 10
+
+ def setup(self, methname, npdtypes):
+ values = get_squares_()
+ if npdtypes == 'bool':
+ if version.parse(np.__version__) > version.parse("1.25"):
+ self.xarg = values.get('int16')[0].astype('bool')
+ else:
+ raise NotImplementedError("Not supported before v1.25")
+ else:
+ self.xarg = values.get('int16')[0]
+
+ def time_ndarray_dlp(self, methname, npdtypes):
+ meth = getattr(self.xarg, methname)
+ meth()
+
+
+class NDArrayAsType(Benchmark):
+ """ Benchmark for type conversion
+ """
+ params = [list(itertools.combinations(TYPES1, 2))]
+ param_names = ['typeconv']
+ timeout = 10
+
+ def setup(self, typeconv):
+ if typeconv[0] == typeconv[1]:
+ raise NotImplementedError(
+ "Skipping test for converting to the same dtype")
+ self.xarg = get_squares_().get(typeconv[0])
+
+ def time_astype(self, typeconv):
+ self.xarg.astype(typeconv[1])
+
class UFuncSmall(Benchmark):
- """ Benchmark for a selection of ufuncs on a small arrays and scalars
+ """ Benchmark for a selection of ufuncs on a small arrays and scalars
Since the arrays and scalars are small, we are benchmarking the overhead
of the numpy ufunc functionality
diff --git a/benchmarks/benchmarks/bench_ufunc_strides.py b/benchmarks/benchmarks/bench_ufunc_strides.py
index f80bf90f9..898cc0818 100644
--- a/benchmarks/benchmarks/bench_ufunc_strides.py
+++ b/benchmarks/benchmarks/bench_ufunc_strides.py
@@ -1,156 +1,181 @@
-from .common import Benchmark
+from .common import Benchmark, get_data
import numpy as np
-UNARY_UFUNCS = [obj for obj in np.core.umath.__dict__.values() if
- isinstance(obj, np.ufunc)]
-UNARY_OBJECT_UFUNCS = [uf for uf in UNARY_UFUNCS if "O->O" in uf.types]
-UNARY_OBJECT_UFUNCS.remove(getattr(np, 'invert'))
+UFUNCS = [obj for obj in np.core.umath.__dict__.values() if
+ isinstance(obj, np.ufunc)]
+UFUNCS_UNARY = [uf for uf in UFUNCS if "O->O" in uf.types]
-stride = [1, 2, 4]
-stride_out = [1, 2, 4]
-dtype = ['e', 'f', 'd']
-
-class Unary(Benchmark):
- params = [UNARY_OBJECT_UFUNCS, stride, stride_out, dtype]
- param_names = ['ufunc', 'stride_in', 'stride_out', 'dtype']
- timeout = 10
-
- def setup(self, ufuncname, stride, stride_out, dtype):
- np.seterr(all='ignore')
- try:
- self.f = ufuncname
- except AttributeError:
- raise NotImplementedError(f"No ufunc {ufuncname} found") from None
- N = 100000
- self.arr_out = np.empty(stride_out*N, dtype)
- self.arr = np.random.rand(stride*N).astype(dtype)
- if (ufuncname.__name__ == 'arccosh'):
- self.arr = 1.0 + self.arr
-
- def time_ufunc(self, ufuncname, stride, stride_out, dtype):
- self.f(self.arr[::stride], self.arr_out[::stride_out])
-
-class AVX_UFunc_log(Benchmark):
- params = [stride, dtype]
- param_names = ['stride', 'dtype']
- timeout = 10
-
- def setup(self, stride, dtype):
- np.seterr(all='ignore')
- N = 10000
- self.arr = np.array(np.random.random_sample(stride*N), dtype=dtype)
-
- def time_log(self, stride, dtype):
- np.log(self.arr[::stride])
-
-
-binary_ufuncs = [
- 'maximum', 'minimum', 'fmax', 'fmin'
-]
-binary_dtype = ['f', 'd']
-
-class Binary(Benchmark):
- param_names = ['ufunc', 'stride_in0', 'stride_in1', 'stride_out', 'dtype']
- params = [binary_ufuncs, stride, stride, stride_out, binary_dtype]
+class _AbstractBinary(Benchmark):
+ params = []
+ param_names = ['ufunc', 'stride_in0', 'stride_in1' 'stride_out', 'dtype']
timeout = 10
+ arrlen = 10000
+ data_finite = True
+ data_denormal = False
+ data_zeros = False
+
+ def setup(self, ufunc, stride_in0, stride_in1, stride_out, dtype):
+ ufunc_insig = f'{dtype}{dtype}->'
+ if ufunc_insig+dtype not in ufunc.types:
+ for st_sig in (ufunc_insig, dtype):
+ test = [sig for sig in ufunc.types if sig.startswith(st_sig)]
+ if test:
+ break
+ if not test:
+ raise NotImplementedError(
+ f"Ufunc {ufunc} doesn't support "
+ f"binary input of dtype {dtype}"
+ ) from None
+ tin, tout = test[0].split('->')
+ else:
+ tin = dtype + dtype
+ tout = dtype
+
+ self.ufunc_args = []
+ for i, (dt, stride) in enumerate(zip(tin, (stride_in0, stride_in1))):
+ self.ufunc_args += [get_data(
+ self.arrlen*stride, dt, i,
+ zeros=self.data_zeros,
+ finite=self.data_finite,
+ denormal=self.data_denormal,
+ )[::stride]]
+ for dt in tout:
+ self.ufunc_args += [
+ np.empty(stride_out*self.arrlen, dt)[::stride_out]
+ ]
- def setup(self, ufuncname, stride_in0, stride_in1, stride_out, dtype):
np.seterr(all='ignore')
- try:
- self.f = getattr(np, ufuncname)
- except AttributeError:
- raise NotImplementedError(f"No ufunc {ufuncname} found") from None
- N = 100000
- self.arr1 = np.array(np.random.rand(stride_in0*N), dtype=dtype)
- self.arr2 = np.array(np.random.rand(stride_in1*N), dtype=dtype)
- self.arr_out = np.empty(stride_out*N, dtype)
-
- def time_ufunc(self, ufuncname, stride_in0, stride_in1, stride_out, dtype):
- self.f(self.arr1[::stride_in0], self.arr2[::stride_in1],
- self.arr_out[::stride_out])
-
-binary_int_ufuncs = ['maximum', 'minimum']
-binary_int_dtype = ['b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q']
+ def time_binary(self, ufunc, stride_in0, stride_in1, stride_out,
+ dtype):
+ ufunc(*self.ufunc_args)
-class BinaryInt(Binary):
+ def time_binary_scalar_in0(self, ufunc, stride_in0, stride_in1,
+ stride_out, dtype):
+ ufunc(self.ufunc_args[0][0], *self.ufunc_args[1:])
- param_names = ['ufunc', 'stride_in0', 'stride_in1', 'stride_out', 'dtype']
- params = [binary_int_ufuncs, stride, stride, stride_out, binary_int_dtype]
-
-class AVX_ldexp(Benchmark):
-
- params = [dtype, stride]
- param_names = ['dtype', 'stride']
- timeout = 10
+ def time_binary_scalar_in1(self, ufunc, stride_in0, stride_in1,
+ stride_out, dtype):
+ ufunc(self.ufunc_args[0], self.ufunc_args[1][0], *self.ufunc_args[2:])
- def setup(self, dtype, stride):
- np.seterr(all='ignore')
- self.f = getattr(np, 'ldexp')
- N = 10000
- self.arr1 = np.array(np.random.rand(stride*N), dtype=dtype)
- self.arr2 = np.array(np.random.rand(stride*N), dtype='i')
-
- def time_ufunc(self, dtype, stride):
- self.f(self.arr1[::stride], self.arr2[::stride])
-
-cmplx_bfuncs = ['add',
- 'subtract',
- 'multiply',
- 'divide']
-cmplxstride = [1, 2, 4]
-cmplxdtype = ['F', 'D']
-
-class AVX_cmplx_arithmetic(Benchmark):
- params = [cmplx_bfuncs, cmplxstride, cmplxdtype]
- param_names = ['bfunc', 'stride', 'dtype']
- timeout = 10
-
- def setup(self, bfuncname, stride, dtype):
- np.seterr(all='ignore')
- try:
- self.f = getattr(np, bfuncname)
- except AttributeError:
- raise NotImplementedError(f"No bfunc {bfuncname} found") from None
- N = 10000
- self.arr1 = np.ones(stride*N, dtype)
- self.arr2 = np.ones(stride*N, dtype)
-
- def time_ufunc(self, bfuncname, stride, dtype):
- self.f(self.arr1[::stride], self.arr2[::stride])
-
-cmplx_ufuncs = ['reciprocal',
- 'absolute',
- 'square',
- 'conjugate']
-
-class AVX_cmplx_funcs(Benchmark):
- params = [cmplx_ufuncs, cmplxstride, cmplxdtype]
- param_names = ['bfunc', 'stride', 'dtype']
+class _AbstractUnary(Benchmark):
+ params = []
+ param_names = ['ufunc', 'stride_in', 'stride_out', 'dtype']
timeout = 10
+ arrlen = 10000
+ data_finite = True
+ data_denormal = False
+ data_zeros = False
+
+ def setup(self, ufunc, stride_in, stride_out, dtype):
+ arr_in = get_data(
+ stride_in*self.arrlen, dtype,
+ zeros=self.data_zeros,
+ finite=self.data_finite,
+ denormal=self.data_denormal,
+ )
+ self.ufunc_args = [arr_in[::stride_in]]
+
+ ufunc_insig = f'{dtype}->'
+ if ufunc_insig+dtype not in ufunc.types:
+ test = [sig for sig in ufunc.types if sig.startswith(ufunc_insig)]
+ if not test:
+ raise NotImplementedError(
+ f"Ufunc {ufunc} doesn't support "
+ f"unary input of dtype {dtype}"
+ ) from None
+ tout = test[0].split('->')[1]
+ else:
+ tout = dtype
+
+ for dt in tout:
+ self.ufunc_args += [
+ np.empty(stride_out*self.arrlen, dt)[::stride_out]
+ ]
- def setup(self, bfuncname, stride, dtype):
np.seterr(all='ignore')
- try:
- self.f = getattr(np, bfuncname)
- except AttributeError:
- raise NotImplementedError(f"No bfunc {bfuncname} found") from None
- N = 10000
- self.arr1 = np.ones(stride*N, dtype)
- def time_ufunc(self, bfuncname, stride, dtype):
- self.f(self.arr1[::stride])
+ def time_unary(self, ufunc, stride_in, stride_out, dtype):
+ ufunc(*self.ufunc_args)
+
+class UnaryFP(_AbstractUnary):
+ params = [UFUNCS_UNARY, [1, 2, 4], [1, 2, 4], ['e', 'f', 'd']]
+
+ def setup(self, ufunc, stride_in, stride_out, dtype):
+ _AbstractUnary.setup(self, ufunc, stride_in, stride_out, dtype)
+ if (ufunc.__name__ == 'arccosh'):
+ self.ufunc_args[0] += 1.0
+
+class UnaryFPSpecial(UnaryFP):
+ data_finite = False
+ data_denormal = True
+ data_zeros = True
+
+class BinaryFP(_AbstractBinary):
+ params = [
+ [np.maximum, np.minimum, np.fmax, np.fmin, np.ldexp],
+ [1, 2, 4], [1, 2, 4], [1, 2, 4], ['f', 'd']
+ ]
+
+class BinaryFPSpecial(BinaryFP):
+ data_finite = False
+ data_denormal = True
+ data_zeros = True
+
+class BinaryComplex(_AbstractBinary):
+ params = [
+ [np.add, np.subtract, np.multiply, np.divide],
+ [1, 2, 4], [1, 2, 4], [1, 2, 4],
+ ['F', 'D']
+ ]
+
+class UnaryComplex(_AbstractUnary):
+ params = [
+ [np.reciprocal, np.absolute, np.square, np.conjugate],
+ [1, 2, 4], [1, 2, 4], ['F', 'D']
+ ]
+
+class BinaryInt(_AbstractBinary):
+ arrlen = 100000
+ params = [
+ [np.maximum, np.minimum],
+ [1, 2], [1, 2], [1, 2],
+ ['b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q']
+ ]
+
+class BinaryIntContig(_AbstractBinary):
+ params = [
+ [getattr(np, uf) for uf in (
+ 'add', 'subtract', 'multiply', 'bitwise_and', 'bitwise_or',
+ 'bitwise_xor', 'logical_and', 'logical_or', 'logical_xor',
+ 'right_shift', 'left_shift',
+ )],
+ [1], [1], [1],
+ ['b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q']
+ ]
+
+class UnaryIntContig(_AbstractUnary):
+ arrlen = 100000
+ params = [
+ [getattr(np, uf) for uf in (
+ 'positive', 'square', 'reciprocal', 'conjugate', 'logical_not',
+ 'invert', 'isnan', 'isinf', 'isfinite',
+ 'absolute', 'sign'
+ )],
+ [1], [1],
+ ['b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q']
+ ]
class Mandelbrot(Benchmark):
def f(self,z):
return np.abs(z) < 4.0
def g(self,z,c):
- return np.sum(np.multiply(z,z) + c)
+ return np.sum(np.multiply(z, z) + c)
def mandelbrot_numpy(self, c, maxiter):
- output = np.zeros(c.shape, np.int)
+ output = np.zeros(c.shape, np.int32)
z = np.empty(c.shape, np.complex64)
for it in range(maxiter):
notdone = self.f(z)
diff --git a/benchmarks/benchmarks/common.py b/benchmarks/benchmarks/common.py
index 0c40e85b0..d10fe999d 100644
--- a/benchmarks/benchmarks/common.py
+++ b/benchmarks/benchmarks/common.py
@@ -1,5 +1,8 @@
-import numpy
+import numpy as np
import random
+import os
+from functools import lru_cache
+from pathlib import Path
# Various pre-crafted datasets/variables for testing
# !!! Must not be changed -- only appended !!!
@@ -7,7 +10,7 @@ import random
# sequences
random.seed(1)
# but will seed it nevertheless
-numpy.random.seed(1)
+np.random.seed(1)
nx, ny = 1000, 1000
# reduced squares based on indexes_rand, primarily for testing more
@@ -19,37 +22,37 @@ TYPES1 = [
'int16', 'float16',
'int32', 'float32',
'int64', 'float64', 'complex64',
- 'longfloat', 'complex128',
+ 'longdouble', 'complex128',
]
-if 'complex256' in numpy.sctypeDict:
- TYPES1.append('complex256')
+if 'complex256' in np.sctypeDict:
+ TYPES1.append('clongdouble')
+DLPACK_TYPES = [
+ 'int16', 'float16',
+ 'int32', 'float32',
+ 'int64', 'float64', 'complex64',
+ 'complex128', 'bool',
+]
-def memoize(func):
- result = []
- def wrapper():
- if not result:
- result.append(func())
- return result[0]
- return wrapper
-
+# Path for caching
+CACHE_ROOT = Path(__file__).resolve().parent.parent / 'env' / 'numpy_benchdata'
# values which will be used to construct our sample data matrices
# replicate 10 times to speed up initial imports of this helper
# and generate some redundancy
-@memoize
+@lru_cache(typed=True)
def get_values():
- rnd = numpy.random.RandomState(1)
- values = numpy.tile(rnd.uniform(0, 100, size=nx*ny//10), 10)
+ rnd = np.random.RandomState(1)
+ values = np.tile(rnd.uniform(0, 100, size=nx*ny//10), 10)
return values
-@memoize
+@lru_cache(typed=True)
def get_squares():
values = get_values()
- squares = {t: numpy.array(values,
- dtype=getattr(numpy, t)).reshape((nx, ny))
+ squares = {t: np.array(values,
+ dtype=getattr(np, t)).reshape((nx, ny))
for t in TYPES1}
# adjust complex ones to have non-degenerated imagery part -- use
@@ -60,42 +63,42 @@ def get_squares():
return squares
-@memoize
+@lru_cache(typed=True)
def get_squares_():
# smaller squares
squares_ = {t: s[:nxs, :nys] for t, s in get_squares().items()}
return squares_
-@memoize
+@lru_cache(typed=True)
def get_vectors():
# vectors
vectors = {t: s[0] for t, s in get_squares().items()}
return vectors
-@memoize
+@lru_cache(typed=True)
def get_indexes():
indexes = list(range(nx))
# so we do not have all items
indexes.pop(5)
indexes.pop(95)
- indexes = numpy.array(indexes)
+ indexes = np.array(indexes)
return indexes
-@memoize
+@lru_cache(typed=True)
def get_indexes_rand():
rnd = random.Random(1)
indexes_rand = get_indexes().tolist() # copy
rnd.shuffle(indexes_rand) # in-place shuffle
- indexes_rand = numpy.array(indexes_rand)
+ indexes_rand = np.array(indexes_rand)
return indexes_rand
-@memoize
+@lru_cache(typed=True)
def get_indexes_():
# smaller versions
indexes = get_indexes()
@@ -103,12 +106,112 @@ def get_indexes_():
return indexes_
-@memoize
+@lru_cache(typed=True)
def get_indexes_rand_():
indexes_rand = get_indexes_rand()
indexes_rand_ = indexes_rand[indexes_rand < nxs]
return indexes_rand_
+@lru_cache(typed=True)
+def get_data(size, dtype, ip_num=0, zeros=False, finite=True, denormal=False):
+ """
+ Generates a cached random array that covers several scenarios that
+ may affect the benchmark for fairness and to stabilize the benchmark.
+
+ Parameters
+ ----------
+ size: int
+ Array length.
+
+ dtype: dtype or dtype specifier
+
+ ip_num: int
+ Input number, to avoid memory overload
+ and to provide unique data for each operand.
+
+ zeros: bool
+ Spreading zeros along with generated data.
+
+ finite: bool
+ Avoid spreading fp special cases nan/inf.
+
+ denormal:
+ Spreading subnormal numbers along with generated data.
+ """
+ dtype = np.dtype(dtype)
+ dname = dtype.name
+ cache_name = f'{dname}_{size}_{ip_num}_{int(zeros)}'
+ if dtype.kind in 'fc':
+ cache_name += f'{int(finite)}{int(denormal)}'
+ cache_name += '.bin'
+ cache_path = CACHE_ROOT / cache_name
+ if cache_path.exists():
+ return np.fromfile(cache_path, dtype)
+
+ array = np.ones(size, dtype)
+ rands = []
+ if dtype.kind == 'i':
+ dinfo = np.iinfo(dtype)
+ scale = 8
+ if zeros:
+ scale += 1
+ lsize = size // scale
+ for low, high in (
+ (-0x80, -1),
+ (1, 0x7f),
+ (-0x8000, -1),
+ (1, 0x7fff),
+ (-0x80000000, -1),
+ (1, 0x7fffffff),
+ (-0x8000000000000000, -1),
+ (1, 0x7fffffffffffffff),
+ ):
+ rands += [np.random.randint(
+ max(low, dinfo.min),
+ min(high, dinfo.max),
+ lsize, dtype
+ )]
+ elif dtype.kind == 'u':
+ dinfo = np.iinfo(dtype)
+ scale = 4
+ if zeros:
+ scale += 1
+ lsize = size // scale
+ for high in (0xff, 0xffff, 0xffffffff, 0xffffffffffffffff):
+ rands += [np.random.randint(1, min(high, dinfo.max), lsize, dtype)]
+ elif dtype.kind in 'fc':
+ scale = 1
+ if zeros:
+ scale += 1
+ if not finite:
+ scale += 2
+ if denormal:
+ scale += 1
+ dinfo = np.finfo(dtype)
+ lsize = size // scale
+ rands = [np.random.rand(lsize).astype(dtype)]
+ if not finite:
+ rands += [
+ np.empty(lsize, dtype=dtype), np.empty(lsize, dtype=dtype)
+ ]
+ rands[1].fill(float('nan'))
+ rands[2].fill(float('inf'))
+ if denormal:
+ rands += [np.empty(lsize, dtype=dtype)]
+ rands[-1].fill(dinfo.smallest_subnormal)
+
+ if rands:
+ if zeros:
+ rands += [np.zeros(lsize, dtype)]
+ stride = len(rands)
+ for start, r in enumerate(rands):
+ array[start:len(r)*stride:stride] = r
+
+ if not CACHE_ROOT.exists():
+ CACHE_ROOT.mkdir(parents=True)
+ array.tofile(cache_path)
+ return array
+
class Benchmark:
pass