"""Benchmarks for `numpy.lib`.""" from .common import Benchmark import numpy as np class Pad(Benchmark): """Benchmarks for `numpy.pad`. When benchmarking the pad function it is useful to cover scenarios where the ratio between the size of the input array and the output array differs significantly (original area vs. padded area). This allows to evaluate for which scenario a padding algorithm is optimized. Furthermore involving large range of array sizes ensures that the effects of CPU-bound caching is visible. The table below shows the sizes of the arrays involved in this benchmark: +-----------------+----------+-----------+-----------+-----------------+ | shape | original | padded: 1 | padded: 8 | padded: (0, 32) | +=================+==========+===========+===========+=================+ | (2 ** 22,) | 32 MiB | 32.0 MiB | 32.0 MiB | 32.0 MiB | +-----------------+----------+-----------+-----------+-----------------+ | (1024, 1024) | 8 MiB | 8.03 MiB | 8.25 MiB | 8.51 MiB | +-----------------+----------+-----------+-----------+-----------------+ | (256, 256, 1) | 256 KiB | 786 KiB | 5.08 MiB | 11.6 MiB | +-----------------+----------+-----------+-----------+-----------------+ | (4, 4, 4, 4) | 2 KiB | 10.1 KiB | 1.22 MiB | 12.8 MiB | +-----------------+----------+-----------+-----------+-----------------+ | (1, 1, 1, 1, 1) | 8 B | 1.90 MiB | 10.8 MiB | 299 MiB | +-----------------+----------+-----------+-----------+-----------------+ """ param_names = ["shape", "pad_width", "mode"] params = [ # Shape of the input arrays [(2 ** 22,), (1024, 1024), (256, 128, 1), (4, 4, 4, 4), (1, 1, 1, 1, 1)], # Tested pad widths [1, 8, (0, 32)], # Tested modes: mean, median, minimum & maximum use the same code path # reflect & symmetric share a lot of their code path ["constant", "edge", "linear_ramp", "mean", "reflect", "wrap"], ] def setup(self, shape, pad_width, mode): # Make sure to fill the array to make the OS page fault # in the setup phase and not the timed phase self.array = np.full(shape, fill_value=1, dtype=np.float64) def time_pad(self, shape, pad_width, mode): np.pad(self.array, pad_width, mode) class Nan(Benchmark): """Benchmarks for nan functions""" param_names = ["array_size", "percent_nans"] params = [ # sizes of the 1D arrays [200, int(2e5)], # percent of np.nan in arrays [0, 0.1, 2., 50., 90.], ] def setup(self, array_size, percent_nans): np.random.seed(123) # produce a randomly shuffled array with the # approximate desired percentage np.nan content base_array = np.random.uniform(size=array_size) base_array[base_array < percent_nans / 100.] = np.nan self.arr = base_array def time_nanmin(self, array_size, percent_nans): np.nanmin(self.arr) def time_nanmax(self, array_size, percent_nans): np.nanmax(self.arr) def time_nanargmin(self, array_size, percent_nans): np.nanargmin(self.arr) def time_nanargmax(self, array_size, percent_nans): np.nanargmax(self.arr) def time_nansum(self, array_size, percent_nans): np.nansum(self.arr) def time_nanprod(self, array_size, percent_nans): np.nanprod(self.arr) def time_nancumsum(self, array_size, percent_nans): np.nancumsum(self.arr) def time_nancumprod(self, array_size, percent_nans): np.nancumprod(self.arr) def time_nanmean(self, array_size, percent_nans): np.nanmean(self.arr) def time_nanvar(self, array_size, percent_nans): np.nanvar(self.arr) def time_nanstd(self, array_size, percent_nans): np.nanstd(self.arr) def time_nanmedian(self, array_size, percent_nans): np.nanmedian(self.arr) def time_nanquantile(self, array_size, percent_nans): np.nanquantile(self.arr, q=0.2) def time_nanpercentile(self, array_size, percent_nans): np.nanpercentile(self.arr, q=50) class Unique(Benchmark): """Benchmark for np.unique with np.nan values.""" param_names = ["array_size", "percent_nans"] params = [ # sizes of the 1D arrays [200, int(2e5)], # percent of np.nan in arrays [0, 0.1, 2., 50., 90.], ] def setup(self, array_size, percent_nans): np.random.seed(123) # produce a randomly shuffled array with the # approximate desired percentage np.nan content base_array = np.random.uniform(size=array_size) base_array[base_array < percent_nans / 100.] = np.nan self.arr = base_array def time_unique(self, array_size, percent_nans): np.unique(self.arr) class Isin(Benchmark): """Benchmarks for `numpy.isin`.""" param_names = ["size", "highest_element"] params = [ [10, 100000, 3000000], [10, 10000, int(1e8)] ] def setup(self, size, highest_element): self.array = np.random.randint( low=0, high=highest_element, size=size) self.in_array = np.random.randint( low=0, high=highest_element, size=size) def time_isin(self, size, highest_element): np.isin(self.array, self.in_array)