1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
"""Benchmarks for `numpy.lib`."""
from .common import Benchmark
import numpy as np
class Pad(Benchmark):
"""Benchmarks for `numpy.pad`.
When benchmarking the pad function it is useful to cover scenarios where
the ratio between the size of the input array and the output array differs
significantly (original area vs. padded area). This allows to evaluate for
which scenario a padding algorithm is optimized. Furthermore involving
large range of array sizes ensures that the effects of CPU-bound caching is
visible.
The table below shows the sizes of the arrays involved in this benchmark:
+-----------------+----------+-----------+-----------+-----------------+
| shape | original | padded: 1 | padded: 8 | padded: (0, 32) |
+=================+==========+===========+===========+=================+
| (2 ** 22,) | 32 MiB | 32.0 MiB | 32.0 MiB | 32.0 MiB |
+-----------------+----------+-----------+-----------+-----------------+
| (1024, 1024) | 8 MiB | 8.03 MiB | 8.25 MiB | 8.51 MiB |
+-----------------+----------+-----------+-----------+-----------------+
| (256, 256, 1) | 256 KiB | 786 KiB | 5.08 MiB | 11.6 MiB |
+-----------------+----------+-----------+-----------+-----------------+
| (4, 4, 4, 4) | 2 KiB | 10.1 KiB | 1.22 MiB | 12.8 MiB |
+-----------------+----------+-----------+-----------+-----------------+
| (1, 1, 1, 1, 1) | 8 B | 1.90 MiB | 10.8 MiB | 299 MiB |
+-----------------+----------+-----------+-----------+-----------------+
"""
param_names = ["shape", "pad_width", "mode"]
params = [
# Shape of the input arrays
[(2 ** 22,), (1024, 1024), (256, 128, 1),
(4, 4, 4, 4), (1, 1, 1, 1, 1)],
# Tested pad widths
[1, 8, (0, 32)],
# Tested modes: mean, median, minimum & maximum use the same code path
# reflect & symmetric share a lot of their code path
["constant", "edge", "linear_ramp", "mean", "reflect", "wrap"],
]
def setup(self, shape, pad_width, mode):
# Make sure to fill the array to make the OS page fault
# in the setup phase and not the timed phase
self.array = np.full(shape, fill_value=1, dtype=np.float64)
def time_pad(self, shape, pad_width, mode):
np.pad(self.array, pad_width, mode)
class Nan(Benchmark):
"""Benchmarks for nan functions"""
param_names = ["array_size", "percent_nans"]
params = [
# sizes of the 1D arrays
[200, int(2e5)],
# percent of np.nan in arrays
[0, 0.1, 2., 50., 90.],
]
def setup(self, array_size, percent_nans):
np.random.seed(123)
# produce a randomly shuffled array with the
# approximate desired percentage np.nan content
base_array = np.random.uniform(size=array_size)
base_array[base_array < percent_nans / 100.] = np.nan
self.arr = base_array
def time_nanmin(self, array_size, percent_nans):
np.nanmin(self.arr)
def time_nanmax(self, array_size, percent_nans):
np.nanmax(self.arr)
def time_nanargmin(self, array_size, percent_nans):
np.nanargmin(self.arr)
def time_nanargmax(self, array_size, percent_nans):
np.nanargmax(self.arr)
def time_nansum(self, array_size, percent_nans):
np.nansum(self.arr)
def time_nanprod(self, array_size, percent_nans):
np.nanprod(self.arr)
def time_nancumsum(self, array_size, percent_nans):
np.nancumsum(self.arr)
def time_nancumprod(self, array_size, percent_nans):
np.nancumprod(self.arr)
def time_nanmean(self, array_size, percent_nans):
np.nanmean(self.arr)
def time_nanvar(self, array_size, percent_nans):
np.nanvar(self.arr)
def time_nanstd(self, array_size, percent_nans):
np.nanstd(self.arr)
def time_nanmedian(self, array_size, percent_nans):
np.nanmedian(self.arr)
def time_nanquantile(self, array_size, percent_nans):
np.nanquantile(self.arr, q=0.2)
def time_nanpercentile(self, array_size, percent_nans):
np.nanpercentile(self.arr, q=50)
class Unique(Benchmark):
"""Benchmark for np.unique with np.nan values."""
param_names = ["array_size", "percent_nans"]
params = [
# sizes of the 1D arrays
[200, int(2e5)],
# percent of np.nan in arrays
[0, 0.1, 2., 50., 90.],
]
def setup(self, array_size, percent_nans):
np.random.seed(123)
# produce a randomly shuffled array with the
# approximate desired percentage np.nan content
base_array = np.random.uniform(size=array_size)
base_array[base_array < percent_nans / 100.] = np.nan
self.arr = base_array
def time_unique(self, array_size, percent_nans):
np.unique(self.arr)
class Isin(Benchmark):
"""Benchmarks for `numpy.isin`."""
param_names = ["size", "highest_element"]
params = [
[10, 100000, 3000000],
[10, 10000, int(1e8)]
]
def setup(self, size, highest_element):
self.array = np.random.randint(
low=0, high=highest_element, size=size)
self.in_array = np.random.randint(
low=0, high=highest_element, size=size)
def time_isin(self, size, highest_element):
np.isin(self.array, self.in_array)
|