benchmarks/benchmarks/bench_lib.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158

"""Benchmarks for `numpy.lib`."""


from .common import Benchmark

import numpy as np


class Pad(Benchmark):
    """Benchmarks for `numpy.pad`.

    When benchmarking the pad function it is useful to cover scenarios where
    the ratio between the size of the input array and the output array differs
    significantly (original area vs. padded area). This allows to evaluate for
    which scenario a padding algorithm is optimized. Furthermore involving
    large range of array sizes ensures that the effects of CPU-bound caching is
    visible.

    The table below shows the sizes of the arrays involved in this benchmark:

    +-----------------+----------+-----------+-----------+-----------------+
    | shape           | original | padded: 1 | padded: 8 | padded: (0, 32) |
    +=================+==========+===========+===========+=================+
    | (2 ** 22,)      | 32 MiB   | 32.0 MiB  | 32.0 MiB  | 32.0 MiB        |
    +-----------------+----------+-----------+-----------+-----------------+
    | (1024, 1024)    | 8 MiB    | 8.03 MiB  | 8.25 MiB  | 8.51 MiB        |
    +-----------------+----------+-----------+-----------+-----------------+
    | (256, 256, 1)   | 256 KiB  | 786 KiB   | 5.08 MiB  | 11.6 MiB        |
    +-----------------+----------+-----------+-----------+-----------------+
    | (4, 4, 4, 4)    | 2 KiB    | 10.1 KiB  | 1.22 MiB  | 12.8 MiB        |
    +-----------------+----------+-----------+-----------+-----------------+
    | (1, 1, 1, 1, 1) | 8 B      | 1.90 MiB  | 10.8 MiB  | 299 MiB         |
    +-----------------+----------+-----------+-----------+-----------------+
    """

    param_names = ["shape", "pad_width", "mode"]
    params = [
        # Shape of the input arrays
        [(2 ** 22,), (1024, 1024), (256, 128, 1),
         (4, 4, 4, 4), (1, 1, 1, 1, 1)],
        # Tested pad widths
        [1, 8, (0, 32)],
        # Tested modes: mean, median, minimum & maximum use the same code path
        #               reflect & symmetric share a lot of their code path
        ["constant", "edge", "linear_ramp", "mean", "reflect", "wrap"],
    ]

    def setup(self, shape, pad_width, mode):
        # Make sure to fill the array to make the OS page fault
        # in the setup phase and not the timed phase
        self.array = np.full(shape, fill_value=1, dtype=np.float64)

    def time_pad(self, shape, pad_width, mode):
        np.pad(self.array, pad_width, mode)


class Nan(Benchmark):
    """Benchmarks for nan functions"""

    param_names = ["array_size", "percent_nans"]
    params = [
            # sizes of the 1D arrays
            [200, int(2e5)],
            # percent of np.nan in arrays
            [0, 0.1, 2., 50., 90.],
            ]

    def setup(self, array_size, percent_nans):
        np.random.seed(123)
        # produce a randomly shuffled array with the
        # approximate desired percentage np.nan content
        base_array = np.random.uniform(size=array_size)
        base_array[base_array < percent_nans / 100.] = np.nan
        self.arr = base_array

    def time_nanmin(self, array_size, percent_nans):
        np.nanmin(self.arr)

    def time_nanmax(self, array_size, percent_nans):
        np.nanmax(self.arr)

    def time_nanargmin(self, array_size, percent_nans):
        np.nanargmin(self.arr)

    def time_nanargmax(self, array_size, percent_nans):
        np.nanargmax(self.arr)

    def time_nansum(self, array_size, percent_nans):
        np.nansum(self.arr)

    def time_nanprod(self, array_size, percent_nans):
        np.nanprod(self.arr)

    def time_nancumsum(self, array_size, percent_nans):
        np.nancumsum(self.arr)

    def time_nancumprod(self, array_size, percent_nans):
        np.nancumprod(self.arr)

    def time_nanmean(self, array_size, percent_nans):
        np.nanmean(self.arr)

    def time_nanvar(self, array_size, percent_nans):
        np.nanvar(self.arr)

    def time_nanstd(self, array_size, percent_nans):
        np.nanstd(self.arr)

    def time_nanmedian(self, array_size, percent_nans):
        np.nanmedian(self.arr)

    def time_nanquantile(self, array_size, percent_nans):
        np.nanquantile(self.arr, q=0.2)

    def time_nanpercentile(self, array_size, percent_nans):
        np.nanpercentile(self.arr, q=50)


class Unique(Benchmark):
    """Benchmark for np.unique with np.nan values."""

    param_names = ["array_size", "percent_nans"]
    params = [
        # sizes of the 1D arrays
        [200, int(2e5)],
        # percent of np.nan in arrays
        [0, 0.1, 2., 50., 90.],
    ]

    def setup(self, array_size, percent_nans):
        np.random.seed(123)
        # produce a randomly shuffled array with the
        # approximate desired percentage np.nan content
        base_array = np.random.uniform(size=array_size)
        base_array[base_array < percent_nans / 100.] = np.nan
        self.arr = base_array

    def time_unique(self, array_size, percent_nans):
        np.unique(self.arr)


class Isin(Benchmark):
    """Benchmarks for `numpy.isin`."""

    param_names = ["size", "highest_element"]
    params = [
        [10, 100000, 3000000],
        [10, 10000, int(1e8)]
    ]

    def setup(self, size, highest_element):
        self.array = np.random.randint(
                low=0, high=highest_element, size=size)
        self.in_array = np.random.randint(
                low=0, high=highest_element, size=size)

    def time_isin(self, size, highest_element):
        np.isin(self.array, self.in_array)