numpy/core/src/umath/reduction.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404

/*
 * This file implements generic methods for computing reductions on arrays.
 *
 * Written by Mark Wiebe (mwwiebe@gmail.com)
 * Copyright (c) 2011 by Enthought, Inc.
 *
 * See LICENSE.txt for the license.
 */
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
#define _UMATHMODULE

#define PY_SSIZE_T_CLEAN
#include <Python.h>

#include "npy_config.h"
#include "numpy/arrayobject.h"

#include "npy_pycompat.h"
#include "ctors.h"

#include "numpy/ufuncobject.h"
#include "lowlevel_strided_loops.h"
#include "reduction.h"
#include "extobj.h"  /* for _check_ufunc_fperr */


/*
 * Count the number of dimensions selected in 'axis_flags'
 */
static int
count_axes(int ndim, const npy_bool *axis_flags)
{
    int idim;
    int naxes = 0;

    for (idim = 0; idim < ndim; ++idim) {
        if (axis_flags[idim]) {
            naxes++;
        }
    }
    return naxes;
}

/*
 * This function initializes a result array for a reduction operation
 * which has no identity. This means it needs to copy the first element
 * it sees along the reduction axes to result.
 *
 * If a reduction has an identity, such as 0 or 1, the result should be
 * fully initialized to the identity, because this function raises an
 * exception when there are no elements to reduce (which is appropriate if,
 * and only if, the reduction operation has no identity).
 *
 * This means it copies the subarray indexed at zero along each reduction axis
 * into 'result'.
 *
 * result  : The array into which the result is computed. This must have
 *           the same number of dimensions as 'operand', but for each
 *           axis i where 'axis_flags[i]' is True, it has a single element.
 * operand : The array being reduced.
 * axis_flags : An array of boolean flags, one for each axis of 'operand'.
 *              When a flag is True, it indicates to reduce along that axis.
 * funcname : The name of the reduction operation, for the purpose of
 *            better quality error messages. For example, "numpy.max"
 *            would be a good name for NumPy's max function.
 *
 * Returns -1 if an error occurred, and otherwise the reduce arrays size,
 * which is the number of elements already initialized.
 */
static npy_intp
PyArray_CopyInitialReduceValues(
                    PyArrayObject *result, PyArrayObject *operand,
                    const npy_bool *axis_flags, const char *funcname,
                    int keepdims)
{
    npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
    npy_intp *shape_orig = PyArray_SHAPE(operand);
    npy_intp *strides_orig = PyArray_STRIDES(operand);
    PyArrayObject *op_view = NULL;

    int ndim = PyArray_NDIM(operand);

    /*
     * Copy the subarray of the first element along each reduction axis.
     *
     * Adjust the shape to only look at the first element along
     * any of the reduction axes. If keepdims is False remove the axes
     * entirely.
     */
    int idim_out = 0;
    npy_intp size = 1;
    for (int idim = 0; idim < ndim; idim++) {
        if (axis_flags[idim]) {
            if (NPY_UNLIKELY(shape_orig[idim] == 0)) {
                PyErr_Format(PyExc_ValueError,
                        "zero-size array to reduction operation %s "
                        "which has no identity", funcname);
                return -1;
            }
            if (keepdims) {
                shape[idim_out] = 1;
                strides[idim_out] = 0;
                idim_out++;
            }
        }
        else {
            size *= shape_orig[idim];
            shape[idim_out] = shape_orig[idim];
            strides[idim_out] = strides_orig[idim];
            idim_out++;
        }
    }

    PyArray_Descr *descr = PyArray_DESCR(operand);
    Py_INCREF(descr);
    op_view = (PyArrayObject *)PyArray_NewFromDescr(
            &PyArray_Type, descr, idim_out, shape, strides,
            PyArray_DATA(operand), 0, NULL);
    if (op_view == NULL) {
        return -1;
    }

    /*
     * Copy the elements into the result to start.
     */
    int res = PyArray_CopyInto(result, op_view);
    Py_DECREF(op_view);
    if (res < 0) {
        return -1;
    }

    /*
     * If there were no reduction axes, we would already be done here.
     * Note that if there is only a single reduction axis, in principle the
     * iteration could be set up more efficiently here by removing that
     * axis before setting up the iterator (simplifying the iteration since
     * `skip_first_count` (the returned size) can be set to 0).
     */
    return size;
}

/*
 * This function executes all the standard NumPy reduction function
 * boilerplate code, just calling the appropriate inner loop function where
 * necessary.
 *
 * context     : The ArrayMethod context (with ufunc, method, and descriptors).
 * operand     : The array to be reduced.
 * out         : NULL, or the array into which to place the result.
 * wheremask   : NOT YET SUPPORTED, but this parameter is placed here
 *               so that support can be added in the future without breaking
 *               API compatibility. Pass in NULL.
 * axis_flags  : Flags indicating the reduction axes of 'operand'.
 * reorderable : If True, the reduction being done is reorderable, which
 *               means specifying multiple axes of reduction at once is ok,
 *               and the reduction code may calculate the reduction in an
 *               arbitrary order. The calculation may be reordered because
 *               of cache behavior or multithreading requirements.
 * keepdims    : If true, leaves the reduction dimensions in the result
 *               with size one.
 * subok       : If true, the result uses the subclass of operand, otherwise
 *               it is always a base class ndarray.
 * identity    : If Py_None, PyArray_CopyInitialReduceValues is used, otherwise
 *               this value is used to initialize the result to
 *               the reduction's unit.
 * loop        : `reduce_loop` from `ufunc_object.c`.  TODO: Refactor
 * data        : Data which is passed to the inner loop.
 * buffersize  : Buffer size for the iterator. For the default, pass in 0.
 * funcname    : The name of the reduction function, for error messages.
 * errormask   : forwarded from _get_bufsize_errmask
 *
 * TODO FIXME: if you squint, this is essentially an second independent
 * implementation of generalized ufuncs with signature (i)->(), plus a few
 * extra bells and whistles. (Indeed, as far as I can tell, it was originally
 * split out to support a fancy version of count_nonzero... which is not
 * actually a reduction function at all, it's just a (i)->() function!) So
 * probably these two implementation should be merged into one. (In fact it
 * would be quite nice to support axis= and keepdims etc. for arbitrary
 * generalized ufuncs!)
 */
NPY_NO_EXPORT PyArrayObject *
PyUFunc_ReduceWrapper(PyArrayMethod_Context *context,
        PyArrayObject *operand, PyArrayObject *out, PyArrayObject *wheremask,
        npy_bool *axis_flags, int reorderable, int keepdims,
        PyObject *identity, PyArray_ReduceLoopFunc *loop,
        void *data, npy_intp buffersize, const char *funcname, int errormask)
{
    assert(loop != NULL);
    PyArrayObject *result = NULL;
    npy_intp skip_first_count = 0;

    /* Iterator parameters */
    NpyIter *iter = NULL;
    PyArrayObject *op[3];
    PyArray_Descr *op_dtypes[3];
    npy_uint32 it_flags, op_flags[3];
    /* Loop auxdata (must be freed on error) */
    NpyAuxData *auxdata = NULL;

    /* More than one axis means multiple orders are possible */
    if (!reorderable && count_axes(PyArray_NDIM(operand), axis_flags) > 1) {
        PyErr_Format(PyExc_ValueError,
                     "reduction operation '%s' is not reorderable, "
                     "so at most one axis may be specified",
                     funcname);
        return NULL;
    }
    /* Can only use where with an initial ( from identity or argument) */
    if (wheremask != NULL && identity == Py_None) {
        PyErr_Format(PyExc_ValueError,
                     "reduction operation '%s' does not have an identity, "
                     "so to use a where mask one has to specify 'initial'",
                     funcname);
        return NULL;
    }


    /* Set up the iterator */
    op[0] = out;
    op[1] = operand;
    op_dtypes[0] = context->descriptors[0];
    op_dtypes[1] = context->descriptors[1];

    it_flags = NPY_ITER_BUFFERED |
            NPY_ITER_EXTERNAL_LOOP |
            NPY_ITER_GROWINNER |
            NPY_ITER_DONT_NEGATE_STRIDES |
            NPY_ITER_ZEROSIZE_OK |
            NPY_ITER_REFS_OK |
            NPY_ITER_DELAY_BUFALLOC |
            NPY_ITER_COPY_IF_OVERLAP;
    op_flags[0] = NPY_ITER_READWRITE |
                  NPY_ITER_ALIGNED |
                  NPY_ITER_ALLOCATE |
                  NPY_ITER_NO_SUBTYPE;
    op_flags[1] = NPY_ITER_READONLY |
                  NPY_ITER_ALIGNED |
                  NPY_ITER_NO_BROADCAST;

    if (wheremask != NULL) {
        op[2] = wheremask;
        /* wheremask is guaranteed to be NPY_BOOL, so borrow its reference */
        op_dtypes[2] = PyArray_DESCR(wheremask);
        assert(op_dtypes[2]->type_num == NPY_BOOL);
        if (op_dtypes[2] == NULL) {
            goto fail;
        }
        op_flags[2] = NPY_ITER_READONLY;
    }
    /* Set up result array axes mapping, operand and wheremask use default */
    int result_axes[NPY_MAXDIMS];
    int *op_axes[3] = {result_axes, NULL, NULL};

    int curr_axis = 0;
    for (int i = 0; i < PyArray_NDIM(operand); i++) {
        if (axis_flags[i]) {
            if (keepdims) {
                result_axes[i] = NPY_ITER_REDUCTION_AXIS(curr_axis);
                curr_axis++;
            }
            else {
                result_axes[i] = NPY_ITER_REDUCTION_AXIS(-1);
            }
        }
        else {
            result_axes[i] = curr_axis;
            curr_axis++;
        }
    }
    if (out != NULL) {
        /* NpyIter does not raise a good error message in this common case. */
        if (NPY_UNLIKELY(curr_axis != PyArray_NDIM(out))) {
            if (keepdims) {
                PyErr_Format(PyExc_ValueError,
                        "output parameter for reduction operation %s has the "
                        "wrong number of dimensions: Found %d but expected %d "
                        "(must match the operand's when keepdims=True)",
                        funcname, PyArray_NDIM(out), curr_axis);
            }
            else {
                PyErr_Format(PyExc_ValueError,
                        "output parameter for reduction operation %s has the "
                        "wrong number of dimensions: Found %d but expected %d",
                        funcname, PyArray_NDIM(out), curr_axis);
            }
            goto fail;
        }
    }

    iter = NpyIter_AdvancedNew(wheremask == NULL ? 2 : 3, op, it_flags,
                               NPY_KEEPORDER, NPY_UNSAFE_CASTING,
                               op_flags,
                               op_dtypes,
                               PyArray_NDIM(operand), op_axes, NULL, buffersize);
    if (iter == NULL) {
        goto fail;
    }

    result = NpyIter_GetOperandArray(iter)[0];

    PyArrayMethod_StridedLoop *strided_loop;
    NPY_ARRAYMETHOD_FLAGS flags = 0;

    int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0;
    needs_api |= NpyIter_IterationNeedsAPI(iter);
    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
        /* Start with the floating-point exception flags cleared */
        npy_clear_floatstatus_barrier((char*)&iter);
    }

    /*
     * Initialize the result to the reduction unit if possible,
     * otherwise copy the initial values and get a view to the rest.
     */
    if (identity != Py_None) {
        if (PyArray_FillWithScalar(result, identity) < 0) {
            goto fail;
        }
    }
    else {
        /*
         * For 1-D skip_first_count could be optimized to 0, but no-identity
         * reductions are not super common.
         * (see also comment in CopyInitialReduceValues)
         */
        skip_first_count = PyArray_CopyInitialReduceValues(
                result, operand, axis_flags, funcname, keepdims);
        if (skip_first_count < 0) {
            goto fail;
        }
    }

    if (!NpyIter_Reset(iter, NULL)) {
        goto fail;
    }

    /*
     * Note that we need to ensure that the iterator is reset before getting
     * the fixed strides.  (The buffer information is uninitialized before.)
     */
    npy_intp fixed_strides[3];
    NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
    if (wheremask != NULL) {
        if (PyArrayMethod_GetMaskedStridedLoop(context,
                1, fixed_strides, &strided_loop, &auxdata, &flags) < 0) {
            goto fail;
        }
    }
    else {
        if (context->method->get_strided_loop(context,
                1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) {
            goto fail;
        }
    }

    if (NpyIter_GetIterSize(iter) != 0) {
        NpyIter_IterNextFunc *iternext;
        char **dataptr;
        npy_intp *strideptr;
        npy_intp *countptr;

        iternext = NpyIter_GetIterNext(iter, NULL);
        if (iternext == NULL) {
            goto fail;
        }
        dataptr = NpyIter_GetDataPtrArray(iter);
        strideptr = NpyIter_GetInnerStrideArray(iter);
        countptr = NpyIter_GetInnerLoopSizePtr(iter);

        if (loop(context, strided_loop, auxdata,
                iter, dataptr, strideptr, countptr, iternext,
                needs_api, skip_first_count) < 0) {
            goto fail;
        }
    }

    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
        /* NOTE: We could check float errors even on error */
        if (_check_ufunc_fperr(errormask, NULL, "reduce") < 0) {
            goto fail;
        }
    }

    if (out != NULL) {
        result = out;
    }
    Py_INCREF(result);

    NPY_AUXDATA_FREE(auxdata);
    if (!NpyIter_Deallocate(iter)) {
        Py_DECREF(result);
        return NULL;
    }
    return result;

fail:
    NPY_AUXDATA_FREE(auxdata);
    if (iter != NULL) {
        NpyIter_Deallocate(iter);
    }

    return NULL;
}