diff options
author | Matti Picus <matti.picus@gmail.com> | 2021-03-18 21:10:19 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-18 21:10:19 +0200 |
commit | bb7a31a6a852ecc64d3f7cffb70121fc8bef20eb (patch) | |
tree | ba0aef2f644d8a94aaf75fe101df976ca25d811b | |
parent | 666866936c1c731f9de0c66d66a58f14c975c871 (diff) | |
parent | b5de1ceb1f1707d0539446b3d8883f3f8f80cb69 (diff) | |
download | numpy-bb7a31a6a852ecc64d3f7cffb70121fc8bef20eb.tar.gz |
Merge pull request #18398 from seberg/casting-loop-signature
MAINT: Fix casting signatures to align with NEP 43 signature
23 files changed, 1765 insertions, 2255 deletions
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index 63e8bf974..a731e7f15 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -1921,6 +1921,11 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size, common_dtype_function *common_dtype; common_instance_function *common_instance; /* + * The casting implementation (ArrayMethod) to convert between two + * instances of this DType, stored explicitly for fast access: + */ + PyObject *within_dtype_castingimpl; + /* * Dictionary of ArrayMethods representing most possible casts * (structured and object are exceptions). * This should potentially become a weak mapping in the future. diff --git a/numpy/core/src/common/lowlevel_strided_loops.h b/numpy/core/src/common/lowlevel_strided_loops.h index 014103f13..1255e51dd 100644 --- a/numpy/core/src/common/lowlevel_strided_loops.h +++ b/numpy/core/src/common/lowlevel_strided_loops.h @@ -2,6 +2,8 @@ #define __LOWLEVEL_STRIDED_LOOPS_H #include "common.h" #include <npy_config.h> +#include <array_method.h> +#include "dtype_transfer.h" #include "mem_overlap.h" /* For PyArray_ macros used below */ @@ -30,22 +32,26 @@ * Use NPY_AUXDATA_CLONE and NPY_AUXDATA_FREE to deal with this data. * */ -typedef int (PyArray_StridedUnaryOp)( - char *dst, npy_intp dst_stride, char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, NpyAuxData *transferdata); +// TODO: FIX! That comment belongs to something now in array-method /* * This is for pointers to functions which behave exactly as - * for PyArray_StridedUnaryOp, but with an additional mask controlling + * for PyArrayMethod_StridedLoop, but with an additional mask controlling * which values are transformed. * + * TODO: We should move this mask "capability" to the ArrayMethod itself + * probably. Although for NumPy internal things this works decently, + * and exposing it there should be well thought out to be useful beyond + * NumPy if possible. + * * In particular, the 'i'-th element is operated on if and only if * mask[i*mask_stride] is true. */ typedef int (PyArray_MaskedStridedUnaryOp)( - char *dst, npy_intp dst_stride, char *src, npy_intp src_stride, + PyArrayMethod_Context *context, char *const *args, + const npy_intp *dimensions, const npy_intp *strides, npy_bool *mask, npy_intp mask_stride, - npy_intp N, npy_intp src_itemsize, NpyAuxData *transferdata); + NpyAuxData *auxdata); /* * Gives back a function pointer to a specialized function for copying @@ -65,7 +71,7 @@ typedef int (PyArray_MaskedStridedUnaryOp)( * Should be the item size if it will always be the same, 0 otherwise. * */ -NPY_NO_EXPORT PyArray_StridedUnaryOp * +NPY_NO_EXPORT PyArrayMethod_StridedLoop * PyArray_GetStridedCopyFn(int aligned, npy_intp src_stride, npy_intp dst_stride, npy_intp itemsize); @@ -80,7 +86,7 @@ PyArray_GetStridedCopyFn(int aligned, * * Parameters are as for PyArray_GetStridedCopyFn. */ -NPY_NO_EXPORT PyArray_StridedUnaryOp * +NPY_NO_EXPORT PyArrayMethod_StridedLoop * PyArray_GetStridedCopySwapFn(int aligned, npy_intp src_stride, npy_intp dst_stride, npy_intp itemsize); @@ -95,7 +101,7 @@ PyArray_GetStridedCopySwapFn(int aligned, * * Parameters are as for PyArray_GetStridedCopyFn. */ -NPY_NO_EXPORT PyArray_StridedUnaryOp * +NPY_NO_EXPORT PyArrayMethod_StridedLoop * PyArray_GetStridedCopySwapPairFn(int aligned, npy_intp src_stride, npy_intp dst_stride, npy_intp itemsize); @@ -114,7 +120,7 @@ NPY_NO_EXPORT int PyArray_GetStridedZeroPadCopyFn(int aligned, int unicode_swap, npy_intp src_stride, npy_intp dst_stride, npy_intp src_itemsize, npy_intp dst_itemsize, - PyArray_StridedUnaryOp **outstransfer, + PyArrayMethod_StridedLoop **outstransfer, NpyAuxData **outtransferdata); /* @@ -123,7 +129,7 @@ PyArray_GetStridedZeroPadCopyFn(int aligned, int unicode_swap, * to dst_type_num. If a conversion is unsupported, returns NULL * without setting a Python exception. */ -NPY_NO_EXPORT PyArray_StridedUnaryOp * +NPY_NO_EXPORT PyArrayMethod_StridedLoop * PyArray_GetStridedNumericCastFn(int aligned, npy_intp src_stride, npy_intp dst_stride, int src_type_num, int dst_type_num); @@ -138,7 +144,7 @@ NPY_NO_EXPORT int PyArray_GetDTypeCopySwapFn(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *dtype, - PyArray_StridedUnaryOp **outstransfer, + PyArrayMethod_StridedLoop **outstransfer, NpyAuxData **outtransferdata); /* @@ -168,12 +174,10 @@ PyArray_GetDTypeCopySwapFn(int aligned, * If 0, the destination data gets new reference ownership. * If 1, the references from the source data are moved to * the destination data. - * out_stransfer: - * The resulting transfer function is placed here. - * out_transferdata: - * The auxiliary data for the transfer function is placed here. - * When finished with the transfer function, the caller must call - * NPY_AUXDATA_FREE on this data. + * cast_info: + * A pointer to an (uninitialized) `NPY_cast_info` struct, the caller + * must call `NPY_cast_info_xfree` on it (except on error) and handle + * its memory livespan. * out_needs_api: * If this is non-NULL, and the transfer function produced needs * to call into the (Python) API, this gets set to 1. This @@ -191,60 +195,15 @@ PyArray_GetDTypeTransferFunction(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, + NPY_cast_info *cast_info, int *out_needs_api); - -/* Same as above, but only wraps copyswapn or legacy cast functions */ -NPY_NO_EXPORT int -PyArray_GetLegacyDTypeTransferFunction(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - int move_references, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, - int *out_needs_api, int wrap_if_unaligned); - -/* Specialized dtype transfer functions */ -NPY_NO_EXPORT int -get_nbo_cast_datetime_transfer_function(int aligned, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata); - -NPY_NO_EXPORT int -get_nbo_datetime_to_string_transfer_function( - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata); - -NPY_NO_EXPORT int -get_datetime_to_unicode_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, - int *out_needs_api); - -NPY_NO_EXPORT int -get_nbo_string_to_datetime_transfer_function( - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata); - -NPY_NO_EXPORT int -get_unicode_to_datetime_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, - int *out_needs_api); - NPY_NO_EXPORT int get_fields_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references, - PyArray_StridedUnaryOp **out_stransfer, + PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata, int *out_needs_api); @@ -253,30 +212,10 @@ get_subarray_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references, - PyArray_StridedUnaryOp **out_stransfer, + PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata, int *out_needs_api); -NPY_NO_EXPORT int -_strided_to_strided_move_references(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data); - -NPY_NO_EXPORT int -_strided_to_strided_copy_references(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data); - -NPY_NO_EXPORT int -wrap_aligned_contig_transfer_function_with_copyswapn( - int aligned, npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata, - int *out_needs_api, - PyArray_StridedUnaryOp *caststransfer, NpyAuxData *castdata); - /* * This is identical to PyArray_GetDTypeTransferFunction, but returns a * transfer function which also takes a mask as a parameter. The mask is used @@ -301,8 +240,7 @@ PyArray_GetMaskedDTypeTransferFunction(int aligned, PyArray_Descr *dst_dtype, PyArray_Descr *mask_dtype, int move_references, - PyArray_MaskedStridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, + NPY_cast_info *cast_info, int *out_needs_api); /* @@ -357,11 +295,9 @@ PyArray_CastRawArrays(npy_intp count, * sizes, for example a casting operation, the 'stransfer' function * should be specialized for that, in which case 'stransfer' will use * this parameter as the source item size. - * stransfer: - * The strided transfer function. - * transferdata: - * An auxiliary data pointer passed to the strided transfer function. - * This follows the conventions of NpyAuxData objects. + * cast_info: + * Pointer to the NPY_cast_info struct which summarizes all information + * necessary to perform a cast. */ NPY_NO_EXPORT npy_intp PyArray_TransferNDimToStrided(npy_intp ndim, @@ -370,8 +306,7 @@ PyArray_TransferNDimToStrided(npy_intp ndim, npy_intp const *coords, npy_intp coords_inc, npy_intp const *shape, npy_intp shape_inc, npy_intp count, npy_intp src_itemsize, - PyArray_StridedUnaryOp *stransfer, - NpyAuxData *transferdata); + NPY_cast_info *cast_info); NPY_NO_EXPORT npy_intp PyArray_TransferStridedToNDim(npy_intp ndim, @@ -380,8 +315,7 @@ PyArray_TransferStridedToNDim(npy_intp ndim, npy_intp const *coords, npy_intp coords_inc, npy_intp const *shape, npy_intp shape_inc, npy_intp count, npy_intp src_itemsize, - PyArray_StridedUnaryOp *stransfer, - NpyAuxData *transferdata); + NPY_cast_info *cast_info); NPY_NO_EXPORT npy_intp PyArray_TransferMaskedStridedToNDim(npy_intp ndim, @@ -391,8 +325,7 @@ PyArray_TransferMaskedStridedToNDim(npy_intp ndim, npy_intp const *coords, npy_intp coords_inc, npy_intp const *shape, npy_intp shape_inc, npy_intp count, npy_intp src_itemsize, - PyArray_MaskedStridedUnaryOp *stransfer, - NpyAuxData *data); + NPY_cast_info *cast_info); NPY_NO_EXPORT int mapiter_trivial_get(PyArrayObject *self, PyArrayObject *ind, diff --git a/numpy/core/src/multiarray/array_assign_array.c b/numpy/core/src/multiarray/array_assign_array.c index 361964a5c..665dadfbf 100644 --- a/numpy/core/src/multiarray/array_assign_array.c +++ b/numpy/core/src/multiarray/array_assign_array.c @@ -23,6 +23,7 @@ #include "lowlevel_strided_loops.h" #include "array_assign.h" +#include "dtype_transfer.h" /* * Check that array data is both uint-aligned and true-aligned for all array @@ -82,10 +83,7 @@ raw_array_assign_array(int ndim, npy_intp const *shape, npy_intp src_strides_it[NPY_MAXDIMS]; npy_intp coord[NPY_MAXDIMS]; - PyArray_StridedUnaryOp *stransfer = NULL; - NpyAuxData *transferdata = NULL; int aligned, needs_api = 0; - npy_intp src_itemsize = src_dtype->elsize; NPY_BEGIN_THREADS_DEF; @@ -117,12 +115,12 @@ raw_array_assign_array(int ndim, npy_intp const *shape, } /* Get the function to do the casting */ + NPY_cast_info cast_info; if (PyArray_GetDTypeTransferFunction(aligned, src_strides_it[0], dst_strides_it[0], src_dtype, dst_dtype, 0, - &stransfer, &transferdata, - &needs_api) != NPY_SUCCEED) { + &cast_info, &needs_api) != NPY_SUCCEED) { return -1; } @@ -130,11 +128,13 @@ raw_array_assign_array(int ndim, npy_intp const *shape, NPY_BEGIN_THREADS; } + npy_intp strides[2] = {src_strides_it[0], dst_strides_it[0]}; + NPY_RAW_ITER_START(idim, ndim, coord, shape_it) { /* Process the innermost dimension */ - if (stransfer( - dst_data, dst_strides_it[0], src_data, src_strides_it[0], - shape_it[0], src_itemsize, transferdata) < 0) { + char *args[2] = {src_data, dst_data}; + if (cast_info.func(&cast_info.context, + args, &shape_it[0], strides, cast_info.auxdata) < 0) { goto fail; } } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape_it, @@ -142,11 +142,11 @@ raw_array_assign_array(int ndim, npy_intp const *shape, src_data, src_strides_it); NPY_END_THREADS; - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); return 0; fail: NPY_END_THREADS; - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); return -1; } @@ -170,10 +170,7 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp const *shape, npy_intp wheremask_strides_it[NPY_MAXDIMS]; npy_intp coord[NPY_MAXDIMS]; - PyArray_MaskedStridedUnaryOp *stransfer = NULL; - NpyAuxData *transferdata = NULL; int aligned, needs_api = 0; - npy_intp src_itemsize = src_dtype->elsize; NPY_BEGIN_THREADS_DEF; @@ -209,35 +206,41 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp const *shape, } /* Get the function to do the casting */ + NPY_cast_info cast_info; if (PyArray_GetMaskedDTypeTransferFunction(aligned, src_strides_it[0], dst_strides_it[0], wheremask_strides_it[0], src_dtype, dst_dtype, wheremask_dtype, 0, - &stransfer, &transferdata, - &needs_api) != NPY_SUCCEED) { + &cast_info, &needs_api) != NPY_SUCCEED) { return -1; } if (!needs_api) { NPY_BEGIN_THREADS; } + npy_intp strides[2] = {src_strides_it[0], dst_strides_it[0]}; NPY_RAW_ITER_START(idim, ndim, coord, shape_it) { + PyArray_MaskedStridedUnaryOp *stransfer; + stransfer = (PyArray_MaskedStridedUnaryOp *)cast_info.func; + /* Process the innermost dimension */ - stransfer(dst_data, dst_strides_it[0], src_data, src_strides_it[0], - (npy_bool *)wheremask_data, wheremask_strides_it[0], - shape_it[0], src_itemsize, transferdata); + char *args[2] = {src_data, dst_data}; + if (stransfer(&cast_info.context, + args, &shape_it[0], strides, + (npy_bool *)wheremask_data, wheremask_strides_it[0], + cast_info.auxdata) < 0) { + break; + } } NPY_RAW_ITER_THREE_NEXT(idim, ndim, coord, shape_it, dst_data, dst_strides_it, src_data, src_strides_it, wheremask_data, wheremask_strides_it); NPY_END_THREADS; - - NPY_AUXDATA_FREE(transferdata); - + NPY_cast_info_xfree(&cast_info); return (needs_api && PyErr_Occurred()) ? -1 : 0; } diff --git a/numpy/core/src/multiarray/array_assign_scalar.c b/numpy/core/src/multiarray/array_assign_scalar.c index 023772776..6cd5f4ad9 100644 --- a/numpy/core/src/multiarray/array_assign_scalar.c +++ b/numpy/core/src/multiarray/array_assign_scalar.c @@ -23,6 +23,7 @@ #include "lowlevel_strided_loops.h" #include "array_assign.h" +#include "dtype_transfer.h" /* * Assigns the scalar value to every element of the destination raw array. @@ -38,10 +39,7 @@ raw_array_assign_scalar(int ndim, npy_intp const *shape, npy_intp shape_it[NPY_MAXDIMS], dst_strides_it[NPY_MAXDIMS]; npy_intp coord[NPY_MAXDIMS]; - PyArray_StridedUnaryOp *stransfer = NULL; - NpyAuxData *transferdata = NULL; int aligned, needs_api = 0; - npy_intp src_itemsize = src_dtype->elsize; NPY_BEGIN_THREADS_DEF; @@ -63,12 +61,12 @@ raw_array_assign_scalar(int ndim, npy_intp const *shape, } /* Get the function to do the casting */ + NPY_cast_info cast_info; if (PyArray_GetDTypeTransferFunction(aligned, 0, dst_strides_it[0], src_dtype, dst_dtype, 0, - &stransfer, &transferdata, - &needs_api) != NPY_SUCCEED) { + &cast_info, &needs_api) != NPY_SUCCEED) { return -1; } @@ -80,22 +78,24 @@ raw_array_assign_scalar(int ndim, npy_intp const *shape, NPY_BEGIN_THREADS_THRESHOLDED(nitems); } + npy_intp strides[2] = {0, dst_strides_it[0]}; + NPY_RAW_ITER_START(idim, ndim, coord, shape_it) { /* Process the innermost dimension */ - if (stransfer( - dst_data, dst_strides_it[0], src_data, 0, - shape_it[0], src_itemsize, transferdata) < 0) { + char *args[2] = {src_data, dst_data}; + if (cast_info.func(&cast_info.context, + args, &shape_it[0], strides, cast_info.auxdata) < 0) { goto fail; } } NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape_it, dst_data, dst_strides_it); NPY_END_THREADS; - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); return 0; fail: NPY_END_THREADS; - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); return -1; } @@ -117,10 +117,7 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp const *shape, npy_intp wheremask_strides_it[NPY_MAXDIMS]; npy_intp coord[NPY_MAXDIMS]; - PyArray_MaskedStridedUnaryOp *stransfer = NULL; - NpyAuxData *transferdata = NULL; int aligned, needs_api = 0; - npy_intp src_itemsize = src_dtype->elsize; NPY_BEGIN_THREADS_DEF; @@ -144,12 +141,12 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp const *shape, } /* Get the function to do the casting */ + NPY_cast_info cast_info; if (PyArray_GetMaskedDTypeTransferFunction(aligned, 0, dst_strides_it[0], wheremask_strides_it[0], src_dtype, dst_dtype, wheremask_dtype, 0, - &stransfer, &transferdata, - &needs_api) != NPY_SUCCEED) { + &cast_info, &needs_api) != NPY_SUCCEED) { return -1; } @@ -161,19 +158,26 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp const *shape, NPY_BEGIN_THREADS_THRESHOLDED(nitems); } + npy_intp strides[2] = {0, dst_strides_it[0]}; + NPY_RAW_ITER_START(idim, ndim, coord, shape_it) { /* Process the innermost dimension */ - stransfer(dst_data, dst_strides_it[0], src_data, 0, - (npy_bool *)wheremask_data, wheremask_strides_it[0], - shape_it[0], src_itemsize, transferdata); + PyArray_MaskedStridedUnaryOp *stransfer; + stransfer = (PyArray_MaskedStridedUnaryOp *)cast_info.func; + + char *args[2] = {src_data, dst_data}; + if (stransfer(&cast_info.context, + args, &shape_it[0], strides, + (npy_bool *)wheremask_data, wheremask_strides_it[0], + cast_info.auxdata) < 0) { + break; + } } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape_it, dst_data, dst_strides_it, wheremask_data, wheremask_strides_it); NPY_END_THREADS; - - NPY_AUXDATA_FREE(transferdata); - + NPY_cast_info_xfree(&cast_info); return (needs_api && PyErr_Occurred()) ? -1 : 0; } diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c index 0fd958282..63fa5f511 100644 --- a/numpy/core/src/multiarray/array_coercion.c +++ b/numpy/core/src/multiarray/array_coercion.c @@ -452,18 +452,21 @@ PyArray_Pack(PyArray_Descr *descr, char *item, PyObject *value) int res = 0; int needs_api = 0; - PyArray_StridedUnaryOp *stransfer; - NpyAuxData *transferdata; + NPY_cast_info cast_info; if (PyArray_GetDTypeTransferFunction( - 0, 0, 0, tmp_descr, descr, 0, &stransfer, &transferdata, + 0, 0, 0, tmp_descr, descr, 0, &cast_info, &needs_api) == NPY_FAIL) { res = -1; goto finish; } - if (stransfer(item, 0, data, 0, 1, tmp_descr->elsize, transferdata) < 0) { + char *args[2] = {data, item}; + const npy_intp strides[2] = {0, 0}; + const npy_intp length = 1; + if (cast_info.func(&cast_info.context, + args, &length, strides, cast_info.auxdata) < 0) { res = -1; } - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); finish: if (PyDataType_REFCHK(tmp_descr)) { diff --git a/numpy/core/src/multiarray/array_method.c b/numpy/core/src/multiarray/array_method.c index 38284fac2..fc315da24 100644 --- a/numpy/core/src/multiarray/array_method.c +++ b/numpy/core/src/multiarray/array_method.c @@ -155,7 +155,7 @@ NPY_NO_EXPORT int npy_default_get_strided_loop( PyArrayMethod_Context *context, int aligned, int NPY_UNUSED(move_references), npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags) { PyArray_Descr **descrs = context->descriptors; @@ -627,6 +627,7 @@ boundarraymethod__simple_strided_call( PyArray_Descr *out_descrs[NPY_MAXARGS]; ssize_t length = -1; int aligned = 1; + char *args[NPY_MAXARGS]; npy_intp strides[NPY_MAXARGS]; int nin = self->method->nin; int nout = self->method->nout; @@ -679,6 +680,7 @@ boundarraymethod__simple_strided_call( } } + args[i] = PyArray_BYTES(arrays[i]); strides[i] = PyArray_STRIDES(arrays[i])[0]; /* TODO: We may need to distinguish aligned and itemsize-aligned */ aligned &= PyArray_ISALIGNED(arrays[i]); @@ -719,7 +721,7 @@ boundarraymethod__simple_strided_call( .method = self->method, .descriptors = descrs, }; - PyArray_StridedUnaryOp *strided_loop = NULL; + PyArrayMethod_StridedLoop *strided_loop = NULL; NpyAuxData *loop_data = NULL; NPY_ARRAYMETHOD_FLAGS flags = 0; @@ -733,11 +735,7 @@ boundarraymethod__simple_strided_call( * TODO: Add floating point error checks if requested and * possibly release GIL if allowed by the flags. */ - /* TODO: strided_loop is currently a cast loop, this will change. */ - int res = strided_loop( - PyArray_BYTES(arrays[1]), strides[1], - PyArray_BYTES(arrays[0]), strides[0], - length, descrs[0]->elsize, loop_data); + int res = strided_loop(&context, args, &length, strides, loop_data); if (loop_data != NULL) { loop_data->free(loop_data); } diff --git a/numpy/core/src/multiarray/array_method.h b/numpy/core/src/multiarray/array_method.h index 1fa8a9ba0..88167a6bb 100644 --- a/numpy/core/src/multiarray/array_method.h +++ b/numpy/core/src/multiarray/array_method.h @@ -6,7 +6,6 @@ #include <Python.h> #include <numpy/ndarraytypes.h> -#include <lowlevel_strided_loops.h> typedef enum { @@ -50,6 +49,11 @@ typedef struct { } PyArrayMethod_Context; +typedef int (PyArrayMethod_StridedLoop)(PyArrayMethod_Context *context, + char *const *data, const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *transferdata); + + typedef NPY_CASTING (resolve_descriptors_function)( struct PyArrayMethodObject_tag *method, PyArray_DTypeMeta **dtypes, @@ -61,7 +65,7 @@ typedef int (get_loop_function)( PyArrayMethod_Context *context, int aligned, int move_references, npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags); @@ -104,10 +108,10 @@ typedef struct PyArrayMethodObject_tag { resolve_descriptors_function *resolve_descriptors; get_loop_function *get_strided_loop; /* Typical loop functions (contiguous ones are used in current casts) */ - PyArray_StridedUnaryOp *strided_loop; - PyArray_StridedUnaryOp *contiguous_loop; - PyArray_StridedUnaryOp *unaligned_strided_loop; - PyArray_StridedUnaryOp *unaligned_contiguous_loop; + PyArrayMethod_StridedLoop *strided_loop; + PyArrayMethod_StridedLoop *contiguous_loop; + PyArrayMethod_StridedLoop *unaligned_strided_loop; + PyArrayMethod_StridedLoop *unaligned_contiguous_loop; } PyArrayMethodObject; @@ -151,7 +155,7 @@ NPY_NO_EXPORT int npy_default_get_strided_loop( PyArrayMethod_Context *context, int aligned, int NPY_UNUSED(move_references), npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags); diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index 5ee5f0c16..d3c969034 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -8,6 +8,7 @@ #include "numpy/arrayscalars.h" #include "npy_config.h" +#include "lowlevel_strided_loops.h" #include "npy_pycompat.h" #include "numpy/npy_math.h" @@ -65,7 +66,13 @@ PyArray_GetObjectToGenericCastingImpl(void); NPY_NO_EXPORT PyObject * PyArray_GetCastingImpl(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to) { - PyObject *res = PyDict_GetItem(from->castingimpls, (PyObject *)to); + PyObject *res; + if (from == to) { + res = from->within_dtype_castingimpl; + } + else { + res = PyDict_GetItemWithError(from->castingimpls, (PyObject *)to); + } if (res != NULL || PyErr_Occurred()) { Py_XINCREF(res); return res; @@ -132,6 +139,12 @@ PyArray_GetCastingImpl(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to) if (res == NULL) { return NULL; } + if (from == to) { + PyErr_Format(PyExc_RuntimeError, + "Internal NumPy error, within-DType cast missing for %S!", from); + Py_DECREF(res); + return NULL; + } if (PyDict_SetItem(from->castingimpls, (PyObject *)to, res) < 0) { Py_DECREF(res); return NULL; @@ -1884,6 +1897,11 @@ PyArray_AddCastingImplmentation(PyBoundArrayMethodObject *meth) return -1; } if (meth->dtypes[0] == meth->dtypes[1]) { + /* + * The method casting between instances of the same dtype is special, + * since it is common, it is stored explicitly (currently) and must + * obey additional constraints to ensure convenient casting. + */ if (!(meth->method->flags & NPY_METH_SUPPORTS_UNALIGNED)) { PyErr_Format(PyExc_TypeError, "A cast where input and output DType (class) are identical " @@ -1898,6 +1916,16 @@ PyArray_AddCastingImplmentation(PyBoundArrayMethodObject *meth) meth->method->name); return -1; } + if (meth->dtypes[0]->within_dtype_castingimpl != NULL) { + PyErr_Format(PyExc_RuntimeError, + "A cast was already added for %S -> %S. (method: %s)", + meth->dtypes[0], meth->dtypes[1], meth->method->name); + return -1; + } + Py_INCREF(meth->method); + meth->dtypes[0]->within_dtype_castingimpl = (PyObject *)meth->method; + + return 0; } if (PyDict_Contains(meth->dtypes[0]->castingimpls, (PyObject *)meth->dtypes[1])) { @@ -1978,7 +2006,7 @@ NPY_NO_EXPORT int legacy_cast_get_strided_loop( PyArrayMethod_Context *context, int aligned, int move_references, npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags) { PyArray_Descr **descrs = context->descriptors; @@ -1986,7 +2014,7 @@ legacy_cast_get_strided_loop( *flags = context->method->flags & NPY_METH_RUNTIME_FLAGS; - if (PyArray_GetLegacyDTypeTransferFunction( + if (get_wrapped_legacy_cast_function( aligned, strides[0], strides[1], descrs[0], descrs[1], move_references, out_loop, out_transferdata, &out_needs_api, 0) < 0) { return -1; @@ -2041,7 +2069,7 @@ NPY_NO_EXPORT int get_byteswap_loop( PyArrayMethod_Context *context, int aligned, int NPY_UNUSED(move_references), npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags) { PyArray_Descr **descrs = context->descriptors; @@ -2083,7 +2111,7 @@ NPY_NO_EXPORT int complex_to_noncomplex_get_loop( PyArrayMethod_Context *context, int aligned, int move_references, npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags) { static PyObject *cls = NULL; @@ -2412,7 +2440,7 @@ NPY_NO_EXPORT int string_to_string_get_loop( PyArrayMethod_Context *context, int aligned, int NPY_UNUSED(move_references), npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags) { int unicode_swap = 0; @@ -2631,7 +2659,7 @@ nonstructured_to_structured_get_loop( PyArrayMethod_Context *context, int aligned, int move_references, npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags) { @@ -2664,11 +2692,7 @@ nonstructured_to_structured_get_loop( * (which is the behaviour at least up to 1.20). */ int needs_api = 0; - if (!aligned) { - /* We need to wrap if aligned is 0. Use a recursive call */ - - } - if (PyArray_GetLegacyDTypeTransferFunction( + if (get_wrapped_legacy_cast_function( 1, strides[0], strides[1], context->descriptors[0], context->descriptors[1], move_references, out_loop, out_transferdata, @@ -2775,7 +2799,7 @@ structured_to_nonstructured_get_loop( PyArrayMethod_Context *context, int aligned, int move_references, npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags) { @@ -2807,7 +2831,7 @@ structured_to_nonstructured_get_loop( * scalars, and should likely just not be allowed. */ int needs_api = 0; - if (PyArray_GetLegacyDTypeTransferFunction( + if (get_wrapped_legacy_cast_function( aligned, strides[0], strides[1], context->descriptors[0], context->descriptors[1], move_references, out_loop, out_transferdata, @@ -3008,7 +3032,7 @@ void_to_void_get_loop( PyArrayMethod_Context *context, int aligned, int move_references, npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags) { @@ -3222,7 +3246,7 @@ object_to_object_get_loop( PyArrayMethod_Context *NPY_UNUSED(context), int NPY_UNUSED(aligned), int move_references, npy_intp *NPY_UNUSED(strides), - PyArray_StridedUnaryOp **out_loop, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags) { diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h index a147dec3c..14218edee 100644 --- a/numpy/core/src/multiarray/convert_datatype.h +++ b/numpy/core/src/multiarray/convert_datatype.h @@ -80,7 +80,7 @@ NPY_NO_EXPORT int legacy_cast_get_strided_loop( PyArrayMethod_Context *context, int aligned, int move_references, npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags); NPY_NO_EXPORT NPY_CASTING diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 70ca96a2d..57cfa1e36 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -2545,8 +2545,6 @@ PyArray_EnsureAnyArray(PyObject *op) NPY_NO_EXPORT int PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order) { - PyArray_StridedUnaryOp *stransfer = NULL; - NpyAuxData *transferdata = NULL; NpyIter *dst_iter, *src_iter; NpyIter_IterNextFunc *dst_iternext, *src_iternext; @@ -2555,9 +2553,7 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order) npy_intp *dst_countptr, *src_countptr; npy_uint32 baseflags; - char *dst_data, *src_data; npy_intp dst_count, src_count, count; - npy_intp src_itemsize; npy_intp dst_size, src_size; int needs_api; @@ -2629,7 +2625,6 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order) /* Since buffering is disabled, we can cache the stride */ src_stride = NpyIter_GetInnerStrideArray(src_iter)[0]; src_countptr = NpyIter_GetInnerLoopSizePtr(src_iter); - src_itemsize = PyArray_DESCR(src)->elsize; if (dst_iternext == NULL || src_iternext == NULL) { NpyIter_Deallocate(dst_iter); @@ -2646,14 +2641,14 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order) * we can pass them to this function to take advantage of * contiguous strides, etc. */ + NPY_cast_info cast_info; if (PyArray_GetDTypeTransferFunction( IsUintAligned(src) && IsAligned(src) && IsUintAligned(dst) && IsAligned(dst), src_stride, dst_stride, PyArray_DESCR(src), PyArray_DESCR(dst), 0, - &stransfer, &transferdata, - &needs_api) != NPY_SUCCEED) { + &cast_info, &needs_api) != NPY_SUCCEED) { NpyIter_Deallocate(dst_iter); NpyIter_Deallocate(src_iter); return -1; @@ -2665,15 +2660,15 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order) dst_count = *dst_countptr; src_count = *src_countptr; - dst_data = dst_dataptr[0]; - src_data = src_dataptr[0]; + char *args[2] = {src_dataptr[0], dst_dataptr[0]}; + npy_intp strides[2] = {src_stride, dst_stride}; + int res = 0; for(;;) { /* Transfer the biggest amount that fits both */ count = (src_count < dst_count) ? src_count : dst_count; - if (stransfer( - dst_data, dst_stride, src_data, src_stride, - count, src_itemsize, transferdata) < 0) { + if (cast_info.func(&cast_info.context, + args, &count, strides, cast_info.auxdata) < 0) { res = -1; break; } @@ -2685,11 +2680,11 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order) break; } dst_count = *dst_countptr; - dst_data = dst_dataptr[0]; + args[1] = dst_dataptr[0]; } else { dst_count -= count; - dst_data += count*dst_stride; + args[1] += count*dst_stride; } /* If we exhausted the src block, refresh it */ @@ -2699,17 +2694,17 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order) break; } src_count = *src_countptr; - src_data = src_dataptr[0]; + args[0] = src_dataptr[0]; } else { src_count -= count; - src_data += count*src_stride; + args[0] += count*src_stride; } } NPY_END_THREADS; - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); NpyIter_Deallocate(dst_iter); NpyIter_Deallocate(src_iter); if (res > 0) { diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c index 696215944..fdf4c0839 100644 --- a/numpy/core/src/multiarray/datetime.c +++ b/numpy/core/src/multiarray/datetime.c @@ -29,6 +29,9 @@ #include "dtypemeta.h" #include "usertypes.h" +#include "dtype_transfer.h" +#include <lowlevel_strided_loops.h> + /* * Computes the python `ret, d = divmod(d, unit)`. * @@ -3807,7 +3810,7 @@ static int time_to_time_get_loop( PyArrayMethod_Context *context, int aligned, int NPY_UNUSED(move_references), npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags) { int requires_wrap = 0; @@ -3815,6 +3818,27 @@ time_to_time_get_loop( PyArray_Descr **descrs = context->descriptors; *flags = NPY_METH_NO_FLOATINGPOINT_ERRORS; + PyArray_DatetimeMetaData *meta1 = get_datetime_metadata_from_dtype(descrs[0]); + PyArray_DatetimeMetaData *meta2 = get_datetime_metadata_from_dtype(descrs[1]); + + if (meta1->base == meta2->base && meta1->num == meta2->num) { + /* + * If the metadata matches, use the low-level copy or copy-swap + * functions. (If they do not match, but swapping is necessary this + * path is hit recursively.) + */ + if (PyDataType_ISNOTSWAPPED(descrs[0]) == + PyDataType_ISNOTSWAPPED(descrs[1])) { + *out_loop = PyArray_GetStridedCopyFn( + aligned, strides[0], strides[1], NPY_SIZEOF_DATETIME); + } + else { + *out_loop = PyArray_GetStridedCopySwapFn( + aligned, strides[0], strides[1], NPY_SIZEOF_DATETIME); + } + return 0; + } + if (!PyDataType_ISNOTSWAPPED(descrs[0]) || !PyDataType_ISNOTSWAPPED(descrs[1])) { inner_aligned = 1; @@ -3830,17 +3854,21 @@ time_to_time_get_loop( return 0; } + PyArray_Descr *src_wrapped_dtype = ensure_dtype_nbo(descrs[0]); + PyArray_Descr *dst_wrapped_dtype = ensure_dtype_nbo(descrs[1]); + int needs_api = 0; - NpyAuxData *castdata = *out_transferdata; - if (wrap_aligned_contig_transfer_function_with_copyswapn( - aligned, strides[0], strides[1], descrs[0], descrs[1], - out_loop, out_transferdata, &needs_api, - *out_loop, castdata) == NPY_FAIL) { - NPY_AUXDATA_FREE(castdata); - return -1; - } + int res = wrap_aligned_transferfunction( + aligned, 0, + strides[0], strides[1], + descrs[0], descrs[1], + src_wrapped_dtype, dst_wrapped_dtype, + out_loop, out_transferdata, &needs_api); + Py_DECREF(src_wrapped_dtype); + Py_DECREF(dst_wrapped_dtype); + assert(needs_api == 0); - return 0; + return res; } @@ -3918,18 +3946,21 @@ time_to_string_resolve_descriptors( loop_descrs[1]->elsize = size; } - Py_INCREF(given_descrs[0]); - loop_descrs[0] = given_descrs[0]; + loop_descrs[0] = ensure_dtype_nbo(given_descrs[0]); + if (loop_descrs[0] == NULL) { + Py_DECREF(loop_descrs[1]); + return -1; + } assert(self->casting == NPY_UNSAFE_CASTING); return NPY_UNSAFE_CASTING; } static int -time_to_string_get_loop( +datetime_to_string_get_loop( PyArrayMethod_Context *context, int aligned, int NPY_UNUSED(move_references), npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags) { PyArray_Descr **descrs = context->descriptors; @@ -3962,23 +3993,24 @@ string_to_datetime_cast_resolve_descriptors( PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2]) { - /* We currently support byte-swapping, so any (unicode) string is OK */ - Py_INCREF(given_descrs[0]); - loop_descrs[0] = given_descrs[0]; - if (given_descrs[1] == NULL) { /* NOTE: This doesn't actually work, and will error during the cast */ loop_descrs[1] = dtypes[1]->default_descr(dtypes[1]); if (loop_descrs[1] == NULL) { - Py_DECREF(loop_descrs[0]); return -1; } } else { - Py_INCREF(given_descrs[1]); - loop_descrs[1] = given_descrs[1]; + loop_descrs[1] = ensure_dtype_nbo(given_descrs[1]); + if (loop_descrs[1] == NULL) { + return -1; + } } + /* We currently support byte-swapping, so any (unicode) string is OK */ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + return NPY_UNSAFE_CASTING; } @@ -3987,7 +4019,7 @@ static int string_to_datetime_cast_get_loop( PyArrayMethod_Context *context, int aligned, int NPY_UNUSED(move_references), npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags) { PyArray_Descr **descrs = context->descriptors; @@ -4131,26 +4163,36 @@ PyArray_InitializeDatetimeCasts() /* * Casts can error and need API (unicodes needs it for string->unicode). * Unicode handling is currently implemented via a legacy cast. + * Datetime->string has its own fast cast while timedelta->string uses + * the legacy fallback. */ - spec.flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; - slots[0].slot = NPY_METH_resolve_descriptors; slots[0].pfunc = &time_to_string_resolve_descriptors; + /* Strided loop differs for the two */ slots[1].slot = NPY_METH_get_loop; - slots[1].pfunc = &time_to_string_get_loop; slots[2].slot = 0; slots[2].pfunc = NULL; + dtypes[0] = datetime; for (int num = NPY_DATETIME; num <= NPY_TIMEDELTA; num++) { + if (num == NPY_DATETIME) { + dtypes[0] = datetime; + spec.flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; + slots[1].pfunc = &datetime_to_string_get_loop; + } + else { + dtypes[0] = timedelta; + spec.flags = NPY_METH_REQUIRES_PYAPI; + slots[1].pfunc = &legacy_cast_get_strided_loop; + } + for (int str = NPY_STRING; str <= NPY_UNICODE; str++) { - dtypes[0] = PyArray_DTypeFromTypeNum(num); dtypes[1] = PyArray_DTypeFromTypeNum(str); int res = PyArray_AddCastingImplementation_FromSpec(&spec, 1); - Py_SETREF(dtypes[0], NULL); Py_SETREF(dtypes[1], NULL); if (res < 0) { - return -1; + goto fail; } } } diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index afa9c12a2..e76532ebe 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -18,6 +18,7 @@ #define _MULTIARRAYMODULE #include <numpy/arrayobject.h> +#include "lowlevel_strided_loops.h" #include "npy_pycompat.h" #include "convert_datatype.h" @@ -74,11 +75,10 @@ _safe_print(PyObject *obj) * Returns NPY_SUCCEED or NPY_FAIL. */ static int -get_decsrcref_transfer_function(int aligned, +get_decref_transfer_function(int aligned, npy_intp src_stride, PyArray_Descr *src_dtype, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, + NPY_cast_info *cast_info, int *out_needs_api); @@ -86,11 +86,15 @@ get_decsrcref_transfer_function(int aligned, /* Moves references from src to dst */ NPY_NO_EXPORT int -_strided_to_strided_move_references(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) +_strided_to_strided_move_references( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *NPY_UNUSED(auxdata)) { + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + PyObject *src_ref = NULL, *dst_ref = NULL; while (N > 0) { memcpy(&src_ref, src, sizeof(src_ref)); @@ -115,11 +119,15 @@ _strided_to_strided_move_references(char *dst, npy_intp dst_stride, /* Copies references from src to dst */ NPY_NO_EXPORT int -_strided_to_strided_copy_references(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) +_strided_to_strided_copy_references( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *NPY_UNUSED(auxdata)) { + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + PyObject *src_ref = NULL, *dst_ref = NULL; while (N > 0) { memcpy(&src_ref, src, sizeof(src_ref)); @@ -147,8 +155,7 @@ typedef struct { NpyAuxData base; PyArray_GetItemFunc *getitem; PyArrayObject_fields arr_fields; - PyArray_StridedUnaryOp *decref_func; - NpyAuxData *decref_data; + NPY_cast_info decref_src; } _any_to_object_auxdata; @@ -158,7 +165,7 @@ _any_to_object_auxdata_free(NpyAuxData *auxdata) _any_to_object_auxdata *data = (_any_to_object_auxdata *)auxdata; Py_DECREF(data->arr_fields.descr); - NPY_AUXDATA_FREE(data->decref_data); + NPY_cast_info_xfree(&data->decref_src); PyMem_Free(data); } @@ -170,25 +177,34 @@ _any_to_object_auxdata_clone(NpyAuxData *auxdata) _any_to_object_auxdata *res = PyMem_Malloc(sizeof(_any_to_object_auxdata)); - memcpy(res, data, sizeof(*data)); + res->base = data->base; + res->getitem = data->getitem; + res->arr_fields = data->arr_fields; Py_INCREF(res->arr_fields.descr); - if (res->decref_data != NULL) { - res->decref_data = NPY_AUXDATA_CLONE(data->decref_data); - if (res->decref_data == NULL) { - NPY_AUXDATA_FREE((NpyAuxData *) res); + + if (data->decref_src.func != NULL) { + if (NPY_cast_info_copy(&res->decref_src, &data->decref_src) < 0) { + NPY_AUXDATA_FREE((NpyAuxData *)res); return NULL; } } + else { + res->decref_src.func = NULL; + } return (NpyAuxData *)res; } static int -_strided_to_strided_any_to_object(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, +_strided_to_strided_any_to_object( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, NpyAuxData *auxdata) { + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + _any_to_object_auxdata *data = (_any_to_object_auxdata *)auxdata; PyObject *dst_ref = NULL; @@ -206,10 +222,10 @@ _strided_to_strided_any_to_object(char *dst, npy_intp dst_stride, dst += dst_stride; --N; } - if (data->decref_func != NULL) { - /* If necessar, clear the input buffer (`move_references`) */ - if (data->decref_func(NULL, 0, orig_src, src_stride, N, - src_itemsize, data->decref_data) < 0) { + if (data->decref_src.func != NULL) { + /* If necessary, clear the input buffer (`move_references`) */ + if (data->decref_src.func(&data->decref_src.context, + &orig_src, &N, &src_stride, data->decref_src.auxdata) < 0) { return -1; } } @@ -222,7 +238,7 @@ any_to_object_get_loop( PyArrayMethod_Context *context, int aligned, int move_references, npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags) { @@ -244,14 +260,13 @@ any_to_object_get_loop( data->arr_fields.nd = 0; data->getitem = context->descriptors[0]->f->getitem; - data->decref_func = NULL; - data->decref_data = NULL; + NPY_cast_info_init(&data->decref_src); if (move_references && PyDataType_REFCHK(context->descriptors[0])) { int needs_api; - if (get_decsrcref_transfer_function( + if (get_decref_transfer_function( aligned, strides[0], context->descriptors[0], - &data->decref_func, &data->decref_data, + &data->decref_src, &needs_api) == NPY_FAIL) { NPY_AUXDATA_FREE(*out_transferdata); *out_transferdata = NULL; @@ -294,11 +309,13 @@ _object_to_any_auxdata_clone(NpyAuxData *data) static int strided_to_strided_object_to_any( - char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp NPY_UNUSED(src_itemsize), + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, NpyAuxData *auxdata) { + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; _object_to_any_auxdata *data = (_object_to_any_auxdata *)auxdata; PyObject *src_ref; @@ -327,16 +344,13 @@ object_to_any_get_loop( PyArrayMethod_Context *context, int NPY_UNUSED(aligned), int move_references, npy_intp *NPY_UNUSED(strides), - PyArray_StridedUnaryOp **out_loop, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags) { *flags = NPY_METH_REQUIRES_PYAPI; - /* - * TODO: After passing `context`, auxdata can be statically allocated - * since `descriptor` is always passed. - */ + /* NOTE: auxdata is only really necessary to flag `move_references` */ _object_to_any_auxdata *data = PyMem_Malloc(sizeof(*data)); if (data == NULL) { return -1; @@ -355,39 +369,22 @@ object_to_any_get_loop( /************************** ZERO-PADDED COPY ******************************/ -/* Does a zero-padded copy */ -typedef struct { - NpyAuxData base; - npy_intp dst_itemsize; -} _strided_zero_pad_data; - -/* zero-padded data copy function */ -static NpyAuxData *_strided_zero_pad_data_clone(NpyAuxData *data) -{ - _strided_zero_pad_data *newdata = - (_strided_zero_pad_data *)PyArray_malloc( - sizeof(_strided_zero_pad_data)); - if (newdata == NULL) { - return NULL; - } - - memcpy(newdata, data, sizeof(_strided_zero_pad_data)); - - return (NpyAuxData *)newdata; -} - /* * Does a strided to strided zero-padded copy for the case where * dst_itemsize > src_itemsize */ static int -_strided_to_strided_zero_pad_copy(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) +_strided_to_strided_zero_pad_copy( + PyArrayMethod_Context *context, char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *NPY_UNUSED(auxdata)) { - _strided_zero_pad_data *d = (_strided_zero_pad_data *)data; - npy_intp dst_itemsize = d->dst_itemsize; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + npy_intp src_itemsize = context->descriptors[0]->elsize; + npy_intp dst_itemsize = context->descriptors[1]->elsize; + npy_intp zero_size = dst_itemsize-src_itemsize; while (N > 0) { @@ -405,13 +402,15 @@ _strided_to_strided_zero_pad_copy(char *dst, npy_intp dst_stride, * dst_itemsize < src_itemsize */ static int -_strided_to_strided_truncate_copy(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) +_strided_to_strided_truncate_copy( + PyArrayMethod_Context *context, char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *NPY_UNUSED(auxdata)) { - _strided_zero_pad_data *d = (_strided_zero_pad_data *)data; - npy_intp dst_itemsize = d->dst_itemsize; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + npy_intp dst_itemsize = context->descriptors[1]->elsize; while (N > 0) { memcpy(dst, src, dst_itemsize); @@ -427,13 +426,17 @@ _strided_to_strided_truncate_copy(char *dst, npy_intp dst_stride, * unicode swapping is needed. */ static int -_strided_to_strided_unicode_copyswap(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) +_strided_to_strided_unicode_copyswap( + PyArrayMethod_Context *context, char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *NPY_UNUSED(auxdata)) { - _strided_zero_pad_data *d = (_strided_zero_pad_data *)data; - npy_intp dst_itemsize = d->dst_itemsize; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + npy_intp src_itemsize = context->descriptors[0]->elsize; + npy_intp dst_itemsize = context->descriptors[1]->elsize; + npy_intp zero_size = dst_itemsize - src_itemsize; npy_intp copy_size = zero_size > 0 ? src_itemsize : dst_itemsize; char *_dst; @@ -462,26 +465,16 @@ NPY_NO_EXPORT int PyArray_GetStridedZeroPadCopyFn(int aligned, int unicode_swap, npy_intp src_stride, npy_intp dst_stride, npy_intp src_itemsize, npy_intp dst_itemsize, - PyArray_StridedUnaryOp **out_stransfer, + PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata) { + *out_transferdata = NULL; if ((src_itemsize == dst_itemsize) && !unicode_swap) { *out_stransfer = PyArray_GetStridedCopyFn(aligned, src_stride, dst_stride, src_itemsize); - *out_transferdata = NULL; return (*out_stransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; } else { - _strided_zero_pad_data *d = PyArray_malloc( - sizeof(_strided_zero_pad_data)); - if (d == NULL) { - PyErr_NoMemory(); - return NPY_FAIL; - } - d->dst_itemsize = dst_itemsize; - d->base.free = (NpyAuxData_FreeFunc *)&PyArray_free; - d->base.clone = &_strided_zero_pad_data_clone; - if (unicode_swap) { *out_stransfer = &_strided_to_strided_unicode_copyswap; } @@ -491,218 +484,10 @@ PyArray_GetStridedZeroPadCopyFn(int aligned, int unicode_swap, else { *out_stransfer = &_strided_to_strided_truncate_copy; } - - *out_transferdata = (NpyAuxData *)d; return NPY_SUCCEED; } } -/***************** WRAP ALIGNED CONTIGUOUS TRANSFER FUNCTION **************/ - -/* Wraps a transfer function + data in alignment code */ -typedef struct { - NpyAuxData base; - PyArray_StridedUnaryOp *wrapped, - *tobuffer, *frombuffer; - NpyAuxData *wrappeddata, *todata, *fromdata; - npy_intp src_itemsize, dst_itemsize; - char *bufferin, *bufferout; - npy_bool init_dest, out_needs_api; -} _align_wrap_data; - -/* transfer data free function */ -static void _align_wrap_data_free(NpyAuxData *data) -{ - _align_wrap_data *d = (_align_wrap_data *)data; - NPY_AUXDATA_FREE(d->wrappeddata); - NPY_AUXDATA_FREE(d->todata); - NPY_AUXDATA_FREE(d->fromdata); - PyArray_free(data); -} - -/* transfer data copy function */ -static NpyAuxData *_align_wrap_data_clone(NpyAuxData *data) -{ - _align_wrap_data *d = (_align_wrap_data *)data; - _align_wrap_data *newdata; - npy_intp basedatasize, datasize; - - /* Round up the structure size to 16-byte boundary */ - basedatasize = (sizeof(_align_wrap_data)+15)&(-0x10); - /* Add space for two low level buffers */ - datasize = basedatasize + - NPY_LOWLEVEL_BUFFER_BLOCKSIZE*d->src_itemsize + - NPY_LOWLEVEL_BUFFER_BLOCKSIZE*d->dst_itemsize; - - /* Allocate the data, and populate it */ - newdata = (_align_wrap_data *)PyArray_malloc(datasize); - if (newdata == NULL) { - return NULL; - } - memcpy(newdata, data, basedatasize); - newdata->bufferin = (char *)newdata + basedatasize; - newdata->bufferout = newdata->bufferin + - NPY_LOWLEVEL_BUFFER_BLOCKSIZE*newdata->src_itemsize; - if (newdata->wrappeddata != NULL) { - newdata->wrappeddata = NPY_AUXDATA_CLONE(d->wrappeddata); - if (newdata->wrappeddata == NULL) { - PyArray_free(newdata); - return NULL; - } - } - if (newdata->todata != NULL) { - newdata->todata = NPY_AUXDATA_CLONE(d->todata); - if (newdata->todata == NULL) { - NPY_AUXDATA_FREE(newdata->wrappeddata); - PyArray_free(newdata); - return NULL; - } - } - if (newdata->fromdata != NULL) { - newdata->fromdata = NPY_AUXDATA_CLONE(d->fromdata); - if (newdata->fromdata == NULL) { - NPY_AUXDATA_FREE(newdata->wrappeddata); - NPY_AUXDATA_FREE(newdata->todata); - PyArray_free(newdata); - return NULL; - } - } - - newdata->init_dest = d->init_dest; - newdata->out_needs_api = d->out_needs_api; - - return (NpyAuxData *)newdata; -} - -static int -_strided_to_strided_contig_align_wrap(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) -{ - _align_wrap_data *d = (_align_wrap_data *)data; - PyArray_StridedUnaryOp *wrapped = d->wrapped, - *tobuffer = d->tobuffer, - *frombuffer = d->frombuffer; - npy_intp inner_src_itemsize = d->src_itemsize, - dst_itemsize = d->dst_itemsize; - NpyAuxData *wrappeddata = d->wrappeddata, - *todata = d->todata, - *fromdata = d->fromdata; - char *bufferin = d->bufferin, *bufferout = d->bufferout; - npy_bool init_dest = d->init_dest; - - for(;;) { - if (N > NPY_LOWLEVEL_BUFFER_BLOCKSIZE) { - if (tobuffer( - bufferin, inner_src_itemsize, src, src_stride, - NPY_LOWLEVEL_BUFFER_BLOCKSIZE, src_itemsize, todata) < 0) { - return -1; - } - if (init_dest) { - memset(bufferout, 0, - dst_itemsize*NPY_LOWLEVEL_BUFFER_BLOCKSIZE); - } - if (wrapped(bufferout, dst_itemsize, bufferin, inner_src_itemsize, - NPY_LOWLEVEL_BUFFER_BLOCKSIZE, - inner_src_itemsize, wrappeddata) < 0) { - return -1; - } - if (frombuffer(dst, dst_stride, bufferout, dst_itemsize, - NPY_LOWLEVEL_BUFFER_BLOCKSIZE, - dst_itemsize, fromdata) < 0) { - return -1; - } - N -= NPY_LOWLEVEL_BUFFER_BLOCKSIZE; - src += NPY_LOWLEVEL_BUFFER_BLOCKSIZE*src_stride; - dst += NPY_LOWLEVEL_BUFFER_BLOCKSIZE*dst_stride; - } - else { - if (tobuffer(bufferin, inner_src_itemsize, src, src_stride, - N, src_itemsize, todata) < 0) { - return -1; - } - if (init_dest) { - memset(bufferout, 0, dst_itemsize*N); - } - if (wrapped(bufferout, dst_itemsize, bufferin, inner_src_itemsize, - N, inner_src_itemsize, wrappeddata) < 0) { - return -1; - } - if (frombuffer(dst, dst_stride, bufferout, dst_itemsize, - N, dst_itemsize, fromdata) < 0) { - return -1; - } - return 0; - } - } -} - -/* - * Wraps an aligned contig to contig transfer function between either - * copies or byte swaps to temporary buffers. - * - * src_itemsize/dst_itemsize - The sizes of the src and dst datatypes. - * tobuffer - copy/swap function from src to an aligned contiguous buffer. - * todata - data for tobuffer - * frombuffer - copy/swap function from an aligned contiguous buffer to dst. - * fromdata - data for frombuffer - * wrapped - contig to contig transfer function being wrapped - * wrappeddata - data for wrapped - * init_dest - 1 means to memset the dest buffer to 0 before calling wrapped. - * out_needs_api - if NPY_TRUE, check for (and break on) Python API errors. - * - * Returns NPY_SUCCEED or NPY_FAIL. - */ -NPY_NO_EXPORT int -wrap_aligned_contig_transfer_function( - npy_intp src_itemsize, npy_intp dst_itemsize, - PyArray_StridedUnaryOp *tobuffer, NpyAuxData *todata, - PyArray_StridedUnaryOp *frombuffer, NpyAuxData *fromdata, - PyArray_StridedUnaryOp *wrapped, NpyAuxData *wrappeddata, - int init_dest, - int out_needs_api, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata) -{ - _align_wrap_data *data; - npy_intp basedatasize, datasize; - - /* Round up the structure size to 16-byte boundary */ - basedatasize = (sizeof(_align_wrap_data)+15)&(-0x10); - /* Add space for two low level buffers */ - datasize = basedatasize + - NPY_LOWLEVEL_BUFFER_BLOCKSIZE*src_itemsize + - NPY_LOWLEVEL_BUFFER_BLOCKSIZE*dst_itemsize; - - /* Allocate the data, and populate it */ - data = (_align_wrap_data *)PyArray_malloc(datasize); - if (data == NULL) { - PyErr_NoMemory(); - return NPY_FAIL; - } - data->base.free = &_align_wrap_data_free; - data->base.clone = &_align_wrap_data_clone; - data->tobuffer = tobuffer; - data->todata = todata; - data->frombuffer = frombuffer; - data->fromdata = fromdata; - data->wrapped = wrapped; - data->wrappeddata = wrappeddata; - data->src_itemsize = src_itemsize; - data->dst_itemsize = dst_itemsize; - data->bufferin = (char *)data + basedatasize; - data->bufferout = data->bufferin + - NPY_LOWLEVEL_BUFFER_BLOCKSIZE*src_itemsize; - data->init_dest = (npy_bool) init_dest; - data->out_needs_api = (npy_bool) out_needs_api; - - /* Set the function and data */ - *out_stransfer = &_strided_to_strided_contig_align_wrap; - *out_transferdata = (NpyAuxData *)data; - - return NPY_SUCCEED; -} /*************************** WRAP DTYPE COPY/SWAP *************************/ /* Wraps the dtype copy swap function */ @@ -718,14 +503,14 @@ static void _wrap_copy_swap_data_free(NpyAuxData *data) { _wrap_copy_swap_data *d = (_wrap_copy_swap_data *)data; Py_DECREF(d->arr); - PyArray_free(data); + PyMem_Free(data); } /* wrap copy swap data copy function */ static NpyAuxData *_wrap_copy_swap_data_clone(NpyAuxData *data) { _wrap_copy_swap_data *newdata = - (_wrap_copy_swap_data *)PyArray_malloc(sizeof(_wrap_copy_swap_data)); + (_wrap_copy_swap_data *)PyMem_Malloc(sizeof(_wrap_copy_swap_data)); if (newdata == NULL) { return NULL; } @@ -737,32 +522,35 @@ static NpyAuxData *_wrap_copy_swap_data_clone(NpyAuxData *data) } static int -_strided_to_strided_wrap_copy_swap(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp NPY_UNUSED(src_itemsize), - NpyAuxData *data) +_strided_to_strided_wrap_copy_swap( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) { - _wrap_copy_swap_data *d = (_wrap_copy_swap_data *)data; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + + _wrap_copy_swap_data *d = (_wrap_copy_swap_data *)auxdata; /* We assume that d->copyswapn should not be able to error. */ d->copyswapn(dst, dst_stride, src, src_stride, N, d->swap, d->arr); return 0; } -/* This only gets used for custom data types and for Unicode when swapping */ +/* + * This function is used only via `get_wrapped_legacy_cast_function` + * when we wrap a legacy DType (or explicitly fall back to the legacy + * wrapping) for an internal cast. + */ static int -wrap_copy_swap_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *dtype, - int should_swap, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata) +wrap_copy_swap_function( + PyArray_Descr *dtype, int should_swap, + PyArrayMethod_StridedLoop **out_stransfer, + NpyAuxData **out_transferdata) { - _wrap_copy_swap_data *data; - npy_intp shape = 1; - /* Allocate the data for the copy swap */ - data = (_wrap_copy_swap_data *)PyArray_malloc(sizeof(_wrap_copy_swap_data)); + _wrap_copy_swap_data *data = PyMem_Malloc(sizeof(_wrap_copy_swap_data)); if (data == NULL) { PyErr_NoMemory(); *out_stransfer = NULL; @@ -780,13 +568,14 @@ wrap_copy_swap_function(int aligned, * The copyswap functions shouldn't need that. */ Py_INCREF(dtype); + npy_intp shape = 1; data->arr = (PyArrayObject *)PyArray_NewFromDescr_int( &PyArray_Type, dtype, 1, &shape, NULL, NULL, 0, NULL, NULL, 0, 1); if (data->arr == NULL) { - PyArray_free(data); + PyMem_Free(data); return NPY_FAIL; } @@ -812,14 +601,14 @@ static void _strided_cast_data_free(NpyAuxData *data) _strided_cast_data *d = (_strided_cast_data *)data; Py_DECREF(d->aip); Py_DECREF(d->aop); - PyArray_free(data); + PyMem_Free(data); } /* strided cast data copy function */ static NpyAuxData *_strided_cast_data_clone(NpyAuxData *data) { _strided_cast_data *newdata = - (_strided_cast_data *)PyArray_malloc(sizeof(_strided_cast_data)); + (_strided_cast_data *)PyMem_Malloc(sizeof(_strided_cast_data)); if (newdata == NULL) { return NULL; } @@ -832,12 +621,16 @@ static NpyAuxData *_strided_cast_data_clone(NpyAuxData *data) } static int -_aligned_strided_to_strided_cast(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) +_aligned_strided_to_strided_cast( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) { - _strided_cast_data *d = (_strided_cast_data *)data; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + + _strided_cast_data *d = (_strided_cast_data *)auxdata; PyArray_VectorUnaryFunc *castfunc = d->castfunc; PyArrayObject *aip = d->aip, *aop = d->aop; npy_bool needs_api = d->needs_api; @@ -863,11 +656,16 @@ _aligned_strided_to_strided_cast(char *dst, npy_intp dst_stride, /* This one requires src be of type NPY_OBJECT */ static int -_aligned_strided_to_strided_cast_decref_src(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) +_aligned_strided_to_strided_cast_decref_src( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) { + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + + _any_to_object_auxdata *data = (_any_to_object_auxdata *)auxdata; _strided_cast_data *d = (_strided_cast_data *)data; PyArray_VectorUnaryFunc *castfunc = d->castfunc; PyArrayObject *aip = d->aip, *aop = d->aop; @@ -897,12 +695,15 @@ _aligned_strided_to_strided_cast_decref_src(char *dst, npy_intp dst_stride, } static int -_aligned_contig_to_contig_cast(char *dst, npy_intp NPY_UNUSED(dst_stride), - char *src, npy_intp NPY_UNUSED(src_stride), - npy_intp N, npy_intp NPY_UNUSED(itemsize), - NpyAuxData *data) +_aligned_contig_to_contig_cast( + PyArrayMethod_Context *NPY_UNUSED(context), char * const*args, + const npy_intp *dimensions, const npy_intp *NPY_UNUSED(strides), + NpyAuxData *auxdata) { - _strided_cast_data *d = (_strided_cast_data *)data; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + + _strided_cast_data *d = (_strided_cast_data *)auxdata; npy_bool needs_api = d->needs_api; d->castfunc(src, dst, N, d->aip, d->aop); @@ -916,45 +717,6 @@ _aligned_contig_to_contig_cast(char *dst, npy_intp NPY_UNUSED(dst_stride), return 0; } -#if !NPY_USE_NEW_CASTINGIMPL -static int -get_nbo_cast_numeric_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - int src_type_num, int dst_type_num, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata) -{ - /* Emit a warning if complex imaginary is being cast away */ - if (PyTypeNum_ISCOMPLEX(src_type_num) && - !PyTypeNum_ISCOMPLEX(dst_type_num) && - !PyTypeNum_ISBOOL(dst_type_num)) { - static PyObject *cls = NULL; - int ret; - npy_cache_import("numpy.core", "ComplexWarning", &cls); - if (cls == NULL) { - return NPY_FAIL; - } - ret = PyErr_WarnEx(cls, - "Casting complex values to real discards " - "the imaginary part", 1); - if (ret < 0) { - return NPY_FAIL; - } - } - - *out_stransfer = PyArray_GetStridedNumericCastFn(aligned, - src_stride, dst_stride, - src_type_num, dst_type_num); - *out_transferdata = NULL; - if (*out_stransfer == NULL) { - PyErr_SetString(PyExc_ValueError, - "unexpected error in GetStridedNumericCastFn"); - return NPY_FAIL; - } - - return NPY_SUCCEED; -} -#endif /* * Does a datetime->datetime, timedelta->timedelta, @@ -984,15 +746,15 @@ typedef struct { static void _strided_datetime_cast_data_free(NpyAuxData *data) { _strided_datetime_cast_data *d = (_strided_datetime_cast_data *)data; - PyArray_free(d->tmp_buffer); - PyArray_free(data); + PyMem_Free(d->tmp_buffer); + PyMem_Free(data); } /* strided datetime cast data copy function */ static NpyAuxData *_strided_datetime_cast_data_clone(NpyAuxData *data) { _strided_datetime_cast_data *newdata = - (_strided_datetime_cast_data *)PyArray_malloc( + (_strided_datetime_cast_data *)PyMem_Malloc( sizeof(_strided_datetime_cast_data)); if (newdata == NULL) { return NULL; @@ -1000,9 +762,9 @@ static NpyAuxData *_strided_datetime_cast_data_clone(NpyAuxData *data) memcpy(newdata, data, sizeof(_strided_datetime_cast_data)); if (newdata->tmp_buffer != NULL) { - newdata->tmp_buffer = PyArray_malloc(newdata->src_itemsize + 1); + newdata->tmp_buffer = PyMem_Malloc(newdata->src_itemsize + 1); if (newdata->tmp_buffer == NULL) { - PyArray_free(newdata); + PyMem_Free(newdata); return NULL; } } @@ -1011,12 +773,16 @@ static NpyAuxData *_strided_datetime_cast_data_clone(NpyAuxData *data) } static int -_strided_to_strided_datetime_general_cast(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) +_strided_to_strided_datetime_general_cast( + PyArrayMethod_Context *NPY_UNUSED(context), char * const*args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) { - _strided_datetime_cast_data *d = (_strided_datetime_cast_data *)data; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + + _strided_datetime_cast_data *d = (_strided_datetime_cast_data *)auxdata; npy_int64 dt; npy_datetimestruct dts; @@ -1044,12 +810,16 @@ _strided_to_strided_datetime_general_cast(char *dst, npy_intp dst_stride, } static int -_strided_to_strided_datetime_cast(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) +_strided_to_strided_datetime_cast( + PyArrayMethod_Context *NPY_UNUSED(context), char * const*args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) { - _strided_datetime_cast_data *d = (_strided_datetime_cast_data *)data; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + + _strided_datetime_cast_data *d = (_strided_datetime_cast_data *)auxdata; npy_int64 num = d->num, denom = d->denom; npy_int64 dt; @@ -1076,13 +846,16 @@ _strided_to_strided_datetime_cast(char *dst, npy_intp dst_stride, } static int -_aligned_strided_to_strided_datetime_cast(char *dst, - npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) +_aligned_strided_to_strided_datetime_cast( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) { - _strided_datetime_cast_data *d = (_strided_datetime_cast_data *)data; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + + _strided_datetime_cast_data *d = (_strided_datetime_cast_data *)auxdata; npy_int64 num = d->num, denom = d->denom; npy_int64 dt; @@ -1109,12 +882,16 @@ _aligned_strided_to_strided_datetime_cast(char *dst, } static int -_strided_to_strided_datetime_to_string(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp NPY_UNUSED(src_itemsize), - NpyAuxData *data) +_strided_to_strided_datetime_to_string( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) { - _strided_datetime_cast_data *d = (_strided_datetime_cast_data *)data; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + + _strided_datetime_cast_data *d = (_strided_datetime_cast_data *)auxdata; npy_intp dst_itemsize = d->dst_itemsize; npy_int64 dt; npy_datetimestruct dts; @@ -1144,12 +921,17 @@ _strided_to_strided_datetime_to_string(char *dst, npy_intp dst_stride, } static int -_strided_to_strided_string_to_datetime(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) +_strided_to_strided_string_to_datetime( + PyArrayMethod_Context *context, char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) { - _strided_datetime_cast_data *d = (_strided_datetime_cast_data *)data; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_itemsize = context->descriptors[0]->elsize; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + + _strided_datetime_cast_data *d = (_strided_datetime_cast_data *)auxdata; npy_datetimestruct dts; char *tmp_buffer = d->tmp_buffer; char *tmp; @@ -1202,7 +984,7 @@ _strided_to_strided_string_to_datetime(char *dst, npy_intp dst_stride, NPY_NO_EXPORT int get_nbo_cast_datetime_transfer_function(int aligned, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, + PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata) { PyArray_DatetimeMetaData *src_meta, *dst_meta; @@ -1225,7 +1007,7 @@ get_nbo_cast_datetime_transfer_function(int aligned, } /* Allocate the data for the casting */ - data = (_strided_datetime_cast_data *)PyArray_malloc( + data = (_strided_datetime_cast_data *)PyMem_Malloc( sizeof(_strided_datetime_cast_data)); if (data == NULL) { PyErr_NoMemory(); @@ -1277,7 +1059,7 @@ get_nbo_cast_datetime_transfer_function(int aligned, NPY_NO_EXPORT int get_nbo_datetime_to_string_transfer_function( PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata) + PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata) { PyArray_DatetimeMetaData *src_meta; _strided_datetime_cast_data *data; @@ -1288,7 +1070,7 @@ get_nbo_datetime_to_string_transfer_function( } /* Allocate the data for the casting */ - data = (_strided_datetime_cast_data *)PyArray_malloc( + data = (_strided_datetime_cast_data *)PyMem_Malloc( sizeof(_strided_datetime_cast_data)); if (data == NULL) { PyErr_NoMemory(); @@ -1317,16 +1099,15 @@ get_nbo_datetime_to_string_transfer_function( return NPY_SUCCEED; } + NPY_NO_EXPORT int get_datetime_to_unicode_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, + PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata, int *out_needs_api) { - NpyAuxData *castdata = NULL, *todata = NULL, *fromdata = NULL; - PyArray_StridedUnaryOp *caststransfer, *tobuffer, *frombuffer; PyArray_Descr *str_dtype; /* Get an ASCII string data type, adapted to match the UNICODE one */ @@ -1336,61 +1117,35 @@ get_datetime_to_unicode_transfer_function(int aligned, } str_dtype->elsize = dst_dtype->elsize / 4; - /* Get the copy/swap operation to dst */ - if (PyArray_GetDTypeCopySwapFn(aligned, - src_stride, src_dtype->elsize, - src_dtype, - &tobuffer, &todata) != NPY_SUCCEED) { - Py_DECREF(str_dtype); - return NPY_FAIL; - } + /* ensured in resolve_descriptors for simplicity */ + assert(PyDataType_ISNOTSWAPPED(src_dtype)); /* Get the NBO datetime to string aligned contig function */ if (get_nbo_datetime_to_string_transfer_function( src_dtype, str_dtype, - &caststransfer, &castdata) != NPY_SUCCEED) { - Py_DECREF(str_dtype); - NPY_AUXDATA_FREE(todata); - return NPY_FAIL; - } - - /* Get the cast operation to dst */ - if (PyArray_GetDTypeTransferFunction(aligned, - str_dtype->elsize, dst_stride, - str_dtype, dst_dtype, - 0, - &frombuffer, &fromdata, - out_needs_api) != NPY_SUCCEED) { + out_stransfer, out_transferdata) != NPY_SUCCEED) { Py_DECREF(str_dtype); - NPY_AUXDATA_FREE(todata); - NPY_AUXDATA_FREE(castdata); return NPY_FAIL; } - /* Wrap it all up in a new transfer function + data */ - if (wrap_aligned_contig_transfer_function( - src_dtype->elsize, str_dtype->elsize, - tobuffer, todata, - frombuffer, fromdata, - caststransfer, castdata, - PyDataType_FLAGCHK(str_dtype, NPY_NEEDS_INIT), - *out_needs_api, - out_stransfer, out_transferdata) != NPY_SUCCEED) { - NPY_AUXDATA_FREE(castdata); - NPY_AUXDATA_FREE(todata); - NPY_AUXDATA_FREE(fromdata); + int res = wrap_aligned_transferfunction( + aligned, 0, /* no need to ensure contiguous */ + src_stride, dst_stride, + src_dtype, dst_dtype, + src_dtype, str_dtype, + out_stransfer, out_transferdata, out_needs_api); + Py_DECREF(str_dtype); + if (res < 0) { return NPY_FAIL; } - Py_DECREF(str_dtype); - return NPY_SUCCEED; } NPY_NO_EXPORT int get_nbo_string_to_datetime_transfer_function( PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata) + PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata) { PyArray_DatetimeMetaData *dst_meta; _strided_datetime_cast_data *data; @@ -1401,7 +1156,7 @@ get_nbo_string_to_datetime_transfer_function( } /* Allocate the data for the casting */ - data = (_strided_datetime_cast_data *)PyArray_malloc( + data = (_strided_datetime_cast_data *)PyMem_Malloc( sizeof(_strided_datetime_cast_data)); if (data == NULL) { PyErr_NoMemory(); @@ -1412,10 +1167,10 @@ get_nbo_string_to_datetime_transfer_function( data->base.free = &_strided_datetime_cast_data_free; data->base.clone = &_strided_datetime_cast_data_clone; data->src_itemsize = src_dtype->elsize; - data->tmp_buffer = PyArray_malloc(data->src_itemsize + 1); + data->tmp_buffer = PyMem_Malloc(data->src_itemsize + 1); if (data->tmp_buffer == NULL) { PyErr_NoMemory(); - PyArray_free(data); + PyMem_Free(data); *out_stransfer = NULL; *out_transferdata = NULL; return NPY_FAIL; @@ -1441,12 +1196,10 @@ NPY_NO_EXPORT int get_unicode_to_datetime_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, + PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata, int *out_needs_api) { - NpyAuxData *castdata = NULL, *todata = NULL, *fromdata = NULL; - PyArray_StridedUnaryOp *caststransfer, *tobuffer, *frombuffer; PyArray_Descr *str_dtype; /* Get an ASCII string data type, adapted to match the UNICODE one */ @@ -1457,55 +1210,25 @@ get_unicode_to_datetime_transfer_function(int aligned, assert(src_dtype->type_num == NPY_UNICODE); str_dtype->elsize = src_dtype->elsize / 4; - /* Get the cast operation from src */ - if (PyArray_GetDTypeTransferFunction(aligned, - src_stride, str_dtype->elsize, - src_dtype, str_dtype, - 0, - &tobuffer, &todata, - out_needs_api) != NPY_SUCCEED) { - Py_DECREF(str_dtype); - return NPY_FAIL; - } - - /* Get the string to NBO datetime aligned contig function */ + /* Get the string to NBO datetime aligned function */ if (get_nbo_string_to_datetime_transfer_function( str_dtype, dst_dtype, - &caststransfer, &castdata) != NPY_SUCCEED) { + out_stransfer, out_transferdata) != NPY_SUCCEED) { Py_DECREF(str_dtype); - NPY_AUXDATA_FREE(todata); return NPY_FAIL; } - /* Get the copy/swap operation to dst */ - if (PyArray_GetDTypeCopySwapFn(aligned, - dst_dtype->elsize, dst_stride, - dst_dtype, - &frombuffer, &fromdata) != NPY_SUCCEED) { - Py_DECREF(str_dtype); - NPY_AUXDATA_FREE(todata); - NPY_AUXDATA_FREE(castdata); - return NPY_FAIL; - } + int res = wrap_aligned_transferfunction( + aligned, 0, /* no need to ensure contiguous */ + src_stride, dst_stride, + src_dtype, dst_dtype, + str_dtype, dst_dtype, + out_stransfer, out_transferdata, out_needs_api); + Py_DECREF(str_dtype); - /* Wrap it all up in a new transfer function + data */ - if (wrap_aligned_contig_transfer_function( - str_dtype->elsize, dst_dtype->elsize, - tobuffer, todata, - frombuffer, fromdata, - caststransfer, castdata, - PyDataType_FLAGCHK(dst_dtype, NPY_NEEDS_INIT), - *out_needs_api, - out_stransfer, out_transferdata) != NPY_SUCCEED) { - Py_DECREF(str_dtype); - NPY_AUXDATA_FREE(castdata); - NPY_AUXDATA_FREE(todata); - NPY_AUXDATA_FREE(fromdata); + if (res < 0) { return NPY_FAIL; } - - Py_DECREF(str_dtype); - return NPY_SUCCEED; } @@ -1515,7 +1238,7 @@ get_legacy_dtype_cast_function( int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references, - PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata, + PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata, int *out_needs_api, int *out_needs_wrap) { _strided_cast_data *data; @@ -1567,7 +1290,7 @@ get_legacy_dtype_cast_function( } /* Allocate the data for the casting */ - data = (_strided_cast_data *)PyArray_malloc(sizeof(_strided_cast_data)); + data = (_strided_cast_data *)PyMem_Malloc(sizeof(_strided_cast_data)); if (data == NULL) { PyErr_NoMemory(); *out_stransfer = NULL; @@ -1591,7 +1314,7 @@ get_legacy_dtype_cast_function( else { tmp_dtype = PyArray_DescrNewByteorder(src_dtype, NPY_NATIVE); if (tmp_dtype == NULL) { - PyArray_free(data); + PyMem_Free(data); return NPY_FAIL; } } @@ -1601,7 +1324,7 @@ get_legacy_dtype_cast_function( 0, NULL, NULL, 0, 1); if (data->aip == NULL) { - PyArray_free(data); + PyMem_Free(data); return NPY_FAIL; } /* @@ -1618,7 +1341,7 @@ get_legacy_dtype_cast_function( tmp_dtype = PyArray_DescrNewByteorder(dst_dtype, NPY_NATIVE); if (tmp_dtype == NULL) { Py_DECREF(data->aip); - PyArray_free(data); + PyMem_Free(data); return NPY_FAIL; } } @@ -1629,7 +1352,7 @@ get_legacy_dtype_cast_function( 0, 1); if (data->aop == NULL) { Py_DECREF(data->aip); - PyArray_free(data); + PyMem_Free(data); return NPY_FAIL; } @@ -1657,228 +1380,24 @@ get_legacy_dtype_cast_function( } -#if !NPY_USE_NEW_CASTINGIMPL -static int -get_nbo_cast_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - int move_references, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, - int *out_needs_api, - int *out_needs_wrap) -{ - if (PyTypeNum_ISNUMBER(src_dtype->type_num) && - PyTypeNum_ISNUMBER(dst_dtype->type_num)) { - *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) || - !PyArray_ISNBO(dst_dtype->byteorder); - return get_nbo_cast_numeric_transfer_function(aligned, - src_stride, dst_stride, - src_dtype->type_num, dst_dtype->type_num, - out_stransfer, out_transferdata); - } - - if (src_dtype->type_num == NPY_DATETIME || - src_dtype->type_num == NPY_TIMEDELTA || - dst_dtype->type_num == NPY_DATETIME || - dst_dtype->type_num == NPY_TIMEDELTA) { - /* A parameterized type, datetime->datetime sometimes needs casting */ - if ((src_dtype->type_num == NPY_DATETIME && - dst_dtype->type_num == NPY_DATETIME) || - (src_dtype->type_num == NPY_TIMEDELTA && - dst_dtype->type_num == NPY_TIMEDELTA)) { - *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) || - !PyArray_ISNBO(dst_dtype->byteorder); - return get_nbo_cast_datetime_transfer_function(aligned, - src_dtype, dst_dtype, - out_stransfer, out_transferdata); - } - - /* - * Datetime <-> string conversions can be handled specially. - * The functions may raise an error if the strings have no - * space, or can't be parsed properly. - */ - if (src_dtype->type_num == NPY_DATETIME) { - switch (dst_dtype->type_num) { - case NPY_STRING: - *out_needs_api = 1; - *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder); - return get_nbo_datetime_to_string_transfer_function( - src_dtype, dst_dtype, - out_stransfer, out_transferdata); - - case NPY_UNICODE: - return get_datetime_to_unicode_transfer_function( - aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - out_stransfer, out_transferdata, - out_needs_api); - } - } - else if (dst_dtype->type_num == NPY_DATETIME) { - switch (src_dtype->type_num) { - case NPY_STRING: - *out_needs_api = 1; - *out_needs_wrap = !PyArray_ISNBO(dst_dtype->byteorder); - return get_nbo_string_to_datetime_transfer_function( - src_dtype, dst_dtype, - out_stransfer, out_transferdata); - - case NPY_UNICODE: - return get_unicode_to_datetime_transfer_function( - aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - out_stransfer, out_transferdata, - out_needs_api); - } - } - } - - return get_legacy_dtype_cast_function( - aligned, src_stride, dst_stride, src_dtype, dst_dtype, - move_references, out_stransfer, out_transferdata, - out_needs_api, out_needs_wrap); -} -#endif - - -NPY_NO_EXPORT int -wrap_aligned_contig_transfer_function_with_copyswapn( - int aligned, npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata, - int *out_needs_api, - PyArray_StridedUnaryOp *caststransfer, NpyAuxData *castdata) -{ - NpyAuxData *todata = NULL, *fromdata = NULL; - PyArray_StridedUnaryOp *tobuffer = NULL, *frombuffer = NULL; - npy_intp src_itemsize = src_dtype->elsize; - npy_intp dst_itemsize = dst_dtype->elsize; - - /* Get the copy/swap operation from src */ - PyArray_GetDTypeCopySwapFn( - aligned, src_stride, src_itemsize, src_dtype, &tobuffer, &todata); - - if (!PyDataType_REFCHK(dst_dtype)) { - /* Copying from buffer is a simple copy/swap operation */ - PyArray_GetDTypeCopySwapFn( - aligned, dst_itemsize, dst_stride, dst_dtype, - &frombuffer, &fromdata); - } - else { - /* - * Since the buffer is initialized to NULL, need to move the - * references in order to DECREF the existing data. - */ - /* Object types cannot be byte swapped */ - assert(PyDataType_ISNOTSWAPPED(dst_dtype)); - /* The loop already needs the python api if this is reached */ - assert(*out_needs_api); - - if (PyArray_GetDTypeTransferFunction( - aligned, dst_itemsize, dst_stride, - dst_dtype, dst_dtype, 1, - &frombuffer, &fromdata, out_needs_api) != NPY_SUCCEED) { - return NPY_FAIL; - } - } - - if (frombuffer == NULL || tobuffer == NULL) { - NPY_AUXDATA_FREE(castdata); - NPY_AUXDATA_FREE(todata); - NPY_AUXDATA_FREE(fromdata); - return NPY_FAIL; - } - - *out_stransfer = caststransfer; - - /* Wrap it all up in a new transfer function + data */ - if (wrap_aligned_contig_transfer_function( - src_itemsize, dst_itemsize, - tobuffer, todata, - frombuffer, fromdata, - caststransfer, castdata, - PyDataType_FLAGCHK(dst_dtype, NPY_NEEDS_INIT), - *out_needs_api, - out_stransfer, out_transferdata) != NPY_SUCCEED) { - NPY_AUXDATA_FREE(castdata); - NPY_AUXDATA_FREE(todata); - NPY_AUXDATA_FREE(fromdata); - return NPY_FAIL; - } - - return NPY_SUCCEED; -} - - -#if !NPY_USE_NEW_CASTINGIMPL -static int -get_cast_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - int move_references, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, - int *out_needs_api) -{ - PyArray_StridedUnaryOp *caststransfer; - NpyAuxData *castdata; - int needs_wrap = 0; - - if (get_nbo_cast_transfer_function(aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - move_references, - &caststransfer, - &castdata, - out_needs_api, - &needs_wrap) != NPY_SUCCEED) { - return NPY_FAIL; - } - - /* - * If all native byte order and doesn't need alignment wrapping, - * return the function - */ - if (!needs_wrap) { - *out_stransfer = caststransfer; - *out_transferdata = castdata; - - return NPY_SUCCEED; - } - /* Otherwise, we have to copy and/or swap to aligned temporaries */ - else { - return wrap_aligned_contig_transfer_function_with_copyswapn( - aligned, src_stride, dst_stride, src_dtype, dst_dtype, - out_stransfer, out_transferdata, out_needs_api, - caststransfer, castdata); - } -} -#endif - /**************************** COPY 1 TO N CONTIGUOUS ************************/ /* Copies 1 element to N contiguous elements */ typedef struct { NpyAuxData base; - PyArray_StridedUnaryOp *stransfer; - NpyAuxData *data; - npy_intp N, dst_itemsize; - /* If this is non-NULL the source type has references needing a decref */ - PyArray_StridedUnaryOp *stransfer_finish_src; - NpyAuxData *data_finish_src; + npy_intp N; + NPY_cast_info wrapped; + /* If finish->func is non-NULL the source needs a decref */ + NPY_cast_info decref_src; } _one_to_n_data; /* transfer data free function */ static void _one_to_n_data_free(NpyAuxData *data) { _one_to_n_data *d = (_one_to_n_data *)data; - NPY_AUXDATA_FREE(d->data); - NPY_AUXDATA_FREE(d->data_finish_src); - PyArray_free(data); + NPY_cast_info_xfree(&d->wrapped); + NPY_cast_info_xfree(&d->decref_src); + PyMem_Free(data); } /* transfer data copy function */ @@ -1888,44 +1407,51 @@ static NpyAuxData *_one_to_n_data_clone(NpyAuxData *data) _one_to_n_data *newdata; /* Allocate the data, and populate it */ - newdata = (_one_to_n_data *)PyArray_malloc(sizeof(_one_to_n_data)); + newdata = (_one_to_n_data *)PyMem_Malloc(sizeof(_one_to_n_data)); if (newdata == NULL) { return NULL; } - memcpy(newdata, data, sizeof(_one_to_n_data)); - if (d->data != NULL) { - newdata->data = NPY_AUXDATA_CLONE(d->data); - if (newdata->data == NULL) { - PyArray_free(newdata); - return NULL; - } + newdata->base.free = &_one_to_n_data_free; + newdata->base.clone = &_one_to_n_data_clone; + newdata->N = d->N; + /* Initialize in case of error, or if it is unused */ + NPY_cast_info_init(&newdata->decref_src); + + if (NPY_cast_info_copy(&newdata->wrapped, &d->wrapped) < 0) { + _one_to_n_data_free((NpyAuxData *)newdata); + return NULL; } - if (d->data_finish_src != NULL) { - newdata->data_finish_src = NPY_AUXDATA_CLONE(d->data_finish_src); - if (newdata->data_finish_src == NULL) { - NPY_AUXDATA_FREE(newdata->data); - PyArray_free(newdata); - return NULL; - } + if (d->decref_src.func == NULL) { + return (NpyAuxData *)newdata; + } + + if (NPY_cast_info_copy(&newdata->decref_src, &d->decref_src) < 0) { + _one_to_n_data_free((NpyAuxData *)newdata); + return NULL; } return (NpyAuxData *)newdata; } static int -_strided_to_strided_one_to_n(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) +_strided_to_strided_one_to_n( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) { - _one_to_n_data *d = (_one_to_n_data *)data; - PyArray_StridedUnaryOp *subtransfer = d->stransfer; - NpyAuxData *subdata = d->data; - npy_intp subN = d->N, dst_itemsize = d->dst_itemsize; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + + _one_to_n_data *d = (_one_to_n_data *)auxdata; + + const npy_intp subN = d->N; + npy_intp sub_strides[2] = {0, d->wrapped.descriptors[1]->elsize}; while (N > 0) { - if (subtransfer( - dst, dst_itemsize, src, 0, subN, src_itemsize, subdata) < 0) { + char *sub_args[2] = {src, dst}; + if (d->wrapped.func(&d->wrapped.context, + sub_args, &subN, sub_strides, d->wrapped.auxdata) < 0) { return -1; } @@ -1937,25 +1463,30 @@ _strided_to_strided_one_to_n(char *dst, npy_intp dst_stride, } static int -_strided_to_strided_one_to_n_with_finish(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) +_strided_to_strided_one_to_n_with_finish( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) { - _one_to_n_data *d = (_one_to_n_data *)data; - PyArray_StridedUnaryOp *subtransfer = d->stransfer, - *stransfer_finish_src = d->stransfer_finish_src; - NpyAuxData *subdata = d->data, *data_finish_src = d->data_finish_src; - npy_intp subN = d->N, dst_itemsize = d->dst_itemsize; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + + _one_to_n_data *d = (_one_to_n_data *)auxdata; + + const npy_intp subN = d->N; + const npy_intp one_item = 1, zero_stride = 0; + npy_intp sub_strides[2] = {0, d->wrapped.descriptors[1]->elsize}; while (N > 0) { - if (subtransfer( - dst, dst_itemsize, src, 0, subN, src_itemsize, subdata) < 0) { + char *sub_args[2] = {src, dst}; + if (d->wrapped.func(&d->wrapped.context, + sub_args, &subN, sub_strides, d->wrapped.auxdata) < 0) { return -1; } - if (stransfer_finish_src( - NULL, 0, src, 0, 1, src_itemsize, data_finish_src) < 0) { + if (d->decref_src.func(&d->decref_src.context, + &src, &one_item, &zero_stride, d->decref_src.auxdata) < 0) { return -1; } @@ -1966,27 +1497,18 @@ _strided_to_strided_one_to_n_with_finish(char *dst, npy_intp dst_stride, return 0; } -/* - * Wraps a transfer function to produce one that copies one element - * of src to N contiguous elements of dst. If stransfer_finish_src is - * not NULL, it should be a transfer function which just affects - * src, for example to do a final DECREF operation for references. - */ + static int -wrap_transfer_function_one_to_n( - PyArray_StridedUnaryOp *stransfer_inner, - NpyAuxData *data_inner, - PyArray_StridedUnaryOp *stransfer_finish_src, - NpyAuxData *data_finish_src, - npy_intp dst_itemsize, +get_one_to_n_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, npy_intp N, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata) + PyArrayMethod_StridedLoop **out_stransfer, + NpyAuxData **out_transferdata, + int *out_needs_api) { - _one_to_n_data *data; - - - data = PyArray_malloc(sizeof(_one_to_n_data)); + _one_to_n_data *data = PyMem_Malloc(sizeof(_one_to_n_data)); if (data == NULL) { PyErr_NoMemory(); return NPY_FAIL; @@ -1994,36 +1516,8 @@ wrap_transfer_function_one_to_n( data->base.free = &_one_to_n_data_free; data->base.clone = &_one_to_n_data_clone; - data->stransfer = stransfer_inner; - data->data = data_inner; - data->stransfer_finish_src = stransfer_finish_src; - data->data_finish_src = data_finish_src; data->N = N; - data->dst_itemsize = dst_itemsize; - - if (stransfer_finish_src == NULL) { - *out_stransfer = &_strided_to_strided_one_to_n; - } - else { - *out_stransfer = &_strided_to_strided_one_to_n_with_finish; - } - *out_transferdata = (NpyAuxData *)data; - - return NPY_SUCCEED; -} - -static int -get_one_to_n_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - int move_references, - npy_intp N, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, - int *out_needs_api) -{ - PyArray_StridedUnaryOp *stransfer, *stransfer_finish_src = NULL; - NpyAuxData *data, *data_finish_src = NULL; + NPY_cast_info_init(&data->decref_src); /* In case of error */ /* * move_references is set to 0, handled in the wrapping transfer fn, @@ -2035,33 +1529,31 @@ get_one_to_n_transfer_function(int aligned, 0, dst_dtype->elsize, src_dtype, dst_dtype, 0, - &stransfer, &data, + &data->wrapped, out_needs_api) != NPY_SUCCEED) { + NPY_AUXDATA_FREE((NpyAuxData *)data); return NPY_FAIL; } /* If the src object will need a DECREF, set src_dtype */ if (move_references && PyDataType_REFCHK(src_dtype)) { - if (get_decsrcref_transfer_function(aligned, + if (get_decref_transfer_function(aligned, src_stride, src_dtype, - &stransfer_finish_src, - &data_finish_src, + &data->decref_src, out_needs_api) != NPY_SUCCEED) { - NPY_AUXDATA_FREE(data); + NPY_AUXDATA_FREE((NpyAuxData *)data); return NPY_FAIL; } } - if (wrap_transfer_function_one_to_n(stransfer, data, - stransfer_finish_src, data_finish_src, - dst_dtype->elsize, - N, - out_stransfer, out_transferdata) != NPY_SUCCEED) { - NPY_AUXDATA_FREE(data); - NPY_AUXDATA_FREE(data_finish_src); - return NPY_FAIL; + if (data->decref_src.func == NULL) { + *out_stransfer = &_strided_to_strided_one_to_n; + } + else { + *out_stransfer = &_strided_to_strided_one_to_n_with_finish; } + *out_transferdata = (NpyAuxData *)data; return NPY_SUCCEED; } @@ -2071,17 +1563,17 @@ get_one_to_n_transfer_function(int aligned, /* Copies N contiguous elements to N contiguous elements */ typedef struct { NpyAuxData base; - PyArray_StridedUnaryOp *stransfer; - NpyAuxData *data; - npy_intp N, src_itemsize, dst_itemsize; + NPY_cast_info wrapped; + npy_intp N; + npy_intp strides[2]; /* avoid look up on the dtype (dst can be NULL) */ } _n_to_n_data; /* transfer data free function */ static void _n_to_n_data_free(NpyAuxData *data) { _n_to_n_data *d = (_n_to_n_data *)data; - NPY_AUXDATA_FREE(d->data); - PyArray_free(data); + NPY_cast_info_xfree(&d->wrapped); + PyMem_Free(data); } /* transfer data copy function */ @@ -2091,38 +1583,47 @@ static NpyAuxData *_n_to_n_data_clone(NpyAuxData *data) _n_to_n_data *newdata; /* Allocate the data, and populate it */ - newdata = (_n_to_n_data *)PyArray_malloc(sizeof(_n_to_n_data)); + newdata = (_n_to_n_data *)PyMem_Malloc(sizeof(_n_to_n_data)); if (newdata == NULL) { return NULL; } - memcpy(newdata, data, sizeof(_n_to_n_data)); - if (newdata->data != NULL) { - newdata->data = NPY_AUXDATA_CLONE(d->data); - if (newdata->data == NULL) { - PyArray_free(newdata); - return NULL; - } + *newdata = *d; + + if (NPY_cast_info_copy(&newdata->wrapped, &d->wrapped) < 0) { + _n_to_n_data_free((NpyAuxData *)newdata); } return (NpyAuxData *)newdata; } static int -_strided_to_strided_n_to_n(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) +_strided_to_strided_1_to_1( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) { - _n_to_n_data *d = (_n_to_n_data *)data; - PyArray_StridedUnaryOp *subtransfer = d->stransfer; - NpyAuxData *subdata = d->data; - npy_intp subN = d->N, src_subitemsize = d->src_itemsize, - dst_subitemsize = d->dst_itemsize; + _n_to_n_data *d = (_n_to_n_data *)auxdata; + return d->wrapped.func(&d->wrapped.context, + args, dimensions, strides, d->wrapped.auxdata); +} + +static int +_strided_to_strided_n_to_n( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) +{ + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + + _n_to_n_data *d = (_n_to_n_data *)auxdata; + npy_intp subN = d->N; while (N > 0) { - if (subtransfer( - dst, dst_subitemsize, src, src_subitemsize, - subN, src_subitemsize, subdata) < 0) { + char *sub_args[2] = {src, dst}; + if (d->wrapped.func(&d->wrapped.context, + sub_args, &subN, d->strides, d->wrapped.auxdata) < 0) { return -1; } src += src_stride; @@ -2133,106 +1634,93 @@ _strided_to_strided_n_to_n(char *dst, npy_intp dst_stride, } static int -_contig_to_contig_n_to_n(char *dst, npy_intp NPY_UNUSED(dst_stride), - char *src, npy_intp NPY_UNUSED(src_stride), - npy_intp N, npy_intp NPY_UNUSED(src_itemsize), - NpyAuxData *data) +_contig_to_contig_n_to_n( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *NPY_UNUSED(strides), + NpyAuxData *auxdata) { - _n_to_n_data *d = (_n_to_n_data *)data; - PyArray_StridedUnaryOp *subtransfer = d->stransfer; - NpyAuxData *subdata = d->data; - npy_intp subN = d->N, src_subitemsize = d->src_itemsize, - dst_subitemsize = d->dst_itemsize; - - if (subtransfer( - dst, dst_subitemsize, src, src_subitemsize, - subN*N, src_subitemsize, subdata) < 0) { + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + + _n_to_n_data *d = (_n_to_n_data *)auxdata; + /* Make one large transfer including both outer and inner iteration: */ + npy_intp subN = N * d->N; + + char *sub_args[2] = {src, dst}; + if (d->wrapped.func(&d->wrapped.context, + sub_args, &subN, d->strides, d->wrapped.auxdata) < 0) { return -1; } return 0; } + /* - * Wraps a transfer function to produce one that copies N contiguous elements - * of src to N contiguous elements of dst. + * Note that this function is currently both used for structured dtype + * casting as well as a decref function (with `dst_dtype == NULL`) */ static int -wrap_transfer_function_n_to_n( - PyArray_StridedUnaryOp *stransfer_inner, - NpyAuxData *data_inner, +get_n_to_n_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, - npy_intp src_itemsize, npy_intp dst_itemsize, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, npy_intp N, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata) + PyArrayMethod_StridedLoop **out_stransfer, + NpyAuxData **out_transferdata, + int *out_needs_api) { - _n_to_n_data *data; - - data = PyArray_malloc(sizeof(_n_to_n_data)); + _n_to_n_data *data = PyMem_Malloc(sizeof(_n_to_n_data)); if (data == NULL) { PyErr_NoMemory(); return NPY_FAIL; } - data->base.free = &_n_to_n_data_free; data->base.clone = &_n_to_n_data_clone; - data->stransfer = stransfer_inner; - data->data = data_inner; data->N = N; - data->src_itemsize = src_itemsize; - data->dst_itemsize = dst_itemsize; - /* - * If the N subarray elements exactly fit in the strides, - * then can do a faster contiguous transfer. - */ - if (src_stride == N * src_itemsize && - dst_stride == N * dst_itemsize) { - *out_stransfer = &_contig_to_contig_n_to_n; - } - else { - *out_stransfer = &_strided_to_strided_n_to_n; + if (N != 1) { + /* + * If N == 1, we can use the original strides, + * otherwise fields are contiguous + */ + src_stride = src_dtype->elsize; + dst_stride = dst_dtype != NULL ? dst_dtype->elsize : 0; + /* Store the wrapped strides for easier access */ + data->strides[0] = src_stride; + data->strides[1] = dst_stride; } - *out_transferdata = (NpyAuxData *)data; - - return NPY_SUCCEED; -} - -static int -get_n_to_n_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - int move_references, - npy_intp N, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, - int *out_needs_api) -{ - PyArray_StridedUnaryOp *stransfer; - NpyAuxData *data; /* * src_stride and dst_stride are set to contiguous, because * subarrays are always contiguous. */ if (PyArray_GetDTypeTransferFunction(aligned, - src_dtype->elsize, dst_dtype->elsize, + src_stride, dst_stride, src_dtype, dst_dtype, move_references, - &stransfer, &data, + &data->wrapped, out_needs_api) != NPY_SUCCEED) { + NPY_AUXDATA_FREE((NpyAuxData *)data); return NPY_FAIL; } - if (wrap_transfer_function_n_to_n(stransfer, data, - src_stride, dst_stride, - src_dtype->elsize, dst_dtype->elsize, - N, - out_stransfer, - out_transferdata) != NPY_SUCCEED) { - NPY_AUXDATA_FREE(data); - return NPY_FAIL; + if (N == 1) { + /* + * No need for wrapping, we can just copy directly. In principle + * this step could be optimized away entirely, but it requires + * replacing the context (to have the unpacked dtypes). + */ + *out_stransfer = &_strided_to_strided_1_to_1; + } + else if (src_stride == N * src_stride && + dst_stride == N * dst_stride) { + /* The subarrays can be coalesced (probably very rarely) */ + *out_stransfer = &_contig_to_contig_n_to_n; + } + else { + *out_stransfer = &_strided_to_strided_n_to_n; } + *out_transferdata = (NpyAuxData *)data; return NPY_SUCCEED; } @@ -2246,16 +1734,13 @@ typedef struct { /* Copies element with subarray broadcasting */ typedef struct { NpyAuxData base; - PyArray_StridedUnaryOp *stransfer; - NpyAuxData *data; - npy_intp src_N, dst_N, src_itemsize, dst_itemsize; - PyArray_StridedUnaryOp *stransfer_decsrcref; - NpyAuxData *data_decsrcref; - PyArray_StridedUnaryOp *stransfer_decdstref; - NpyAuxData *data_decdstref; + NPY_cast_info wrapped; + NPY_cast_info decref_src; + NPY_cast_info decref_dst; /* The use-case should probably be deprecated */ + npy_intp src_N, dst_N; /* This gets a run-length encoded representation of the transfer */ npy_intp run_count; - _subarray_broadcast_offsetrun offsetruns; + _subarray_broadcast_offsetrun offsetruns[]; } _subarray_broadcast_data; @@ -2263,49 +1748,48 @@ typedef struct { static void _subarray_broadcast_data_free(NpyAuxData *data) { _subarray_broadcast_data *d = (_subarray_broadcast_data *)data; - NPY_AUXDATA_FREE(d->data); - NPY_AUXDATA_FREE(d->data_decsrcref); - NPY_AUXDATA_FREE(d->data_decdstref); - PyArray_free(data); + NPY_cast_info_xfree(&d->wrapped); + NPY_cast_info_xfree(&d->decref_src); + NPY_cast_info_xfree(&d->decref_dst); + PyMem_Free(data); } /* transfer data copy function */ -static NpyAuxData *_subarray_broadcast_data_clone( NpyAuxData *data) +static NpyAuxData *_subarray_broadcast_data_clone(NpyAuxData *data) { _subarray_broadcast_data *d = (_subarray_broadcast_data *)data; - _subarray_broadcast_data *newdata; - npy_intp run_count = d->run_count, structsize; - structsize = sizeof(_subarray_broadcast_data) + - run_count*sizeof(_subarray_broadcast_offsetrun); + npy_intp offsetruns_size = d->run_count*sizeof(_subarray_broadcast_offsetrun); + npy_intp structsize = sizeof(_subarray_broadcast_data) + offsetruns_size; /* Allocate the data and populate it */ - newdata = (_subarray_broadcast_data *)PyArray_malloc(structsize); + _subarray_broadcast_data *newdata = PyMem_Malloc(structsize); if (newdata == NULL) { return NULL; } - memcpy(newdata, data, structsize); - if (d->data != NULL) { - newdata->data = NPY_AUXDATA_CLONE(d->data); - if (newdata->data == NULL) { - PyArray_free(newdata); - return NULL; - } + newdata->base.free = &_subarray_broadcast_data_free; + newdata->base.clone = &_subarray_broadcast_data_clone; + newdata->src_N = d->src_N; + newdata->dst_N = d->dst_N; + newdata->run_count = d->run_count; + memcpy(newdata->offsetruns, d->offsetruns, offsetruns_size); + + NPY_cast_info_init(&newdata->decref_src); + NPY_cast_info_init(&newdata->decref_dst); + + if (NPY_cast_info_copy(&newdata->wrapped, &d->wrapped) < 0) { + _subarray_broadcast_data_free((NpyAuxData *)newdata); + return NULL; } - if (d->data_decsrcref != NULL) { - newdata->data_decsrcref = NPY_AUXDATA_CLONE(d->data_decsrcref); - if (newdata->data_decsrcref == NULL) { - NPY_AUXDATA_FREE(newdata->data); - PyArray_free(newdata); + if (d->decref_src.func != NULL) { + if (NPY_cast_info_copy(&newdata->decref_src, &d->decref_src) < 0) { + _subarray_broadcast_data_free((NpyAuxData *) newdata); return NULL; } } - if (d->data_decdstref != NULL) { - newdata->data_decdstref = NPY_AUXDATA_CLONE(d->data_decdstref); - if (newdata->data_decdstref == NULL) { - NPY_AUXDATA_FREE(newdata->data); - NPY_AUXDATA_FREE(newdata->data_decsrcref); - PyArray_free(newdata); + if (d->decref_dst.func != NULL) { + if (NPY_cast_info_copy(&newdata->decref_dst, &d->decref_dst) < 0) { + _subarray_broadcast_data_free((NpyAuxData *) newdata); return NULL; } } @@ -2314,31 +1798,34 @@ static NpyAuxData *_subarray_broadcast_data_clone( NpyAuxData *data) } static int -_strided_to_strided_subarray_broadcast(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp NPY_UNUSED(src_itemsize), - NpyAuxData *data) +_strided_to_strided_subarray_broadcast( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) { - _subarray_broadcast_data *d = (_subarray_broadcast_data *)data; - PyArray_StridedUnaryOp *subtransfer = d->stransfer; - NpyAuxData *subdata = d->data; - npy_intp run, run_count = d->run_count, - src_subitemsize = d->src_itemsize, - dst_subitemsize = d->dst_itemsize; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + + _subarray_broadcast_data *d = (_subarray_broadcast_data *)auxdata; + npy_intp run, run_count = d->run_count; npy_intp loop_index, offset, count; - char *dst_ptr; - _subarray_broadcast_offsetrun *offsetruns = &d->offsetruns; + + npy_intp src_subitemsize = d->wrapped.descriptors[0]->elsize; + npy_intp dst_subitemsize = d->wrapped.descriptors[1]->elsize; + + npy_intp sub_strides[2] = {src_subitemsize, dst_subitemsize}; while (N > 0) { loop_index = 0; for (run = 0; run < run_count; ++run) { - offset = offsetruns[run].offset; - count = offsetruns[run].count; - dst_ptr = dst + loop_index*dst_subitemsize; + offset = d->offsetruns[run].offset; + count = d->offsetruns[run].count; + char *dst_ptr = dst + loop_index*dst_subitemsize; + char *sub_args[2] = {src + offset, dst_ptr}; if (offset != -1) { - if (subtransfer( - dst_ptr, dst_subitemsize, src + offset, src_subitemsize, - count, src_subitemsize, subdata) < 0) { + if (d->wrapped.func(&d->wrapped.context, + sub_args, &count, sub_strides, d->wrapped.auxdata) < 0) { return -1; } } @@ -2357,44 +1844,42 @@ _strided_to_strided_subarray_broadcast(char *dst, npy_intp dst_stride, static int -_strided_to_strided_subarray_broadcast_withrefs(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp NPY_UNUSED(src_itemsize), - NpyAuxData *data) +_strided_to_strided_subarray_broadcast_withrefs( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) { - _subarray_broadcast_data *d = (_subarray_broadcast_data *)data; - PyArray_StridedUnaryOp *subtransfer = d->stransfer; - NpyAuxData *subdata = d->data; - PyArray_StridedUnaryOp *stransfer_decsrcref = d->stransfer_decsrcref; - NpyAuxData *data_decsrcref = d->data_decsrcref; - PyArray_StridedUnaryOp *stransfer_decdstref = d->stransfer_decdstref; - NpyAuxData *data_decdstref = d->data_decdstref; - npy_intp run, run_count = d->run_count, - src_subitemsize = d->src_itemsize, - dst_subitemsize = d->dst_itemsize, - src_subN = d->src_N; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + + _subarray_broadcast_data *d = (_subarray_broadcast_data *)auxdata; + npy_intp run, run_count = d->run_count; npy_intp loop_index, offset, count; - char *dst_ptr; - _subarray_broadcast_offsetrun *offsetruns = &d->offsetruns; + + npy_intp src_subitemsize = d->wrapped.descriptors[0]->elsize; + npy_intp dst_subitemsize = d->wrapped.descriptors[1]->elsize; + + npy_intp sub_strides[2] = {src_subitemsize, dst_subitemsize}; while (N > 0) { loop_index = 0; for (run = 0; run < run_count; ++run) { - offset = offsetruns[run].offset; - count = offsetruns[run].count; - dst_ptr = dst + loop_index*dst_subitemsize; + offset = d->offsetruns[run].offset; + count = d->offsetruns[run].count; + char *dst_ptr = dst + loop_index*dst_subitemsize; + char *sub_args[2] = {src + offset, dst_ptr}; if (offset != -1) { - if (subtransfer( - dst_ptr, dst_subitemsize, src + offset, src_subitemsize, - count, src_subitemsize, subdata) < 0) { + if (d->wrapped.func(&d->wrapped.context, + sub_args, &count, sub_strides, d->wrapped.auxdata) < 0) { return -1; } } else { - if (stransfer_decdstref != NULL) { - if (stransfer_decdstref( - NULL, 0, dst_ptr, dst_subitemsize, - count, dst_subitemsize, data_decdstref) < 0) { + if (d->decref_dst.func != NULL) { + if (d->decref_dst.func(&d->decref_dst.context, + &dst_ptr, &count, &dst_subitemsize, + d->decref_dst.auxdata) < 0) { return -1; } } @@ -2403,10 +1888,10 @@ _strided_to_strided_subarray_broadcast_withrefs(char *dst, npy_intp dst_stride, loop_index += count; } - if (stransfer_decsrcref != NULL) { - if (stransfer_decsrcref( - NULL, 0, src, src_subitemsize, - src_subN, src_subitemsize, data_decsrcref) < 0) { + if (d->decref_src.func != NULL) { + if (d->decref_src.func(&d->decref_src.context, + &src, &d->src_N, &src_subitemsize, + d->decref_src.auxdata) < 0) { return -1; } } @@ -2426,24 +1911,30 @@ get_subarray_broadcast_transfer_function(int aligned, npy_intp src_size, npy_intp dst_size, PyArray_Dims src_shape, PyArray_Dims dst_shape, int move_references, - PyArray_StridedUnaryOp **out_stransfer, + PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata, int *out_needs_api) { _subarray_broadcast_data *data; npy_intp structsize, loop_index, run, run_size, src_index, dst_index, i, ndim; - _subarray_broadcast_offsetrun *offsetruns; structsize = sizeof(_subarray_broadcast_data) + dst_size*sizeof(_subarray_broadcast_offsetrun); /* Allocate the data and populate it */ - data = (_subarray_broadcast_data *)PyArray_malloc(structsize); + data = (_subarray_broadcast_data *)PyMem_Malloc(structsize); if (data == NULL) { PyErr_NoMemory(); return NPY_FAIL; } + data->base.free = &_subarray_broadcast_data_free; + data->base.clone = &_subarray_broadcast_data_clone; + data->src_N = src_size; + data->dst_N = dst_size; + + NPY_cast_info_init(&data->decref_src); + NPY_cast_info_init(&data->decref_dst); /* * move_references is set to 0, handled in the wrapping transfer fn, @@ -2454,17 +1945,11 @@ get_subarray_broadcast_transfer_function(int aligned, src_dtype->elsize, dst_dtype->elsize, src_dtype, dst_dtype, 0, - &data->stransfer, &data->data, + &data->wrapped, out_needs_api) != NPY_SUCCEED) { - PyArray_free(data); + NPY_AUXDATA_FREE((NpyAuxData *)data); return NPY_FAIL; } - data->base.free = &_subarray_broadcast_data_free; - data->base.clone = &_subarray_broadcast_data_clone; - data->src_N = src_size; - data->dst_N = dst_size; - data->src_itemsize = src_dtype->elsize; - data->dst_itemsize = dst_dtype->elsize; /* If the src object will need a DECREF */ if (move_references && PyDataType_REFCHK(src_dtype)) { @@ -2472,18 +1957,12 @@ get_subarray_broadcast_transfer_function(int aligned, src_dtype->elsize, 0, src_dtype, NULL, 1, - &data->stransfer_decsrcref, - &data->data_decsrcref, + &data->decref_src, out_needs_api) != NPY_SUCCEED) { - NPY_AUXDATA_FREE(data->data); - PyArray_free(data); + NPY_AUXDATA_FREE((NpyAuxData *)data); return NPY_FAIL; } } - else { - data->stransfer_decsrcref = NULL; - data->data_decsrcref = NULL; - } /* If the dst object needs a DECREF to set it to NULL */ if (PyDataType_REFCHK(dst_dtype)) { @@ -2491,22 +1970,15 @@ get_subarray_broadcast_transfer_function(int aligned, dst_dtype->elsize, 0, dst_dtype, NULL, 1, - &data->stransfer_decdstref, - &data->data_decdstref, + &data->decref_dst, out_needs_api) != NPY_SUCCEED) { - NPY_AUXDATA_FREE(data->data); - NPY_AUXDATA_FREE(data->data_decsrcref); - PyArray_free(data); + NPY_AUXDATA_FREE((NpyAuxData *)data); return NPY_FAIL; } } - else { - data->stransfer_decdstref = NULL; - data->data_decdstref = NULL; - } /* Calculate the broadcasting and set the offsets */ - offsetruns = &data->offsetruns; + _subarray_broadcast_offsetrun *offsetruns = data->offsetruns; ndim = (src_shape.len > dst_shape.len) ? src_shape.len : dst_shape.len; for (loop_index = 0; loop_index < dst_size; ++loop_index) { npy_intp src_factor = 1; @@ -2592,8 +2064,8 @@ get_subarray_broadcast_transfer_function(int aligned, } } - if (data->stransfer_decsrcref == NULL && - data->stransfer_decdstref == NULL) { + if (data->decref_src.func == NULL && + data->decref_dst.func == NULL) { *out_stransfer = &_strided_to_strided_subarray_broadcast; } else { @@ -2613,7 +2085,7 @@ get_subarray_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references, - PyArray_StridedUnaryOp **out_stransfer, + PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata, int *out_needs_api) { @@ -2644,46 +2116,36 @@ get_subarray_transfer_function(int aligned, } /* - * Just a straight one-element copy. + * Copy the src value to all the dst values, the size one can be + * special cased for speed. */ - if (dst_size == 1 && src_size == 1) { - npy_free_cache_dim_obj(src_shape); - npy_free_cache_dim_obj(dst_shape); + if ((dst_size == 1 && src_size == 1) || ( + src_shape.len == dst_shape.len && PyArray_CompareLists( + src_shape.ptr, dst_shape.ptr, src_shape.len))) { - return PyArray_GetDTypeTransferFunction(aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - move_references, - out_stransfer, out_transferdata, - out_needs_api); - } - /* Copy the src value to all the dst values */ - else if (src_size == 1) { npy_free_cache_dim_obj(src_shape); npy_free_cache_dim_obj(dst_shape); - return get_one_to_n_transfer_function(aligned, + return get_n_to_n_transfer_function(aligned, src_stride, dst_stride, src_dtype, dst_dtype, move_references, - dst_size, + src_size, out_stransfer, out_transferdata, out_needs_api); } - /* If the shapes match exactly, do an n to n copy */ - else if (src_shape.len == dst_shape.len && - PyArray_CompareLists(src_shape.ptr, dst_shape.ptr, - src_shape.len)) { + /* Copy the src value to all the dst values */ + else if (src_size == 1) { npy_free_cache_dim_obj(src_shape); npy_free_cache_dim_obj(dst_shape); - return get_n_to_n_transfer_function(aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - move_references, - src_size, - out_stransfer, out_transferdata, - out_needs_api); + return get_one_to_n_transfer_function(aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + move_references, + dst_size, + out_stransfer, out_transferdata, + out_needs_api); } /* * Copy the subarray with broadcasting, truncating, and zero-padding @@ -2707,91 +2169,82 @@ get_subarray_transfer_function(int aligned, /**************************** COPY FIELDS *******************************/ typedef struct { - npy_intp src_offset, dst_offset, src_itemsize; - PyArray_StridedUnaryOp *stransfer; - NpyAuxData *data; + npy_intp src_offset, dst_offset; + NPY_cast_info info; } _single_field_transfer; typedef struct { NpyAuxData base; npy_intp field_count; - - _single_field_transfer fields; + _single_field_transfer fields[]; } _field_transfer_data; + /* transfer data free function */ static void _field_transfer_data_free(NpyAuxData *data) { _field_transfer_data *d = (_field_transfer_data *)data; - npy_intp i, field_count; - _single_field_transfer *fields; - - field_count = d->field_count; - fields = &d->fields; - for (i = 0; i < field_count; ++i) { - NPY_AUXDATA_FREE(fields[i].data); + for (npy_intp i = 0; i < d->field_count; ++i) { + NPY_cast_info_xfree(&d->fields[i].info); } - PyArray_free(d); + PyMem_Free(d); } /* transfer data copy function */ static NpyAuxData *_field_transfer_data_clone(NpyAuxData *data) { _field_transfer_data *d = (_field_transfer_data *)data; - _field_transfer_data *newdata; - npy_intp i, field_count = d->field_count, structsize; - _single_field_transfer *fields, *newfields; - structsize = sizeof(_field_transfer_data) + + npy_intp field_count = d->field_count; + npy_intp structsize = sizeof(_field_transfer_data) + field_count * sizeof(_single_field_transfer); /* Allocate the data and populate it */ - newdata = (_field_transfer_data *)PyArray_malloc(structsize); + _field_transfer_data *newdata = PyMem_Malloc(structsize); if (newdata == NULL) { return NULL; } - memcpy(newdata, d, structsize); + newdata->base = d->base; + newdata->field_count = 0; + /* Copy all the fields transfer data */ - fields = &d->fields; - newfields = &newdata->fields; - for (i = 0; i < field_count; ++i) { - if (fields[i].data != NULL) { - newfields[i].data = NPY_AUXDATA_CLONE(fields[i].data); - if (newfields[i].data == NULL) { - for (i = i-1; i >= 0; --i) { - NPY_AUXDATA_FREE(newfields[i].data); - } - PyArray_free(newdata); - return NULL; - } + for (npy_intp i = 0; i < field_count; ++i) { + if (NPY_cast_info_copy(&newdata->fields[i].info, &d->fields[i].info) < 0) { + NPY_AUXDATA_FREE((NpyAuxData *)newdata); + return NULL; } - + newdata->fields[i].src_offset = d->fields[i].src_offset; + newdata->fields[i].dst_offset = d->fields[i].dst_offset; + newdata->field_count++; } return (NpyAuxData *)newdata; } + static int -_strided_to_strided_field_transfer(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp NPY_UNUSED(src_itemsize), - NpyAuxData *data) +_strided_to_strided_field_transfer( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) { - _field_transfer_data *d = (_field_transfer_data *)data; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + + _field_transfer_data *d = (_field_transfer_data *)auxdata; npy_intp i, field_count = d->field_count; - _single_field_transfer *field; + const npy_intp blocksize = NPY_LOWLEVEL_BUFFER_BLOCKSIZE; /* Do the transfer a block at a time */ for (;;) { - field = &d->fields; - if (N > NPY_LOWLEVEL_BUFFER_BLOCKSIZE) { - for (i = 0; i < field_count; ++i, ++field) { - if (field->stransfer( - dst + field->dst_offset, dst_stride, - src + field->src_offset, src_stride, - NPY_LOWLEVEL_BUFFER_BLOCKSIZE, - field->src_itemsize, field->data) < 0) { + if (N > blocksize) { + for (i = 0; i < field_count; ++i) { + _single_field_transfer field = d->fields[i]; + char *fargs[2] = {src + field.src_offset, dst + field.dst_offset}; + if (field.info.func(&field.info.context, + fargs, &blocksize, strides, field.info.auxdata) < 0) { return -1; } } @@ -2800,12 +2253,11 @@ _strided_to_strided_field_transfer(char *dst, npy_intp dst_stride, dst += NPY_LOWLEVEL_BUFFER_BLOCKSIZE*dst_stride; } else { - for (i = 0; i < field_count; ++i, ++field) { - if (field->stransfer( - dst + field->dst_offset, dst_stride, - src + field->src_offset, src_stride, - N, - field->src_itemsize, field->data) < 0) { + for (i = 0; i < field_count; ++i) { + _single_field_transfer field = d->fields[i]; + char *fargs[2] = {src + field.src_offset, dst + field.dst_offset}; + if (field.info.func(&field.info.context, + fargs, &N, strides, field.info.auxdata) < 0) { return -1; } } @@ -2819,11 +2271,11 @@ _strided_to_strided_field_transfer(char *dst, npy_intp dst_stride, * must have fields. Does not take care of object<->structure conversion */ NPY_NO_EXPORT int -get_fields_transfer_function(int aligned, +get_fields_transfer_function(int NPY_UNUSED(aligned), npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references, - PyArray_StridedUnaryOp **out_stransfer, + PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata, int *out_needs_api) { @@ -2832,8 +2284,6 @@ get_fields_transfer_function(int aligned, npy_int i, field_count, structsize; int src_offset, dst_offset; _field_transfer_data *data; - _single_field_transfer *fields; - int failed = 0; /* * There are three cases to take care of: 1. src is non-structured, @@ -2847,64 +2297,55 @@ get_fields_transfer_function(int aligned, /* Allocate the field-data structure and populate it */ structsize = sizeof(_field_transfer_data) + (field_count + 1) * sizeof(_single_field_transfer); - data = (_field_transfer_data *)PyArray_malloc(structsize); + data = PyMem_Malloc(structsize); if (data == NULL) { PyErr_NoMemory(); return NPY_FAIL; } data->base.free = &_field_transfer_data_free; data->base.clone = &_field_transfer_data_clone; - fields = &data->fields; + data->field_count = 0; for (i = 0; i < field_count; ++i) { key = PyTuple_GET_ITEM(dst_dtype->names, i); tup = PyDict_GetItem(dst_dtype->fields, key); if (!PyArg_ParseTuple(tup, "Oi|O", &dst_fld_dtype, &dst_offset, &title)) { - PyArray_free(data); + PyMem_Free(data); return NPY_FAIL; } if (PyArray_GetDTypeTransferFunction(0, src_stride, dst_stride, src_dtype, dst_fld_dtype, 0, - &fields[i].stransfer, - &fields[i].data, + &data->fields[i].info, out_needs_api) != NPY_SUCCEED) { - for (i = i-1; i >= 0; --i) { - NPY_AUXDATA_FREE(fields[i].data); - } - PyArray_free(data); + NPY_AUXDATA_FREE((NpyAuxData *)data); return NPY_FAIL; } - fields[i].src_offset = 0; - fields[i].dst_offset = dst_offset; - fields[i].src_itemsize = src_dtype->elsize; + data->fields[i].src_offset = 0; + data->fields[i].dst_offset = dst_offset; + data->field_count++; } /* - * If references should be decrefd in src, add - * another transfer function to do that. + * If references should be decrefd in src, add another transfer + * function to do that. Since a decref function only uses a single + * input, the second one (normally output) just does not matter here. */ if (move_references && PyDataType_REFCHK(src_dtype)) { - if (get_decsrcref_transfer_function(0, + if (get_decref_transfer_function(0, src_stride, src_dtype, - &fields[field_count].stransfer, - &fields[field_count].data, + &data->fields[field_count].info, out_needs_api) != NPY_SUCCEED) { - for (i = 0; i < field_count; ++i) { - NPY_AUXDATA_FREE(fields[i].data); - } - PyArray_free(data); + NPY_AUXDATA_FREE((NpyAuxData *)data); return NPY_FAIL; } - fields[field_count].src_offset = 0; - fields[field_count].dst_offset = 0; - fields[field_count].src_itemsize = src_dtype->elsize; - field_count++; + data->fields[field_count].src_offset = 0; + data->fields[field_count].dst_offset = 0; + data->field_count = field_count; } - data->field_count = field_count; *out_stransfer = &_strided_to_strided_field_transfer; *out_transferdata = (NpyAuxData *)data; @@ -2924,19 +2365,19 @@ get_fields_transfer_function(int aligned, /* Allocate the field-data structure and populate it */ structsize = sizeof(_field_transfer_data) + 1 * sizeof(_single_field_transfer); - data = (_field_transfer_data *)PyArray_malloc(structsize); + data = PyMem_Malloc(structsize); if (data == NULL) { PyErr_NoMemory(); return NPY_FAIL; } data->base.free = &_field_transfer_data_free; data->base.clone = &_field_transfer_data_clone; - fields = &data->fields; key = PyTuple_GET_ITEM(src_dtype->names, 0); tup = PyDict_GetItem(src_dtype->fields, key); if (!PyArg_ParseTuple(tup, "Oi|O", &src_fld_dtype, &src_offset, &title)) { + PyMem_Free(data); return NPY_FAIL; } @@ -2944,16 +2385,13 @@ get_fields_transfer_function(int aligned, src_stride, dst_stride, src_fld_dtype, dst_dtype, move_references, - &fields[0].stransfer, - &fields[0].data, + &data->fields[0].info, out_needs_api) != NPY_SUCCEED) { - PyArray_free(data); + PyMem_Free(data); return NPY_FAIL; } - fields[0].src_offset = src_offset; - fields[0].dst_offset = 0; - fields[0].src_itemsize = src_fld_dtype->elsize; - + data->fields[0].src_offset = src_offset; + data->fields[0].dst_offset = 0; data->field_count = 1; *out_stransfer = &_strided_to_strided_field_transfer; @@ -2974,14 +2412,14 @@ get_fields_transfer_function(int aligned, /* Allocate the field-data structure and populate it */ structsize = sizeof(_field_transfer_data) + field_count * sizeof(_single_field_transfer); - data = (_field_transfer_data *)PyArray_malloc(structsize); + data = PyMem_Malloc(structsize); if (data == NULL) { PyErr_NoMemory(); return NPY_FAIL; } data->base.free = &_field_transfer_data_free; data->base.clone = &_field_transfer_data_clone; - fields = &data->fields; + data->field_count = 0; /* set up the transfer function for each field */ for (i = 0; i < field_count; ++i) { @@ -2989,42 +2427,31 @@ get_fields_transfer_function(int aligned, tup = PyDict_GetItem(dst_dtype->fields, key); if (!PyArg_ParseTuple(tup, "Oi|O", &dst_fld_dtype, &dst_offset, &title)) { - failed = 1; - break; + NPY_AUXDATA_FREE((NpyAuxData *)data); + return NPY_FAIL; } key = PyTuple_GET_ITEM(src_dtype->names, i); tup = PyDict_GetItem(src_dtype->fields, key); if (!PyArg_ParseTuple(tup, "Oi|O", &src_fld_dtype, &src_offset, &title)) { - failed = 1; - break; + NPY_AUXDATA_FREE((NpyAuxData *)data); + return NPY_FAIL; } if (PyArray_GetDTypeTransferFunction(0, src_stride, dst_stride, src_fld_dtype, dst_fld_dtype, move_references, - &fields[i].stransfer, - &fields[i].data, + &data->fields[i].info, out_needs_api) != NPY_SUCCEED) { - failed = 1; - break; - } - fields[i].src_offset = src_offset; - fields[i].dst_offset = dst_offset; - fields[i].src_itemsize = src_fld_dtype->elsize; - } - - if (failed) { - for (i = i-1; i >= 0; --i) { - NPY_AUXDATA_FREE(fields[i].data); + NPY_AUXDATA_FREE((NpyAuxData *)data); + return NPY_FAIL; } - PyArray_free(data); - return NPY_FAIL; + data->fields[i].src_offset = src_offset; + data->fields[i].dst_offset = dst_offset; + data->field_count++; } - data->field_count = field_count; - *out_stransfer = &_strided_to_strided_field_transfer; *out_transferdata = (NpyAuxData *)data; @@ -3032,70 +2459,61 @@ get_fields_transfer_function(int aligned, } static int -get_decsrcref_fields_transfer_function(int aligned, +get_decref_fields_transfer_function(int NPY_UNUSED(aligned), npy_intp src_stride, PyArray_Descr *src_dtype, - PyArray_StridedUnaryOp **out_stransfer, + PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata, int *out_needs_api) { PyObject *names, *key, *tup, *title; PyArray_Descr *src_fld_dtype; - npy_int i, names_size, field_count, structsize; + npy_int i, field_count, structsize; int src_offset; - _field_transfer_data *data; - _single_field_transfer *fields; names = src_dtype->names; - names_size = PyTuple_GET_SIZE(src_dtype->names); + field_count = PyTuple_GET_SIZE(src_dtype->names); - field_count = names_size; + /* Over-allocating here: less fields may be used */ structsize = sizeof(_field_transfer_data) + field_count * sizeof(_single_field_transfer); /* Allocate the data and populate it */ - data = (_field_transfer_data *)PyArray_malloc(structsize); + _field_transfer_data *data = PyMem_Malloc(structsize); if (data == NULL) { PyErr_NoMemory(); return NPY_FAIL; } data->base.free = &_field_transfer_data_free; data->base.clone = &_field_transfer_data_clone; - fields = &data->fields; + data->field_count = 0; - field_count = 0; - for (i = 0; i < names_size; ++i) { + _single_field_transfer *field = data->fields; + for (i = 0; i < field_count; ++i) { key = PyTuple_GET_ITEM(names, i); tup = PyDict_GetItem(src_dtype->fields, key); if (!PyArg_ParseTuple(tup, "Oi|O", &src_fld_dtype, &src_offset, &title)) { - PyArray_free(data); + NPY_AUXDATA_FREE((NpyAuxData *)data); return NPY_FAIL; } if (PyDataType_REFCHK(src_fld_dtype)) { if (out_needs_api) { *out_needs_api = 1; } - if (get_decsrcref_transfer_function(0, + if (get_decref_transfer_function(0, src_stride, src_fld_dtype, - &fields[field_count].stransfer, - &fields[field_count].data, + &field->info, out_needs_api) != NPY_SUCCEED) { - for (i = field_count-1; i >= 0; --i) { - NPY_AUXDATA_FREE(fields[i].data); - } - PyArray_free(data); + NPY_AUXDATA_FREE((NpyAuxData *)data); return NPY_FAIL; } - fields[field_count].src_offset = src_offset; - fields[field_count].dst_offset = 0; - fields[field_count].src_itemsize = src_dtype->elsize; - field_count++; + field->src_offset = src_offset; + data->field_count++; + field++; } } - data->field_count = field_count; - *out_stransfer = &_strided_to_strided_field_transfer; *out_transferdata = (NpyAuxData *)data; @@ -3107,52 +2525,43 @@ get_decsrcref_fields_transfer_function(int aligned, typedef struct { NpyAuxData base; - /* The transfer function being wrapped */ - PyArray_StridedUnaryOp *stransfer; - NpyAuxData *transferdata; - + /* The transfer function being wrapped (could likely be stored directly) */ + NPY_cast_info wrapped; /* The src decref function if necessary */ - PyArray_StridedUnaryOp *decsrcref_stransfer; - NpyAuxData *decsrcref_transferdata; + NPY_cast_info decref_src; } _masked_wrapper_transfer_data; /* transfer data free function */ -static void _masked_wrapper_transfer_data_free(NpyAuxData *data) +static void +_masked_wrapper_transfer_data_free(NpyAuxData *data) { _masked_wrapper_transfer_data *d = (_masked_wrapper_transfer_data *)data; - NPY_AUXDATA_FREE(d->transferdata); - NPY_AUXDATA_FREE(d->decsrcref_transferdata); - PyArray_free(data); + NPY_cast_info_xfree(&d->wrapped); + NPY_cast_info_xfree(&d->decref_src); + PyMem_Free(data); } /* transfer data copy function */ -static NpyAuxData *_masked_wrapper_transfer_data_clone(NpyAuxData *data) +static NpyAuxData * +_masked_wrapper_transfer_data_clone(NpyAuxData *data) { _masked_wrapper_transfer_data *d = (_masked_wrapper_transfer_data *)data; _masked_wrapper_transfer_data *newdata; /* Allocate the data and populate it */ - newdata = (_masked_wrapper_transfer_data *)PyArray_malloc( - sizeof(_masked_wrapper_transfer_data)); + newdata = PyMem_Malloc(sizeof(*newdata)); if (newdata == NULL) { return NULL; } - memcpy(newdata, d, sizeof(_masked_wrapper_transfer_data)); + newdata->base = d->base; - /* Clone all the owned auxdata as well */ - if (newdata->transferdata != NULL) { - newdata->transferdata = NPY_AUXDATA_CLONE(newdata->transferdata); - if (newdata->transferdata == NULL) { - PyArray_free(newdata); - return NULL; - } + if (NPY_cast_info_copy(&newdata->wrapped, &d->wrapped) < 0) { + PyMem_Free(newdata); + return NULL; } - if (newdata->decsrcref_transferdata != NULL) { - newdata->decsrcref_transferdata = - NPY_AUXDATA_CLONE(newdata->decsrcref_transferdata); - if (newdata->decsrcref_transferdata == NULL) { - NPY_AUXDATA_FREE(newdata->transferdata); - PyArray_free(newdata); + if (d->decref_src.func != NULL) { + if (NPY_cast_info_copy(&newdata->decref_src, &d->decref_src) < 0) { + NPY_AUXDATA_FREE((NpyAuxData *)newdata); return NULL; } } @@ -3161,31 +2570,25 @@ static NpyAuxData *_masked_wrapper_transfer_data_clone(NpyAuxData *data) } static int -_strided_masked_wrapper_decsrcref_transfer_function( - char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_bool *mask, npy_intp mask_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *transferdata) +_strided_masked_wrapper_decref_transfer_function( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + npy_bool *mask, npy_intp mask_stride, + NpyAuxData *auxdata) { - _masked_wrapper_transfer_data *d = - (_masked_wrapper_transfer_data *)transferdata; - npy_intp subloopsize; - PyArray_StridedUnaryOp *unmasked_stransfer, *decsrcref_stransfer; - NpyAuxData *unmasked_transferdata, *decsrcref_transferdata; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; - unmasked_stransfer = d->stransfer; - unmasked_transferdata = d->transferdata; - decsrcref_stransfer = d->decsrcref_stransfer; - decsrcref_transferdata = d->decsrcref_transferdata; + _masked_wrapper_transfer_data *d = (_masked_wrapper_transfer_data *)auxdata; + npy_intp subloopsize; while (N > 0) { - /* Skip masked values, still calling decsrcref for move_references */ + /* Skip masked values, still calling decref for move_references */ mask = (npy_bool*)npy_memchr((char *)mask, 0, mask_stride, N, &subloopsize, 1); - if (decsrcref_stransfer( - NULL, 0, src, src_stride, - subloopsize, src_itemsize, decsrcref_transferdata) < 0) { + if (d->decref_src.func(&d->decref_src.context, + &src, &subloopsize, &src_stride, d->decref_src.auxdata) < 0) { return -1; } dst += subloopsize * dst_stride; @@ -3198,9 +2601,9 @@ _strided_masked_wrapper_decsrcref_transfer_function( /* Process unmasked values */ mask = (npy_bool*)npy_memchr((char *)mask, 0, mask_stride, N, &subloopsize, 0); - if (unmasked_stransfer( - dst, dst_stride, src, src_stride, - subloopsize, src_itemsize, unmasked_transferdata) < 0) { + char *wrapped_args[2] = {src, dst}; + if (d->wrapped.func(&d->wrapped.context, + wrapped_args, &subloopsize, strides, d->wrapped.auxdata) < 0) { return -1; } dst += subloopsize * dst_stride; @@ -3212,21 +2615,17 @@ _strided_masked_wrapper_decsrcref_transfer_function( static int _strided_masked_wrapper_transfer_function( - char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_bool *mask, npy_intp mask_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *transferdata) + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + npy_bool *mask, npy_intp mask_stride, + NpyAuxData *auxdata) { + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; - _masked_wrapper_transfer_data *d = - (_masked_wrapper_transfer_data *)transferdata; + _masked_wrapper_transfer_data *d = (_masked_wrapper_transfer_data *)auxdata; npy_intp subloopsize; - PyArray_StridedUnaryOp *unmasked_stransfer; - NpyAuxData *unmasked_transferdata; - - unmasked_stransfer = d->stransfer; - unmasked_transferdata = d->transferdata; while (N > 0) { /* Skip masked values */ @@ -3242,9 +2641,9 @@ _strided_masked_wrapper_transfer_function( /* Process unmasked values */ mask = (npy_bool*)npy_memchr((char *)mask, 0, mask_stride, N, &subloopsize, 0); - if (unmasked_stransfer( - dst, dst_stride, src, src_stride, - subloopsize, src_itemsize, unmasked_transferdata) < 0) { + char *wrapped_args[2] = {src, dst}; + if (d->wrapped.func(&d->wrapped.context, + wrapped_args, &subloopsize, strides, d->wrapped.auxdata) < 0) { return -1; } dst += subloopsize * dst_stride; @@ -3258,25 +2657,25 @@ _strided_masked_wrapper_transfer_function( /*************************** CLEAR SRC *******************************/ static int -_dec_src_ref_nop(char *NPY_UNUSED(dst), - npy_intp NPY_UNUSED(dst_stride), - char *NPY_UNUSED(src), npy_intp NPY_UNUSED(src_stride), - npy_intp NPY_UNUSED(N), - npy_intp NPY_UNUSED(src_itemsize), - NpyAuxData *NPY_UNUSED(data)) +_dec_src_ref_nop( + PyArrayMethod_Context *NPY_UNUSED(context), + char *const *NPY_UNUSED(args), const npy_intp *NPY_UNUSED(dimensions), + const npy_intp *NPY_UNUSED(strides), NpyAuxData *NPY_UNUSED(auxdata)) { /* NOP */ return 0; } static int -_strided_to_null_dec_src_ref_reference(char *NPY_UNUSED(dst), - npy_intp NPY_UNUSED(dst_stride), - char *src, npy_intp src_stride, - npy_intp N, - npy_intp NPY_UNUSED(src_itemsize), - NpyAuxData *NPY_UNUSED(data)) +_strided_to_null_dec_src_ref_reference( + PyArrayMethod_Context *NPY_UNUSED(context), + char *const *args, const npy_intp *dimensions, + const npy_intp *strides, NpyAuxData *NPY_UNUSED(auxdata)) { + char *src = args[0]; + npy_intp N = dimensions[0]; + npy_intp stride = strides[0]; + PyObject *src_ref = NULL; while (N > 0) { /* Release the reference in src and set it to NULL */ @@ -3285,27 +2684,38 @@ _strided_to_null_dec_src_ref_reference(char *NPY_UNUSED(dst), Py_XDECREF(src_ref); memset(src, 0, sizeof(PyObject *)); - src += src_stride; + src += stride; --N; } return 0; } -NPY_NO_EXPORT int -get_decsrcref_transfer_function(int aligned, +/* + * Get a function to decref. Currently, this uses a cast info slot, which + * means that the second (destination) descriptor is always set to NULL + * and generally does not have to be passed. + * Since we do not currently have an `ArrayMethod` representing this, the + * method is also set to NULL. + * + * TODO: this function should probably be moved onto the DType eventually, + * which would allow for user DTypes to include dynamic allocated + * memory or Python objects. + */ +static int +get_decref_transfer_function(int aligned, npy_intp src_stride, PyArray_Descr *src_dtype, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, + NPY_cast_info *cast_info, int *out_needs_api) { + NPY_cast_info_init(cast_info); + /* If there are no references, it's a nop */ if (!PyDataType_REFCHK(src_dtype)) { - *out_stransfer = &_dec_src_ref_nop; - *out_transferdata = NULL; - - return NPY_SUCCEED; + cast_info->func = &_dec_src_ref_nop; + cast_info->auxdata = NULL; + goto finalize; } /* If it's a single reference, it's one decref */ else if (src_dtype->type_num == NPY_OBJECT) { @@ -3313,17 +2723,14 @@ get_decsrcref_transfer_function(int aligned, *out_needs_api = 1; } - *out_stransfer = &_strided_to_null_dec_src_ref_reference; - *out_transferdata = NULL; - - return NPY_SUCCEED; + cast_info->func = &_strided_to_null_dec_src_ref_reference; + cast_info->auxdata = NULL; + goto finalize; } /* If there are subarrays, need to wrap it */ else if (PyDataType_HASSUBARRAY(src_dtype)) { PyArray_Dims src_shape = {NULL, -1}; npy_intp src_size; - PyArray_StridedUnaryOp *stransfer; - NpyAuxData *data; if (out_needs_api) { *out_needs_api = 1; @@ -3338,332 +2745,47 @@ get_decsrcref_transfer_function(int aligned, src_size = PyArray_MultiplyList(src_shape.ptr, src_shape.len); npy_free_cache_dim_obj(src_shape); - /* Get a function for contiguous src of the subarray type */ - if (get_decsrcref_transfer_function(aligned, - src_dtype->subarray->base->elsize, - src_dtype->subarray->base, - &stransfer, &data, - out_needs_api) != NPY_SUCCEED) { - return NPY_FAIL; - } - - if (wrap_transfer_function_n_to_n(stransfer, data, - src_stride, 0, - src_dtype->subarray->base->elsize, 0, - src_size, - out_stransfer, out_transferdata) != NPY_SUCCEED) { - NPY_AUXDATA_FREE(data); + if (get_n_to_n_transfer_function(aligned, + src_stride, 0, + src_dtype->subarray->base, NULL, 1, src_size, + &cast_info->func, &cast_info->auxdata, + out_needs_api) != NPY_SUCCEED) { return NPY_FAIL; } - return NPY_SUCCEED; + goto finalize; } /* If there are fields, need to do each field */ - else { + else if (PyDataType_HASFIELDS(src_dtype)) { if (out_needs_api) { *out_needs_api = 1; } - return get_decsrcref_fields_transfer_function(aligned, + if (get_decref_fields_transfer_function(aligned, src_stride, src_dtype, - out_stransfer, - out_transferdata, - out_needs_api); - } -} - -/********************* DTYPE COPY SWAP FUNCTION ***********************/ - -NPY_NO_EXPORT int -PyArray_GetDTypeCopySwapFn(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *dtype, - PyArray_StridedUnaryOp **outstransfer, - NpyAuxData **outtransferdata) -{ - npy_intp itemsize = dtype->elsize; - - /* If it's a custom data type, wrap its copy swap function */ - if (dtype->type_num >= NPY_NTYPES) { - *outstransfer = NULL; - wrap_copy_swap_function(aligned, - src_stride, dst_stride, - dtype, - !PyArray_ISNBO(dtype->byteorder), - outstransfer, outtransferdata); - } - /* A straight copy */ - else if (itemsize == 1 || PyArray_ISNBO(dtype->byteorder)) { - *outstransfer = PyArray_GetStridedCopyFn(aligned, - src_stride, dst_stride, - itemsize); - *outtransferdata = NULL; - } - else if (dtype->kind == 'U') { - return wrap_copy_swap_function(aligned, - src_stride, dst_stride, dtype, 1, - outstransfer, outtransferdata); - } - /* If it's not complex, one swap */ - else if (dtype->kind != 'c') { - *outstransfer = PyArray_GetStridedCopySwapFn(aligned, - src_stride, dst_stride, - itemsize); - *outtransferdata = NULL; + &cast_info->func, &cast_info->auxdata, + out_needs_api) < 0) { + return NPY_FAIL; + } + goto finalize; } - /* If complex, a paired swap */ else { - *outstransfer = PyArray_GetStridedCopySwapPairFn(aligned, - src_stride, dst_stride, - itemsize); - *outtransferdata = NULL; - } - - return (*outstransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; -} - -/********************* MAIN DTYPE TRANSFER FUNCTION ***********************/ - -#if !NPY_USE_NEW_CASTINGIMPL -static int -PyArray_LegacyGetDTypeTransferFunction(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - int move_references, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, - int *out_needs_api) -{ - npy_intp src_itemsize, dst_itemsize; - int src_type_num, dst_type_num; - int is_builtin; - -#if NPY_DT_DBG_TRACING - printf("Calculating dtype transfer from "); - if (PyObject_Print((PyObject *)src_dtype, stdout, 0) < 0) { - return NPY_FAIL; - } - printf(" to "); - if (PyObject_Print((PyObject *)dst_dtype, stdout, 0) < 0) { + PyErr_Format(PyExc_RuntimeError, + "Internal error, tried to fetch decref function for the " + "unsupported DType '%S'.", src_dtype); return NPY_FAIL; } - printf("\n"); -#endif - - /* - * If one of the dtypes is NULL, we give back either a src decref - * function or a dst setzero function - */ - if (dst_dtype == NULL) { - if (move_references) { - return get_decsrcref_transfer_function(aligned, - src_dtype->elsize, - src_dtype, - out_stransfer, out_transferdata, - out_needs_api); - } - else { - *out_stransfer = &_dec_src_ref_nop; - *out_transferdata = NULL; - return NPY_SUCCEED; - } - } - - src_itemsize = src_dtype->elsize; - dst_itemsize = dst_dtype->elsize; - src_type_num = src_dtype->type_num; - dst_type_num = dst_dtype->type_num; - is_builtin = src_type_num < NPY_NTYPES && dst_type_num < NPY_NTYPES; - - /* Common special case - number -> number NBO cast */ - if (PyTypeNum_ISNUMBER(src_type_num) && - PyTypeNum_ISNUMBER(dst_type_num) && - PyArray_ISNBO(src_dtype->byteorder) && - PyArray_ISNBO(dst_dtype->byteorder)) { - - if (PyArray_EquivTypenums(src_type_num, dst_type_num)) { - *out_stransfer = PyArray_GetStridedCopyFn(aligned, - src_stride, dst_stride, - src_itemsize); - *out_transferdata = NULL; - return (*out_stransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; - } - else { - return get_nbo_cast_numeric_transfer_function (aligned, - src_stride, dst_stride, - src_type_num, dst_type_num, - out_stransfer, out_transferdata); - } - } - - /* - * If there are no references and the data types are equivalent and builtin, - * return a simple copy - */ - if (PyArray_EquivTypes(src_dtype, dst_dtype) && - !PyDataType_REFCHK(src_dtype) && !PyDataType_REFCHK(dst_dtype) && - ( !PyDataType_HASFIELDS(dst_dtype) || - is_dtype_struct_simple_unaligned_layout(dst_dtype)) && - is_builtin) { - /* - * We can't pass through the aligned flag because it's not - * appropriate. Consider a size-8 string, it will say it's - * aligned because strings only need alignment 1, but the - * copy function wants to know if it's alignment 8. - * - * TODO: Change align from a flag to a "best power of 2 alignment" - * which holds the strongest alignment value for all - * the data which will be used. - */ - *out_stransfer = PyArray_GetStridedCopyFn(0, - src_stride, dst_stride, - src_dtype->elsize); - *out_transferdata = NULL; - return NPY_SUCCEED; - } - - /* First look at the possibilities of just a copy or swap */ - if (src_itemsize == dst_itemsize && src_dtype->kind == dst_dtype->kind && - !PyDataType_HASFIELDS(src_dtype) && - !PyDataType_HASFIELDS(dst_dtype) && - !PyDataType_HASSUBARRAY(src_dtype) && - !PyDataType_HASSUBARRAY(dst_dtype) && - src_type_num != NPY_DATETIME && src_type_num != NPY_TIMEDELTA) { - /* A custom data type requires that we use its copy/swap */ - if (!is_builtin) { - /* - * If the sizes and kinds are identical, but they're different - * custom types, then get a cast function - */ - if (src_type_num != dst_type_num) { - return get_cast_transfer_function(aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - move_references, - out_stransfer, out_transferdata, - out_needs_api); - } - else { - return wrap_copy_swap_function(aligned, - src_stride, dst_stride, - src_dtype, - PyArray_ISNBO(src_dtype->byteorder) != - PyArray_ISNBO(dst_dtype->byteorder), - out_stransfer, out_transferdata); - } - } - - /* The special types, which have no or subelement byte-order */ - switch (src_type_num) { - case NPY_UNICODE: - /* Wrap the copy swap function when swapping is necessary */ - if (PyArray_ISNBO(src_dtype->byteorder) != - PyArray_ISNBO(dst_dtype->byteorder)) { - return wrap_copy_swap_function(aligned, - src_stride, dst_stride, - src_dtype, 1, - out_stransfer, out_transferdata); - } - case NPY_VOID: - case NPY_STRING: - *out_stransfer = PyArray_GetStridedCopyFn(0, - src_stride, dst_stride, - src_itemsize); - *out_transferdata = NULL; - return NPY_SUCCEED; - case NPY_OBJECT: - if (out_needs_api) { - *out_needs_api = 1; - } - if (move_references) { - *out_stransfer = &_strided_to_strided_move_references; - *out_transferdata = NULL; - } - else { - *out_stransfer = &_strided_to_strided_copy_references; - *out_transferdata = NULL; - } - return NPY_SUCCEED; - } - - /* This is a straight copy */ - if (src_itemsize == 1 || PyArray_ISNBO(src_dtype->byteorder) == - PyArray_ISNBO(dst_dtype->byteorder)) { - *out_stransfer = PyArray_GetStridedCopyFn(aligned, - src_stride, dst_stride, - src_itemsize); - *out_transferdata = NULL; - return (*out_stransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; - } - /* This is a straight copy + byte swap */ - else if (!PyTypeNum_ISCOMPLEX(src_type_num)) { - *out_stransfer = PyArray_GetStridedCopySwapFn(aligned, - src_stride, dst_stride, - src_itemsize); - *out_transferdata = NULL; - return (*out_stransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; - } - /* This is a straight copy + element pair byte swap */ - else { - *out_stransfer = PyArray_GetStridedCopySwapPairFn(aligned, - src_stride, dst_stride, - src_itemsize); - *out_transferdata = NULL; - return (*out_stransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; - } - } - - /* Handle subarrays */ - if (PyDataType_HASSUBARRAY(src_dtype) || - PyDataType_HASSUBARRAY(dst_dtype)) { - return get_subarray_transfer_function(aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - move_references, - out_stransfer, out_transferdata, - out_needs_api); - } - - /* Handle fields */ - if ((PyDataType_HASFIELDS(src_dtype) || PyDataType_HASFIELDS(dst_dtype)) && - src_type_num != NPY_OBJECT && dst_type_num != NPY_OBJECT) { - return get_fields_transfer_function(aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - move_references, - out_stransfer, out_transferdata, - out_needs_api); - } - /* Check for different-sized strings, unicodes, or voids */ - if (src_type_num == dst_type_num) { - switch (src_type_num) { - case NPY_UNICODE: - if (PyArray_ISNBO(src_dtype->byteorder) != - PyArray_ISNBO(dst_dtype->byteorder)) { - return PyArray_GetStridedZeroPadCopyFn(0, 1, - src_stride, dst_stride, - src_dtype->elsize, dst_dtype->elsize, - out_stransfer, out_transferdata); - } - case NPY_STRING: - case NPY_VOID: - return PyArray_GetStridedZeroPadCopyFn(0, 0, - src_stride, dst_stride, - src_dtype->elsize, dst_dtype->elsize, - out_stransfer, out_transferdata); - } - } - - /* Otherwise a cast is necessary */ - return get_cast_transfer_function(aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - move_references, - out_stransfer, out_transferdata, - out_needs_api); + finalize: + /* Make sure all important fields are either set or cleared */ + Py_INCREF(src_dtype); + cast_info->descriptors[0] = src_dtype; + cast_info->descriptors[1] = NULL; + cast_info->context.method = NULL; + cast_info->context.caller = NULL; + return NPY_SUCCEED; } -#endif + /* * ********************* Generalized Multistep Cast ************************ @@ -3671,54 +2793,31 @@ PyArray_LegacyGetDTypeTransferFunction(int aligned, * New general purpose multiple step cast function when resolve descriptors * implies that multiple cast steps are necessary. */ -#if NPY_USE_NEW_CASTINGIMPL - -/* - * The full context passed in is never the correct context for each - * individual cast, so we have to store each of these casts information. - * Certain fields may be undefined (currently, the `caller`). - */ -typedef struct { - PyArray_StridedUnaryOp *stransfer; - NpyAuxData *auxdata; - PyArrayMethod_Context context; - PyArray_Descr *descriptors[2]; -} _cast_info; typedef struct { NpyAuxData base; /* Information for main cast */ - _cast_info main; + NPY_cast_info main; /* Information for input preparation cast */ - _cast_info from; + NPY_cast_info from; /* Information for output finalization cast */ - _cast_info to; + NPY_cast_info to; char *from_buffer; char *to_buffer; } _multistep_castdata; -static NPY_INLINE void -_cast_info_free(_cast_info *cast_info) -{ - NPY_AUXDATA_FREE(cast_info->auxdata); - Py_DECREF(cast_info->descriptors[0]); - Py_DECREF(cast_info->descriptors[1]); - Py_DECREF(cast_info->context.method); -} - - /* zero-padded data copy function */ static void _multistep_cast_auxdata_free(NpyAuxData *auxdata) { _multistep_castdata *data = (_multistep_castdata *)auxdata; - _cast_info_free(&data->main); - if (data->from.stransfer != NULL) { - _cast_info_free(&data->from); + NPY_cast_info_xfree(&data->main); + if (data->from.func != NULL) { + NPY_cast_info_xfree(&data->from); } - if (data->to.stransfer != NULL) { - _cast_info_free(&data->to); + if (data->to.func != NULL) { + NPY_cast_info_xfree(&data->to); } PyMem_Free(data); } @@ -3727,22 +2826,21 @@ _multistep_cast_auxdata_free(NpyAuxData *auxdata) static NpyAuxData * _multistep_cast_auxdata_clone(NpyAuxData *auxdata_old); + static NpyAuxData * -_multistep_cast_auxdata_clone_int(NpyAuxData *auxdata_old, int move_auxdata) +_multistep_cast_auxdata_clone_int(_multistep_castdata *castdata, int move_info) { - _multistep_castdata *castdata = (_multistep_castdata *)auxdata_old; - /* Round up the structure size to 16-byte boundary for the buffers */ ssize_t datasize = (sizeof(_multistep_castdata) + 15) & ~0xf; ssize_t from_buffer_offset = datasize; - if (castdata->from.stransfer != NULL) { + if (castdata->from.func != NULL) { ssize_t src_itemsize = castdata->main.context.descriptors[0]->elsize; datasize += NPY_LOWLEVEL_BUFFER_BLOCKSIZE * src_itemsize; datasize = (datasize + 15) & ~0xf; } ssize_t to_buffer_offset = datasize; - if (castdata->to.stransfer != NULL) { + if (castdata->to.func != NULL) { ssize_t dst_itemsize = castdata->main.context.descriptors[1]->elsize; datasize += NPY_LOWLEVEL_BUFFER_BLOCKSIZE * dst_itemsize; } @@ -3752,98 +2850,81 @@ _multistep_cast_auxdata_clone_int(NpyAuxData *auxdata_old, int move_auxdata) return NULL; } - _multistep_castdata *auxdata = (_multistep_castdata *)char_data; + _multistep_castdata *newdata = (_multistep_castdata *)char_data; - /* Copy the prepared old and fix it up internal pointers */ - memcpy(char_data, castdata, sizeof(*castdata)); + /* Fix up the basic information: */ + newdata->base.free = &_multistep_cast_auxdata_free; + newdata->base.clone = &_multistep_cast_auxdata_clone; + /* And buffer information: */ + newdata->from_buffer = char_data + from_buffer_offset; + newdata->to_buffer = char_data + to_buffer_offset; - auxdata->from_buffer = char_data + from_buffer_offset; - auxdata->to_buffer = char_data + to_buffer_offset; + /* Initialize funcs to NULL to signal no-cleanup in case of an error. */ + newdata->from.func = NULL; + newdata->to.func = NULL; - auxdata->main.context.descriptors = auxdata->main.descriptors; - auxdata->from.context.descriptors = auxdata->from.descriptors; - auxdata->to.context.descriptors = auxdata->to.descriptors; - - auxdata->base.free = &_multistep_cast_auxdata_free; - auxdata->base.clone = &_multistep_cast_auxdata_clone; - - /* Hold on to references and initialize buffers if necessary. */ - Py_INCREF(auxdata->main.descriptors[0]); - Py_INCREF(auxdata->main.descriptors[1]); - Py_INCREF(auxdata->main.context.method); - - if (!move_auxdata) { - /* Ensure we don't free twice on error: */ - auxdata->from.auxdata = NULL; - auxdata->to.auxdata = NULL; + if (move_info) { + NPY_cast_info_move(&newdata->main, &castdata->main); + } + else if (NPY_cast_info_copy(&newdata->main, &castdata->main) < 0) { + goto fail; + } - if (castdata->main.auxdata != NULL) { - auxdata->main.auxdata = NPY_AUXDATA_CLONE(castdata->main.auxdata); - if (auxdata->main.auxdata == NULL) { - NPY_AUXDATA_FREE((NpyAuxData *)auxdata); - return NULL; - } + if (castdata->from.func != NULL) { + if (move_info) { + NPY_cast_info_move(&newdata->from, &castdata->from); } - } - else { - /* Clear the original, to avoid double free. */ - castdata->main.auxdata = NULL; - castdata->from.auxdata = NULL; - castdata->to.auxdata = NULL; - } - - if (castdata->from.stransfer != NULL) { - Py_INCREF(auxdata->from.descriptors[0]); - Py_INCREF(auxdata->from.descriptors[1]); - Py_INCREF(auxdata->from.context.method); - if (PyDataType_FLAGCHK(auxdata->main.descriptors[0], NPY_NEEDS_INIT)) { - memset(auxdata->from_buffer, 0, to_buffer_offset - from_buffer_offset); + else if (NPY_cast_info_copy(&newdata->from, &castdata->from) < 0) { + goto fail; } - if (!move_auxdata && castdata->from.auxdata != NULL) { - auxdata->from.auxdata = NPY_AUXDATA_CLONE(castdata->from.auxdata); - if (auxdata->from.auxdata == NULL) { - NPY_AUXDATA_FREE((NpyAuxData *)auxdata); - return NULL; - } + + if (PyDataType_FLAGCHK(newdata->main.descriptors[0], NPY_NEEDS_INIT)) { + memset(newdata->from_buffer, 0, to_buffer_offset - from_buffer_offset); } } - if (castdata->to.stransfer != NULL) { - Py_INCREF(auxdata->to.descriptors[0]); - Py_INCREF(auxdata->to.descriptors[1]); - Py_INCREF(auxdata->to.context.method); - if (PyDataType_FLAGCHK(auxdata->main.descriptors[1], NPY_NEEDS_INIT)) { - memset(auxdata->to_buffer, 0, datasize - to_buffer_offset); + if (castdata->to.func != NULL) { + if (move_info) { + NPY_cast_info_move(&newdata->to, &castdata->to); } - if (!move_auxdata && castdata->to.auxdata != NULL) { - auxdata->to.auxdata = NPY_AUXDATA_CLONE(castdata->to.auxdata); - if (auxdata->to.auxdata == NULL) { - NPY_AUXDATA_FREE((NpyAuxData *)auxdata); - return NULL; - } + else if (NPY_cast_info_copy(&newdata->to, &castdata->to) < 0) { + goto fail; + } + + if (PyDataType_FLAGCHK(newdata->main.descriptors[1], NPY_NEEDS_INIT)) { + memset(newdata->to_buffer, 0, datasize - to_buffer_offset); } } - return (NpyAuxData *)auxdata; + return (NpyAuxData *)newdata; + + fail: + NPY_AUXDATA_FREE((NpyAuxData *)newdata); + return NULL; } + static NpyAuxData * _multistep_cast_auxdata_clone(NpyAuxData *auxdata_old) { - return _multistep_cast_auxdata_clone_int(auxdata_old, 0); + return _multistep_cast_auxdata_clone_int( + (_multistep_castdata *)auxdata_old, 0); } static int _strided_to_strided_multistep_cast( - char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *data) + /* The context is always stored explicitly in auxdata */ + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *auxdata) { - _multistep_castdata *castdata = (_multistep_castdata *)data; + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + _multistep_castdata *castdata = (_multistep_castdata *)auxdata; + npy_intp src_stride = strides[0], dst_stride = strides[1]; char *main_src, *main_dst; - npy_intp main_src_stride, main_dst_stride, main_src_itemsize; + npy_intp main_src_stride, main_dst_stride; npy_intp block_size = NPY_LOWLEVEL_BUFFER_BLOCKSIZE; while (N > 0) { @@ -3851,25 +2932,24 @@ _strided_to_strided_multistep_cast( block_size = N; } - if (castdata->from.stransfer != NULL) { + if (castdata->from.func != NULL) { npy_intp out_stride = castdata->from.descriptors[1]->elsize; - if (castdata->from.stransfer( - castdata->from_buffer, out_stride, src, src_stride, - block_size, src_itemsize, castdata->from.auxdata)) { + if (castdata->from.func(&castdata->from.context, + (char *[2]){src, castdata->from_buffer}, &block_size, + (npy_intp [2]){src_stride, out_stride}, + castdata->from.auxdata) != 0) { /* TODO: Internal buffer may require cleanup on error. */ return -1; } main_src = castdata->from_buffer; main_src_stride = out_stride; - main_src_itemsize = out_stride; } else { main_src = src; main_src_stride = src_stride; - main_src_itemsize = src_itemsize; } - if (castdata->to.stransfer != NULL) { + if (castdata->to.func != NULL) { main_dst = castdata->to_buffer; main_dst_stride = castdata->main.descriptors[1]->elsize; } @@ -3878,17 +2958,19 @@ _strided_to_strided_multistep_cast( main_dst_stride = dst_stride; } - if (castdata->main.stransfer( - main_dst, main_dst_stride, main_src, main_src_stride, - block_size, main_src_itemsize, castdata->main.auxdata)) { + if (castdata->main.func(&castdata->main.context, + (char *[2]){main_src, main_dst}, &block_size, + (npy_intp [2]){main_src_stride, main_dst_stride}, + castdata->main.auxdata) != 0) { /* TODO: Internal buffer may require cleanup on error. */ return -1; } - if (castdata->to.stransfer != NULL) { - if (castdata->to.stransfer( - dst, dst_stride, main_dst, main_dst_stride, - block_size, main_dst_stride, castdata->to.auxdata)) { + if (castdata->to.func != NULL) { + if (castdata->to.func(&castdata->to.context, + (char *[2]){main_dst, dst}, &block_size, + (npy_intp [2]){main_dst_stride, dst_stride}, + castdata->to.auxdata) != 0) { return -1; } } @@ -3906,7 +2988,7 @@ _strided_to_strided_multistep_cast( * transferfunction and transferdata. */ static NPY_INLINE int -init_cast_info(_cast_info *cast_info, NPY_CASTING *casting, +init_cast_info(NPY_cast_info *cast_info, NPY_CASTING *casting, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int main_step) { PyObject *meth = PyArray_GetCastingImpl( @@ -3921,12 +3003,11 @@ init_cast_info(_cast_info *cast_info, NPY_CASTING *casting, return -1; } /* Initialize the context and related data */ - cast_info->context.caller = NULL; - cast_info->stransfer = NULL; + NPY_cast_info_init(cast_info); cast_info->auxdata = NULL; + cast_info->context.caller = NULL; cast_info->context.method = (PyArrayMethodObject *)meth; - cast_info->context.descriptors = cast_info->descriptors; PyArray_DTypeMeta *dtypes[2] = {NPY_DTYPE(src_dtype), NPY_DTYPE(dst_dtype)}; PyArray_Descr *in_descr[2] = {src_dtype, dst_dtype}; @@ -3936,11 +3017,13 @@ init_cast_info(_cast_info *cast_info, NPY_CASTING *casting, if (NPY_UNLIKELY(*casting < 0)) { if (!PyErr_Occurred()) { PyErr_Format(PyExc_TypeError, - "Cannot cast data from %S to %S.", src_dtype, dst_dtype); + "Cannot cast array data from %R to %R.", src_dtype, dst_dtype); Py_DECREF(meth); return -1; } } + assert(PyArray_DescrCheck(cast_info->descriptors[0])); + assert(PyArray_DescrCheck(cast_info->descriptors[1])); if (!main_step && NPY_UNLIKELY(src_dtype != cast_info->descriptors[0] || dst_dtype != cast_info->descriptors[1])) { @@ -3955,7 +3038,7 @@ init_cast_info(_cast_info *cast_info, NPY_CASTING *casting, "the same DType class and such a cast must currently return " "the input descriptors unmodified).", src_dtype, dst_dtype); - _cast_info_free(cast_info); + NPY_cast_info_xfree(cast_info); return -1; } @@ -3964,6 +3047,23 @@ init_cast_info(_cast_info *cast_info, NPY_CASTING *casting, /* + * When there is a failure in ArrayMethod.get_loop(...) we still have + * to clean up references, but assume that `auxdata` and `func` + * have undefined values. + * NOTE: This should possibly be moved, but is only necessary here + */ +static void +_clear_cast_info_after_get_loop_failure(NPY_cast_info *cast_info) +{ + /* As public API we could choose to clear auxdata != NULL */ + assert(cast_info->auxdata == NULL); + /* Set func to be non-null so that `NPY_cats_info_xfree` does not skip */ + cast_info->func = &_dec_src_ref_nop; + NPY_cast_info_xfree(cast_info); +} + + +/* * Helper for PyArray_GetDTypeTransferFunction, which fetches a single * transfer function from the each casting implementation (ArrayMethod). * May set the transfer function to NULL when the cast can be achieved using @@ -3985,25 +3085,22 @@ init_cast_info(_cast_info *cast_info, NPY_CASTING *casting, * Returns -1 on failure, 0 on success */ static int -get_transferfunction_for_descrs( +define_cast_for_descrs( int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, - int *out_needs_api) + NPY_cast_info *cast_info, int *out_needs_api) { - *out_transferdata = NULL; /* ensure NULL on error */ /* Storage for all cast info in case multi-step casting is necessary */ _multistep_castdata castdata; - /* Initialize secondary `stransfer` to indicate whether they are used: */ - castdata.to.stransfer = NULL; - castdata.from.stransfer = NULL; + /* Initialize funcs to NULL to simplify cleanup on error. */ + castdata.main.func = NULL; + castdata.to.func = NULL; + castdata.from.func = NULL; NPY_CASTING casting = -1; - int res = -1; - if (init_cast_info(&castdata.main, &casting, src_dtype, dst_dtype, 1) < 0) { + if (init_cast_info(cast_info, &casting, src_dtype, dst_dtype, 1) < 0) { return -1; } @@ -4016,17 +3113,17 @@ get_transferfunction_for_descrs( * deleted in any case. */ int must_wrap = (!aligned && - (castdata.main.context.method->flags & NPY_METH_SUPPORTS_UNALIGNED) == 0); + (cast_info->context.method->flags & NPY_METH_SUPPORTS_UNALIGNED) == 0); /* * Wrap the input with an additional cast if necessary. */ - if (NPY_UNLIKELY(src_dtype != castdata.main.descriptors[0] || must_wrap)) { + if (NPY_UNLIKELY(src_dtype != cast_info->descriptors[0] || must_wrap)) { NPY_CASTING from_casting = -1; /* Cast function may not support the input, wrap if necessary */ if (init_cast_info( &castdata.from, &from_casting, - src_dtype, castdata.main.descriptors[0], 0) < 0) { + src_dtype, cast_info->descriptors[0], 0) < 0) { goto fail; } casting = PyArray_MinCastSafety(casting, from_casting); @@ -4034,22 +3131,21 @@ get_transferfunction_for_descrs( /* Prepare the actual cast (if necessary): */ if (from_casting & _NPY_CAST_IS_VIEW && !must_wrap) { /* This step is not necessary and can be skipped. */ - _cast_info_free(&castdata.from); + castdata.from.func = &_dec_src_ref_nop; /* avoid NULL */ + NPY_cast_info_xfree(&castdata.from); } else { /* Fetch the cast function and set up */ PyArrayMethod_Context *context = &castdata.from.context; - npy_intp strides[2] = {src_stride, castdata.main.descriptors[0]->elsize}; + npy_intp strides[2] = {src_stride, cast_info->descriptors[0]->elsize}; NPY_ARRAYMETHOD_FLAGS flags; if (context->method->get_strided_loop( context, aligned, move_references, strides, - &castdata.from.stransfer, &castdata.from.auxdata, &flags) < 0) { - assert(castdata.from.auxdata != NULL); - _cast_info_free(&castdata.from); - castdata.from.stransfer = NULL; /* ensure we cleanup once */ + &castdata.from.func, &castdata.from.auxdata, &flags) < 0) { + _clear_cast_info_after_get_loop_failure(&castdata.from); goto fail; } - assert(castdata.from.stransfer != NULL); + assert(castdata.from.func != NULL); *out_needs_api |= (flags & NPY_METH_REQUIRES_PYAPI) != 0; /* The main cast now uses a buffered input: */ @@ -4060,12 +3156,12 @@ get_transferfunction_for_descrs( /* * Wrap the output with an additional cast if necessary. */ - if (NPY_UNLIKELY(dst_dtype != castdata.main.descriptors[1] || must_wrap)) { + if (NPY_UNLIKELY(dst_dtype != cast_info->descriptors[1] || must_wrap)) { NPY_CASTING to_casting = -1; /* Cast function may not support the output, wrap if necessary */ if (init_cast_info( &castdata.to, &to_casting, - castdata.main.descriptors[1], dst_dtype, 0) < 0) { + cast_info->descriptors[1], dst_dtype, 0) < 0) { goto fail; } casting = PyArray_MinCastSafety(casting, to_casting); @@ -4073,27 +3169,26 @@ get_transferfunction_for_descrs( /* Prepare the actual cast (if necessary): */ if (to_casting & _NPY_CAST_IS_VIEW && !must_wrap) { /* This step is not necessary and can be skipped. */ - _cast_info_free(&castdata.to); + castdata.to.func = &_dec_src_ref_nop; /* avoid NULL */ + NPY_cast_info_xfree(&castdata.to); } else { /* Fetch the cast function and set up */ PyArrayMethod_Context *context = &castdata.to.context; - npy_intp strides[2] = {castdata.main.descriptors[1]->elsize, dst_stride}; + npy_intp strides[2] = {cast_info->descriptors[1]->elsize, dst_stride}; NPY_ARRAYMETHOD_FLAGS flags; if (context->method->get_strided_loop( context, aligned, 1 /* clear buffer */, strides, - &castdata.to.stransfer, &castdata.to.auxdata, &flags) < 0) { - assert(castdata.to.auxdata != NULL); - _cast_info_free(&castdata.to); - castdata.to.stransfer = NULL; /* ensure we cleanup once */ + &castdata.to.func, &castdata.to.auxdata, &flags) < 0) { + _clear_cast_info_after_get_loop_failure(&castdata.to); goto fail; } - assert(castdata.to.stransfer != NULL); + assert(castdata.to.func != NULL); *out_needs_api |= (flags & NPY_METH_REQUIRES_PYAPI) != 0; /* The main cast now uses a buffered input: */ dst_stride = strides[0]; - if (castdata.from.stransfer != NULL) { + if (castdata.from.func != NULL) { /* Both input and output are wrapped, now always aligned */ aligned = 1; } @@ -4101,48 +3196,45 @@ get_transferfunction_for_descrs( } /* Fetch the main cast function (with updated values) */ - PyArrayMethod_Context *context = &castdata.main.context; + PyArrayMethod_Context *context = &cast_info->context; npy_intp strides[2] = {src_stride, dst_stride}; NPY_ARRAYMETHOD_FLAGS flags; if (context->method->get_strided_loop( context, aligned, move_references, strides, - &castdata.main.stransfer, &castdata.main.auxdata, &flags) < 0) { + &cast_info->func, &cast_info->auxdata, &flags) < 0) { + _clear_cast_info_after_get_loop_failure(cast_info); goto fail; } *out_needs_api |= (flags & NPY_METH_REQUIRES_PYAPI) != 0; - if (castdata.from.stransfer == NULL && castdata.to.stransfer == NULL) { - /* The main step is sufficient to do the cast */ - *out_stransfer = castdata.main.stransfer; - *out_transferdata = castdata.main.auxdata; - castdata.main.auxdata = NULL; /* do not free the auxdata */ - _cast_info_free(&castdata.main); + if (castdata.from.func == NULL && castdata.to.func == NULL) { + /* Most of the time, there will be only one step required. */ return 0; } - - /* Clone the castdata as it is currently not persistently stored. */ - *out_transferdata = _multistep_cast_auxdata_clone_int( - (NpyAuxData *)&castdata, 1); - if (*out_transferdata == NULL) { + /* The full cast passed in is only the "main" step, copy cast_info there */ + NPY_cast_info_move(&castdata.main, cast_info); + Py_INCREF(src_dtype); + cast_info->descriptors[0] = src_dtype; + Py_INCREF(dst_dtype); + cast_info->descriptors[1] = dst_dtype; + cast_info->context.method = NULL; + + cast_info->func = &_strided_to_strided_multistep_cast; + cast_info->auxdata = _multistep_cast_auxdata_clone_int(&castdata, 1); + if (cast_info->auxdata == NULL) { PyErr_NoMemory(); goto fail; } - *out_stransfer = &_strided_to_strided_multistep_cast; - res = 0; /* success */ + return 0; fail: - _cast_info_free(&castdata.main); - if (castdata.from.stransfer != NULL) { - _cast_info_free(&castdata.from); - } - if (castdata.to.stransfer != NULL) { - _cast_info_free(&castdata.to); - } - return res; + NPY_cast_info_xfree(&castdata.main); + NPY_cast_info_xfree(&castdata.from); + NPY_cast_info_xfree(&castdata.to); + return -1; } -#endif NPY_NO_EXPORT int @@ -4150,13 +3242,11 @@ PyArray_GetDTypeTransferFunction(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, + NPY_cast_info *cast_info, int *out_needs_api) { assert(src_dtype != NULL); -#if NPY_USE_NEW_CASTINGIMPL /* * If one of the dtypes is NULL, we give back either a src decref * function or a dst setzero function @@ -4167,54 +3257,146 @@ PyArray_GetDTypeTransferFunction(int aligned, * from this function.) */ if (dst_dtype == NULL) { - if (move_references) { - return get_decsrcref_transfer_function(aligned, + assert(move_references); + return get_decref_transfer_function(aligned, src_dtype->elsize, src_dtype, - out_stransfer, out_transferdata, + cast_info, out_needs_api); - } - else { - *out_stransfer = &_dec_src_ref_nop; - *out_transferdata = NULL; - return NPY_SUCCEED; - } } - if (get_transferfunction_for_descrs(aligned, + if (define_cast_for_descrs(aligned, src_stride, dst_stride, src_dtype, dst_dtype, move_references, - out_stransfer, out_transferdata, out_needs_api) < 0) { + cast_info, out_needs_api) < 0) { return NPY_FAIL; } return NPY_SUCCEED; +} -#else - return PyArray_LegacyGetDTypeTransferFunction( - aligned, src_stride, dst_stride, src_dtype, dst_dtype, - move_references, out_stransfer, out_transferdata, out_needs_api); -#endif + +/* + * Internal wrapping of casts that have to be performed in a "single" + * function (i.e. not by the generic multi-step-cast), but rely on it + * internally. There are only two occasions where this is used: + * + * 1. Void advertises that it handles unaligned casts, but has to wrap the + * legacy cast which (probably) does not. + * 2. Datetime to unicode casts are implemented via bytes "U" vs. "S". If + * we relax the chaining rules to allow "recursive" cast chaining where + * `resolve_descriptors` can return a descriptor with a different type, + * this would become unnecessary. + * 3. Time <-> Time casts, which currently must support byte swapping, but + * have a non-trivial inner-loop (due to units) which does not support + * it. + * + * When wrapping is performed (guaranteed for `aligned == 0` and if the + * wrapped dtype is not identical to the input dtype), the wrapped transfer + * function can assume a contiguous input. + * Otherwise use `must_wrap` to ensure that wrapping occurs, which guarantees + * a contiguous, aligned, call of the wrapped function. + */ +NPY_NO_EXPORT int +wrap_aligned_transferfunction( + int aligned, int must_wrap, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArray_Descr *src_wrapped_dtype, PyArray_Descr *dst_wrapped_dtype, + PyArrayMethod_StridedLoop **out_stransfer, + NpyAuxData **out_transferdata, int *out_needs_api) +{ + must_wrap = must_wrap | !aligned; + + _multistep_castdata castdata; + NPY_cast_info_init(&castdata.main); + NPY_cast_info_init(&castdata.from); + NPY_cast_info_init(&castdata.to); + + /* Finalize the existing cast information: */ + castdata.main.func = *out_stransfer; + *out_stransfer = NULL; + castdata.main.auxdata = *out_transferdata; + *out_transferdata = NULL; + castdata.main.context.method = NULL; + /* These are always legacy casts that only support native-byte-order: */ + Py_INCREF(src_wrapped_dtype); + castdata.main.descriptors[0] = src_wrapped_dtype; + if (castdata.main.descriptors[0] == NULL) { + castdata.main.descriptors[1] = NULL; + goto fail; + } + Py_INCREF(dst_wrapped_dtype); + castdata.main.descriptors[1] = dst_wrapped_dtype; + if (castdata.main.descriptors[1] == NULL) { + goto fail; + } + + /* + * Similar to the normal multi-step cast, but we always have to wrap + * it all up, but we can simply do this via a "recursive" call. + * TODO: This is slightly wasteful, since it unnecessarily checks casting, + * but this whole function is about corner cases, which should rather + * have an explicit implementation instead if we want performance. + */ + if (must_wrap || src_wrapped_dtype != src_dtype) { + if (PyArray_GetDTypeTransferFunction(aligned, + src_stride, castdata.main.descriptors[0]->elsize, + src_dtype, castdata.main.descriptors[0], 0, + &castdata.from, out_needs_api) != NPY_SUCCEED) { + goto fail; + } + } + if (must_wrap || dst_wrapped_dtype != dst_dtype) { + if (PyArray_GetDTypeTransferFunction(aligned, + castdata.main.descriptors[1]->elsize, dst_stride, + castdata.main.descriptors[1], dst_dtype, + 1, /* clear buffer if it includes references */ + &castdata.to, out_needs_api) != NPY_SUCCEED) { + goto fail; + } + } + + *out_transferdata = _multistep_cast_auxdata_clone_int(&castdata, 1); + if (*out_transferdata == NULL) { + PyErr_NoMemory(); + goto fail; + } + *out_stransfer = &_strided_to_strided_multistep_cast; + return 0; + + fail: + NPY_cast_info_xfree(&castdata.main); + NPY_cast_info_xfree(&castdata.from); + NPY_cast_info_xfree(&castdata.to); + + return -1; } /* - * Basic version of PyArray_GetDTypeTransferFunction for legacy dtype - * support. - * It supports only wrapping the copyswapn functions and the legacy - * cast functions registered with `PyArray_RegisterCastFunc`. - * This function takes the easy way out: It does not wrap, so if wrapping - * might be necessary due to unaligned data, the user has to ensure that - * this is done and aligned is passed in as True (this is asserted only). + * This function wraps the legacy casts stored on the `dtype->f->cast` + * or registered with `PyArray_RegisterCastFunc`. + * For casts between two dtypes with the same type (within DType casts) + * it also wraps the `copyswapn` function. + * + * This function is called called from `ArrayMethod.get_loop()` when a + * specialized cast function is missing. + * + * In general, the legacy cast functions do not support unaligned access, + * so an ArrayMethod using this must signal that. In a few places we do + * signal support for unaligned access (or byte swapping). + * In this case `allow_wrapped=1` will wrap it into an additional multi-step + * cast as necessary. */ NPY_NO_EXPORT int -PyArray_GetLegacyDTypeTransferFunction(int aligned, +get_wrapped_legacy_cast_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references, - PyArray_StridedUnaryOp **out_stransfer, + PyArrayMethod_StridedLoop **out_stransfer, NpyAuxData **out_transferdata, - int *out_needs_api, int wrap_if_unaligned) + int *out_needs_api, int allow_wrapped) { /* Note: We ignore `needs_wrap`; needs-wrap is handled by another cast */ int needs_wrap = 0; @@ -4224,11 +3406,9 @@ PyArray_GetLegacyDTypeTransferFunction(int aligned, * This is a cast within the same dtype. For legacy user-dtypes, * it is always valid to handle this using the copy swap function. */ - return wrap_copy_swap_function(aligned, - src_stride, dst_stride, - src_dtype, - PyArray_ISNBO(src_dtype->byteorder) != - PyArray_ISNBO(dst_dtype->byteorder), + return wrap_copy_swap_function(src_dtype, + PyDataType_ISNOTSWAPPED(src_dtype) != + PyDataType_ISNOTSWAPPED(dst_dtype), out_stransfer, out_transferdata); } @@ -4246,7 +3426,7 @@ PyArray_GetLegacyDTypeTransferFunction(int aligned, if (!needs_wrap) { return 0; } - if (NPY_UNLIKELY(!wrap_if_unaligned)) { + if (NPY_UNLIKELY(!allow_wrapped)) { /* * Legacy casts do not support unaligned which requires wrapping. * However, normally we ensure that wrapping happens before calling @@ -4257,23 +3437,35 @@ PyArray_GetLegacyDTypeTransferFunction(int aligned, "probably it incorrectly flagged support for unaligned data. " "(aligned passed to discovery is %d)", src_dtype, dst_dtype, aligned); - return -1; + goto fail; } /* * If we are here, use the legacy code to wrap the above cast (which * does not support unaligned data) into copyswapn. */ - NpyAuxData *castdata = *out_transferdata; - *out_transferdata = NULL; - if (wrap_aligned_contig_transfer_function_with_copyswapn( - aligned, src_stride, dst_stride, src_dtype, dst_dtype, - out_stransfer, out_transferdata, out_needs_api, - *out_stransfer, castdata) == NPY_FAIL) { - NPY_AUXDATA_FREE(castdata); - return -1; + PyArray_Descr *src_wrapped_dtype = ensure_dtype_nbo(src_dtype); + if (src_wrapped_dtype == NULL) { + goto fail; } - return 0; + PyArray_Descr *dst_wrapped_dtype = ensure_dtype_nbo(dst_dtype); + if (dst_wrapped_dtype == NULL) { + goto fail; + } + int res = wrap_aligned_transferfunction( + aligned, 1, /* We assume wrapped is contiguous here */ + src_stride, dst_stride, + src_dtype, dst_dtype, + src_wrapped_dtype, dst_wrapped_dtype, + out_stransfer, out_transferdata, out_needs_api); + Py_DECREF(src_wrapped_dtype); + Py_DECREF(dst_wrapped_dtype); + return res; + + fail: + NPY_AUXDATA_FREE(*out_transferdata); + *out_transferdata = NULL; + return -1; } @@ -4286,24 +3478,27 @@ PyArray_GetMaskedDTypeTransferFunction(int aligned, PyArray_Descr *dst_dtype, PyArray_Descr *mask_dtype, int move_references, - PyArray_MaskedStridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata, + NPY_cast_info *cast_info, int *out_needs_api) { - PyArray_StridedUnaryOp *stransfer = NULL; - NpyAuxData *transferdata = NULL; - _masked_wrapper_transfer_data *data; + NPY_cast_info_init(cast_info); - /* TODO: Add struct-based mask_dtype support later */ if (mask_dtype->type_num != NPY_BOOL && mask_dtype->type_num != NPY_UINT8) { PyErr_SetString(PyExc_TypeError, - "Only bool and uint8 masks are supported at the moment, " - "structs of bool/uint8 is planned for the future"); + "Only bool and uint8 masks are supported."); return NPY_FAIL; } - /* TODO: Special case some important cases so they're fast */ + /* Create the wrapper function's auxdata */ + _masked_wrapper_transfer_data *data; + data = PyMem_Malloc(sizeof(_masked_wrapper_transfer_data)); + if (data == NULL) { + PyErr_NoMemory(); + return NPY_FAIL; + } + data->base.free = &_masked_wrapper_transfer_data_free; + data->base.clone = &_masked_wrapper_transfer_data_clone; /* Fall back to wrapping a non-masked transfer function */ assert(dst_dtype != NULL); @@ -4311,47 +3506,38 @@ PyArray_GetMaskedDTypeTransferFunction(int aligned, src_stride, dst_stride, src_dtype, dst_dtype, move_references, - &stransfer, &transferdata, + &data->wrapped, out_needs_api) != NPY_SUCCEED) { + PyMem_Free(data); return NPY_FAIL; } - /* Create the wrapper function's auxdata */ - data = (_masked_wrapper_transfer_data *)PyArray_malloc( - sizeof(_masked_wrapper_transfer_data)); - if (data == NULL) { - PyErr_NoMemory(); - NPY_AUXDATA_FREE(transferdata); - return NPY_FAIL; - } - - /* Fill in the auxdata object */ - memset(data, 0, sizeof(_masked_wrapper_transfer_data)); - data->base.free = &_masked_wrapper_transfer_data_free; - data->base.clone = &_masked_wrapper_transfer_data_clone; - - data->stransfer = stransfer; - data->transferdata = transferdata; - /* If the src object will need a DECREF, get a function to handle that */ if (move_references && PyDataType_REFCHK(src_dtype)) { - if (get_decsrcref_transfer_function(aligned, + if (get_decref_transfer_function(aligned, src_stride, src_dtype, - &data->decsrcref_stransfer, - &data->decsrcref_transferdata, + &data->decref_src, out_needs_api) != NPY_SUCCEED) { NPY_AUXDATA_FREE((NpyAuxData *)data); return NPY_FAIL; } - - *out_stransfer = &_strided_masked_wrapper_decsrcref_transfer_function; + cast_info->func = (PyArrayMethod_StridedLoop *) + &_strided_masked_wrapper_decref_transfer_function; } else { - *out_stransfer = &_strided_masked_wrapper_transfer_function; - } - - *out_transferdata = (NpyAuxData *)data; + NPY_cast_info_init(&data->decref_src); + cast_info->func = (PyArrayMethod_StridedLoop *) + &_strided_masked_wrapper_transfer_function; + } + cast_info->auxdata = (NpyAuxData *)data; + /* The context is almost unused, but clear it for cleanup. */ + Py_INCREF(src_dtype); + cast_info->descriptors[0] = src_dtype; + Py_INCREF(dst_dtype); + cast_info->descriptors[1] = dst_dtype; + cast_info->context.caller = NULL; + cast_info->context.method = NULL; return NPY_SUCCEED; } @@ -4363,8 +3549,6 @@ PyArray_CastRawArrays(npy_intp count, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references) { - PyArray_StridedUnaryOp *stransfer = NULL; - NpyAuxData *transferdata = NULL; int aligned = 1, needs_api = 0; /* Make sure the copy is reasonable */ @@ -4388,21 +3572,23 @@ PyArray_CastRawArrays(npy_intp count, src_dtype->alignment); /* Get the function to do the casting */ + NPY_cast_info cast_info; if (PyArray_GetDTypeTransferFunction(aligned, src_stride, dst_stride, src_dtype, dst_dtype, move_references, - &stransfer, &transferdata, + &cast_info, &needs_api) != NPY_SUCCEED) { return NPY_FAIL; } /* Cast */ - stransfer(dst, dst_stride, src, src_stride, count, - src_dtype->elsize, transferdata); + char *args[2] = {src, dst}; + npy_intp strides[2] = {src_stride, dst_stride}; + cast_info.func(&cast_info.context, args, &count, strides, cast_info.auxdata); /* Cleanup */ - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); /* If needs_api was set to 1, it may have raised a Python exception */ return (needs_api && PyErr_Occurred()) ? NPY_FAIL : NPY_SUCCEED; diff --git a/numpy/core/src/multiarray/dtype_transfer.h b/numpy/core/src/multiarray/dtype_transfer.h index c61119bfa..e29ac40b8 100644 --- a/numpy/core/src/multiarray/dtype_transfer.h +++ b/numpy/core/src/multiarray/dtype_transfer.h @@ -1,16 +1,139 @@ #ifndef _NPY_DTYPE_TRANSFER_H #define _NPY_DTYPE_TRANSFER_H -#include "lowlevel_strided_loops.h" #include "array_method.h" +/* + * More than for most functions, cast information needs to be stored in + * a few places. Most importantly, in many cases we need to chain or wrap + * casts (e.g. structured dtypes). + * + * This struct provides a place to store all necessary information as + * compact as possible. It must be used with the inline functions below + * to ensure correct setup and teardown. + * + * In general, the casting machinery currently handles the correct set up + * of the struct. + */ +typedef struct { + PyArrayMethod_StridedLoop *func; + NpyAuxData *auxdata; + PyArrayMethod_Context context; + /* Storage to be linked from "context" */ + PyArray_Descr *descriptors[2]; +} NPY_cast_info; + + +/* + * Create a new cast-info struct with cast_info->context.descriptors linked. + * Compilers should inline this to ensure the whole struct is not actually + * copied. + * If set up otherwise, func must be NULL'ed to indicate no-cleanup necessary. + */ +static NPY_INLINE void +NPY_cast_info_init(NPY_cast_info *cast_info) +{ + cast_info->func = NULL; /* mark as uninitialized. */ + /* + * Support for auxdata being unchanged, in the future, we might add + * a scratch space to `NPY_cast_info` and link to that instead. + */ + cast_info->auxdata = NULL; + cast_info->context.descriptors = cast_info->descriptors; + + // TODO: Delete this again probably maybe create a new minimal init macro + cast_info->context.caller = NULL; +} + + +/* + * Free's all references and data held inside the struct (not the struct). + * First checks whether `cast_info.func == NULL`, and assume it is + * uninitialized in that case. + */ +static NPY_INLINE void +NPY_cast_info_xfree(NPY_cast_info *cast_info) +{ + if (cast_info->func == NULL) { + return; + } + assert(cast_info->context.descriptors == cast_info->descriptors); + NPY_AUXDATA_FREE(cast_info->auxdata); + Py_DECREF(cast_info->descriptors[0]); + Py_XDECREF(cast_info->descriptors[1]); + Py_XDECREF(cast_info->context.method); + cast_info->func = NULL; +} + + +/* + * Move the data from `original` to `cast_info`. Original is cleared + * (its func set to NULL). + */ +static NPY_INLINE void +NPY_cast_info_move(NPY_cast_info *cast_info, NPY_cast_info *original) +{ + *cast_info = *original; + /* Fix internal pointer: */ + cast_info->context.descriptors = cast_info->descriptors; + /* Mark original to not be cleaned up: */ + original->func = NULL; +} + +/* + * Finalize a copy (INCREF+auxdata clone). This assumes a previous `memcpy` + * of the struct. + * NOTE: It is acceptable to call this with the same struct if the struct + * has been filled by a valid memcpy from an initialized one. + */ +static NPY_INLINE int +NPY_cast_info_copy(NPY_cast_info *cast_info, NPY_cast_info *original) +{ + cast_info->context.descriptors = cast_info->descriptors; + + assert(original->func != NULL); + cast_info->func = original->func; + cast_info->descriptors[0] = original->descriptors[0]; + Py_XINCREF(cast_info->descriptors[0]); + cast_info->descriptors[1] = original->descriptors[1]; + Py_XINCREF(cast_info->descriptors[1]); + cast_info->context.caller = original->context.caller; + Py_XINCREF(cast_info->context.caller); + cast_info->context.method = original->context.method; + Py_XINCREF(cast_info->context.method); + if (original->auxdata == NULL) { + cast_info->auxdata = NULL; + return 0; + } + cast_info->auxdata = NPY_AUXDATA_CLONE(original->auxdata); + if (NPY_UNLIKELY(cast_info->auxdata == NULL)) { + /* No need for cleanup, everything but auxdata is initialized fine. */ + return -1; + } + return 0; +} + + +NPY_NO_EXPORT int +_strided_to_strided_move_references( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *NPY_UNUSED(auxdata)); + +NPY_NO_EXPORT int +_strided_to_strided_copy_references( + PyArrayMethod_Context *NPY_UNUSED(context), char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *NPY_UNUSED(auxdata)); + + NPY_NO_EXPORT int any_to_object_get_loop( PyArrayMethod_Context *context, int aligned, int move_references, npy_intp *strides, - PyArray_StridedUnaryOp **out_loop, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags); @@ -19,9 +142,64 @@ object_to_any_get_loop( PyArrayMethod_Context *context, int NPY_UNUSED(aligned), int move_references, npy_intp *NPY_UNUSED(strides), - PyArray_StridedUnaryOp **out_loop, + PyArrayMethod_StridedLoop **out_loop, NpyAuxData **out_transferdata, NPY_ARRAYMETHOD_FLAGS *flags); +NPY_NO_EXPORT int +wrap_aligned_transferfunction( + int aligned, int must_wrap, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArray_Descr *src_wrapped_dtype, PyArray_Descr *dst_wrapped_dtype, + PyArrayMethod_StridedLoop **out_stransfer, + NpyAuxData **out_transferdata, int *out_needs_api); + + +NPY_NO_EXPORT int +get_nbo_cast_datetime_transfer_function(int aligned, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArrayMethod_StridedLoop **out_stransfer, + NpyAuxData **out_transferdata); + +NPY_NO_EXPORT int +get_nbo_datetime_to_string_transfer_function( + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArrayMethod_StridedLoop **out_stransfer, + NpyAuxData **out_transferdata); + +NPY_NO_EXPORT int +get_nbo_string_to_datetime_transfer_function( + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArrayMethod_StridedLoop **out_stransfer, + NpyAuxData **out_transferdata); + +NPY_NO_EXPORT int +get_datetime_to_unicode_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArrayMethod_StridedLoop **out_stransfer, + NpyAuxData **out_transferdata, + int *out_needs_api); + +NPY_NO_EXPORT int +get_unicode_to_datetime_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArrayMethod_StridedLoop **out_stransfer, + NpyAuxData **out_transferdata, + int *out_needs_api); + +/* Creates a wrapper around copyswapn or legacy cast functions */ +NPY_NO_EXPORT int +get_wrapped_legacy_cast_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + PyArrayMethod_StridedLoop **out_stransfer, + NpyAuxData **out_transferdata, + int *out_needs_api, int allow_wrapped); + + #endif /* _NPY_DTYPE_TRANSFER_H */ diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src index 04682d1ed..631042dae 100644 --- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src +++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src @@ -116,11 +116,20 @@ static int #if @is_aligned@ && @is_swap@ == 0 && @elsize@ <= NPY_SIZEOF_INTP NPY_GCC_UNROLL_LOOPS #endif -@prefix@_@oper@_size@elsize@(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp NPY_UNUSED(src_itemsize), - NpyAuxData *NPY_UNUSED(data)) +@prefix@_@oper@_size@elsize@( + PyArrayMethod_Context *context, char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *NPY_UNUSED(auxdata)) { + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; +#if !@src_contig@ + npy_intp src_stride = strides[0]; +#endif +#if !@dst_contig@ + npy_intp dst_stride = strides[1]; +#endif + #if @is_aligned@ /* sanity check */ assert(N == 0 || npy_is_aligned(dst, _UINT_ALIGN(@type@))); @@ -186,12 +195,17 @@ static int */ #if (@src_contig@ == 0) && @is_aligned@ static NPY_GCC_OPT_3 int -@prefix@_@oper@_size@elsize@_srcstride0(char *dst, - npy_intp dst_stride, - char *src, npy_intp NPY_UNUSED(src_stride), - npy_intp N, npy_intp NPY_UNUSED(src_itemsize), - NpyAuxData *NPY_UNUSED(data)) +@prefix@_@oper@_size@elsize@_srcstride0( + PyArrayMethod_Context *context, char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *NPY_UNUSED(auxdata)) { + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; +#if !@dst_contig@ + npy_intp dst_stride = strides[1]; +#endif + #if @elsize@ != 16 # if !(@elsize@ == 1 && @dst_contig@) @type@ temp; @@ -252,11 +266,16 @@ static NPY_GCC_OPT_3 int /**end repeat**/ static int -_strided_to_strided(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *NPY_UNUSED(data)) +_strided_to_strided( + PyArrayMethod_Context *context, char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *NPY_UNUSED(data)) { + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + npy_intp src_itemsize = context->descriptors[0]->elsize; + while (N > 0) { memmove(dst, src, src_itemsize); dst += dst_stride; @@ -266,12 +285,22 @@ _strided_to_strided(char *dst, npy_intp dst_stride, return 0; } +/* + * NOTE: This function is currently unused. It would currently be used for + * builtin dtypes that have an elsize other than 2, 4, 8, or 16 bytes. + * Since unicode and complex swap differently, no such dtype exists. + */ static int -_swap_strided_to_strided(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *NPY_UNUSED(data)) +_swap_strided_to_strided( + PyArrayMethod_Context *context, char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *NPY_UNUSED(data)) { + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + npy_intp src_itemsize = context->descriptors[0]->elsize; + char *a, *b, c; while (N > 0) { @@ -293,11 +322,16 @@ _swap_strided_to_strided(char *dst, npy_intp dst_stride, } static int -_swap_pair_strided_to_strided(char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp src_itemsize, - NpyAuxData *NPY_UNUSED(data)) +_swap_pair_strided_to_strided( + PyArrayMethod_Context *context, char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *NPY_UNUSED(data)) { + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_stride = strides[0], dst_stride = strides[1]; + npy_intp src_itemsize = context->descriptors[0]->elsize; + char *a, *b, c; npy_intp itemsize_half = src_itemsize / 2; @@ -329,17 +363,21 @@ _swap_pair_strided_to_strided(char *dst, npy_intp dst_stride, } static int -_contig_to_contig(char *dst, npy_intp NPY_UNUSED(dst_stride), - char *src, npy_intp NPY_UNUSED(src_stride), - npy_intp N, npy_intp src_itemsize, - NpyAuxData *NPY_UNUSED(data)) +_contig_to_contig( + PyArrayMethod_Context *context, char *const *args, + const npy_intp *dimensions, const npy_intp *NPY_UNUSED(strides), + NpyAuxData *NPY_UNUSED(data)) { + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; + npy_intp src_itemsize = context->descriptors[0]->elsize; + memmove(dst, src, src_itemsize*N); return 0; } -NPY_NO_EXPORT PyArray_StridedUnaryOp * +NPY_NO_EXPORT PyArrayMethod_StridedLoop * PyArray_GetStridedCopyFn(int aligned, npy_intp src_stride, npy_intp dst_stride, npy_intp itemsize) { @@ -493,7 +531,7 @@ PyArray_GetStridedCopyFn(int aligned, npy_intp src_stride, * #not_pair = 1, 0# */ -NPY_NO_EXPORT PyArray_StridedUnaryOp * +NPY_NO_EXPORT PyArrayMethod_StridedLoop * @function@(int aligned, npy_intp src_stride, npy_intp dst_stride, npy_intp itemsize) { @@ -797,11 +835,16 @@ NPY_NO_EXPORT PyArray_StridedUnaryOp * static NPY_GCC_OPT_3 int @prefix@_cast_@name1@_to_@name2@( - char *dst, npy_intp dst_stride, - char *src, npy_intp src_stride, - npy_intp N, npy_intp NPY_UNUSED(src_itemsize), - NpyAuxData *NPY_UNUSED(data)) + PyArrayMethod_Context *context, char *const *args, + const npy_intp *dimensions, const npy_intp *strides, + NpyAuxData *NPY_UNUSED(data)) { + npy_intp N = dimensions[0]; + char *src = args[0], *dst = args[1]; +#if !@contig@ + npy_intp src_stride = strides[0], dst_stride = strides[1]; +#endif + #if @is_complex1@ _TYPE1 src_value[2]; #elif !@aligned@ @@ -896,7 +939,7 @@ static NPY_GCC_OPT_3 int /**end repeat**/ -NPY_NO_EXPORT PyArray_StridedUnaryOp * +NPY_NO_EXPORT PyArrayMethod_StridedLoop * PyArray_GetStridedNumericCastFn(int aligned, npy_intp src_stride, npy_intp dst_stride, int src_type_num, int dst_type_num) @@ -987,8 +1030,7 @@ PyArray_TransferNDimToStrided(npy_intp ndim, npy_intp const *coords, npy_intp coords_inc, npy_intp const *shape, npy_intp shape_inc, npy_intp count, npy_intp src_itemsize, - PyArray_StridedUnaryOp *stransfer, - NpyAuxData *data) + NPY_cast_info *cast_info) { npy_intp i, M, N, coord0, shape0, src_stride0, coord1, shape1, src_stride1; @@ -997,12 +1039,17 @@ PyArray_TransferNDimToStrided(npy_intp ndim, shape0 = shape[0]; src_stride0 = src_strides[0]; N = shape0 - coord0; + + npy_intp strides[2] = {src_stride0, dst_stride}; + + char *args[2] = {src, dst}; if (N >= count) { - return stransfer(dst, dst_stride, src, src_stride0, - count, src_itemsize, data); + return cast_info->func(&cast_info->context, + args, &count, strides, cast_info->auxdata); } - int res = stransfer(dst, dst_stride, src, src_stride0, - N, src_itemsize, data); + int res = cast_info->func(&cast_info->context, + args, &N, strides, cast_info->auxdata); + if (res < 0) { return -1; } @@ -1024,13 +1071,14 @@ PyArray_TransferNDimToStrided(npy_intp ndim, M = (shape1 - coord1 - 1); N = shape0*M; for (i = 0; i < M; ++i) { + args[0] = src; args[1] = dst; if (shape0 >= count) { - return stransfer(dst, dst_stride, src, src_stride0, - count, src_itemsize, data); + return cast_info->func(&cast_info->context, + args, &count, strides, cast_info->auxdata); } else { - res = stransfer(dst, dst_stride, src, src_stride0, - shape0, src_itemsize, data); + res = cast_info->func(&cast_info->context, + args, &shape0, strides, cast_info->auxdata); if (res < 0) { return -1; } @@ -1087,13 +1135,14 @@ PyArray_TransferNDimToStrided(npy_intp ndim, /* A loop for dimensions 0 and 1 */ for (i = 0; i < shape1; ++i) { + args[0] = src; args[1] = dst; if (shape0 >= count) { - return stransfer(dst, dst_stride, src, src_stride0, - count, src_itemsize, data); + return cast_info->func(&cast_info->context, + args, &count, strides, cast_info->auxdata); } else { - res = stransfer(dst, dst_stride, src, src_stride0, - shape0, src_itemsize, data); + res = cast_info->func(&cast_info->context, + args, &shape0, strides, cast_info->auxdata); if (res < 0) { return -1; } @@ -1114,8 +1163,7 @@ PyArray_TransferStridedToNDim(npy_intp ndim, npy_intp const *coords, npy_intp coords_inc, npy_intp const *shape, npy_intp shape_inc, npy_intp count, npy_intp src_itemsize, - PyArray_StridedUnaryOp *stransfer, - NpyAuxData *data) + NPY_cast_info *cast_info) { npy_intp i, M, N, coord0, shape0, dst_stride0, coord1, shape1, dst_stride1; @@ -1124,12 +1172,16 @@ PyArray_TransferStridedToNDim(npy_intp ndim, shape0 = shape[0]; dst_stride0 = dst_strides[0]; N = shape0 - coord0; + + npy_intp strides[2] = {src_stride, dst_stride0}; + + char *args[2] = {src, dst}; if (N >= count) { - return stransfer(dst, dst_stride0, src, src_stride, - count, src_itemsize, data); + return cast_info->func(&cast_info->context, + args, &count, strides, cast_info->auxdata); } - int res = stransfer(dst, dst_stride0, src, src_stride, - N, src_itemsize, data); + int res = cast_info->func(&cast_info->context, + args, &N, strides, cast_info->auxdata); if (res < 0) { return -1; } @@ -1151,13 +1203,14 @@ PyArray_TransferStridedToNDim(npy_intp ndim, M = (shape1 - coord1 - 1); N = shape0*M; for (i = 0; i < M; ++i) { + args[0] = src; args[1] = dst; if (shape0 >= count) { - return stransfer(dst, dst_stride0, src, src_stride, - count, src_itemsize, data); + return cast_info->func(&cast_info->context, + args, &count, strides, cast_info->auxdata); } else { - res = stransfer(dst, dst_stride0, src, src_stride, - shape0, src_itemsize, data); + res = cast_info->func(&cast_info->context, + args, &shape0, strides, cast_info->auxdata); if (res < 0) { return -1; } @@ -1214,13 +1267,14 @@ PyArray_TransferStridedToNDim(npy_intp ndim, /* A loop for dimensions 0 and 1 */ for (i = 0; i < shape1; ++i) { + args[0] = src; args[1] = dst; if (shape0 >= count) { - return stransfer(dst, dst_stride0, src, src_stride, - count, src_itemsize, data); + return cast_info->func(&cast_info->context, + args, &count, strides, cast_info->auxdata); } else { - res = stransfer(dst, dst_stride0, src, src_stride, - shape0, src_itemsize, data); + res = cast_info->func(&cast_info->context, + args, &shape0, strides, cast_info->auxdata); if (res < 0) { return -1; } @@ -1242,26 +1296,27 @@ PyArray_TransferMaskedStridedToNDim(npy_intp ndim, npy_intp const *coords, npy_intp coords_inc, npy_intp const *shape, npy_intp shape_inc, npy_intp count, npy_intp src_itemsize, - PyArray_MaskedStridedUnaryOp *stransfer, - NpyAuxData *data) + NPY_cast_info *cast_info) { npy_intp i, M, N, coord0, shape0, dst_stride0, coord1, shape1, dst_stride1; + PyArray_MaskedStridedUnaryOp *stransfer = + (PyArray_MaskedStridedUnaryOp*)cast_info->func; /* Finish off dimension 0 */ coord0 = coords[0]; shape0 = shape[0]; dst_stride0 = dst_strides[0]; N = shape0 - coord0; + + npy_intp strides[2] = {src_stride, dst_stride0}; + + char *args[2] = {src, dst}; if (N >= count) { - return stransfer( - dst, dst_stride0, src, src_stride, - mask, mask_stride, - count, src_itemsize, data); + return stransfer(&cast_info->context, + args, &count, strides, mask, mask_stride, cast_info->auxdata); } - int res = stransfer( - dst, dst_stride0, src, src_stride, - mask, mask_stride, - N, src_itemsize, data); + int res = stransfer(&cast_info->context, + args, &count, strides, mask, mask_stride, cast_info->auxdata); if (res < 0) { return -1; } @@ -1284,17 +1339,16 @@ PyArray_TransferMaskedStridedToNDim(npy_intp ndim, M = (shape1 - coord1 - 1); N = shape0*M; for (i = 0; i < M; ++i) { + args[0] = src; args[1] = dst; if (shape0 >= count) { - return stransfer( - dst, dst_stride0, src, src_stride, - mask, mask_stride, - count, src_itemsize, data); + return stransfer(&cast_info->context, + args, &count, strides, + mask, mask_stride, cast_info->auxdata); } else { - int res = stransfer( - dst, dst_stride0, src, src_stride, - mask, mask_stride, - shape0, src_itemsize, data); + int res = stransfer(&cast_info->context, + args, &shape0, strides, + mask, mask_stride, cast_info->auxdata); if (res < 0) { return -1; } @@ -1352,17 +1406,16 @@ PyArray_TransferMaskedStridedToNDim(npy_intp ndim, /* A loop for dimensions 0 and 1 */ for (i = 0; i < shape1; ++i) { + args[0] = src; args[1] = dst; if (shape0 >= count) { - return stransfer( - dst, dst_stride0, src, src_stride, - mask, mask_stride, - count, src_itemsize, data); + return stransfer(&cast_info->context, + args, &count, strides, mask, + mask_stride, cast_info->auxdata); } else { - res = stransfer( - dst, dst_stride0, src, src_stride, - mask, mask_stride, - shape0, src_itemsize, data); + int res = stransfer(&cast_info->context, + args, &shape0, strides, + mask, mask_stride, cast_info->auxdata); if (res < 0) { return -1; } @@ -1659,22 +1712,15 @@ mapiter_@name@(PyArrayMapIterObject *mit) npy_intp reset_offsets[2] = {0, 0}; /* Use strided transfer functions for the inner loop */ - PyArray_StridedUnaryOp *stransfer = NULL; - NpyAuxData *transferdata = NULL; npy_intp fixed_strides[2]; -#if @isget@ - npy_intp src_itemsize = PyArray_ITEMSIZE(array); -#else - npy_intp src_itemsize = PyArray_ITEMSIZE(mit->extra_op); -#endif - /* * Get a dtype transfer function, since there are no * buffers, this is safe. */ NpyIter_GetInnerFixedStrideArray(mit->subspace_iter, fixed_strides); + NPY_cast_info cast_info; if (PyArray_GetDTypeTransferFunction(is_aligned, #if @isget@ fixed_strides[0], fixed_strides[1], @@ -1684,7 +1730,7 @@ mapiter_@name@(PyArrayMapIterObject *mit) PyArray_DESCR(mit->extra_op), PyArray_DESCR(array), #endif 0, - &stransfer, &transferdata, + &cast_info, &needs_api) != NPY_SUCCEED) { return -1; } @@ -1721,7 +1767,7 @@ mapiter_@name@(PyArrayMapIterObject *mit) #if @isget@ && @one_iter@ if (check_and_adjust_index(&indval, fancy_dims[i], iteraxis, _save) < 0 ) { - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); return -1; } #else @@ -1753,7 +1799,7 @@ mapiter_@name@(PyArrayMapIterObject *mit) &errmsg)) { NPY_END_THREADS; PyErr_SetString(PyExc_ValueError, errmsg); - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); return -1; } if (is_subiter_trivial != 0) { @@ -1783,7 +1829,7 @@ mapiter_@name@(PyArrayMapIterObject *mit) * not at all... */ if (needs_api && PyErr_Occurred()) { - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); return -1; } #endif @@ -1791,21 +1837,21 @@ mapiter_@name@(PyArrayMapIterObject *mit) do { #if @isget@ - if (NPY_UNLIKELY(stransfer( - subspace_ptrs[1], subspace_strides[1], - subspace_ptrs[0], subspace_strides[0], - *counter, src_itemsize, transferdata) < 0)) { + if (NPY_UNLIKELY(cast_info.func(&cast_info.context, + subspace_ptrs, counter, subspace_strides, + cast_info.auxdata) < 0)) { NPY_END_THREADS; - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); return -1; } #else - if (NPY_UNLIKELY(stransfer( - subspace_ptrs[0], subspace_strides[0], - subspace_ptrs[1], subspace_strides[1], - *counter, src_itemsize, transferdata) < 0)) { + /* The operand order is reveresed here */ + char *args[2] = {subspace_ptrs[1], subspace_ptrs[0]}; + npy_intp strides[2] = {subspace_strides[1], subspace_strides[0]}; + if (NPY_UNLIKELY(cast_info.func(&cast_info.context, + args, counter, strides, cast_info.auxdata) < 0)) { NPY_END_THREADS; - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); return -1; } #endif @@ -1817,7 +1863,7 @@ mapiter_@name@(PyArrayMapIterObject *mit) } /**end repeat1**/ - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); } return 0; } diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index dd6d6630a..41311b03f 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -1047,8 +1047,6 @@ array_boolean_subscript(PyArrayObject *self, PyArrayObject *op[2] = {self, bmask}; npy_uint32 flags, op_flags[2]; npy_intp fixed_strides[3]; - PyArray_StridedUnaryOp *stransfer = NULL; - NpyAuxData *transferdata = NULL; NpyIter_IterNextFunc *iternext; npy_intp innersize, *innerstrides; @@ -1073,12 +1071,13 @@ array_boolean_subscript(PyArrayObject *self, /* Get a dtype transfer function */ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); + NPY_cast_info cast_info; if (PyArray_GetDTypeTransferFunction( IsUintAligned(self) && IsAligned(self), fixed_strides[0], itemsize, dtype, dtype, 0, - &stransfer, &transferdata, + &cast_info, &needs_api) != NPY_SUCCEED) { Py_DECREF(ret); NpyIter_Deallocate(iter); @@ -1090,7 +1089,7 @@ array_boolean_subscript(PyArrayObject *self, if (iternext == NULL) { Py_DECREF(ret); NpyIter_Deallocate(iter); - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); return NULL; } @@ -1101,6 +1100,8 @@ array_boolean_subscript(PyArrayObject *self, self_stride = innerstrides[0]; bmask_stride = innerstrides[1]; + npy_intp strides[2] = {self_stride, itemsize}; + int res = 0; do { innersize = *NpyIter_GetInnerLoopSizePtr(iter); @@ -1116,8 +1117,9 @@ array_boolean_subscript(PyArrayObject *self, /* Process unmasked values */ bmask_data = npy_memchr(bmask_data, 0, bmask_stride, innersize, &subloopsize, 0); - res = stransfer(ret_data, itemsize, self_data, self_stride, - subloopsize, itemsize, transferdata); + char *args[2] = {self_data, ret_data}; + res = cast_info.func(&cast_info.context, + args, &subloopsize, strides, cast_info.auxdata); if (res < 0) { break; } @@ -1132,7 +1134,7 @@ array_boolean_subscript(PyArrayObject *self, if (!NpyIter_Deallocate(iter)) { res = -1; } - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); if (res < 0) { /* Should be practically impossible, since there is no cast */ Py_DECREF(ret); @@ -1174,7 +1176,7 @@ NPY_NO_EXPORT int array_assign_boolean_subscript(PyArrayObject *self, PyArrayObject *bmask, PyArrayObject *v, NPY_ORDER order) { - npy_intp size, src_itemsize, v_stride; + npy_intp size, v_stride; char *v_data; int needs_api = 0; npy_intp bmask_size; @@ -1226,7 +1228,6 @@ array_assign_boolean_subscript(PyArrayObject *self, v_stride = 0; } - src_itemsize = PyArray_DESCR(v)->elsize; v_data = PyArray_DATA(v); /* Create an iterator for the data */ @@ -1241,8 +1242,6 @@ array_assign_boolean_subscript(PyArrayObject *self, npy_intp innersize, *innerstrides; char **dataptrs; - PyArray_StridedUnaryOp *stransfer = NULL; - NpyAuxData *transferdata = NULL; npy_intp self_stride, bmask_stride, subloopsize; char *self_data; char *bmask_data; @@ -1274,13 +1273,14 @@ array_assign_boolean_subscript(PyArrayObject *self, /* Get a dtype transfer function */ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); + NPY_cast_info cast_info; if (PyArray_GetDTypeTransferFunction( IsUintAligned(self) && IsAligned(self) && IsUintAligned(v) && IsAligned(v), v_stride, fixed_strides[0], PyArray_DESCR(v), PyArray_DESCR(self), 0, - &stransfer, &transferdata, + &cast_info, &needs_api) != NPY_SUCCEED) { NpyIter_Deallocate(iter); return -1; @@ -1290,6 +1290,8 @@ array_assign_boolean_subscript(PyArrayObject *self, NPY_BEGIN_THREADS_NDITER(iter); } + npy_intp strides[2] = {v_stride, self_stride}; + do { innersize = *NpyIter_GetInnerLoopSizePtr(iter); self_data = dataptrs[0]; @@ -1304,8 +1306,10 @@ array_assign_boolean_subscript(PyArrayObject *self, /* Process unmasked values */ bmask_data = npy_memchr(bmask_data, 0, bmask_stride, innersize, &subloopsize, 0); - res = stransfer(self_data, self_stride, v_data, v_stride, - subloopsize, src_itemsize, transferdata); + + char *args[2] = {v_data, self_data}; + res = cast_info.func(&cast_info.context, + args, &subloopsize, strides, cast_info.auxdata); if (res < 0) { break; } @@ -1319,7 +1323,7 @@ array_assign_boolean_subscript(PyArrayObject *self, NPY_END_THREADS; } - NPY_AUXDATA_FREE(transferdata); + NPY_cast_info_xfree(&cast_info); if (!NpyIter_Deallocate(iter)) { res = -1; } diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c index 3403ce98b..81209651b 100644 --- a/numpy/core/src/multiarray/nditer_api.c +++ b/numpy/core/src/multiarray/nditer_api.c @@ -2009,8 +2009,7 @@ npyiter_copy_from_buffers(NpyIter *iter) dst_coords, axisdata_incr, dst_shape, axisdata_incr, op_transfersize, dtypes[iop]->elsize, - (PyArray_MaskedStridedUnaryOp *)transferinfo[iop].write.func, - transferinfo[iop].write.auxdata) < 0) { + &transferinfo[iop].write) < 0) { return -1; } } @@ -2022,15 +2021,14 @@ npyiter_copy_from_buffers(NpyIter *iter) dst_coords, axisdata_incr, dst_shape, axisdata_incr, op_transfersize, dtypes[iop]->elsize, - transferinfo[iop].write.func, - transferinfo[iop].write.auxdata) < 0) { + &transferinfo[iop].write) < 0) { return -1; } } } /* If there's no copy back, we may have to decrement refs. In - * this case, the transfer function has a 'decsrcref' transfer - * function, so we can use it to do the decrement. + * this case, the transfer is instead a function which clears + * (DECREFs) the single input. * * The flag USINGBUFFER is set when the buffer was used, so * only decrement refs when this flag is on. @@ -2040,9 +2038,10 @@ npyiter_copy_from_buffers(NpyIter *iter) NPY_IT_DBG_PRINT1("Iterator: Freeing refs and zeroing buffer " "of operand %d\n", (int)iop); /* Decrement refs */ + npy_intp buf_stride = dtypes[iop]->elsize; if (transferinfo[iop].write.func( - NULL, 0, buffer, dtypes[iop]->elsize, - transfersize, dtypes[iop]->elsize, + &transferinfo[iop].write.context, + &buffer, &transfersize, &buf_stride, transferinfo[iop].write.auxdata) < 0) { /* Since this should only decrement, it should never error */ assert(0); @@ -2550,8 +2549,7 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs) src_coords, axisdata_incr, src_shape, axisdata_incr, op_transfersize, src_itemsize, - transferinfo[iop].read.func, - transferinfo[iop].read.auxdata) < 0) { + &transferinfo[iop].read) < 0) { return -1; } } diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index dcf8f412f..982dca849 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -597,27 +597,25 @@ NpyIter_Copy(NpyIter *iter) } } - if (transferinfo[iop].read.auxdata != NULL) { + if (transferinfo[iop].read.func != NULL) { if (out_of_memory) { - transferinfo[iop].read.auxdata = NULL; + transferinfo[iop].read.func = NULL; /* No cleanup */ } else { - transferinfo[iop].read.auxdata = - NPY_AUXDATA_CLONE(transferinfo[iop].read.auxdata); - if (transferinfo[iop].read.auxdata == NULL) { + if (NPY_cast_info_copy(&transferinfo[iop].read, + &transferinfo[iop].read) < 0) { out_of_memory = 1; } } } - if (transferinfo[iop].write.auxdata != NULL) { + if (transferinfo[iop].write.func != NULL) { if (out_of_memory) { - transferinfo[iop].write.auxdata = NULL; + transferinfo[iop].write.func = NULL; /* No cleanup */ } else { - transferinfo[iop].write.auxdata = - NPY_AUXDATA_CLONE(transferinfo[iop].write.auxdata); - if (transferinfo[iop].write.auxdata == NULL) { + if (NPY_cast_info_copy(&transferinfo[iop].write, + &transferinfo[iop].write) < 0) { out_of_memory = 1; } } @@ -696,12 +694,8 @@ NpyIter_Deallocate(NpyIter *iter) NpyIter_TransferInfo *transferinfo = NBF_TRANSFERINFO(bufferdata); /* read bufferdata */ for (iop = 0; iop < nop; ++iop, ++transferinfo) { - if (transferinfo->read.auxdata) { - NPY_AUXDATA_FREE(transferinfo->read.auxdata); - } - if (transferinfo->write.auxdata) { - NPY_AUXDATA_FREE(transferinfo->write.auxdata); - } + NPY_cast_info_xfree(&transferinfo->read); + NPY_cast_info_xfree(&transferinfo->write); } } @@ -3163,8 +3157,7 @@ npyiter_allocate_transfer_functions(NpyIter *iter) PyArray_DESCR(op[iop]), op_dtype[iop], move_references, - &transferinfo[iop].read.func, - &transferinfo[iop].read.auxdata, + &transferinfo[iop].read, &needs_api) != NPY_SUCCEED) { iop -= 1; /* This one cannot be cleaned up yet. */ goto fail; @@ -3196,8 +3189,7 @@ npyiter_allocate_transfer_functions(NpyIter *iter) PyArray_DESCR(op[iop]), mask_dtype, move_references, - (PyArray_MaskedStridedUnaryOp **)&transferinfo[iop].write.func, - &transferinfo[iop].write.auxdata, + &transferinfo[iop].write, &needs_api) != NPY_SUCCEED) { goto fail; } @@ -3210,8 +3202,7 @@ npyiter_allocate_transfer_functions(NpyIter *iter) op_dtype[iop], PyArray_DESCR(op[iop]), move_references, - &transferinfo[iop].write.func, - &transferinfo[iop].write.auxdata, + &transferinfo[iop].write, &needs_api) != NPY_SUCCEED) { goto fail; } @@ -3229,8 +3220,7 @@ npyiter_allocate_transfer_functions(NpyIter *iter) op_dtype[iop]->elsize, 0, op_dtype[iop], NULL, 1, - &transferinfo[iop].write.func, - &transferinfo[iop].write.auxdata, + &transferinfo[iop].write, &needs_api) != NPY_SUCCEED) { goto fail; } @@ -3254,14 +3244,8 @@ npyiter_allocate_transfer_functions(NpyIter *iter) fail: for (i = 0; i < iop+1; ++i) { - if (transferinfo[iop].read.auxdata != NULL) { - NPY_AUXDATA_FREE(transferinfo[iop].read.auxdata); - transferinfo[iop].read.auxdata = NULL; - } - if (transferinfo[iop].write.auxdata != NULL) { - NPY_AUXDATA_FREE(transferinfo[iop].write.auxdata); - transferinfo[iop].write.auxdata = NULL; - } + NPY_cast_info_xfree(&transferinfo[iop].read); + NPY_cast_info_xfree(&transferinfo[iop].write); } return 0; } diff --git a/numpy/core/src/multiarray/nditer_impl.h b/numpy/core/src/multiarray/nditer_impl.h index bcc3ab7b9..a5a9177e5 100644 --- a/numpy/core/src/multiarray/nditer_impl.h +++ b/numpy/core/src/multiarray/nditer_impl.h @@ -21,6 +21,7 @@ #include "convert_datatype.h" #include "lowlevel_strided_loops.h" +#include "dtype_transfer.h" /********** ITERATOR CONSTRUCTION TIMING **************/ #define NPY_IT_CONSTRUCTION_TIMING 0 @@ -231,14 +232,10 @@ typedef npy_int16 npyiter_opitflags; &(iter)->iter_flexdata + NIT_AXISDATA_OFFSET(itflags, ndim, nop))) /* Internal-only BUFFERDATA MEMBER ACCESS */ -struct _transferdata { - PyArray_StridedUnaryOp *func; - NpyAuxData *auxdata; -}; struct NpyIter_TransferInfo_tag { - struct _transferdata read; - struct _transferdata write; + NPY_cast_info read; + NPY_cast_info write; /* Probably unnecessary, but make sure what follows is intp aligned: */ npy_intp _unused_ensure_alignment[]; }; diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py index 0f42f7076..9e99e0bc3 100644 --- a/numpy/core/tests/test_api.py +++ b/numpy/core/tests/test_api.py @@ -149,7 +149,7 @@ def test_array_impossible_casts(array): rt = rational(1, 2) if array: rt = np.array(rt) - with assert_raises(ValueError): + with assert_raises(TypeError): np.array(rt, dtype="M8") diff --git a/numpy/core/tests/test_casting_unittests.py b/numpy/core/tests/test_casting_unittests.py index 674583d47..c8fcd4b42 100644 --- a/numpy/core/tests/test_casting_unittests.py +++ b/numpy/core/tests/test_casting_unittests.py @@ -619,6 +619,28 @@ class TestCasting: elif change_length > 0: assert safety == Casting.safe + @pytest.mark.parametrize("order1", [">", "<"]) + @pytest.mark.parametrize("order2", [">", "<"]) + def test_unicode_byteswapped_cast(self, order1, order2): + # Very specific tests (not using the castingimpl directly) + # that tests unicode bytedwaps including for unaligned array data. + dtype1 = np.dtype(f"{order1}U30") + dtype2 = np.dtype(f"{order2}U30") + data1 = np.empty(30 * 4 + 1, dtype=np.uint8)[1:].view(dtype1) + data2 = np.empty(30 * 4 + 1, dtype=np.uint8)[1:].view(dtype2) + if dtype1.alignment != 1: + # alignment should always be >1, but skip the check if not + assert not data1.flags.aligned + assert not data2.flags.aligned + + element = "this is a ünicode string‽" + data1[()] = element + # Test both `data1` and `data1.copy()` (which should be aligned) + for data in [data1, data1.copy()]: + data2[...] = data1 + assert data2[()] == element + assert data2.copy()[()] == element + def test_void_to_string_special_case(self): # Cover a small special case in void to string casting that could # probably just as well be turned into an error (compare diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py index 62f6381d5..b4146eadf 100644 --- a/numpy/core/tests/test_datetime.py +++ b/numpy/core/tests/test_datetime.py @@ -5,7 +5,7 @@ import datetime import pytest from numpy.testing import ( assert_, assert_equal, assert_raises, assert_warns, suppress_warnings, - assert_raises_regex, + assert_raises_regex, assert_array_equal, ) from numpy.compat import pickle @@ -686,6 +686,63 @@ class TestDateTime: str_b[...] = dt_a assert_equal(str_a, str_b) + @pytest.mark.parametrize("time_dtype", ["m8[D]", "M8[Y]"]) + def test_time_byteswapping(self, time_dtype): + times = np.array(["2017", "NaT"], dtype=time_dtype) + times_swapped = times.astype(times.dtype.newbyteorder()) + assert_array_equal(times, times_swapped) + + unswapped = times_swapped.view(np.int64).newbyteorder() + assert_array_equal(unswapped, times.view(np.int64)) + + @pytest.mark.parametrize(["time1", "time2"], + [("M8[s]", "M8[D]"), ("m8[s]", "m8[ns]")]) + def test_time_byteswapped_cast(self, time1, time2): + dtype1 = np.dtype(time1) + dtype2 = np.dtype(time2) + times = np.array(["2017", "NaT"], dtype=dtype1) + expected = times.astype(dtype2) + + # Test that every byte-swapping combination also returns the same + # results (previous tests check that this comparison works fine). + res = times.astype(dtype1.newbyteorder()).astype(dtype2) + assert_array_equal(res, expected) + res = times.astype(dtype2.newbyteorder()) + assert_array_equal(res, expected) + res = times.astype(dtype1.newbyteorder()).astype(dtype2.newbyteorder()) + assert_array_equal(res, expected) + + @pytest.mark.parametrize("time_dtype", ["m8[D]", "M8[Y]"]) + @pytest.mark.parametrize("str_dtype", ["U", "S"]) + def test_datetime_conversions_byteorders(self, str_dtype, time_dtype): + times = np.array(["2017", "NaT"], dtype=time_dtype) + # Unfortunately, timedelta does not roundtrip: + from_strings = np.array(["2017", "NaT"], dtype=str_dtype) + to_strings = times.astype(str_dtype) # assume this is correct + + # Check that conversion from times to string works if src is swapped: + times_swapped = times.astype(times.dtype.newbyteorder()) + res = times_swapped.astype(str_dtype) + assert_array_equal(res, to_strings) + # And also if both are swapped: + res = times_swapped.astype(to_strings.dtype.newbyteorder()) + assert_array_equal(res, to_strings) + # only destination is swapped: + res = times.astype(to_strings.dtype.newbyteorder()) + assert_array_equal(res, to_strings) + + # Check that conversion from string to times works if src is swapped: + from_strings_swapped = from_strings.astype( + from_strings.dtype.newbyteorder()) + res = from_strings_swapped.astype(time_dtype) + assert_array_equal(res, times) + # And if both are swapped: + res = from_strings_swapped.astype(times.dtype.newbyteorder()) + assert_array_equal(res, times) + # Only destination is swapped: + res = from_strings.astype(times.dtype.newbyteorder()) + assert_array_equal(res, times) + def test_datetime_array_str(self): a = np.array(['2011-03-16', '1920-01-01', '2013-05-19'], dtype='M') assert_equal(str(a), "['2011-03-16' '1920-01-01' '2013-05-19']") diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py index 528486a05..53e4821ae 100644 --- a/numpy/core/tests/test_dtype.py +++ b/numpy/core/tests/test_dtype.py @@ -751,8 +751,6 @@ class TestStructuredDtypeSparseFields: sparse_dtype = np.dtype([('a', {'names':['ab'], 'formats':['f'], 'offsets':[4]}, (2, 3))]) - @pytest.mark.xfail(reason="inaccessible data is changed see gh-12686.") - @pytest.mark.valgrind_error(reason="reads from uninitialized buffers.") def test_sparse_field_assignment(self): arr = np.zeros(3, self.dtype) sparse_arr = arr.view(self.sparse_dtype) diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py index b2341fe4e..82ba5a01b 100644 --- a/numpy/core/tests/test_nditer.py +++ b/numpy/core/tests/test_nditer.py @@ -2705,9 +2705,25 @@ def test_iter_writemasked_badinput(): op_dtypes=['f4', None], casting='same_kind') -def test_iter_writemasked(): - a = np.zeros((3,), dtype='f8') - msk = np.array([True, True, False]) +def _is_buffered(iterator): + try: + iterator.itviews + except ValueError: + return True + return False + +@pytest.mark.parametrize("a", + [np.zeros((3,), dtype='f8'), + np.zeros((9876, 3*5), dtype='f8')[::2, :], + np.zeros((4, 312, 124, 3), dtype='f8')[::2, :, ::2, :]]) +def test_iter_writemasked(a): + # Note, the slicing above is to ensure that nditer cannot combine multiple + # axes into one. The repetition is just to make things a bit more + # interesting. + shape = a.shape + reps = shape[-1] // 3 + msk = np.empty(shape, dtype=bool) + msk[...] = [True, True, False] * reps # When buffering is unused, 'writemasked' effectively does nothing. # It's up to the user of the iterator to obey the requested semantics. @@ -2718,18 +2734,31 @@ def test_iter_writemasked(): for x, m in it: x[...] = 1 # Because we violated the semantics, all the values became 1 - assert_equal(a, [1, 1, 1]) + assert_equal(a, np.broadcast_to([1, 1, 1] * reps, shape)) # Even if buffering is enabled, we still may be accessing the array # directly. it = np.nditer([a, msk], ['buffered'], [['readwrite', 'writemasked'], ['readonly', 'arraymask']]) + # @seberg: I honestly don't currently understand why a "buffered" iterator + # would end up not using a buffer for the small array here at least when + # "writemasked" is used, that seems confusing... Check by testing for + # actual memory overlap! + is_buffered = True with it: for x, m in it: x[...] = 2.5 - # Because we violated the semantics, all the values became 2.5 - assert_equal(a, [2.5, 2.5, 2.5]) + if np.may_share_memory(x, a): + is_buffered = False + + if not is_buffered: + # Because we violated the semantics, all the values became 2.5 + assert_equal(a, np.broadcast_to([2.5, 2.5, 2.5] * reps, shape)) + else: + # For large sizes, the iterator may be buffered: + assert_equal(a, np.broadcast_to([2.5, 2.5, 1] * reps, shape)) + a[...] = 2.5 # If buffering will definitely happening, for instance because of # a cast, only the items selected by the mask will be copied back from @@ -2744,7 +2773,7 @@ def test_iter_writemasked(): x[...] = 3 # Even though we violated the semantics, only the selected values # were copied back - assert_equal(a, [3, 3, 2.5]) + assert_equal(a, np.broadcast_to([3, 3, 2.5] * reps, shape)) def test_iter_writemasked_decref(): # force casting (to make it interesting) by using a structured dtype. |