diff options
Diffstat (limited to 'numpy/core')
-rw-r--r-- | numpy/core/src/common/dlpack/dlpack.h | 188 | ||||
-rw-r--r-- | numpy/core/src/multiarray/methods.c | 163 |
2 files changed, 351 insertions, 0 deletions
diff --git a/numpy/core/src/common/dlpack/dlpack.h b/numpy/core/src/common/dlpack/dlpack.h new file mode 100644 index 000000000..84afca248 --- /dev/null +++ b/numpy/core/src/common/dlpack/dlpack.h @@ -0,0 +1,188 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file dlpack.h + * \brief The common header of DLPack. + */ +#ifndef DLPACK_DLPACK_H_ +#define DLPACK_DLPACK_H_ + +#ifdef __cplusplus +#define DLPACK_EXTERN_C extern "C" +#else +#define DLPACK_EXTERN_C +#endif + +/*! \brief The current version of dlpack */ +#define DLPACK_VERSION 050 + +/*! \brief DLPACK_DLL prefix for windows */ +#ifdef _WIN32 +#ifdef DLPACK_EXPORTS +#define DLPACK_DLL __declspec(dllexport) +#else +#define DLPACK_DLL __declspec(dllimport) +#endif +#else +#define DLPACK_DLL +#endif + +#include <stdint.h> +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif +/*! + * \brief The device type in DLDevice. + */ +typedef enum { + /*! \brief CPU device */ + kDLCPU = 1, + /*! \brief CUDA GPU device */ + kDLCUDA = 2, + /*! + * \brief Pinned CUDA CPU memory by cudaMallocHost + */ + kDLCUDAHost = 3, + /*! \brief OpenCL devices. */ + kDLOpenCL = 4, + /*! \brief Vulkan buffer for next generation graphics. */ + kDLVulkan = 7, + /*! \brief Metal for Apple GPU. */ + kDLMetal = 8, + /*! \brief Verilog simulator buffer */ + kDLVPI = 9, + /*! \brief ROCm GPUs for AMD GPUs */ + kDLROCM = 10, + /*! + * \brief Reserved extension device type, + * used for quickly test extension device + * The semantics can differ depending on the implementation. + */ + kDLExtDev = 12, +} DLDeviceType; + +/*! + * \brief A Device for Tensor and operator. + */ +typedef struct { + /*! \brief The device type used in the device. */ + DLDeviceType device_type; + /*! \brief The device index */ + int device_id; +} DLDevice; + +/*! + * \brief The type code options DLDataType. + */ +typedef enum { + /*! \brief signed integer */ + kDLInt = 0U, + /*! \brief unsigned integer */ + kDLUInt = 1U, + /*! \brief IEEE floating point */ + kDLFloat = 2U, + /*! + * \brief Opaque handle type, reserved for testing purposes. + * Frameworks need to agree on the handle data type for the exchange to be well-defined. + */ + kDLOpaqueHandle = 3U, + /*! \brief bfloat16 */ + kDLBfloat = 4U, + /*! + * \brief complex number + * (C/C++/Python layout: compact struct per complex number) + */ + kDLComplex = 5U, +} DLDataTypeCode; + +/*! + * \brief The data type the tensor can hold. + * + * Examples + * - float: type_code = 2, bits = 32, lanes=1 + * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4 + * - int8: type_code = 0, bits = 8, lanes=1 + * - std::complex<float>: type_code = 5, bits = 64, lanes = 1 + */ +typedef struct { + /*! + * \brief Type code of base types. + * We keep it uint8_t instead of DLDataTypeCode for minimal memory + * footprint, but the value should be one of DLDataTypeCode enum values. + * */ + uint8_t code; + /*! + * \brief Number of bits, common choices are 8, 16, 32. + */ + uint8_t bits; + /*! \brief Number of lanes in the type, used for vector types. */ + uint16_t lanes; +} DLDataType; + +/*! + * \brief Plain C Tensor object, does not manage memory. + */ +typedef struct { + /*! + * \brief The opaque data pointer points to the allocated data. This will be + * CUDA device pointer or cl_mem handle in OpenCL. This pointer is always + * aligned to 256 bytes as in CUDA. + * + * For given DLTensor, the size of memory required to store the contents of + * data is calculated as follows: + * + * \code{.c} + * static inline size_t GetDataSize(const DLTensor* t) { + * size_t size = 1; + * for (tvm_index_t i = 0; i < t->ndim; ++i) { + * size *= t->shape[i]; + * } + * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; + * return size; + * } + * \endcode + */ + void* data; + /*! \brief The device of the tensor */ + DLDevice device; + /*! \brief Number of dimensions */ + int ndim; + /*! \brief The data type of the pointer*/ + DLDataType dtype; + /*! \brief The shape of the tensor */ + int64_t* shape; + /*! + * \brief strides of the tensor (in number of elements, not bytes) + * can be NULL, indicating tensor is compact and row-majored. + */ + int64_t* strides; + /*! \brief The offset in bytes to the beginning pointer to data */ + uint64_t byte_offset; +} DLTensor; + +/*! + * \brief C Tensor object, manage memory of DLTensor. This data structure is + * intended to facilitate the borrowing of DLTensor by another framework. It is + * not meant to transfer the tensor. When the borrowing framework doesn't need + * the tensor, it should call the deleter to notify the host that the resource + * is no longer needed. + */ +typedef struct DLManagedTensor { + /*! \brief DLTensor which is being memory managed */ + DLTensor dl_tensor; + /*! \brief the context of the original host framework of DLManagedTensor in + * which DLManagedTensor is used in the framework. It can also be NULL. + */ + void * manager_ctx; + /*! \brief Destructor signature void (*)(void*) - this should be called + * to destruct manager_ctx which holds the DLManagedTensor. It can be NULL + * if there is no way for the caller to provide a reasonable destructor. + * The destructors deletes the argument self as well. + */ + void (*deleter)(struct DLManagedTensor * self); +} DLManagedTensor; +#ifdef __cplusplus +} // DLPACK_EXTERN_C +#endif +#endif // DLPACK_DLPACK_H_ diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index 2d66c77dc..2251d4b69 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -31,6 +31,7 @@ #include "alloc.h" #include <stdarg.h> +#include "common/dlpack/dlpack.h" /* NpyArg_ParseKeywords @@ -2762,6 +2763,158 @@ array_class_getitem(PyObject *cls, PyObject *args) generic_alias = NULL; #endif return generic_alias; + +#define NPY_DLPACK_CAPSULE_NAME "dltensor" +#define NPY_DLPACK_USED_CAPSULE_NAME "used_dltensor" + +static void array_dlpack_capsule_deleter(PyObject *self) +{ + if (!PyCapsule_IsValid(self, NPY_DLPACK_CAPSULE_NAME)) { + if (!PyCapsule_IsValid(self, NPY_DLPACK_USED_CAPSULE_NAME)) { + PyErr_SetString(PyExc_RuntimeError, "Invalid capsule name."); + } + return; + } + + DLManagedTensor *managed = + (DLManagedTensor *)PyCapsule_GetPointer(self, NPY_DLPACK_CAPSULE_NAME); + managed->deleter(managed); +} + +static void array_dlpack_deleter(DLManagedTensor *self) +{ + PyArrayObject *array = (PyArrayObject *)self->manager_ctx; + // This will also free the strides as it's one allocation. + PyMem_Free(self->dl_tensor.shape); + PyMem_Free(self); + + PyArray_XDECREF(array); +} + +static PyObject * +array_dlpack(PyArrayObject *self, + PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) +{ + PyObject *stream = Py_None; + NPY_PREPARE_ARGPARSER; + if (npy_parse_arguments("__dlpack__", args, len_args, kwnames, + "$stream", NULL, &stream, NULL, NULL, NULL)) { + return NULL; + } + + if (stream != Py_None) { + PyErr_SetString(PyExc_RuntimeError, "NumPy only supports " + "stream=None."); + return NULL; + } + + npy_intp itemsize = PyArray_ITEMSIZE(self); + int ndim = PyArray_NDIM(self); + npy_intp *strides = PyArray_STRIDES(self); + npy_intp *shape = PyArray_SHAPE(self); + + for (int i = 0; i < ndim; ++i) { + if (strides[i] % itemsize != 0) { + PyErr_SetString(PyExc_RuntimeError, + "DLPack only supports strides which are a multiple of " + "itemsize."); + return NULL; + } + } + + DLDataType managed_dtype; + PyArray_Descr *dtype = PyArray_DESCR(self); + + if (PyDataType_ISBYTESWAPPED(dtype)) { + PyErr_SetString(PyExc_TypeError, "DLPack only supports native " + "byte swapping."); + return NULL; + } + + managed_dtype.bits = 8 * itemsize; + managed_dtype.lanes = 1; + + if (PyDataType_ISSIGNED(dtype)) { + managed_dtype.code = kDLInt; + } else if (PyDataType_ISUNSIGNED(dtype)) { + managed_dtype.code = kDLUInt; + } else if (PyDataType_ISFLOAT(dtype)) { + // We can't be sure that the dtype is + // IEEE or padded. + if (itemsize > 8) { + PyErr_SetString(PyExc_TypeError, "DLPack only supports IEEE " + "floating point types without padding."); + return NULL; + } + managed_dtype.code = kDLFloat; + } else if (PyDataType_ISCOMPLEX(dtype)) { + // We can't be sure that the dtype is + // IEEE or padded. + if (itemsize > 16) { + PyErr_SetString(PyExc_TypeError, "DLPack only supports IEEE " + "complex point types without padding."); + return NULL; + } + managed_dtype.code = kDLComplex; + } else { + PyErr_SetString(PyExc_TypeError, + "DLPack only supports signed/unsigned integers, float " + "and complex dtypes."); + return NULL; + } + + DLManagedTensor *managed = PyMem_Malloc(sizeof(DLManagedTensor)); + if (managed == NULL) { + PyErr_NoMemory(); + return NULL; + } + + managed->dl_tensor.data = PyArray_DATA(self); + managed->dl_tensor.device.device_type = kDLCPU; + managed->dl_tensor.device.device_id = 0; + managed->dl_tensor.dtype = managed_dtype; + + + int64_t *managed_shape_strides = PyMem_Malloc(sizeof(int64_t) * ndim * 2); + if (managed_shape_strides == NULL) { + PyErr_NoMemory(); + PyMem_Free(managed); + return NULL; + } + + int64_t *managed_shape = managed_shape_strides; + int64_t *managed_strides = managed_shape_strides + ndim; + for (int i = 0; i < ndim; ++i) { + managed_shape[i] = shape[i]; + // Strides in DLPack are items; in NumPy are bytes. + managed_strides[i] = strides[i] / itemsize; + } + + managed->dl_tensor.ndim = ndim; + managed->dl_tensor.shape = managed_shape; + managed->dl_tensor.strides = managed_strides; + managed->dl_tensor.byte_offset = 0; + managed->manager_ctx = self; + managed->deleter = array_dlpack_deleter; + + PyObject *capsule = PyCapsule_New(managed, NPY_DLPACK_CAPSULE_NAME, + array_dlpack_capsule_deleter); + if (capsule == NULL) { + PyMem_Free(managed); + PyMem_Free(managed_shape_strides); + return NULL; + } + + // the capsule holds a reference + PyArray_INCREF(self); + return capsule; +} + +static PyObject * +array_dlpack_device(PyArrayObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args)) +{ + return Py_BuildValue("ii", kDLCPU, 0); +>>>>>>> ENH: Add the __dlpack__ and __dlpack_device__ methods to ndarray. } NPY_NO_EXPORT PyMethodDef array_methods[] = { @@ -2989,5 +3142,15 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = { {"view", (PyCFunction)array_view, METH_FASTCALL | METH_KEYWORDS, NULL}, + + // For data interchange between libraries + {"__dlpack__", + (PyCFunction)array_dlpack, + METH_FASTCALL | METH_KEYWORDS, NULL}, + + {"__dlpack_device__", + (PyCFunction)array_dlpack_device, + METH_NOARGS, NULL}, + {NULL, NULL, 0, NULL} /* sentinel */ }; |