summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/src/common/dlpack/dlpack.h188
-rw-r--r--numpy/core/src/multiarray/methods.c163
2 files changed, 351 insertions, 0 deletions
diff --git a/numpy/core/src/common/dlpack/dlpack.h b/numpy/core/src/common/dlpack/dlpack.h
new file mode 100644
index 000000000..84afca248
--- /dev/null
+++ b/numpy/core/src/common/dlpack/dlpack.h
@@ -0,0 +1,188 @@
+/*!
+ * Copyright (c) 2017 by Contributors
+ * \file dlpack.h
+ * \brief The common header of DLPack.
+ */
+#ifndef DLPACK_DLPACK_H_
+#define DLPACK_DLPACK_H_
+
+#ifdef __cplusplus
+#define DLPACK_EXTERN_C extern "C"
+#else
+#define DLPACK_EXTERN_C
+#endif
+
+/*! \brief The current version of dlpack */
+#define DLPACK_VERSION 050
+
+/*! \brief DLPACK_DLL prefix for windows */
+#ifdef _WIN32
+#ifdef DLPACK_EXPORTS
+#define DLPACK_DLL __declspec(dllexport)
+#else
+#define DLPACK_DLL __declspec(dllimport)
+#endif
+#else
+#define DLPACK_DLL
+#endif
+
+#include <stdint.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*!
+ * \brief The device type in DLDevice.
+ */
+typedef enum {
+ /*! \brief CPU device */
+ kDLCPU = 1,
+ /*! \brief CUDA GPU device */
+ kDLCUDA = 2,
+ /*!
+ * \brief Pinned CUDA CPU memory by cudaMallocHost
+ */
+ kDLCUDAHost = 3,
+ /*! \brief OpenCL devices. */
+ kDLOpenCL = 4,
+ /*! \brief Vulkan buffer for next generation graphics. */
+ kDLVulkan = 7,
+ /*! \brief Metal for Apple GPU. */
+ kDLMetal = 8,
+ /*! \brief Verilog simulator buffer */
+ kDLVPI = 9,
+ /*! \brief ROCm GPUs for AMD GPUs */
+ kDLROCM = 10,
+ /*!
+ * \brief Reserved extension device type,
+ * used for quickly test extension device
+ * The semantics can differ depending on the implementation.
+ */
+ kDLExtDev = 12,
+} DLDeviceType;
+
+/*!
+ * \brief A Device for Tensor and operator.
+ */
+typedef struct {
+ /*! \brief The device type used in the device. */
+ DLDeviceType device_type;
+ /*! \brief The device index */
+ int device_id;
+} DLDevice;
+
+/*!
+ * \brief The type code options DLDataType.
+ */
+typedef enum {
+ /*! \brief signed integer */
+ kDLInt = 0U,
+ /*! \brief unsigned integer */
+ kDLUInt = 1U,
+ /*! \brief IEEE floating point */
+ kDLFloat = 2U,
+ /*!
+ * \brief Opaque handle type, reserved for testing purposes.
+ * Frameworks need to agree on the handle data type for the exchange to be well-defined.
+ */
+ kDLOpaqueHandle = 3U,
+ /*! \brief bfloat16 */
+ kDLBfloat = 4U,
+ /*!
+ * \brief complex number
+ * (C/C++/Python layout: compact struct per complex number)
+ */
+ kDLComplex = 5U,
+} DLDataTypeCode;
+
+/*!
+ * \brief The data type the tensor can hold.
+ *
+ * Examples
+ * - float: type_code = 2, bits = 32, lanes=1
+ * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4
+ * - int8: type_code = 0, bits = 8, lanes=1
+ * - std::complex<float>: type_code = 5, bits = 64, lanes = 1
+ */
+typedef struct {
+ /*!
+ * \brief Type code of base types.
+ * We keep it uint8_t instead of DLDataTypeCode for minimal memory
+ * footprint, but the value should be one of DLDataTypeCode enum values.
+ * */
+ uint8_t code;
+ /*!
+ * \brief Number of bits, common choices are 8, 16, 32.
+ */
+ uint8_t bits;
+ /*! \brief Number of lanes in the type, used for vector types. */
+ uint16_t lanes;
+} DLDataType;
+
+/*!
+ * \brief Plain C Tensor object, does not manage memory.
+ */
+typedef struct {
+ /*!
+ * \brief The opaque data pointer points to the allocated data. This will be
+ * CUDA device pointer or cl_mem handle in OpenCL. This pointer is always
+ * aligned to 256 bytes as in CUDA.
+ *
+ * For given DLTensor, the size of memory required to store the contents of
+ * data is calculated as follows:
+ *
+ * \code{.c}
+ * static inline size_t GetDataSize(const DLTensor* t) {
+ * size_t size = 1;
+ * for (tvm_index_t i = 0; i < t->ndim; ++i) {
+ * size *= t->shape[i];
+ * }
+ * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
+ * return size;
+ * }
+ * \endcode
+ */
+ void* data;
+ /*! \brief The device of the tensor */
+ DLDevice device;
+ /*! \brief Number of dimensions */
+ int ndim;
+ /*! \brief The data type of the pointer*/
+ DLDataType dtype;
+ /*! \brief The shape of the tensor */
+ int64_t* shape;
+ /*!
+ * \brief strides of the tensor (in number of elements, not bytes)
+ * can be NULL, indicating tensor is compact and row-majored.
+ */
+ int64_t* strides;
+ /*! \brief The offset in bytes to the beginning pointer to data */
+ uint64_t byte_offset;
+} DLTensor;
+
+/*!
+ * \brief C Tensor object, manage memory of DLTensor. This data structure is
+ * intended to facilitate the borrowing of DLTensor by another framework. It is
+ * not meant to transfer the tensor. When the borrowing framework doesn't need
+ * the tensor, it should call the deleter to notify the host that the resource
+ * is no longer needed.
+ */
+typedef struct DLManagedTensor {
+ /*! \brief DLTensor which is being memory managed */
+ DLTensor dl_tensor;
+ /*! \brief the context of the original host framework of DLManagedTensor in
+ * which DLManagedTensor is used in the framework. It can also be NULL.
+ */
+ void * manager_ctx;
+ /*! \brief Destructor signature void (*)(void*) - this should be called
+ * to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
+ * if there is no way for the caller to provide a reasonable destructor.
+ * The destructors deletes the argument self as well.
+ */
+ void (*deleter)(struct DLManagedTensor * self);
+} DLManagedTensor;
+#ifdef __cplusplus
+} // DLPACK_EXTERN_C
+#endif
+#endif // DLPACK_DLPACK_H_
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index 2d66c77dc..2251d4b69 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -31,6 +31,7 @@
#include "alloc.h"
#include <stdarg.h>
+#include "common/dlpack/dlpack.h"
/* NpyArg_ParseKeywords
@@ -2762,6 +2763,158 @@ array_class_getitem(PyObject *cls, PyObject *args)
generic_alias = NULL;
#endif
return generic_alias;
+
+#define NPY_DLPACK_CAPSULE_NAME "dltensor"
+#define NPY_DLPACK_USED_CAPSULE_NAME "used_dltensor"
+
+static void array_dlpack_capsule_deleter(PyObject *self)
+{
+ if (!PyCapsule_IsValid(self, NPY_DLPACK_CAPSULE_NAME)) {
+ if (!PyCapsule_IsValid(self, NPY_DLPACK_USED_CAPSULE_NAME)) {
+ PyErr_SetString(PyExc_RuntimeError, "Invalid capsule name.");
+ }
+ return;
+ }
+
+ DLManagedTensor *managed =
+ (DLManagedTensor *)PyCapsule_GetPointer(self, NPY_DLPACK_CAPSULE_NAME);
+ managed->deleter(managed);
+}
+
+static void array_dlpack_deleter(DLManagedTensor *self)
+{
+ PyArrayObject *array = (PyArrayObject *)self->manager_ctx;
+ // This will also free the strides as it's one allocation.
+ PyMem_Free(self->dl_tensor.shape);
+ PyMem_Free(self);
+
+ PyArray_XDECREF(array);
+}
+
+static PyObject *
+array_dlpack(PyArrayObject *self,
+ PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames)
+{
+ PyObject *stream = Py_None;
+ NPY_PREPARE_ARGPARSER;
+ if (npy_parse_arguments("__dlpack__", args, len_args, kwnames,
+ "$stream", NULL, &stream, NULL, NULL, NULL)) {
+ return NULL;
+ }
+
+ if (stream != Py_None) {
+ PyErr_SetString(PyExc_RuntimeError, "NumPy only supports "
+ "stream=None.");
+ return NULL;
+ }
+
+ npy_intp itemsize = PyArray_ITEMSIZE(self);
+ int ndim = PyArray_NDIM(self);
+ npy_intp *strides = PyArray_STRIDES(self);
+ npy_intp *shape = PyArray_SHAPE(self);
+
+ for (int i = 0; i < ndim; ++i) {
+ if (strides[i] % itemsize != 0) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "DLPack only supports strides which are a multiple of "
+ "itemsize.");
+ return NULL;
+ }
+ }
+
+ DLDataType managed_dtype;
+ PyArray_Descr *dtype = PyArray_DESCR(self);
+
+ if (PyDataType_ISBYTESWAPPED(dtype)) {
+ PyErr_SetString(PyExc_TypeError, "DLPack only supports native "
+ "byte swapping.");
+ return NULL;
+ }
+
+ managed_dtype.bits = 8 * itemsize;
+ managed_dtype.lanes = 1;
+
+ if (PyDataType_ISSIGNED(dtype)) {
+ managed_dtype.code = kDLInt;
+ } else if (PyDataType_ISUNSIGNED(dtype)) {
+ managed_dtype.code = kDLUInt;
+ } else if (PyDataType_ISFLOAT(dtype)) {
+ // We can't be sure that the dtype is
+ // IEEE or padded.
+ if (itemsize > 8) {
+ PyErr_SetString(PyExc_TypeError, "DLPack only supports IEEE "
+ "floating point types without padding.");
+ return NULL;
+ }
+ managed_dtype.code = kDLFloat;
+ } else if (PyDataType_ISCOMPLEX(dtype)) {
+ // We can't be sure that the dtype is
+ // IEEE or padded.
+ if (itemsize > 16) {
+ PyErr_SetString(PyExc_TypeError, "DLPack only supports IEEE "
+ "complex point types without padding.");
+ return NULL;
+ }
+ managed_dtype.code = kDLComplex;
+ } else {
+ PyErr_SetString(PyExc_TypeError,
+ "DLPack only supports signed/unsigned integers, float "
+ "and complex dtypes.");
+ return NULL;
+ }
+
+ DLManagedTensor *managed = PyMem_Malloc(sizeof(DLManagedTensor));
+ if (managed == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ managed->dl_tensor.data = PyArray_DATA(self);
+ managed->dl_tensor.device.device_type = kDLCPU;
+ managed->dl_tensor.device.device_id = 0;
+ managed->dl_tensor.dtype = managed_dtype;
+
+
+ int64_t *managed_shape_strides = PyMem_Malloc(sizeof(int64_t) * ndim * 2);
+ if (managed_shape_strides == NULL) {
+ PyErr_NoMemory();
+ PyMem_Free(managed);
+ return NULL;
+ }
+
+ int64_t *managed_shape = managed_shape_strides;
+ int64_t *managed_strides = managed_shape_strides + ndim;
+ for (int i = 0; i < ndim; ++i) {
+ managed_shape[i] = shape[i];
+ // Strides in DLPack are items; in NumPy are bytes.
+ managed_strides[i] = strides[i] / itemsize;
+ }
+
+ managed->dl_tensor.ndim = ndim;
+ managed->dl_tensor.shape = managed_shape;
+ managed->dl_tensor.strides = managed_strides;
+ managed->dl_tensor.byte_offset = 0;
+ managed->manager_ctx = self;
+ managed->deleter = array_dlpack_deleter;
+
+ PyObject *capsule = PyCapsule_New(managed, NPY_DLPACK_CAPSULE_NAME,
+ array_dlpack_capsule_deleter);
+ if (capsule == NULL) {
+ PyMem_Free(managed);
+ PyMem_Free(managed_shape_strides);
+ return NULL;
+ }
+
+ // the capsule holds a reference
+ PyArray_INCREF(self);
+ return capsule;
+}
+
+static PyObject *
+array_dlpack_device(PyArrayObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args))
+{
+ return Py_BuildValue("ii", kDLCPU, 0);
+>>>>>>> ENH: Add the __dlpack__ and __dlpack_device__ methods to ndarray.
}
NPY_NO_EXPORT PyMethodDef array_methods[] = {
@@ -2989,5 +3142,15 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = {
{"view",
(PyCFunction)array_view,
METH_FASTCALL | METH_KEYWORDS, NULL},
+
+ // For data interchange between libraries
+ {"__dlpack__",
+ (PyCFunction)array_dlpack,
+ METH_FASTCALL | METH_KEYWORDS, NULL},
+
+ {"__dlpack_device__",
+ (PyCFunction)array_dlpack_device,
+ METH_NOARGS, NULL},
+
{NULL, NULL, 0, NULL} /* sentinel */
};