diff options
author | Junyan He <junyan.he@intel.com> | 2017-06-11 13:50:08 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-08-02 17:16:29 +0800 |
commit | 16af56f366f3f673fe97adf866f926a842e15cd1 (patch) | |
tree | d32203b5e002786dfaf96bddae1880b4e82f3bf5 | |
parent | 5efeda566794550bc24334192999efd0a848458f (diff) | |
download | beignet-16af56f366f3f673fe97adf866f926a842e15cd1.tar.gz |
Add cl_kernel define to runtime.
The cl_kernel is one binary kernel function of cl_program.
Just like cl_program, it may contain multi binaries for each
kind of device. The clEnqueueNDRangeKernel will decide which
binary to use.
Signed-off-by: Junyan He <junyan.he@intel.com>
-rw-r--r-- | runtime/cl_kernel.c | 654 | ||||
-rw-r--r-- | runtime/cl_kernel.h | 130 |
2 files changed, 784 insertions, 0 deletions
diff --git a/runtime/cl_kernel.c b/runtime/cl_kernel.c new file mode 100644 index 00000000..d37a20d6 --- /dev/null +++ b/runtime/cl_kernel.c @@ -0,0 +1,654 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Benjamin Segovia <benjamin.segovia@intel.com> + */ + +#include "cl_kernel.h" +#include "cl_program.h" +#include "cl_device_id.h" +#include "cl_event.h" +#include "cl_alloc.h" +#include "cl_sampler.h" +#include "cl_mem.h" +#include "cl_command_queue.h" +#include <string.h> + +static void +cl_kernel_arg_delete(cl_argument arg) +{ + assert(arg); + + if (arg->is_set == CL_FALSE) { + return; + } + + if (arg->arg_type != ArgTypeValue && arg->arg_type != ArgTypeStruct) { + return; + } + + if (arg->arg_type == ArgTypeValue && arg->arg_size > sizeof(cl_double)) { + CL_FREE(arg->val.val_ptr); + } else if (arg->arg_type == ArgTypeStruct) { + CL_FREE(arg->val.val_ptr); + } + + arg->is_set = CL_FALSE; + arg->use_svm = CL_FALSE; + return; +} + +LOCAL cl_int +cl_kernel_set_arg_svm_pointer(cl_kernel k, cl_uint index, const void *value) +{ + cl_int i; + cl_argument arg = NULL; + cl_mem svm = cl_context_get_svm_by_ptr(k->program->ctx, value, CL_FALSE); + if (svm == NULL) + return CL_INVALID_ARG_VALUE; + + if (index >= k->arg_n) + return CL_INVALID_ARG_INDEX; + + for (i = 0; i < k->arg_n; i++) { + if (k->args[i].arg_no == index) { + arg = &k->args[i]; + break; + } + } + assert(arg); + + if (arg->is_set) { + cl_kernel_arg_delete(arg); + } + + if (arg->arg_type != ArgTypePointer) + return CL_INVALID_ARG_VALUE; + + if (arg->arg_addrspace != AddressSpaceGlobal && + arg->arg_addrspace != AddressSpaceConstant) + return CL_INVALID_ARG_VALUE; + + arg->val.val_svm.svm = svm; + arg->val.val_svm.ptr = (void *)value; + arg->val_size = sizeof(cl_mem); + arg->is_set = CL_TRUE; + arg->use_svm = CL_TRUE; + return CL_SUCCESS; +} + +LOCAL cl_int +cl_kernel_set_arg(cl_kernel kernel, cl_uint index, size_t sz, const void *value) +{ + cl_int i; + cl_argument arg = NULL; + + if (index >= kernel->arg_n) + return CL_INVALID_ARG_INDEX; + + for (i = 0; i < kernel->arg_n; i++) { + if (kernel->args[i].arg_no == index) { + arg = &kernel->args[i]; + break; + } + } + assert(arg); + + if (arg->is_set) { + cl_kernel_arg_delete(arg); + } + + /* Local mem is special, the size is the local mem's size to be allocated. */ + if (arg->arg_type == ArgTypePointer && arg->arg_addrspace == AddressSpaceLocal) { + if (sz == 0) + return CL_INVALID_ARG_SIZE; + + if (value != NULL) + return CL_INVALID_ARG_VALUE; + + arg->val_size = sz; + arg->is_set = CL_TRUE; + return CL_SUCCESS; + } + + if (sz != arg->arg_size) + return CL_INVALID_ARG_SIZE; + + /* For constant and global mem, we should have a cl_mem object, and it is a buffer. */ + if (arg->arg_type == ArgTypePointer) { + assert(arg->arg_addrspace != AddressSpaceLocal); + if (value == NULL || *((cl_mem *)value) == NULL) { + arg->val.val_mem = NULL; + } else { + if (!CL_OBJECT_IS_MEM(*(cl_mem *)value)) + return CL_INVALID_ARG_VALUE; + + arg->val.val_mem = *(cl_mem *)value; + } + + arg->val_size = sizeof(cl_mem); + arg->is_set = CL_TRUE; + return CL_SUCCESS; + } + + /* For image, we should have a cl_mem object, and it is a image. */ + if (arg->arg_type == ArgTypeImage) { + if (!CL_OBJECT_IS_MEM(*(cl_mem *)value)) + return CL_INVALID_ARG_VALUE; + + arg->val.val_mem = *(cl_mem *)value; + arg->val_size = sizeof(cl_mem); + arg->is_set = CL_TRUE; + return CL_SUCCESS; + } + + if (arg->arg_type == ArgTypePipe) { + if (!CL_OBJECT_IS_MEM(*(cl_mem *)value)) + return CL_INVALID_ARG_VALUE; + + arg->val.val_mem = *(cl_mem *)value; + arg->val_size = sizeof(cl_mem); + arg->is_set = CL_TRUE; + return CL_SUCCESS; + } + + /* For image, we should have a cl_mem object, and it is a image. */ + if (arg->arg_type == ArgTypeSampler) { + if (!CL_OBJECT_IS_SAMPLER(*(cl_sampler *)value)) + return CL_INVALID_ARG_VALUE; + + arg->val.val_sampler = *(cl_sampler *)value; + arg->val_size = sizeof(cl_sampler); + arg->is_set = CL_TRUE; + return CL_SUCCESS; + } + + if (arg->arg_type == ArgTypeValue && arg->arg_size <= sizeof(cl_double)) { + memcpy(&arg->val, value, sz); + arg->is_set = CL_TRUE; + arg->val_size = arg->arg_size; + return CL_SUCCESS; + } + + arg->val.val_ptr = CL_MALLOC(sz); + if (arg->val.val_ptr == NULL) + return CL_OUT_OF_HOST_MEMORY; + + memset(arg->val.val_ptr, 0, sz); + memcpy(arg->val.val_ptr, value, sz); + arg->val_size = arg->arg_size; + arg->is_set = CL_TRUE; + return CL_SUCCESS; +} + +LOCAL int +cl_kernel_get_argument_info(cl_kernel k, cl_uint arg_index, cl_kernel_arg_info param_name, + size_t param_value_size, void *param_value, size_t *param_value_size_ret) +{ + assert(k != NULL); + int str_len = 0; + + switch (param_name) { + case CL_KERNEL_ARG_ADDRESS_QUALIFIER: + if (param_value_size_ret) + *param_value_size_ret = sizeof(cl_kernel_arg_address_qualifier); + if (!param_value) + return CL_SUCCESS; + + if (param_value_size < sizeof(cl_kernel_arg_address_qualifier)) + return CL_INVALID_VALUE; + + if (k->args[arg_index].arg_addrspace == AddressSpaceGlobal) { + *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ADDRESS_GLOBAL; + } else if (k->args[arg_index].arg_addrspace == AddressSpaceConstant) { + *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ADDRESS_CONSTANT; + } else if (k->args[arg_index].arg_addrspace == AddressSpaceLocal) { + *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ADDRESS_LOCAL; + } else { + /* If no address qualifier is specified, the default address qualifier + which is CL_KERNEL_ARG_ADDRESS_PRIVATE is returned. */ + *(cl_kernel_arg_address_qualifier *)param_value = CL_KERNEL_ARG_ADDRESS_PRIVATE; + } + return CL_SUCCESS; + + case CL_KERNEL_ARG_ACCESS_QUALIFIER: + if (k->args[arg_index].arg_access_qualifier == 0) + return CL_KERNEL_ARG_INFO_NOT_AVAILABLE; + if (param_value_size_ret) + *param_value_size_ret = sizeof(cl_kernel_arg_access_qualifier); + if (!param_value) + return CL_SUCCESS; + if (param_value_size < sizeof(cl_kernel_arg_access_qualifier)) + return CL_INVALID_VALUE; + *(cl_kernel_arg_address_qualifier *)param_value = k->args[arg_index].arg_access_qualifier; + return CL_SUCCESS; + + case CL_KERNEL_ARG_TYPE_NAME: + if (k->args[arg_index].arg_type_name == NULL) + return CL_KERNEL_ARG_INFO_NOT_AVAILABLE; + str_len = strlen(k->args[arg_index].arg_type_name); + if (param_value_size_ret) + *param_value_size_ret = str_len + 1; + if (!param_value) + return CL_SUCCESS; + if (param_value_size < str_len + 1) + return CL_INVALID_VALUE; + + memcpy(param_value, k->args[arg_index].arg_type_name, str_len); + ((char *)param_value)[str_len] = 0; + return CL_SUCCESS; + + case CL_KERNEL_ARG_NAME: + if (k->args[arg_index].arg_name == NULL) + return CL_KERNEL_ARG_INFO_NOT_AVAILABLE; + str_len = strlen(k->args[arg_index].arg_name); + if (param_value_size_ret) + *param_value_size_ret = str_len + 1; + if (!param_value) + return CL_SUCCESS; + if (param_value_size < str_len + 1) + return CL_INVALID_VALUE; + + memcpy(param_value, k->args[arg_index].arg_name, str_len); + ((char *)param_value)[str_len] = 0; + return CL_SUCCESS; + + case CL_KERNEL_ARG_TYPE_QUALIFIER: + if ((k->args[arg_index].arg_type_qualifier & + (~(CL_KERNEL_ARG_TYPE_NONE | CL_KERNEL_ARG_TYPE_CONST | + CL_KERNEL_ARG_TYPE_RESTRICT | CL_KERNEL_ARG_TYPE_VOLATILE | + CL_KERNEL_ARG_TYPE_PIPE))) != 0) + return CL_KERNEL_ARG_INFO_NOT_AVAILABLE; + if (param_value_size_ret) + *param_value_size_ret = sizeof(cl_kernel_arg_type_qualifier); + if (!param_value) + return CL_SUCCESS; + if (param_value_size < sizeof(cl_kernel_arg_type_qualifier)) + return CL_INVALID_VALUE; + + *(cl_kernel_arg_type_qualifier *)param_value = k->args[arg_index].arg_type_qualifier; + return CL_SUCCESS; + + default: + assert(0); + } + + return CL_SUCCESS; +} + +LOCAL cl_kernel +cl_kernel_new(cl_program p, const char *name) +{ + cl_kernel k = NULL; + + k = CL_CALLOC(1, sizeof(struct _cl_kernel)); + if (k == NULL) + return NULL; + + CL_OBJECT_INIT_BASE(k, CL_OBJECT_KERNEL_MAGIC); + k->program = p; + + k->name = CL_CALLOC(1, strlen(name) + 1); + if (k->name == NULL) { + CL_FREE(k); + return NULL; + } + memcpy(k->name, name, strlen(name) + 1); + + k->each_device = CL_CALLOC(p->each_device_num, sizeof(cl_kernel_for_device)); + if (k->each_device == NULL) { + CL_FREE(k->name); + CL_FREE(k); + return NULL; + } + k->each_device_num = p->each_device_num; + + /* Add it to program's user kernels list. */ + cl_program_add_ref(p); + CL_OBJECT_LOCK(p); + list_add_tail(&p->kernels, &k->base.node); + p->ker_n++; + CL_OBJECT_UNLOCK(p); + return k; +} + +LOCAL void +cl_kernel_delete(cl_kernel k) +{ + cl_uint i; + if (k == NULL) + return; + + /* We are not done with the kernel */ + if (CL_OBJECT_DEC_REF(k) > 1) + return; + + CL_OBJECT_LOCK(k->program); + list_node_del(&k->base.node); + k->program->ker_n--; + CL_OBJECT_UNLOCK(k->program); + cl_program_delete(k->program); + + if (k->name) + CL_FREE(k->name); + k->name = NULL; + + if (k->kernel_attr) + CL_FREE(k->kernel_attr); + k->kernel_attr = NULL; + + if (k->exec_info) { + assert(k->exec_info_n > 0); + CL_FREE(k->exec_info); + k->exec_info = NULL; + k->exec_info_n = 0; + } + + for (i = 0; i < k->each_device_num; i++) { + if (k->each_device[i]) + (k->each_device[i]->device->api.kernel_delete)(k->each_device[i]->device, k); + } + CL_FREE(k->each_device); + + if (k->args) { + for (i = 0; i < k->arg_n; i++) { + if (k->args[i].arg_name) + CL_FREE(k->args[i].arg_name); + if (k->args[i].arg_type_name) + CL_FREE(k->args[i].arg_type_name); + cl_kernel_arg_delete(&k->args[i]); + } + + CL_FREE(k->args); + k->args = NULL; + } + + CL_OBJECT_DESTROY_BASE(k); + CL_FREE(k); +} + +LOCAL void +cl_kernel_add_ref(cl_kernel k) +{ + CL_OBJECT_INC_REF(k); +} + +LOCAL cl_kernel +cl_kernel_create(cl_program p, const char *name, cl_int *errcode_ret) +{ + cl_kernel kernel = NULL; + cl_uint i, j; + cl_int err = CL_SUCCESS; + int someone_created; + cl_bool find; + + assert(p->each_device); + assert(name); + + if (CL_OBJECT_TAKE_OWNERSHIP(p, CL_FALSE) == CL_FALSE) { + *errcode_ret = CL_INVALID_OPERATION; + return NULL; + } + + if (p->build_status != CL_BUILD_SUCCESS) { + *errcode_ret = CL_INVALID_PROGRAM_EXECUTABLE; + return NULL; + } + + /* Need to find it in at least one device's program */ + find = CL_FALSE; + for (i = 0; i < p->each_device_num; i++) { + for (j = 0; j < p->each_device[i]->kernel_num; j++) { + if (strcmp(p->each_device[i]->kernel_names[j], name) == 0) + find = CL_TRUE; + } + } + if (find == CL_FALSE) { + *errcode_ret = CL_INVALID_KERNEL_NAME; + CL_OBJECT_RELEASE_OWNERSHIP(p); + return NULL; + } + + kernel = cl_kernel_new(p, name); + CL_OBJECT_RELEASE_OWNERSHIP(p); + if (kernel == NULL) { + *errcode_ret = CL_OUT_OF_HOST_MEMORY; + return NULL; + } + + someone_created = 0; + for (i = 0; i < p->each_device_num; i++) { + err = (p->each_device[i]->device->api.kernel_create)(p->each_device[i]->device, kernel); + if (err == CL_INVALID_KERNEL_DEFINITION) { // Conflict kernel define, can not go on + *errcode_ret = CL_INVALID_KERNEL_DEFINITION; + break; + } + + if (err == CL_SUCCESS) { // Once success, this kernel can be created + someone_created = 1; + } + } + + if (*errcode_ret != CL_SUCCESS) { + cl_kernel_delete(kernel); + return NULL; + } + if (someone_created == 0) { + assert(err != CL_SUCCESS); + *errcode_ret = err; + cl_kernel_delete(kernel); + return NULL; + } + + *errcode_ret = CL_SUCCESS; + return kernel; +} + +LOCAL cl_int +cl_kernel_get_workgroup_info(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, + size_t param_value_size, void *param_value, size_t *param_value_size_ret) +{ + const void *src_ptr = NULL; + size_t src_size = 0; + size_t parameter_data = 0; + size_t wk_size[3] = {0, 0, 0}; + cl_int err = CL_SUCCESS; + + if (device == NULL) { + assert(kernel->each_device_num == 1); + device = kernel->each_device[0]->device; + } + + switch (param_name) { + case CL_KERNEL_WORK_GROUP_SIZE: + case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: + case CL_KERNEL_LOCAL_MEM_SIZE: + case CL_KERNEL_PRIVATE_MEM_SIZE: { + err = device->api.kernel_get_info(device, kernel, param_name, ¶meter_data); + src_ptr = ¶meter_data; + src_size = sizeof(size_t); + break; + } + case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: { + src_ptr = kernel->compile_wg_sz; + src_size = sizeof(size_t) * 3; + break; + } + case CL_KERNEL_GLOBAL_WORK_SIZE: { + err = device->api.kernel_get_info(device, kernel, param_name, wk_size); + src_ptr = wk_size; + src_size = sizeof(size_t) * 3; + break; + } + default: + return CL_INVALID_VALUE; + } + + if (err != CL_SUCCESS) + return err; + + return cl_get_info_helper(src_ptr, src_size, + param_value, param_value_size, param_value_size_ret); +} + +LOCAL cl_int +cl_kernel_get_subgroup_info(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, + size_t input_value_size, const void *input_value, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) +{ + const void *src_ptr = NULL; + size_t src_size = 0; + size_t parameter_data = 0; + cl_int err = CL_SUCCESS; + + switch (param_name) { + case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR: { + int i, dim = 0; + size_t local_sz = 1; + size_t prefer_sz = 0; + + switch (input_value_size) { + case sizeof(size_t) * 1: + case sizeof(size_t) * 2: + case sizeof(size_t) * 3: + dim = input_value_size / sizeof(size_t); + break; + default: + return CL_INVALID_VALUE; + } + + if (input_value == NULL) + return CL_INVALID_VALUE; + + for (i = 0; i < dim; i++) + local_sz *= ((size_t *)input_value)[i]; + + err = device->api.kernel_get_info(device, kernel, param_name, &prefer_sz); + if (err != CL_SUCCESS) + return err; + + parameter_data = local_sz >= prefer_sz ? prefer_sz : local_sz; + src_ptr = ¶meter_data; + src_size = sizeof(size_t); + break; + } + + case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR: { + int i, dim = 0; + size_t local_sz = 1; + size_t prefer_sz = 0; + + switch (input_value_size) { + case sizeof(size_t) * 1: + case sizeof(size_t) * 2: + case sizeof(size_t) * 3: + dim = input_value_size / sizeof(size_t); + break; + default: + return CL_INVALID_VALUE; + } + + if (input_value == NULL) + return CL_INVALID_VALUE; + + for (i = 0; i < dim; i++) + local_sz *= ((size_t *)input_value)[i]; + + err = device->api.kernel_get_info(device, kernel, + CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, &prefer_sz); + if (err != CL_SUCCESS) + return err; + parameter_data = (local_sz + prefer_sz - 1) / prefer_sz; + src_ptr = ¶meter_data; + src_size = sizeof(size_t); + break; + } + default: + return CL_INVALID_VALUE; + }; + + return cl_get_info_helper(src_ptr, src_size, + param_value, param_value_size, param_value_size_ret); +} + +LOCAL cl_int +cl_kernel_set_exec_info(cl_kernel k, size_t n, const void *value) +{ + size_t i; + cl_mem svm; + + assert(k != NULL); + + if (n == 0) + return CL_SUCCESS; + + if (k->exec_info) { // Already set + assert(k->exec_info_n > 0); + CL_FREE(k->exec_info); + k->exec_info_n = 0; + } + + k->exec_info = CL_CALLOC(n, sizeof(_cl_kernel_exec_svm_info)); + if (k->exec_info == NULL) + return CL_OUT_OF_HOST_MEMORY; + + for (i = 0; i < n / sizeof(void *); i++) { // Assure all the ptr are svm allocated */ + svm = cl_context_get_svm_by_ptr(k->program->ctx, ((void **)value)[i], CL_FALSE); + if (svm == NULL) { + CL_FREE(k->exec_info); + k->exec_info = NULL; + return CL_INVALID_OPERATION; + } + + k->exec_info[i].svm = svm; + assert(svm->host_ptr); + k->exec_info[i].offset = svm->host_ptr - ((void **)value)[i]; + } + + k->exec_info_n = n / sizeof(void *); + return CL_SUCCESS; +} + +LOCAL cl_int +cl_enqueue_handle_kernel_ndrange(cl_event e, cl_int status) +{ + cl_int err = CL_SUCCESS; + + if (status == CL_QUEUED) { + cl_uint i; + cl_kernel k = e->exec_data.nd_range.kernel; + assert(k); + + /* Check that the user did not forget any argument */ + for (i = 0; i < k->arg_n; ++i) { + if (k->args[i].is_set == CL_FALSE) + return CL_INVALID_KERNEL_ARGS; + + if ((k->args[i].arg_type == ArgTypePointer && k->args[i].arg_addrspace != AddressSpaceLocal) || + k->args[i].arg_type == ArgTypeImage || + k->args[i].arg_type == ArgTypePipe) { + if (k->args[i].val.val_mem) { + err = cl_mem_assure_allocated(e->queue->device, k->args[i].val.val_mem); + if (err != CL_SUCCESS) + return err; + } + } + } + } + + err = e->queue->device->api.nd_range_kernel(e, status); + return err; +} diff --git a/runtime/cl_kernel.h b/runtime/cl_kernel.h new file mode 100644 index 00000000..c227df74 --- /dev/null +++ b/runtime/cl_kernel.h @@ -0,0 +1,130 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Benjamin Segovia <benjamin.segovia@intel.com> + */ + +#ifndef __CL_KERNEL_H__ +#define __CL_KERNEL_H__ + +#include "cl_base_object.h" +#include "CL/cl.h" + +typedef enum cl_address_space_type { + AddressSpacePrivate = 0, + AddressSpaceGlobal = 1, + AddressSpaceConstant = 2, + AddressSpaceLocal = 3, +} cl_address_space_type; + +typedef enum cl_arg_type { + ArgTypeInvalid = 0, + ArgTypeValue, // int, float, double, long, etc + ArgTypeStruct, + ArgTypePointer, + ArgTypeImage, + ArgTypeSampler, + ArgTypePipe, +} cl_arg_type; + +typedef struct _cl_argument { + cl_arg_type arg_type; + cl_uint arg_no; + cl_uint arg_size; // size in bytes + cl_address_space_type arg_addrspace; + cl_uint arg_type_qualifier; + cl_uint arg_access_qualifier; + char *arg_name; + char *arg_type_name; + cl_bool use_svm; + + union { + cl_char val_char; + cl_short val_short; + cl_int val_int; + cl_long val_long; + cl_half val_half; + cl_float val_float; + cl_double val_double; + cl_sampler val_sampler; + cl_mem val_mem; + struct { + cl_mem svm; + void *ptr; + } val_svm; + void *val_ptr; + } val; + cl_uint val_size; + cl_bool is_set; /* All args must be set before NDRange */ +} _cl_argument; +typedef _cl_argument *cl_argument; + +typedef struct _cl_kernel_for_device { + cl_device_id device; + void *exec_code; /* The binary for exec */ + cl_uint exec_code_sz; /* The binary for exec size */ +} _cl_kernel_for_device; +typedef _cl_kernel_for_device *cl_kernel_for_device; + +typedef struct _cl_kernel_exec_svm_info { + cl_mem svm; + size_t offset; +} _cl_kernel_exec_svm_info; +typedef _cl_kernel_exec_svm_info *cl_kernel_exec_svm_info; + +/* One OCL function */ +typedef struct _cl_kernel { + _cl_base_object base; + cl_program program; /* Point back to program */ + char *name; /* The kernel name */ + cl_argument args; /* All the arguments */ + cl_uint arg_n; /* Number of arguments */ + size_t compile_wg_sz[3]; /* Required workgroup size by + __attribute__((reqd_work_group_size(X, Y, Z))) qualifier */ + char *kernel_attr; /* The kernel attribute */ + cl_uint each_device_num; /* Each device number */ + cl_kernel_for_device *each_device; /* Program content interpreted by device */ + cl_kernel_exec_svm_info exec_info; /* The kernel's exec info */ + cl_uint exec_info_n; /* The kernel's exec info count */ +} _cl_kernel; + +#define CL_OBJECT_KERNEL_MAGIC 0x1234567890abedefLL +#define CL_OBJECT_IS_KERNEL(obj) ((obj && \ + ((cl_base_object)obj)->magic == CL_OBJECT_KERNEL_MAGIC && \ + CL_OBJECT_GET_REF(obj) >= 1)) + +extern cl_kernel cl_kernel_create(cl_program p, const char *name, cl_int *errcode_ret); +extern cl_kernel cl_kernel_new(cl_program, const char *name); +extern void cl_kernel_delete(cl_kernel); +extern void cl_kernel_add_ref(cl_kernel); +extern int cl_kernel_set_arg(cl_kernel, uint32_t arg_index, size_t arg_size, const void *arg_value); +extern int cl_kernel_get_argument_info(cl_kernel k, cl_uint arg_index, cl_kernel_arg_info param_name, + size_t param_value_size, void *param_value, size_t *param_value_size_ret); +extern cl_int cl_kernel_get_workgroup_info(cl_kernel kernel, cl_device_id device, + cl_kernel_work_group_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret); +extern cl_int cl_kernel_get_subgroup_info(cl_kernel kernel, cl_device_id device, + cl_kernel_work_group_info param_name, size_t input_value_size, + const void *input_value, size_t param_value_size, + void *param_value, size_t *param_value_size_ret); + +/* Set the argument before kernel execution */ +extern int cl_kernel_set_arg_svm_pointer(cl_kernel, uint32_t arg_index, const void *arg_value); +extern cl_int cl_kernel_set_exec_info(cl_kernel k, size_t n, const void *value); +extern cl_int cl_kernel_work_group_sz(cl_kernel ker, const size_t *local_wk_sz, + cl_uint wk_dim, size_t *wk_grp_sz); +extern cl_int cl_enqueue_handle_kernel_ndrange(cl_event e, cl_int status); +#endif /* __CL_KERNEL_H__ */ |