summaryrefslogtreecommitdiff
path: root/src/cl_mem.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cl_mem.c')
-rw-r--r--src/cl_mem.c78
1 files changed, 50 insertions, 28 deletions
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 0c49c3d7..a8543c9e 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -2146,6 +2146,36 @@ fail:
return ret;
}
+#define ALIGN16 16
+#define ALIGN4 4
+#define ALIGN1 1
+
+static size_t
+get_align_size_for_copy_kernel(struct _cl_mem_image* image, const size_t origin0, const size_t region0,
+ const size_t offset, cl_image_format *fmt) {
+ size_t align_size = 0;
+
+ if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % ALIGN16 == 0) &&
+ ((origin0 * image->bpp) % ALIGN16 == 0) && (region0 % ALIGN16 == 0) && (offset % ALIGN16 == 0)){
+ fmt->image_channel_order = CL_RGBA;
+ fmt->image_channel_data_type = CL_UNSIGNED_INT32;
+ align_size = ALIGN16;
+ }
+ else if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % ALIGN4 == 0) &&
+ ((origin0 * image->bpp) % ALIGN4 == 0) && (region0 % ALIGN4 == 0) && (offset % ALIGN4 == 0)){
+ fmt->image_channel_order = CL_R;
+ fmt->image_channel_data_type = CL_UNSIGNED_INT32;
+ align_size = ALIGN4;
+ }
+ else{
+ fmt->image_channel_order = CL_R;
+ fmt->image_channel_data_type = CL_UNSIGNED_INT8;
+ align_size = ALIGN1;
+ }
+
+ return align_size;
+}
+
LOCAL cl_int
cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_mem_image* image, cl_mem buffer,
const size_t *src_origin, const size_t dst_offset, const size_t *region) {
@@ -2158,7 +2188,6 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_m
cl_image_format fmt;
size_t origin0, region0;
size_t kn_dst_offset;
- int align16 = 0;
size_t align_size = 1;
size_t w_saved;
@@ -2176,18 +2205,7 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_m
w_saved = image->w;
region0 = region[0] * bpp;
kn_dst_offset = dst_offset;
- if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % 16 == 0) &&
- ((src_origin[0] * bpp) % 16 == 0) && (region0 % 16 == 0) && (dst_offset % 16 == 0)){
- fmt.image_channel_order = CL_RGBA;
- fmt.image_channel_data_type = CL_UNSIGNED_INT32;
- align16 = 1;
- align_size = 16;
- }
- else{
- fmt.image_channel_order = CL_R;
- fmt.image_channel_data_type = CL_UNSIGNED_INT8;
- align_size = 1;
- }
+ align_size = get_align_size_for_copy_kernel(image, src_origin[0], region0, dst_offset, &fmt);
image->intel_fmt = cl_image_get_intel_format(&fmt);
image->w = (image->w * image->bpp) / align_size;
image->bpp = align_size;
@@ -2198,7 +2216,7 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_m
/* setup the kernel and run. */
if(image->image_type == CL_MEM_OBJECT_IMAGE2D) {
- if(align16){
+ if(align_size == ALIGN16){
extern char cl_internal_copy_image_2d_to_buffer_align16_str[];
extern size_t cl_internal_copy_image_2d_to_buffer_align16_str_size;
@@ -2206,6 +2224,14 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event event, struct _cl_m
cl_internal_copy_image_2d_to_buffer_align16_str,
(size_t)cl_internal_copy_image_2d_to_buffer_align16_str_size, NULL);
}
+ else if(align_size == ALIGN4){
+ extern char cl_internal_copy_image_2d_to_buffer_align4_str[];
+ extern size_t cl_internal_copy_image_2d_to_buffer_align4_str_size;
+
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN4,
+ cl_internal_copy_image_2d_to_buffer_align4_str,
+ (size_t)cl_internal_copy_image_2d_to_buffer_align4_str_size, NULL);
+ }
else{
extern char cl_internal_copy_image_2d_to_buffer_str[];
extern size_t cl_internal_copy_image_2d_to_buffer_str_size;
@@ -2262,7 +2288,6 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event event, cl_mem buffe
cl_image_format fmt;
size_t origin0, region0;
size_t kn_src_offset;
- int align16 = 0;
size_t align_size = 1;
size_t w_saved = 0;
@@ -2280,18 +2305,7 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event event, cl_mem buffe
w_saved = image->w;
region0 = region[0] * bpp;
kn_src_offset = src_offset;
- if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * image->bpp) % 16 == 0) &&
- ((dst_origin[0] * bpp) % 16 == 0) && (region0 % 16 == 0) && (src_offset % 16 == 0)){
- fmt.image_channel_order = CL_RGBA;
- fmt.image_channel_data_type = CL_UNSIGNED_INT32;
- align16 = 1;
- align_size = 16;
- }
- else{
- fmt.image_channel_order = CL_R;
- fmt.image_channel_data_type = CL_UNSIGNED_INT8;
- align_size = 1;
- }
+ align_size = get_align_size_for_copy_kernel(image, dst_origin[0], region0, src_offset, &fmt);
image->intel_fmt = cl_image_get_intel_format(&fmt);
image->w = (image->w * image->bpp) / align_size;
image->bpp = align_size;
@@ -2302,7 +2316,7 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event event, cl_mem buffe
/* setup the kernel and run. */
if(image->image_type == CL_MEM_OBJECT_IMAGE2D) {
- if(align16){
+ if(align_size == ALIGN16){
extern char cl_internal_copy_buffer_to_image_2d_align16_str[];
extern size_t cl_internal_copy_buffer_to_image_2d_align16_str_size;
@@ -2310,6 +2324,14 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event event, cl_mem buffe
cl_internal_copy_buffer_to_image_2d_align16_str,
(size_t)cl_internal_copy_buffer_to_image_2d_align16_str_size, NULL);
}
+ else if(align_size == ALIGN4){
+ extern char cl_internal_copy_buffer_to_image_2d_align4_str[];
+ extern size_t cl_internal_copy_buffer_to_image_2d_align4_str_size;
+
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN4,
+ cl_internal_copy_buffer_to_image_2d_align4_str,
+ (size_t)cl_internal_copy_buffer_to_image_2d_align4_str_size, NULL);
+ }
else{
extern char cl_internal_copy_buffer_to_image_2d_str[];
extern size_t cl_internal_copy_buffer_to_image_2d_str_size;