From 3d21fa56f5f5afbbf16b35b199480af71e1189a3 Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Mon, 17 Aug 2020 13:44:49 -0700 Subject: libclc: Make all built-ins overloadable The SPIR spec states that all OpenCL built-in functions should be overloadable and mangled, to ensure consistency. Add the overload attribute to functions which were missing them: work dimensions, memory barriers and fences, and events. Reviewed By: tstellar, jenatali Differential Revision: https://reviews.llvm.org/D82078 --- .../amdgcn-amdhsa/lib/workitem/get_global_size.cl | 11 +++++----- .../amdgcn-amdhsa/lib/workitem/get_local_size.cl | 23 ++++++++++---------- .../amdgcn-amdhsa/lib/workitem/get_num_groups.cl | 2 +- libclc/amdgcn/lib/mem_fence/fence.cl | 25 ++++++++++------------ libclc/amdgcn/lib/synchronization/barrier.cl | 7 +++--- libclc/amdgcn/lib/workitem/get_global_offset.cl | 12 +++++------ libclc/amdgcn/lib/workitem/get_global_size.cl | 19 +++++++++------- libclc/amdgcn/lib/workitem/get_group_id.cl | 19 +++++++++------- libclc/amdgcn/lib/workitem/get_local_id.cl | 19 +++++++++------- libclc/amdgcn/lib/workitem/get_local_size.cl | 19 +++++++++------- libclc/amdgcn/lib/workitem/get_num_groups.cl | 19 +++++++++------- libclc/amdgcn/lib/workitem/get_work_dim.cl | 8 +++---- .../generic/include/clc/async/wait_group_events.h | 3 ++- .../clc/explicit_fence/explicit_memory_fence.h | 6 +++--- .../generic/include/clc/synchronization/barrier.h | 2 +- .../generic/include/clc/workitem/get_global_id.h | 2 +- .../include/clc/workitem/get_global_offset.h | 2 +- .../generic/include/clc/workitem/get_global_size.h | 2 +- libclc/generic/include/clc/workitem/get_group_id.h | 2 +- libclc/generic/include/clc/workitem/get_local_id.h | 2 +- .../generic/include/clc/workitem/get_local_size.h | 2 +- .../generic/include/clc/workitem/get_num_groups.h | 2 +- libclc/generic/include/clc/workitem/get_work_dim.h | 2 +- libclc/generic/lib/async/wait_group_events.cl | 3 ++- libclc/generic/lib/workitem/get_global_id.cl | 2 +- libclc/generic/lib/workitem/get_global_size.cl | 2 +- libclc/ptx-nvidiacl/lib/mem_fence/fence.cl | 10 ++++----- libclc/ptx-nvidiacl/lib/synchronization/barrier.cl | 3 +-- libclc/ptx-nvidiacl/lib/workitem/get_global_id.cl | 2 +- libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl | 2 +- libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl | 2 +- libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl | 2 +- libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl | 2 +- libclc/r600/lib/synchronization/barrier.cl | 3 +-- libclc/r600/lib/workitem/get_global_offset.cl | 15 ++++++------- libclc/r600/lib/workitem/get_global_size.cl | 19 +++++++++------- libclc/r600/lib/workitem/get_group_id.cl | 19 +++++++++------- libclc/r600/lib/workitem/get_local_id.cl | 19 +++++++++------- libclc/r600/lib/workitem/get_local_size.cl | 19 +++++++++------- libclc/r600/lib/workitem/get_num_groups.cl | 19 +++++++++------- libclc/r600/lib/workitem/get_work_dim.cl | 11 +++++----- 41 files changed, 191 insertions(+), 173 deletions(-) (limited to 'libclc') diff --git a/libclc/amdgcn-amdhsa/lib/workitem/get_global_size.cl b/libclc/amdgcn-amdhsa/lib/workitem/get_global_size.cl index 2f95f9916b2c..62bd2ba28352 100644 --- a/libclc/amdgcn-amdhsa/lib/workitem/get_global_size.cl +++ b/libclc/amdgcn-amdhsa/lib/workitem/get_global_size.cl @@ -15,10 +15,9 @@ CONST_AS uchar * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr"); #endif -_CLC_DEF size_t get_global_size(uint dim) -{ - CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr(); - if (dim < 3) - return ptr[3 + dim]; - return 1; +_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) { + CONST_AS uint *ptr = (CONST_AS uint *)__dispatch_ptr(); + if (dim < 3) + return ptr[3 + dim]; + return 1; } diff --git a/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.cl b/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.cl index 9f208d8aea77..9f09fd5a16ec 100644 --- a/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.cl +++ b/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.cl @@ -15,16 +15,15 @@ CONST_AS char * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr"); #endif -_CLC_DEF size_t get_local_size(uint dim) -{ - CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr(); - switch (dim) { - case 0: - return ptr[1] & 0xffffu; - case 1: - return ptr[1] >> 16; - case 2: - return ptr[2] & 0xffffu; - } - return 1; +_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) { + CONST_AS uint *ptr = (CONST_AS uint *)__dispatch_ptr(); + switch (dim) { + case 0: + return ptr[1] & 0xffffu; + case 1: + return ptr[1] >> 16; + case 2: + return ptr[2] & 0xffffu; + } + return 1; } diff --git a/libclc/amdgcn-amdhsa/lib/workitem/get_num_groups.cl b/libclc/amdgcn-amdhsa/lib/workitem/get_num_groups.cl index 946b526fdb68..35dc22188521 100644 --- a/libclc/amdgcn-amdhsa/lib/workitem/get_num_groups.cl +++ b/libclc/amdgcn-amdhsa/lib/workitem/get_num_groups.cl @@ -1,7 +1,7 @@ #include -_CLC_DEF size_t get_num_groups(uint dim) { +_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) { size_t global_size = get_global_size(dim); size_t local_size = get_local_size(dim); size_t num_groups = global_size / local_size; diff --git a/libclc/amdgcn/lib/mem_fence/fence.cl b/libclc/amdgcn/lib/mem_fence/fence.cl index b85baf755b85..c7a10bb0238a 100644 --- a/libclc/amdgcn/lib/mem_fence/fence.cl +++ b/libclc/amdgcn/lib/mem_fence/fence.cl @@ -17,24 +17,21 @@ void __clc_amdgcn_s_waitcnt(unsigned flags); _CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt"); #endif -_CLC_DEF void mem_fence(cl_mem_fence_flags flags) -{ - if (flags & CLK_GLOBAL_MEM_FENCE) { - // scalar loads are counted with LGKM but we don't know whether - // the compiler turned any loads to scalar - __waitcnt(0); - } else if (flags & CLK_LOCAL_MEM_FENCE) - __waitcnt(0xff); // LGKM is [12:8] +_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) { + if (flags & CLK_GLOBAL_MEM_FENCE) { + // scalar loads are counted with LGKM but we don't know whether + // the compiler turned any loads to scalar + __waitcnt(0); + } else if (flags & CLK_LOCAL_MEM_FENCE) + __waitcnt(0xff); // LGKM is [12:8] } #undef __waitcnt // We don't have separate mechanism for read and write fences -_CLC_DEF void read_mem_fence(cl_mem_fence_flags flags) -{ - mem_fence(flags); +_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) { + mem_fence(flags); } -_CLC_DEF void write_mem_fence(cl_mem_fence_flags flags) -{ - mem_fence(flags); +_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) { + mem_fence(flags); } diff --git a/libclc/amdgcn/lib/synchronization/barrier.cl b/libclc/amdgcn/lib/synchronization/barrier.cl index e2f3c1369bbe..82bbd4b530f3 100644 --- a/libclc/amdgcn/lib/synchronization/barrier.cl +++ b/libclc/amdgcn/lib/synchronization/barrier.cl @@ -1,7 +1,6 @@ #include -_CLC_DEF void barrier(cl_mem_fence_flags flags) -{ - mem_fence(flags); - __builtin_amdgcn_s_barrier(); +_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) { + mem_fence(flags); + __builtin_amdgcn_s_barrier(); } diff --git a/libclc/amdgcn/lib/workitem/get_global_offset.cl b/libclc/amdgcn/lib/workitem/get_global_offset.cl index 0a87cd23f1f8..73d5694523ac 100644 --- a/libclc/amdgcn/lib/workitem/get_global_offset.cl +++ b/libclc/amdgcn/lib/workitem/get_global_offset.cl @@ -8,11 +8,9 @@ #define CONST_AS __attribute__((address_space(2))) #endif -_CLC_DEF size_t get_global_offset(uint dim) -{ - CONST_AS uint * ptr = - (CONST_AS uint *) __builtin_amdgcn_implicitarg_ptr(); - if (dim < 3) - return ptr[dim + 1]; - return 0; +_CLC_DEF _CLC_OVERLOAD size_t get_global_offset(uint dim) { + CONST_AS uint *ptr = (CONST_AS uint *)__builtin_amdgcn_implicitarg_ptr(); + if (dim < 3) + return ptr[dim + 1]; + return 0; } diff --git a/libclc/amdgcn/lib/workitem/get_global_size.cl b/libclc/amdgcn/lib/workitem/get_global_size.cl index c1e3894e4c87..2f28ca606665 100644 --- a/libclc/amdgcn/lib/workitem/get_global_size.cl +++ b/libclc/amdgcn/lib/workitem/get_global_size.cl @@ -4,12 +4,15 @@ uint __clc_amdgcn_get_global_size_x(void) __asm("llvm.r600.read.global.size.x"); uint __clc_amdgcn_get_global_size_y(void) __asm("llvm.r600.read.global.size.y"); uint __clc_amdgcn_get_global_size_z(void) __asm("llvm.r600.read.global.size.z"); -_CLC_DEF size_t get_global_size(uint dim) -{ - switch (dim) { - case 0: return __clc_amdgcn_get_global_size_x(); - case 1: return __clc_amdgcn_get_global_size_y(); - case 2: return __clc_amdgcn_get_global_size_z(); - default: return 1; - } +_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) { + switch (dim) { + case 0: + return __clc_amdgcn_get_global_size_x(); + case 1: + return __clc_amdgcn_get_global_size_y(); + case 2: + return __clc_amdgcn_get_global_size_z(); + default: + return 1; + } } diff --git a/libclc/amdgcn/lib/workitem/get_group_id.cl b/libclc/amdgcn/lib/workitem/get_group_id.cl index eb57b3e2584a..211c86eea10d 100644 --- a/libclc/amdgcn/lib/workitem/get_group_id.cl +++ b/libclc/amdgcn/lib/workitem/get_group_id.cl @@ -1,11 +1,14 @@ #include -_CLC_DEF size_t get_group_id(uint dim) -{ - switch(dim) { - case 0: return __builtin_amdgcn_workgroup_id_x(); - case 1: return __builtin_amdgcn_workgroup_id_y(); - case 2: return __builtin_amdgcn_workgroup_id_z(); - default: return 1; - } +_CLC_DEF _CLC_OVERLOAD size_t get_group_id(uint dim) { + switch (dim) { + case 0: + return __builtin_amdgcn_workgroup_id_x(); + case 1: + return __builtin_amdgcn_workgroup_id_y(); + case 2: + return __builtin_amdgcn_workgroup_id_z(); + default: + return 1; + } } diff --git a/libclc/amdgcn/lib/workitem/get_local_id.cl b/libclc/amdgcn/lib/workitem/get_local_id.cl index 9f666dea3400..073ecfa40ab4 100644 --- a/libclc/amdgcn/lib/workitem/get_local_id.cl +++ b/libclc/amdgcn/lib/workitem/get_local_id.cl @@ -1,11 +1,14 @@ #include -_CLC_DEF size_t get_local_id(uint dim) -{ - switch(dim) { - case 0: return __builtin_amdgcn_workitem_id_x(); - case 1: return __builtin_amdgcn_workitem_id_y(); - case 2: return __builtin_amdgcn_workitem_id_z(); - default: return 1; - } +_CLC_DEF _CLC_OVERLOAD size_t get_local_id(uint dim) { + switch (dim) { + case 0: + return __builtin_amdgcn_workitem_id_x(); + case 1: + return __builtin_amdgcn_workitem_id_y(); + case 2: + return __builtin_amdgcn_workitem_id_z(); + default: + return 1; + } } diff --git a/libclc/amdgcn/lib/workitem/get_local_size.cl b/libclc/amdgcn/lib/workitem/get_local_size.cl index 9b19f6b35412..c398b7eb5a9d 100644 --- a/libclc/amdgcn/lib/workitem/get_local_size.cl +++ b/libclc/amdgcn/lib/workitem/get_local_size.cl @@ -4,12 +4,15 @@ uint __clc_amdgcn_get_local_size_x(void) __asm("llvm.r600.read.local.size.x"); uint __clc_amdgcn_get_local_size_y(void) __asm("llvm.r600.read.local.size.y"); uint __clc_amdgcn_get_local_size_z(void) __asm("llvm.r600.read.local.size.z"); -_CLC_DEF size_t get_local_size(uint dim) -{ - switch (dim) { - case 0: return __clc_amdgcn_get_local_size_x(); - case 1: return __clc_amdgcn_get_local_size_y(); - case 2: return __clc_amdgcn_get_local_size_z(); - default: return 1; - } +_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) { + switch (dim) { + case 0: + return __clc_amdgcn_get_local_size_x(); + case 1: + return __clc_amdgcn_get_local_size_y(); + case 2: + return __clc_amdgcn_get_local_size_z(); + default: + return 1; + } } diff --git a/libclc/amdgcn/lib/workitem/get_num_groups.cl b/libclc/amdgcn/lib/workitem/get_num_groups.cl index f921414acc2c..020741e49cb7 100644 --- a/libclc/amdgcn/lib/workitem/get_num_groups.cl +++ b/libclc/amdgcn/lib/workitem/get_num_groups.cl @@ -4,12 +4,15 @@ uint __clc_amdgcn_get_num_groups_x(void) __asm("llvm.r600.read.ngroups.x"); uint __clc_amdgcn_get_num_groups_y(void) __asm("llvm.r600.read.ngroups.y"); uint __clc_amdgcn_get_num_groups_z(void) __asm("llvm.r600.read.ngroups.z"); -_CLC_DEF size_t get_num_groups(uint dim) -{ - switch (dim) { - case 0: return __clc_amdgcn_get_num_groups_x(); - case 1: return __clc_amdgcn_get_num_groups_y(); - case 2: return __clc_amdgcn_get_num_groups_z(); - default: return 1; - } +_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) { + switch (dim) { + case 0: + return __clc_amdgcn_get_num_groups_x(); + case 1: + return __clc_amdgcn_get_num_groups_y(); + case 2: + return __clc_amdgcn_get_num_groups_z(); + default: + return 1; + } } diff --git a/libclc/amdgcn/lib/workitem/get_work_dim.cl b/libclc/amdgcn/lib/workitem/get_work_dim.cl index 3add9b64f057..cb8cf83a220c 100644 --- a/libclc/amdgcn/lib/workitem/get_work_dim.cl +++ b/libclc/amdgcn/lib/workitem/get_work_dim.cl @@ -8,9 +8,7 @@ #define CONST_AS __attribute__((address_space(2))) #endif -_CLC_DEF uint get_work_dim(void) -{ - CONST_AS uint * ptr = - (CONST_AS uint *) __builtin_amdgcn_implicitarg_ptr(); - return ptr[0]; +_CLC_DEF _CLC_OVERLOAD uint get_work_dim(void) { + CONST_AS uint *ptr = (CONST_AS uint *)__builtin_amdgcn_implicitarg_ptr(); + return ptr[0]; } diff --git a/libclc/generic/include/clc/async/wait_group_events.h b/libclc/generic/include/clc/async/wait_group_events.h index 799efa0a791c..d707f4c68a20 100644 --- a/libclc/generic/include/clc/async/wait_group_events.h +++ b/libclc/generic/include/clc/async/wait_group_events.h @@ -1 +1,2 @@ -void wait_group_events(int num_events, event_t *event_list); +_CLC_DECL _CLC_OVERLOAD void wait_group_events(int num_events, + event_t *event_list); diff --git a/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h b/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h index 8e046b1225de..05c6d7939549 100644 --- a/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h +++ b/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h @@ -1,3 +1,3 @@ -_CLC_DECL void mem_fence(cl_mem_fence_flags flags); -_CLC_DECL void read_mem_fence(cl_mem_fence_flags flags); -_CLC_DECL void write_mem_fence(cl_mem_fence_flags flags); +_CLC_DECL _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags); +_CLC_DECL _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags); +_CLC_DECL _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags); diff --git a/libclc/generic/include/clc/synchronization/barrier.h b/libclc/generic/include/clc/synchronization/barrier.h index 7167a3d3f093..63e3ac58e900 100644 --- a/libclc/generic/include/clc/synchronization/barrier.h +++ b/libclc/generic/include/clc/synchronization/barrier.h @@ -1 +1 @@ -_CLC_DECL void barrier(cl_mem_fence_flags flags); +_CLC_DECL _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags); diff --git a/libclc/generic/include/clc/workitem/get_global_id.h b/libclc/generic/include/clc/workitem/get_global_id.h index 92759f146894..3bbace022951 100644 --- a/libclc/generic/include/clc/workitem/get_global_id.h +++ b/libclc/generic/include/clc/workitem/get_global_id.h @@ -1 +1 @@ -_CLC_DECL size_t get_global_id(uint dim); +_CLC_DECL _CLC_OVERLOAD size_t get_global_id(uint dim); diff --git a/libclc/generic/include/clc/workitem/get_global_offset.h b/libclc/generic/include/clc/workitem/get_global_offset.h index 7f4f6032abe6..ad7b441cf716 100644 --- a/libclc/generic/include/clc/workitem/get_global_offset.h +++ b/libclc/generic/include/clc/workitem/get_global_offset.h @@ -1 +1 @@ -_CLC_DECL size_t get_global_offset(uint dim); +_CLC_DECL _CLC_OVERLOAD size_t get_global_offset(uint dim); diff --git a/libclc/generic/include/clc/workitem/get_global_size.h b/libclc/generic/include/clc/workitem/get_global_size.h index 2f8370585397..1b7ccf75643d 100644 --- a/libclc/generic/include/clc/workitem/get_global_size.h +++ b/libclc/generic/include/clc/workitem/get_global_size.h @@ -1 +1 @@ -_CLC_DECL size_t get_global_size(uint dim); +_CLC_DECL _CLC_OVERLOAD size_t get_global_size(uint dim); diff --git a/libclc/generic/include/clc/workitem/get_group_id.h b/libclc/generic/include/clc/workitem/get_group_id.h index 346c82c6c316..b71fbc1990bc 100644 --- a/libclc/generic/include/clc/workitem/get_group_id.h +++ b/libclc/generic/include/clc/workitem/get_group_id.h @@ -1 +1 @@ -_CLC_DECL size_t get_group_id(uint dim); +_CLC_DECL _CLC_OVERLOAD size_t get_group_id(uint dim); diff --git a/libclc/generic/include/clc/workitem/get_local_id.h b/libclc/generic/include/clc/workitem/get_local_id.h index 169aeed86786..60aa1ec68427 100644 --- a/libclc/generic/include/clc/workitem/get_local_id.h +++ b/libclc/generic/include/clc/workitem/get_local_id.h @@ -1 +1 @@ -_CLC_DECL size_t get_local_id(uint dim); +_CLC_DECL _CLC_OVERLOAD size_t get_local_id(uint dim); diff --git a/libclc/generic/include/clc/workitem/get_local_size.h b/libclc/generic/include/clc/workitem/get_local_size.h index 040ec58a3d8b..808730fbc38e 100644 --- a/libclc/generic/include/clc/workitem/get_local_size.h +++ b/libclc/generic/include/clc/workitem/get_local_size.h @@ -1 +1 @@ -_CLC_DECL size_t get_local_size(uint dim); +_CLC_DECL _CLC_OVERLOAD size_t get_local_size(uint dim); diff --git a/libclc/generic/include/clc/workitem/get_num_groups.h b/libclc/generic/include/clc/workitem/get_num_groups.h index e555c7efc2d2..8657eb7f6eaf 100644 --- a/libclc/generic/include/clc/workitem/get_num_groups.h +++ b/libclc/generic/include/clc/workitem/get_num_groups.h @@ -1 +1 @@ -_CLC_DECL size_t get_num_groups(uint dim); +_CLC_DECL _CLC_OVERLOAD size_t get_num_groups(uint dim); diff --git a/libclc/generic/include/clc/workitem/get_work_dim.h b/libclc/generic/include/clc/workitem/get_work_dim.h index ae08ba9a5150..8781b2a974d4 100644 --- a/libclc/generic/include/clc/workitem/get_work_dim.h +++ b/libclc/generic/include/clc/workitem/get_work_dim.h @@ -1 +1 @@ -_CLC_DECL uint get_work_dim(void); +_CLC_DECL _CLC_OVERLOAD uint get_work_dim(void); diff --git a/libclc/generic/lib/async/wait_group_events.cl b/libclc/generic/lib/async/wait_group_events.cl index 05c9d58db45e..5f4eec325a04 100644 --- a/libclc/generic/lib/async/wait_group_events.cl +++ b/libclc/generic/lib/async/wait_group_events.cl @@ -1,5 +1,6 @@ #include -_CLC_DEF void wait_group_events(int num_events, event_t *event_list) { +_CLC_DEF _CLC_OVERLOAD void wait_group_events(int num_events, + event_t *event_list) { barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); } diff --git a/libclc/generic/lib/workitem/get_global_id.cl b/libclc/generic/lib/workitem/get_global_id.cl index b6c2ea1d2cca..ccd84d9d8330 100644 --- a/libclc/generic/lib/workitem/get_global_id.cl +++ b/libclc/generic/lib/workitem/get_global_id.cl @@ -1,5 +1,5 @@ #include -_CLC_DEF size_t get_global_id(uint dim) { +_CLC_DEF _CLC_OVERLOAD size_t get_global_id(uint dim) { return get_group_id(dim) * get_local_size(dim) + get_local_id(dim) + get_global_offset(dim); } diff --git a/libclc/generic/lib/workitem/get_global_size.cl b/libclc/generic/lib/workitem/get_global_size.cl index 5ae649e10d51..9bc260782530 100644 --- a/libclc/generic/lib/workitem/get_global_size.cl +++ b/libclc/generic/lib/workitem/get_global_size.cl @@ -1,5 +1,5 @@ #include -_CLC_DEF size_t get_global_size(uint dim) { +_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) { return get_num_groups(dim)*get_local_size(dim); } diff --git a/libclc/ptx-nvidiacl/lib/mem_fence/fence.cl b/libclc/ptx-nvidiacl/lib/mem_fence/fence.cl index 16b039176ece..de078b5e8c19 100644 --- a/libclc/ptx-nvidiacl/lib/mem_fence/fence.cl +++ b/libclc/ptx-nvidiacl/lib/mem_fence/fence.cl @@ -1,15 +1,15 @@ #include -_CLC_DEF void mem_fence(cl_mem_fence_flags flags) { - if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE)) - __nvvm_membar_cta(); +_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) { + if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE)) + __nvvm_membar_cta(); } // We do not have separate mechanism for read and write fences. -_CLC_DEF void read_mem_fence(cl_mem_fence_flags flags) { +_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) { mem_fence(flags); } -_CLC_DEF void write_mem_fence(cl_mem_fence_flags flags) { +_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) { mem_fence(flags); } diff --git a/libclc/ptx-nvidiacl/lib/synchronization/barrier.cl b/libclc/ptx-nvidiacl/lib/synchronization/barrier.cl index 930e36a2853e..b3d99d797edf 100644 --- a/libclc/ptx-nvidiacl/lib/synchronization/barrier.cl +++ b/libclc/ptx-nvidiacl/lib/synchronization/barrier.cl @@ -1,6 +1,5 @@ #include -_CLC_DEF void barrier(cl_mem_fence_flags flags) { +_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) { __syncthreads(); } - diff --git a/libclc/ptx-nvidiacl/lib/workitem/get_global_id.cl b/libclc/ptx-nvidiacl/lib/workitem/get_global_id.cl index 19bc195312cf..a7f5f59a3702 100644 --- a/libclc/ptx-nvidiacl/lib/workitem/get_global_id.cl +++ b/libclc/ptx-nvidiacl/lib/workitem/get_global_id.cl @@ -1,5 +1,5 @@ #include -_CLC_DEF size_t get_global_id(uint dim) { +_CLC_DEF _CLC_OVERLOAD size_t get_global_id(uint dim) { return get_group_id(dim) * get_local_size(dim) + get_local_id(dim); } diff --git a/libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl b/libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl index dbc47847f9e3..bbbf1068e69e 100644 --- a/libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl +++ b/libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl @@ -1,6 +1,6 @@ #include -_CLC_DEF size_t get_group_id(uint dim) { +_CLC_DEF _CLC_OVERLOAD size_t get_group_id(uint dim) { switch (dim) { case 0: return __nvvm_read_ptx_sreg_ctaid_x(); case 1: return __nvvm_read_ptx_sreg_ctaid_y(); diff --git a/libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl b/libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl index f31581a19a3c..a6770f2b9155 100644 --- a/libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl +++ b/libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl @@ -1,6 +1,6 @@ #include -_CLC_DEF size_t get_local_id(uint dim) { +_CLC_DEF _CLC_OVERLOAD size_t get_local_id(uint dim) { switch (dim) { case 0: return __nvvm_read_ptx_sreg_tid_x(); case 1: return __nvvm_read_ptx_sreg_tid_y(); diff --git a/libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl b/libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl index d00b0d6c9fba..5960d5d79932 100644 --- a/libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl +++ b/libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl @@ -1,6 +1,6 @@ #include -_CLC_DEF size_t get_local_size(uint dim) { +_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) { switch (dim) { case 0: return __nvvm_read_ptx_sreg_ntid_x(); case 1: return __nvvm_read_ptx_sreg_ntid_y(); diff --git a/libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl b/libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl index d7abf3f29070..f0e52f1fdbc0 100644 --- a/libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl +++ b/libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl @@ -1,6 +1,6 @@ #include -_CLC_DEF size_t get_num_groups(uint dim) { +_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) { switch (dim) { case 0: return __nvvm_read_ptx_sreg_nctaid_x(); case 1: return __nvvm_read_ptx_sreg_nctaid_y(); diff --git a/libclc/r600/lib/synchronization/barrier.cl b/libclc/r600/lib/synchronization/barrier.cl index 98200e7eda92..6a28ee3201de 100644 --- a/libclc/r600/lib/synchronization/barrier.cl +++ b/libclc/r600/lib/synchronization/barrier.cl @@ -2,8 +2,7 @@ _CLC_DEF void __clc_r600_barrier(void) __asm("llvm.r600.group.barrier"); -_CLC_DEF void barrier(uint flags) -{ +_CLC_DEF _CLC_OVERLOAD void barrier(uint flags) { // We should call mem_fence here, but that is not implemented for r600 yet __clc_r600_barrier(); } diff --git a/libclc/r600/lib/workitem/get_global_offset.cl b/libclc/r600/lib/workitem/get_global_offset.cl index b38ae3377570..7c2e403ea6ec 100644 --- a/libclc/r600/lib/workitem/get_global_offset.cl +++ b/libclc/r600/lib/workitem/get_global_offset.cl @@ -1,11 +1,10 @@ #include -_CLC_DEF uint get_global_offset(uint dim) -{ - __attribute__((address_space(7))) uint * ptr = - (__attribute__((address_space(7))) uint *) - __builtin_r600_implicitarg_ptr(); - if (dim < 3) - return ptr[dim + 1]; - return 0; +_CLC_DEF _CLC_OVERLOAD uint get_global_offset(uint dim) { + __attribute__((address_space(7))) uint *ptr = + (__attribute__((address_space(7))) + uint *)__builtin_r600_implicitarg_ptr(); + if (dim < 3) + return ptr[dim + 1]; + return 0; } diff --git a/libclc/r600/lib/workitem/get_global_size.cl b/libclc/r600/lib/workitem/get_global_size.cl index d356929c4948..628136150d84 100644 --- a/libclc/r600/lib/workitem/get_global_size.cl +++ b/libclc/r600/lib/workitem/get_global_size.cl @@ -4,12 +4,15 @@ uint __clc_r600_get_global_size_x(void) __asm("llvm.r600.read.global.size.x"); uint __clc_r600_get_global_size_y(void) __asm("llvm.r600.read.global.size.y"); uint __clc_r600_get_global_size_z(void) __asm("llvm.r600.read.global.size.z"); -_CLC_DEF size_t get_global_size(uint dim) -{ - switch (dim) { - case 0: return __clc_r600_get_global_size_x(); - case 1: return __clc_r600_get_global_size_y(); - case 2: return __clc_r600_get_global_size_z(); - default: return 1; - } +_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) { + switch (dim) { + case 0: + return __clc_r600_get_global_size_x(); + case 1: + return __clc_r600_get_global_size_y(); + case 2: + return __clc_r600_get_global_size_z(); + default: + return 1; + } } diff --git a/libclc/r600/lib/workitem/get_group_id.cl b/libclc/r600/lib/workitem/get_group_id.cl index e5efc0a85778..1fb993ace72e 100644 --- a/libclc/r600/lib/workitem/get_group_id.cl +++ b/libclc/r600/lib/workitem/get_group_id.cl @@ -1,11 +1,14 @@ #include -_CLC_DEF uint get_group_id(uint dim) -{ - switch(dim) { - case 0: return __builtin_r600_read_tgid_x(); - case 1: return __builtin_r600_read_tgid_y(); - case 2: return __builtin_r600_read_tgid_z(); - default: return 1; - } +_CLC_DEF _CLC_OVERLOAD uint get_group_id(uint dim) { + switch (dim) { + case 0: + return __builtin_r600_read_tgid_x(); + case 1: + return __builtin_r600_read_tgid_y(); + case 2: + return __builtin_r600_read_tgid_z(); + default: + return 1; + } } diff --git a/libclc/r600/lib/workitem/get_local_id.cl b/libclc/r600/lib/workitem/get_local_id.cl index a871a5d77f0c..80fdc344193b 100644 --- a/libclc/r600/lib/workitem/get_local_id.cl +++ b/libclc/r600/lib/workitem/get_local_id.cl @@ -1,11 +1,14 @@ #include -_CLC_DEF uint get_local_id(uint dim) -{ - switch(dim) { - case 0: return __builtin_r600_read_tidig_x(); - case 1: return __builtin_r600_read_tidig_y(); - case 2: return __builtin_r600_read_tidig_z(); - default: return 1; - } +_CLC_DEF _CLC_OVERLOAD uint get_local_id(uint dim) { + switch (dim) { + case 0: + return __builtin_r600_read_tidig_x(); + case 1: + return __builtin_r600_read_tidig_y(); + case 2: + return __builtin_r600_read_tidig_z(); + default: + return 1; + } } diff --git a/libclc/r600/lib/workitem/get_local_size.cl b/libclc/r600/lib/workitem/get_local_size.cl index 89e2612786e4..6edab7c46c2f 100644 --- a/libclc/r600/lib/workitem/get_local_size.cl +++ b/libclc/r600/lib/workitem/get_local_size.cl @@ -4,12 +4,15 @@ uint __clc_r600_get_local_size_x(void) __asm("llvm.r600.read.local.size.x"); uint __clc_r600_get_local_size_y(void) __asm("llvm.r600.read.local.size.y"); uint __clc_r600_get_local_size_z(void) __asm("llvm.r600.read.local.size.z"); -_CLC_DEF size_t get_local_size(uint dim) -{ - switch (dim) { - case 0: return __clc_r600_get_local_size_x(); - case 1: return __clc_r600_get_local_size_y(); - case 2: return __clc_r600_get_local_size_z(); - default: return 1; - } +_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) { + switch (dim) { + case 0: + return __clc_r600_get_local_size_x(); + case 1: + return __clc_r600_get_local_size_y(); + case 2: + return __clc_r600_get_local_size_z(); + default: + return 1; + } } diff --git a/libclc/r600/lib/workitem/get_num_groups.cl b/libclc/r600/lib/workitem/get_num_groups.cl index dfe6cef22f8e..ab4f5f629c27 100644 --- a/libclc/r600/lib/workitem/get_num_groups.cl +++ b/libclc/r600/lib/workitem/get_num_groups.cl @@ -4,12 +4,15 @@ uint __clc_r600_get_num_groups_x(void) __asm("llvm.r600.read.ngroups.x"); uint __clc_r600_get_num_groups_y(void) __asm("llvm.r600.read.ngroups.y"); uint __clc_r600_get_num_groups_z(void) __asm("llvm.r600.read.ngroups.z"); -_CLC_DEF size_t get_num_groups(uint dim) -{ - switch (dim) { - case 0: return __clc_r600_get_num_groups_x(); - case 1: return __clc_r600_get_num_groups_y(); - case 2: return __clc_r600_get_num_groups_z(); - default: return 1; - } +_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) { + switch (dim) { + case 0: + return __clc_r600_get_num_groups_x(); + case 1: + return __clc_r600_get_num_groups_y(); + case 2: + return __clc_r600_get_num_groups_z(); + default: + return 1; + } } diff --git a/libclc/r600/lib/workitem/get_work_dim.cl b/libclc/r600/lib/workitem/get_work_dim.cl index fccf716cf7c9..e18a83b8dd95 100644 --- a/libclc/r600/lib/workitem/get_work_dim.cl +++ b/libclc/r600/lib/workitem/get_work_dim.cl @@ -1,9 +1,8 @@ #include -_CLC_DEF uint get_work_dim(void) -{ - __attribute__((address_space(7))) uint * ptr = - (__attribute__((address_space(7))) uint *) - __builtin_r600_implicitarg_ptr(); - return ptr[0]; +_CLC_DEF _CLC_OVERLOAD uint get_work_dim(void) { + __attribute__((address_space(7))) uint *ptr = + (__attribute__((address_space(7))) + uint *)__builtin_r600_implicitarg_ptr(); + return ptr[0]; } -- cgit v1.2.1