diff options
author | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2021-11-19 20:18:23 +0000 |
---|---|---|
committer | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2021-11-19 20:18:23 +0000 |
commit | 04954824ee158fffe6653c7eb51c07347b36ff21 (patch) | |
tree | ff546e7f59b2bcaee5052322a4fb7337a66c900f | |
parent | b07b5bd72716625e0976a84d23652d94d8d0165a (diff) | |
download | llvm-04954824ee158fffe6653c7eb51c07347b36ff21.tar.gz |
[openmp][amdgpu][nfc] Simplify implicit args handling
Removes a +x/-x pair on the only store/load of a variable
and deletes some nearby dead code. Also reduces the size of the implicit
struct to reflect the code currently emitted by clang.
Differential Revision: https://reviews.llvm.org/D114270
-rw-r--r-- | openmp/libomptarget/plugins/amdgpu/impl/internal.h | 13 | ||||
-rw-r--r-- | openmp/libomptarget/plugins/amdgpu/impl/system.cpp | 30 | ||||
-rw-r--r-- | openmp/libomptarget/plugins/amdgpu/src/rtl.cpp | 12 |
3 files changed, 10 insertions, 45 deletions
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/internal.h b/openmp/libomptarget/plugins/amdgpu/impl/internal.h index 8bca165dd5ae..bdac98c1e101 100644 --- a/openmp/libomptarget/plugins/amdgpu/impl/internal.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/internal.h @@ -38,13 +38,11 @@ typedef struct impl_implicit_args_s { unsigned long offset_y; unsigned long offset_z; unsigned long hostcall_ptr; - char num_gpu_queues; - unsigned long gpu_queue_ptr; - char num_cpu_queues; - unsigned long cpu_worker_signals; - unsigned long cpu_queue_ptr; - unsigned long kernarg_template_ptr; + unsigned long unused0; + unsigned long unused1; + unsigned long unused2; } impl_implicit_args_t; +static_assert(sizeof(impl_implicit_args_t) == 56, ""); // ---------------------- Kernel Start ------------- typedef struct atl_kernel_info_s { @@ -57,9 +55,6 @@ typedef struct atl_kernel_info_s { uint32_t vgpr_spill_count; uint32_t kernel_segment_size; uint32_t num_args; - std::vector<uint64_t> arg_alignments; - std::vector<uint64_t> arg_offsets; - std::vector<uint64_t> arg_sizes; } atl_kernel_info_t; typedef struct atl_symbol_info_s { diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp index 9151da95d6e4..4d661dc38100 100644 --- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp @@ -62,26 +62,16 @@ public: }; KernelArgMD() - : name_(std::string()), typeName_(std::string()), size_(0), offset_(0), - align_(0), valueKind_(ValueKind::Unknown) {} + : name_(std::string()), size_(0), offset_(0), + valueKind_(ValueKind::Unknown) {} // fields std::string name_; - std::string typeName_; uint32_t size_; uint32_t offset_; - uint32_t align_; ValueKind valueKind_; }; -class KernelMD { -public: - KernelMD() : kernargSegmentSize_(0ull) {} - - // fields - uint64_t kernargSegmentSize_; -}; - static const std::map<std::string, KernelArgMD::ValueKind> ArgValueKind = { // v3 // {"by_value", KernelArgMD::ValueKind::ByValue}, @@ -320,10 +310,6 @@ int populate_kernelArgMD(msgpack::byte_range args_element, foronly_string(value, [&](size_t N, const unsigned char *str) { kernelarg->name_ = std::string(str, str + N); }); - } else if (message_is_string(key, ".type_name")) { - foronly_string(value, [&](size_t N, const unsigned char *str) { - kernelarg->typeName_ = std::string(str, str + N); - }); } else if (message_is_string(key, ".size")) { foronly_unsigned(value, [&](uint64_t x) { kernelarg->size_ = x; }); } else if (message_is_string(key, ".offset")) { @@ -395,7 +381,7 @@ static hsa_status_t get_code_object_custom_metadata( return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; } - atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0, {}, {}, {}}; + atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0}; uint64_t sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count; msgpack_errors += map_lookup_uint64_t(element, ".sgpr_count", &sgpr_count); @@ -479,13 +465,10 @@ static hsa_status_t get_code_object_custom_metadata( "iterate args map in kernel args metadata"); return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; } - // populate info with sizes and offsets - info.arg_sizes.push_back(lcArg.size_); // v3 has offset field and not align field size_t new_offset = lcArg.offset_; size_t padding = new_offset - offset; offset = new_offset; - info.arg_offsets.push_back(lcArg.offset_); DP("Arg[%lu] \"%s\" (%u, %u)\n", i, lcArg.name_.c_str(), lcArg.size_, lcArg.offset_); offset += lcArg.size_; @@ -501,12 +484,9 @@ static hsa_status_t get_code_object_custom_metadata( } } - // add size of implicit args, e.g.: offset x, y and z and pipe pointer, but - // do not count the compiler set implicit args, but set your own implicit - // args by discounting the compiler set implicit args + // TODO: Probably don't want this arithmetic info.kernel_segment_size = - (hasHiddenArgs ? kernel_explicit_args_size : kernel_segment_size) + - sizeof(impl_implicit_args_t); + (hasHiddenArgs ? kernel_explicit_args_size : kernel_segment_size); DP("[%s: kernarg seg size] (%lu --> %u)\n", kernelName.c_str(), kernel_segment_size, info.kernel_segment_size); diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index 906b856c6311..71321be3e83c 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -1631,22 +1631,12 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id, auto It = KernelInfoMap.find(kernelStr); if (It != KernelInfoMap.end()) { atl_kernel_info_t info = It->second; - // return the size for non-implicit args - kernarg_segment_size = - info.kernel_segment_size - sizeof(impl_implicit_args_t); + kernarg_segment_size = info.kernel_segment_size; } else { err = HSA_STATUS_ERROR; } } - // each arg is a void * in this openmp implementation - uint32_t arg_num = kernarg_segment_size / sizeof(void *); - std::vector<size_t> arg_sizes(arg_num); - for (std::vector<size_t>::iterator it = arg_sizes.begin(); - it != arg_sizes.end(); it++) { - *it = sizeof(void *); - } - // default value GENERIC (in case symbol is missing from cubin file) llvm::omp::OMPTgtExecModeFlags ExecModeVal = llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC; |