diff options
author | Martin Liska <mliska@suse.cz> | 2016-11-23 13:27:13 +0100 |
---|---|---|
committer | Martin Jambor <jamborm@gcc.gnu.org> | 2016-11-23 13:27:13 +0100 |
commit | b8d89b03db5f212919e4571671ebb4f5f8b1e19d (patch) | |
tree | ca8d3a25bea7a0869227ad33aafc39ca9eff7e0e /libgomp/plugin/plugin-hsa.c | |
parent | 3615816da830d41f67a5d8955ae588eba7f0b6fb (diff) | |
download | gcc-b8d89b03db5f212919e4571671ebb4f5f8b1e19d.tar.gz |
Remove build dependence on HSA run-time
2016-11-23 Martin Liska <mliska@suse.cz>
Martin Jambor <mjambor@suse.cz>
gcc/
* doc/install.texi: Remove entry about --with-hsa-kmt-lib.
libgomp/
* plugin/hsa.h: New file.
* plugin/hsa_ext_finalize.h: New file.
* plugin/configfrag.ac: Remove hsa-kmt-lib test. Added checks for
header file unistd.h, and functions secure_getenv, __secure_getenv,
getuid, geteuid, getgid and getegid.
* plugin/Makefrag.am (libgomp_plugin_hsa_la_CPPFLAGS): Added
-D_GNU_SOURCE.
* plugin/plugin-hsa.c: Include config.h, inttypes.h and stdbool.h.
Handle various cases of secure_getenv presence, add an implementation
when we can test effective UID and GID.
(struct hsa_runtime_fn_info): New structure.
(hsa_runtime_fn_info hsa_fns): New variable.
(hsa_runtime_lib): Likewise.
(support_cpu_devices): Likewise.
(init_enviroment_variables): Load newly introduced ENV
variables.
(hsa_warn): Call hsa run-time functions via hsa_fns structure.
(hsa_fatal): Likewise.
(DLSYM_FN): New macro.
(init_hsa_runtime_functions): New function.
(suitable_hsa_agent_p): Call hsa run-time functions via hsa_fns
structure. Depending on environment, also allow CPU devices.
(init_hsa_context): Call hsa run-time functions via hsa_fns structure.
(get_kernarg_memory_region): Likewise.
(GOMP_OFFLOAD_init_device): Likewise.
(destroy_hsa_program): Likewise.
(init_basic_kernel_info): New function.
(GOMP_OFFLOAD_load_image): Use it.
(create_and_finalize_hsa_program): Call hsa run-time functions via
hsa_fns structure.
(create_single_kernel_dispatch): Likewise.
(release_kernel_dispatch): Likewise.
(init_single_kernel): Likewise.
(parse_target_attributes): Allow up multiple HSA grid dimensions.
(get_group_size): New function.
(run_kernel): Likewise.
(GOMP_OFFLOAD_run): Outline most functionality to run_kernel.
(GOMP_OFFLOAD_fini_device): Call hsa run-time functions via hsa_fns
structure.
* testsuite/lib/libgomp.exp: Remove hsa_kmt_lib support.
* testsuite/libgomp-test-support.exp.in: Likewise.
* Makefile.in: Regenerated.
* aclocal.m4: Likewise.
* config.h.in: Likewise.
* configure: Likewise.
* testsuite/Makefile.in: Likewise.
Co-Authored-By: Martin Jambor <mjambor@suse.cz>
From-SVN: r242749
Diffstat (limited to 'libgomp/plugin/plugin-hsa.c')
-rw-r--r-- | libgomp/plugin/plugin-hsa.c | 505 |
1 files changed, 388 insertions, 117 deletions
diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c index bed8555fb90..b829c8ca81f 100644 --- a/libgomp/plugin/plugin-hsa.c +++ b/libgomp/plugin/plugin-hsa.c @@ -27,16 +27,129 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ +#include "config.h" #include <stdio.h> #include <stdlib.h> #include <string.h> #include <pthread.h> -#include <hsa.h> -#include <hsa_ext_finalize.h> +#include <inttypes.h> +#include <stdbool.h> +#include <plugin/hsa.h> +#include <plugin/hsa_ext_finalize.h> #include <dlfcn.h> #include "libgomp-plugin.h" #include "gomp-constants.h" +/* Secure getenv() which returns NULL if running as SUID/SGID. */ +#ifndef HAVE_SECURE_GETENV +#ifdef HAVE___SECURE_GETENV +#define secure_getenv __secure_getenv +#elif defined (HAVE_UNISTD_H) && defined(HAVE_GETUID) && defined(HAVE_GETEUID) \ + && defined(HAVE_GETGID) && defined(HAVE_GETEGID) + +#include <unistd.h> + +/* Implementation of secure_getenv() for targets where it is not provided but + we have at least means to test real and effective IDs. */ + +static char * +secure_getenv (const char *name) +{ + if ((getuid () == geteuid ()) && (getgid () == getegid ())) + return getenv (name); + else + return NULL; +} + +#else +#define secure_getenv getenv +#endif +#endif + +/* As an HSA runtime is dlopened, following structure defines function + pointers utilized by the HSA plug-in. */ + +struct hsa_runtime_fn_info +{ + /* HSA runtime. */ + hsa_status_t (*hsa_status_string_fn) (hsa_status_t status, + const char **status_string); + hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent, + hsa_agent_info_t attribute, + void *value); + hsa_status_t (*hsa_init_fn) (void); + hsa_status_t (*hsa_iterate_agents_fn) + (hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data); + hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region, + hsa_region_info_t attribute, + void *value); + hsa_status_t (*hsa_queue_create_fn) + (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type, + void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), + void *data, uint32_t private_segment_size, + uint32_t group_segment_size, hsa_queue_t **queue); + hsa_status_t (*hsa_agent_iterate_regions_fn) + (hsa_agent_t agent, + hsa_status_t (*callback)(hsa_region_t region, void *data), void *data); + hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable); + hsa_status_t (*hsa_executable_create_fn) + (hsa_profile_t profile, hsa_executable_state_t executable_state, + const char *options, hsa_executable_t *executable); + hsa_status_t (*hsa_executable_global_variable_define_fn) + (hsa_executable_t executable, const char *variable_name, void *address); + hsa_status_t (*hsa_executable_load_code_object_fn) + (hsa_executable_t executable, hsa_agent_t agent, + hsa_code_object_t code_object, const char *options); + hsa_status_t (*hsa_executable_freeze_fn)(hsa_executable_t executable, + const char *options); + hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value, + uint32_t num_consumers, + const hsa_agent_t *consumers, + hsa_signal_t *signal); + hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size, + void **ptr); + hsa_status_t (*hsa_memory_free_fn) (void *ptr); + hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal); + hsa_status_t (*hsa_executable_get_symbol_fn) + (hsa_executable_t executable, const char *module_name, + const char *symbol_name, hsa_agent_t agent, int32_t call_convention, + hsa_executable_symbol_t *symbol); + hsa_status_t (*hsa_executable_symbol_get_info_fn) + (hsa_executable_symbol_t executable_symbol, + hsa_executable_symbol_info_t attribute, void *value); + uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue, + uint64_t value); + uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue); + void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal, + hsa_signal_value_t value); + void (*hsa_signal_store_release_fn) (hsa_signal_t signal, + hsa_signal_value_t value); + hsa_signal_value_t (*hsa_signal_wait_acquire_fn) + (hsa_signal_t signal, hsa_signal_condition_t condition, + hsa_signal_value_t compare_value, uint64_t timeout_hint, + hsa_wait_state_t wait_state_hint); + hsa_signal_value_t (*hsa_signal_load_acquire_fn) (hsa_signal_t signal); + hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue); + + /* HSA finalizer. */ + hsa_status_t (*hsa_ext_program_add_module_fn) (hsa_ext_program_t program, + hsa_ext_module_t module); + hsa_status_t (*hsa_ext_program_create_fn) + (hsa_machine_model_t machine_model, hsa_profile_t profile, + hsa_default_float_rounding_mode_t default_float_rounding_mode, + const char *options, hsa_ext_program_t *program); + hsa_status_t (*hsa_ext_program_destroy_fn) (hsa_ext_program_t program); + hsa_status_t (*hsa_ext_program_finalize_fn) + (hsa_ext_program_t program,hsa_isa_t isa, + int32_t call_convention, hsa_ext_control_directives_t control_directives, + const char *options, hsa_code_object_type_t code_object_type, + hsa_code_object_t *code_object); +}; + +/* HSA runtime functions that are initialized in init_hsa_context. */ + +static struct hsa_runtime_fn_info hsa_fns; + /* Keep the following GOMP prefixed structures in sync with respective parts of the compiler. */ @@ -129,20 +242,36 @@ static bool debug; static bool suppress_host_fallback; +/* Flag to locate HSA runtime shared library that is dlopened + by this plug-in. */ + +static const char *hsa_runtime_lib; + +/* Flag to decide if the runtime should support also CPU devices (can be + a simulator). */ + +static bool support_cpu_devices; + /* Initialize debug and suppress_host_fallback according to the environment. */ static void init_enviroment_variables (void) { - if (getenv ("HSA_DEBUG")) + if (secure_getenv ("HSA_DEBUG")) debug = true; else debug = false; - if (getenv ("HSA_SUPPRESS_HOST_FALLBACK")) + if (secure_getenv ("HSA_SUPPRESS_HOST_FALLBACK")) suppress_host_fallback = true; else suppress_host_fallback = false; + + hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB"); + if (hsa_runtime_lib == NULL) + hsa_runtime_lib = HSA_RUNTIME_LIB "libhsa-runtime64.so"; + + support_cpu_devices = secure_getenv ("HSA_SUPPORT_CPU_DEVICES"); } /* Print a logging message with PREFIX to stderr if HSA_DEBUG value @@ -176,7 +305,7 @@ hsa_warn (const char *str, hsa_status_t status) return; const char *hsa_error_msg; - hsa_status_string (status, &hsa_error_msg); + hsa_fns.hsa_status_string_fn (status, &hsa_error_msg); fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg); } @@ -188,7 +317,7 @@ static void hsa_fatal (const char *str, hsa_status_t status) { const char *hsa_error_msg; - hsa_status_string (status, &hsa_error_msg); + hsa_fns.hsa_status_string_fn (status, &hsa_error_msg); GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str, hsa_error_msg); } @@ -200,7 +329,7 @@ static bool hsa_error (const char *str, hsa_status_t status) { const char *hsa_error_msg; - hsa_status_string (status, &hsa_error_msg); + hsa_fns.hsa_status_string_fn (status, &hsa_error_msg); GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str, hsa_error_msg); return false; @@ -359,6 +488,50 @@ struct hsa_context_info static struct hsa_context_info hsa_context; +#define DLSYM_FN(function) \ + hsa_fns.function##_fn = dlsym (handle, #function); \ + if (hsa_fns.function##_fn == NULL) \ + return false; + +static bool +init_hsa_runtime_functions (void) +{ + void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY); + if (handle == NULL) + return false; + + DLSYM_FN (hsa_status_string) + DLSYM_FN (hsa_agent_get_info) + DLSYM_FN (hsa_init) + DLSYM_FN (hsa_iterate_agents) + DLSYM_FN (hsa_region_get_info) + DLSYM_FN (hsa_queue_create) + DLSYM_FN (hsa_agent_iterate_regions) + DLSYM_FN (hsa_executable_destroy) + DLSYM_FN (hsa_executable_create) + DLSYM_FN (hsa_executable_global_variable_define) + DLSYM_FN (hsa_executable_load_code_object) + DLSYM_FN (hsa_executable_freeze) + DLSYM_FN (hsa_signal_create) + DLSYM_FN (hsa_memory_allocate) + DLSYM_FN (hsa_memory_free) + DLSYM_FN (hsa_signal_destroy) + DLSYM_FN (hsa_executable_get_symbol) + DLSYM_FN (hsa_executable_symbol_get_info) + DLSYM_FN (hsa_queue_add_write_index_release) + DLSYM_FN (hsa_queue_load_read_index_acquire) + DLSYM_FN (hsa_signal_wait_acquire) + DLSYM_FN (hsa_signal_store_relaxed) + DLSYM_FN (hsa_signal_store_release) + DLSYM_FN (hsa_signal_load_acquire) + DLSYM_FN (hsa_queue_destroy) + DLSYM_FN (hsa_ext_program_add_module) + DLSYM_FN (hsa_ext_program_create) + DLSYM_FN (hsa_ext_program_destroy) + DLSYM_FN (hsa_ext_program_finalize) + return true; +} + /* Find kernel for an AGENT by name provided in KERNEL_NAME. */ static struct kernel_info * @@ -386,17 +559,32 @@ suitable_hsa_agent_p (hsa_agent_t agent) { hsa_device_type_t device_type; hsa_status_t status - = hsa_agent_get_info (agent, HSA_AGENT_INFO_DEVICE, &device_type); - if (status != HSA_STATUS_SUCCESS || device_type != HSA_DEVICE_TYPE_GPU) + = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE, + &device_type); + if (status != HSA_STATUS_SUCCESS) return false; + switch (device_type) + { + case HSA_DEVICE_TYPE_GPU: + break; + case HSA_DEVICE_TYPE_CPU: + if (!support_cpu_devices) + return false; + break; + default: + return false; + } + uint32_t features = 0; - status = hsa_agent_get_info (agent, HSA_AGENT_INFO_FEATURE, &features); + status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_FEATURE, + &features); if (status != HSA_STATUS_SUCCESS || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) return false; hsa_queue_type_t queue_type; - status = hsa_agent_get_info (agent, HSA_AGENT_INFO_QUEUE_TYPE, &queue_type); + status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_QUEUE_TYPE, + &queue_type); if (status != HSA_STATUS_SUCCESS || (queue_type != HSA_QUEUE_TYPE_MULTI)) return false; @@ -443,11 +631,16 @@ init_hsa_context (void) if (hsa_context.initialized) return true; init_enviroment_variables (); - status = hsa_init (); + if (!init_hsa_runtime_functions ()) + { + HSA_DEBUG ("Run-time could not be dynamically opened\n"); + return false; + } + status = hsa_fns.hsa_init_fn (); if (status != HSA_STATUS_SUCCESS) return hsa_error ("Run-time could not be initialized", status); HSA_DEBUG ("HSA run-time initialized\n"); - status = hsa_iterate_agents (count_gpu_agents, NULL); + status = hsa_fns.hsa_iterate_agents_fn (count_gpu_agents, NULL); if (status != HSA_STATUS_SUCCESS) return hsa_error ("HSA GPU devices could not be enumerated", status); HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count); @@ -455,7 +648,7 @@ init_hsa_context (void) hsa_context.agents = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count * sizeof (struct agent_info)); - status = hsa_iterate_agents (assign_agent_ids, &agent_index); + status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index); if (agent_index != hsa_context.agent_count) { GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents"); @@ -485,14 +678,16 @@ get_kernarg_memory_region (hsa_region_t region, void *data) hsa_status_t status; hsa_region_segment_t segment; - status = hsa_region_get_info (region, HSA_REGION_INFO_SEGMENT, &segment); + status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT, + &segment); if (status != HSA_STATUS_SUCCESS) return status; if (segment != HSA_REGION_SEGMENT_GLOBAL) return HSA_STATUS_SUCCESS; uint32_t flags; - status = hsa_region_get_info (region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); + status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS, + &flags); if (status != HSA_STATUS_SUCCESS) return status; if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) @@ -546,29 +741,36 @@ GOMP_OFFLOAD_init_device (int n) uint32_t queue_size; hsa_status_t status; - status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_QUEUE_MAX_SIZE, - &queue_size); + status = hsa_fns.hsa_agent_get_info_fn (agent->id, + HSA_AGENT_INFO_QUEUE_MAX_SIZE, + &queue_size); if (status != HSA_STATUS_SUCCESS) return hsa_error ("Error requesting maximum queue size of the HSA agent", - status); - status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_ISA, &agent->isa); + status); + status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_ISA, + &agent->isa); if (status != HSA_STATUS_SUCCESS) return hsa_error ("Error querying the ISA of the agent", status); - status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI, - queue_callback, NULL, UINT32_MAX, UINT32_MAX, - &agent->command_q); + status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size, + HSA_QUEUE_TYPE_MULTI, + queue_callback, NULL, UINT32_MAX, + UINT32_MAX, + &agent->command_q); if (status != HSA_STATUS_SUCCESS) return hsa_error ("Error creating command queue", status); - status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI, - queue_callback, NULL, UINT32_MAX, UINT32_MAX, - &agent->kernel_dispatch_command_q); + status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size, + HSA_QUEUE_TYPE_MULTI, + queue_callback, NULL, UINT32_MAX, + UINT32_MAX, + &agent->kernel_dispatch_command_q); if (status != HSA_STATUS_SUCCESS) return hsa_error ("Error creating kernel dispatch command queue", status); agent->kernarg_region.handle = (uint64_t) -1; - status = hsa_agent_iterate_regions (agent->id, get_kernarg_memory_region, - &agent->kernarg_region); + status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id, + get_kernarg_memory_region, + &agent->kernarg_region); if (agent->kernarg_region.handle == (uint64_t) -1) { GOMP_PLUGIN_error ("Could not find suitable memory region for kernel " @@ -646,7 +848,7 @@ destroy_hsa_program (struct agent_info *agent) HSA_DEBUG ("Destroying the current HSA program.\n"); - status = hsa_executable_destroy (agent->executable); + status = hsa_fns.hsa_executable_destroy_fn (agent->executable); if (status != HSA_STATUS_SUCCESS) return hsa_error ("Could not destroy HSA executable", status); @@ -661,6 +863,29 @@ destroy_hsa_program (struct agent_info *agent) return true; } +/* Initialize KERNEL from D and other parameters. Return true on success. */ + +static bool +init_basic_kernel_info (struct kernel_info *kernel, + struct hsa_kernel_description *d, + struct agent_info *agent, + struct module_info *module) +{ + kernel->agent = agent; + kernel->module = module; + kernel->name = d->name; + kernel->omp_data_size = d->omp_data_size; + kernel->gridified_kernel_p = d->gridified_kernel_p; + kernel->dependencies_count = d->kernel_dependencies_count; + kernel->dependencies = d->kernel_dependencies; + if (pthread_mutex_init (&kernel->init_mutex, NULL)) + { + GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex"); + return false; + } + return true; +} + /* Part of the libgomp plugin interface. Load BRIG module described by struct brig_image_desc in TARGET_DATA and return references to kernel descriptors in TARGET_TABLE. */ @@ -715,19 +940,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, void *target_data, pair->end = (uintptr_t) (kernel + 1); struct hsa_kernel_description *d = &image_desc->kernel_infos[i]; - kernel->agent = agent; - kernel->module = module; - kernel->name = d->name; - kernel->omp_data_size = d->omp_data_size; - kernel->gridified_kernel_p = d->gridified_kernel_p; - kernel->dependencies_count = d->kernel_dependencies_count; - kernel->dependencies = d->kernel_dependencies; - if (pthread_mutex_init (&kernel->init_mutex, NULL)) - { - GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex"); - return -1; - } - + if (!init_basic_kernel_info (kernel, d, agent, module)) + return -1; kernel++; pair++; } @@ -799,9 +1013,10 @@ create_and_finalize_hsa_program (struct agent_info *agent) if (agent->prog_finalized) goto final; - status = hsa_ext_program_create (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL, - HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, - NULL, &prog_handle); + status = hsa_fns.hsa_ext_program_create_fn + (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, &prog_handle); if (status != HSA_STATUS_SUCCESS) hsa_fatal ("Could not create an HSA program", status); @@ -810,8 +1025,8 @@ create_and_finalize_hsa_program (struct agent_info *agent) struct module_info *module = agent->first_module; while (module) { - status = hsa_ext_program_add_module (prog_handle, - module->image_desc->brig_module); + status = hsa_fns.hsa_ext_program_add_module_fn + (prog_handle, module->image_desc->brig_module); if (status != HSA_STATUS_SUCCESS) hsa_fatal ("Could not add a module to the HSA program", status); module = module->next; @@ -837,7 +1052,8 @@ create_and_finalize_hsa_program (struct agent_info *agent) continue; } - status = hsa_ext_program_add_module (prog_handle, library->image); + status = hsa_fns.hsa_ext_program_add_module_fn (prog_handle, + library->image); if (status != HSA_STATUS_SUCCESS) hsa_warn ("Could not add a shared BRIG library the HSA program", status); @@ -849,11 +1065,9 @@ create_and_finalize_hsa_program (struct agent_info *agent) hsa_ext_control_directives_t control_directives; memset (&control_directives, 0, sizeof (control_directives)); hsa_code_object_t code_object; - status = hsa_ext_program_finalize (prog_handle, agent->isa, - HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, - control_directives, "", - HSA_CODE_OBJECT_TYPE_PROGRAM, - &code_object); + status = hsa_fns.hsa_ext_program_finalize_fn + (prog_handle, agent->isa,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, "", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object); if (status != HSA_STATUS_SUCCESS) { hsa_warn ("Finalization of the HSA program failed", status); @@ -861,11 +1075,12 @@ create_and_finalize_hsa_program (struct agent_info *agent) } HSA_DEBUG ("Finalization done\n"); - hsa_ext_program_destroy (prog_handle); + hsa_fns.hsa_ext_program_destroy_fn (prog_handle); status - = hsa_executable_create (HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, - "", &agent->executable); + = hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL, + HSA_EXECUTABLE_STATE_UNFROZEN, + "", &agent->executable); if (status != HSA_STATUS_SUCCESS) hsa_fatal ("Could not create HSA executable", status); @@ -877,9 +1092,8 @@ create_and_finalize_hsa_program (struct agent_info *agent) { struct global_var_info *var; var = &module->image_desc->global_variables[i]; - status - = hsa_executable_global_variable_define (agent->executable, - var->name, var->address); + status = hsa_fns.hsa_executable_global_variable_define_fn + (agent->executable, var->name, var->address); HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name, var->address); @@ -892,11 +1106,12 @@ create_and_finalize_hsa_program (struct agent_info *agent) module = module->next; } - status = hsa_executable_load_code_object (agent->executable, agent->id, - code_object, ""); + status = hsa_fns.hsa_executable_load_code_object_fn (agent->executable, + agent->id, + code_object, ""); if (status != HSA_STATUS_SUCCESS) hsa_fatal ("Could not add a code object to the HSA executable", status); - status = hsa_executable_freeze (agent->executable, ""); + status = hsa_fns.hsa_executable_freeze_fn (agent->executable, ""); if (status != HSA_STATUS_SUCCESS) hsa_fatal ("Could not freeze the HSA executable", status); @@ -937,7 +1152,7 @@ create_single_kernel_dispatch (struct kernel_info *kernel, shadow->object = kernel->object; hsa_signal_t sync_signal; - hsa_status_t status = hsa_signal_create (1, 0, NULL, &sync_signal); + hsa_status_t status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &sync_signal); if (status != HSA_STATUS_SUCCESS) hsa_fatal ("Error creating the HSA sync signal", status); @@ -946,8 +1161,9 @@ create_single_kernel_dispatch (struct kernel_info *kernel, shadow->group_segment_size = kernel->group_segment_size; status - = hsa_memory_allocate (agent->kernarg_region, kernel->kernarg_segment_size, - &shadow->kernarg_address); + = hsa_fns.hsa_memory_allocate_fn (agent->kernarg_region, + kernel->kernarg_segment_size, + &shadow->kernarg_address); if (status != HSA_STATUS_SUCCESS) hsa_fatal ("Could not allocate memory for HSA kernel arguments", status); @@ -962,11 +1178,11 @@ release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow) HSA_DEBUG ("Released kernel dispatch: %p has value: %lu (%p)\n", shadow, shadow->debug, (void *) shadow->debug); - hsa_memory_free (shadow->kernarg_address); + hsa_fns.hsa_memory_free_fn (shadow->kernarg_address); hsa_signal_t s; s.handle = shadow->signal; - hsa_signal_destroy (s); + hsa_fns.hsa_signal_destroy_fn (s); free (shadow->omp_data_memory); @@ -986,31 +1202,30 @@ init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size) hsa_status_t status; struct agent_info *agent = kernel->agent; hsa_executable_symbol_t kernel_symbol; - status = hsa_executable_get_symbol (agent->executable, NULL, kernel->name, - agent->id, 0, &kernel_symbol); + status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL, + kernel->name, agent->id, + 0, &kernel_symbol); if (status != HSA_STATUS_SUCCESS) { hsa_warn ("Could not find symbol for kernel in the code object", status); goto failure; } HSA_DEBUG ("Located kernel %s\n", kernel->name); - status - = hsa_executable_symbol_get_info (kernel_symbol, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, - &kernel->object); + status = hsa_fns.hsa_executable_symbol_get_info_fn + (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel->object); if (status != HSA_STATUS_SUCCESS) hsa_fatal ("Could not extract a kernel object from its symbol", status); - status = hsa_executable_symbol_get_info + status = hsa_fns.hsa_executable_symbol_get_info_fn (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &kernel->kernarg_segment_size); if (status != HSA_STATUS_SUCCESS) hsa_fatal ("Could not get info about kernel argument size", status); - status = hsa_executable_symbol_get_info + status = hsa_fns.hsa_executable_symbol_get_info_fn (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &kernel->group_segment_size); if (status != HSA_STATUS_SUCCESS) hsa_fatal ("Could not get info about kernel group segment size", status); - status = hsa_executable_symbol_get_info + status = hsa_fns.hsa_executable_symbol_get_info_fn (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &kernel->private_segment_size); if (status != HSA_STATUS_SUCCESS) @@ -1209,18 +1424,43 @@ parse_target_attributes (void **input, struct GOMP_kernel_launch_attributes *kla; kla = (struct GOMP_kernel_launch_attributes *) *input; *result = kla; - if (kla->ndim != 1) - GOMP_PLUGIN_fatal ("HSA does not yet support number of dimensions " - "different from one."); - if (kla->gdims[0] == 0) - return false; - - HSA_DEBUG ("GOMP_OFFLOAD_run called with grid size %u and group size %u\n", - kla->gdims[0], kla->wdims[0]); + if (kla->ndim == 0 || kla->ndim > 3) + GOMP_PLUGIN_fatal ("Invalid number of dimensions (%u)", kla->ndim); + HSA_DEBUG ("GOMP_OFFLOAD_run called with %u dimensions:\n", kla->ndim); + unsigned i; + for (i = 0; i < kla->ndim; i++) + { + HSA_DEBUG (" Dimension %u: grid size %u and group size %u\n", i, + kla->gdims[i], kla->wdims[i]); + if (kla->gdims[i] == 0) + return false; + } return true; } +/* Return the group size given the requested GROUP size, GRID size and number + of grid dimensions NDIM. */ + +static uint32_t +get_group_size (uint32_t ndim, uint32_t grid, uint32_t group) +{ + if (group == 0) + { + /* TODO: Provide a default via environment or device characteristics. */ + if (ndim == 1) + group = 64; + else if (ndim == 2) + group = 8; + else + group = 4; + } + + if (group > grid) + group = grid; + return group; +} + /* Return true if the HSA runtime can run function FN_PTR. */ bool @@ -1254,22 +1494,14 @@ packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest) __atomic_store_n (packet, header | (rest << 16), __ATOMIC_RELEASE); } -/* Part of the libgomp plugin interface. Run a kernel on device N and pass it - an array of pointers in VARS as a parameter. The kernel is identified by - FN_PTR which must point to a kernel_info structure. */ +/* Run KERNEL on its agent, pass VARS to it as arguments and take + launchattributes from KLA. */ void -GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args) +run_kernel (struct kernel_info *kernel, void *vars, + struct GOMP_kernel_launch_attributes *kla) { - struct kernel_info *kernel = (struct kernel_info *) fn_ptr; struct agent_info *agent = kernel->agent; - struct GOMP_kernel_launch_attributes def; - struct GOMP_kernel_launch_attributes *kla; - if (!parse_target_attributes (args, &def, &kla)) - { - HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n"); - return; - } if (pthread_rwlock_rdlock (&agent->modules_rwlock)) GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock"); @@ -1288,11 +1520,12 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args) print_kernel_dispatch (shadow, 2); } - uint64_t index = hsa_queue_add_write_index_release (agent->command_q, 1); + uint64_t index + = hsa_fns.hsa_queue_add_write_index_release_fn (agent->command_q, 1); HSA_DEBUG ("Got AQL index %llu\n", (long long int) index); /* Wait until the queue is not full before writing the packet. */ - while (index - hsa_queue_load_read_index_acquire (agent->command_q) + while (index - hsa_fns.hsa_queue_load_read_index_acquire_fn (agent->command_q) >= agent->command_q->size) ; @@ -1302,17 +1535,33 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args) memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4); packet->grid_size_x = kla->gdims[0]; - uint32_t wgs = kla->wdims[0]; - if (wgs == 0) - /* TODO: Provide a default via environment. */ - wgs = 64; - else if (wgs > kla->gdims[0]) - wgs = kla->gdims[0]; - packet->workgroup_size_x = wgs; - packet->grid_size_y = 1; - packet->workgroup_size_y = 1; - packet->grid_size_z = 1; - packet->workgroup_size_z = 1; + packet->workgroup_size_x = get_group_size (kla->ndim, kla->gdims[0], + kla->wdims[0]); + + if (kla->ndim >= 2) + { + packet->grid_size_y = kla->gdims[1]; + packet->workgroup_size_y = get_group_size (kla->ndim, kla->gdims[1], + kla->wdims[1]); + } + else + { + packet->grid_size_y = 1; + packet->workgroup_size_y = 1; + } + + if (kla->ndim == 3) + { + packet->grid_size_z = kla->gdims[2]; + packet->workgroup_size_z = get_group_size (kla->ndim, kla->gdims[2], + kla->wdims[2]); + } + else + { + packet->grid_size_z = 1; + packet->workgroup_size_z = 1; + } + packet->private_segment_size = kernel->private_segment_size; packet->group_segment_size = kernel->group_segment_size; packet->kernel_object = kernel->object; @@ -1320,7 +1569,7 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args) hsa_signal_t s; s.handle = shadow->signal; packet->completion_signal = s; - hsa_signal_store_relaxed (s, 1); + hsa_fns.hsa_signal_store_relaxed_fn (s, 1); memcpy (shadow->kernarg_address, &vars, sizeof (vars)); /* PR hsa/70337. */ @@ -1344,9 +1593,10 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args) HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name); packet_store_release ((uint32_t *) packet, header, - 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS); + (uint16_t) kla->ndim << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS); - hsa_signal_store_release (agent->command_q->doorbell_signal, index); + hsa_fns.hsa_signal_store_release_fn (agent->command_q->doorbell_signal, + index); /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for signal wait and signal load operations on their own and we need to @@ -1357,8 +1607,9 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args) HSA_DEBUG ("Kernel dispatched, waiting for completion\n"); /* Root signal waits with 1ms timeout. */ - while (hsa_signal_wait_acquire (s, HSA_SIGNAL_CONDITION_LT, 1, 1000 * 1000, - HSA_WAIT_STATE_BLOCKED) != 0) + while (hsa_fns.hsa_signal_wait_acquire_fn (s, HSA_SIGNAL_CONDITION_LT, 1, + 1000 * 1000, + HSA_WAIT_STATE_BLOCKED) != 0) for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++) { hsa_signal_t child_s; @@ -1366,7 +1617,7 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args) HSA_DEBUG ("Waiting for children completion signal: %lu\n", shadow->children_dispatches[i]->signal); - hsa_signal_load_acquire (child_s); + hsa_fns.hsa_signal_load_acquire_fn (child_s); } release_kernel_dispatch (shadow); @@ -1375,6 +1626,26 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args) GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock"); } +/* Part of the libgomp plugin interface. Run a kernel on device N (the number + is actually ignored, we assume the FN_PTR has been mapped using the correct + device) and pass it an array of pointers in VARS as a parameter. The kernel + is identified by FN_PTR which must point to a kernel_info structure. */ + +void +GOMP_OFFLOAD_run (int n __attribute__((unused)), + void *fn_ptr, void *vars, void **args) +{ + struct kernel_info *kernel = (struct kernel_info *) fn_ptr; + struct GOMP_kernel_launch_attributes def; + struct GOMP_kernel_launch_attributes *kla; + if (!parse_target_attributes (args, &def, &kla)) + { + HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n"); + return; + } + run_kernel (kernel, vars, kla); +} + /* Information to be passed to a thread running a kernel asycnronously. */ struct async_run_info @@ -1534,10 +1805,10 @@ GOMP_OFFLOAD_fini_device (int n) release_agent_shared_libraries (agent); - hsa_status_t status = hsa_queue_destroy (agent->command_q); + hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (agent->command_q); if (status != HSA_STATUS_SUCCESS) return hsa_error ("Error destroying command queue", status); - status = hsa_queue_destroy (agent->kernel_dispatch_command_q); + status = hsa_fns.hsa_queue_destroy_fn (agent->kernel_dispatch_command_q); if (status != HSA_STATUS_SUCCESS) return hsa_error ("Error destroying kernel dispatch command queue", status); if (pthread_mutex_destroy (&agent->prog_mutex)) |