summaryrefslogtreecommitdiff
path: root/libgomp/plugin/plugin-hsa.c
diff options
context:
space:
mode:
authorMartin Liska <mliska@suse.cz>2016-11-23 13:27:13 +0100
committerMartin Jambor <jamborm@gcc.gnu.org>2016-11-23 13:27:13 +0100
commitb8d89b03db5f212919e4571671ebb4f5f8b1e19d (patch)
treeca8d3a25bea7a0869227ad33aafc39ca9eff7e0e /libgomp/plugin/plugin-hsa.c
parent3615816da830d41f67a5d8955ae588eba7f0b6fb (diff)
downloadgcc-b8d89b03db5f212919e4571671ebb4f5f8b1e19d.tar.gz
Remove build dependence on HSA run-time
2016-11-23 Martin Liska <mliska@suse.cz> Martin Jambor <mjambor@suse.cz> gcc/ * doc/install.texi: Remove entry about --with-hsa-kmt-lib. libgomp/ * plugin/hsa.h: New file. * plugin/hsa_ext_finalize.h: New file. * plugin/configfrag.ac: Remove hsa-kmt-lib test. Added checks for header file unistd.h, and functions secure_getenv, __secure_getenv, getuid, geteuid, getgid and getegid. * plugin/Makefrag.am (libgomp_plugin_hsa_la_CPPFLAGS): Added -D_GNU_SOURCE. * plugin/plugin-hsa.c: Include config.h, inttypes.h and stdbool.h. Handle various cases of secure_getenv presence, add an implementation when we can test effective UID and GID. (struct hsa_runtime_fn_info): New structure. (hsa_runtime_fn_info hsa_fns): New variable. (hsa_runtime_lib): Likewise. (support_cpu_devices): Likewise. (init_enviroment_variables): Load newly introduced ENV variables. (hsa_warn): Call hsa run-time functions via hsa_fns structure. (hsa_fatal): Likewise. (DLSYM_FN): New macro. (init_hsa_runtime_functions): New function. (suitable_hsa_agent_p): Call hsa run-time functions via hsa_fns structure. Depending on environment, also allow CPU devices. (init_hsa_context): Call hsa run-time functions via hsa_fns structure. (get_kernarg_memory_region): Likewise. (GOMP_OFFLOAD_init_device): Likewise. (destroy_hsa_program): Likewise. (init_basic_kernel_info): New function. (GOMP_OFFLOAD_load_image): Use it. (create_and_finalize_hsa_program): Call hsa run-time functions via hsa_fns structure. (create_single_kernel_dispatch): Likewise. (release_kernel_dispatch): Likewise. (init_single_kernel): Likewise. (parse_target_attributes): Allow up multiple HSA grid dimensions. (get_group_size): New function. (run_kernel): Likewise. (GOMP_OFFLOAD_run): Outline most functionality to run_kernel. (GOMP_OFFLOAD_fini_device): Call hsa run-time functions via hsa_fns structure. * testsuite/lib/libgomp.exp: Remove hsa_kmt_lib support. * testsuite/libgomp-test-support.exp.in: Likewise. * Makefile.in: Regenerated. * aclocal.m4: Likewise. * config.h.in: Likewise. * configure: Likewise. * testsuite/Makefile.in: Likewise. Co-Authored-By: Martin Jambor <mjambor@suse.cz> From-SVN: r242749
Diffstat (limited to 'libgomp/plugin/plugin-hsa.c')
-rw-r--r--libgomp/plugin/plugin-hsa.c505
1 files changed, 388 insertions, 117 deletions
diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c
index bed8555fb90..b829c8ca81f 100644
--- a/libgomp/plugin/plugin-hsa.c
+++ b/libgomp/plugin/plugin-hsa.c
@@ -27,16 +27,129 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
+#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
-#include <hsa.h>
-#include <hsa_ext_finalize.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <plugin/hsa.h>
+#include <plugin/hsa_ext_finalize.h>
#include <dlfcn.h>
#include "libgomp-plugin.h"
#include "gomp-constants.h"
+/* Secure getenv() which returns NULL if running as SUID/SGID. */
+#ifndef HAVE_SECURE_GETENV
+#ifdef HAVE___SECURE_GETENV
+#define secure_getenv __secure_getenv
+#elif defined (HAVE_UNISTD_H) && defined(HAVE_GETUID) && defined(HAVE_GETEUID) \
+ && defined(HAVE_GETGID) && defined(HAVE_GETEGID)
+
+#include <unistd.h>
+
+/* Implementation of secure_getenv() for targets where it is not provided but
+ we have at least means to test real and effective IDs. */
+
+static char *
+secure_getenv (const char *name)
+{
+ if ((getuid () == geteuid ()) && (getgid () == getegid ()))
+ return getenv (name);
+ else
+ return NULL;
+}
+
+#else
+#define secure_getenv getenv
+#endif
+#endif
+
+/* As an HSA runtime is dlopened, following structure defines function
+ pointers utilized by the HSA plug-in. */
+
+struct hsa_runtime_fn_info
+{
+ /* HSA runtime. */
+ hsa_status_t (*hsa_status_string_fn) (hsa_status_t status,
+ const char **status_string);
+ hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent,
+ hsa_agent_info_t attribute,
+ void *value);
+ hsa_status_t (*hsa_init_fn) (void);
+ hsa_status_t (*hsa_iterate_agents_fn)
+ (hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data);
+ hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region,
+ hsa_region_info_t attribute,
+ void *value);
+ hsa_status_t (*hsa_queue_create_fn)
+ (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
+ void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
+ void *data, uint32_t private_segment_size,
+ uint32_t group_segment_size, hsa_queue_t **queue);
+ hsa_status_t (*hsa_agent_iterate_regions_fn)
+ (hsa_agent_t agent,
+ hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
+ hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable);
+ hsa_status_t (*hsa_executable_create_fn)
+ (hsa_profile_t profile, hsa_executable_state_t executable_state,
+ const char *options, hsa_executable_t *executable);
+ hsa_status_t (*hsa_executable_global_variable_define_fn)
+ (hsa_executable_t executable, const char *variable_name, void *address);
+ hsa_status_t (*hsa_executable_load_code_object_fn)
+ (hsa_executable_t executable, hsa_agent_t agent,
+ hsa_code_object_t code_object, const char *options);
+ hsa_status_t (*hsa_executable_freeze_fn)(hsa_executable_t executable,
+ const char *options);
+ hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value,
+ uint32_t num_consumers,
+ const hsa_agent_t *consumers,
+ hsa_signal_t *signal);
+ hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size,
+ void **ptr);
+ hsa_status_t (*hsa_memory_free_fn) (void *ptr);
+ hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal);
+ hsa_status_t (*hsa_executable_get_symbol_fn)
+ (hsa_executable_t executable, const char *module_name,
+ const char *symbol_name, hsa_agent_t agent, int32_t call_convention,
+ hsa_executable_symbol_t *symbol);
+ hsa_status_t (*hsa_executable_symbol_get_info_fn)
+ (hsa_executable_symbol_t executable_symbol,
+ hsa_executable_symbol_info_t attribute, void *value);
+ uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue,
+ uint64_t value);
+ uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue);
+ void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal,
+ hsa_signal_value_t value);
+ void (*hsa_signal_store_release_fn) (hsa_signal_t signal,
+ hsa_signal_value_t value);
+ hsa_signal_value_t (*hsa_signal_wait_acquire_fn)
+ (hsa_signal_t signal, hsa_signal_condition_t condition,
+ hsa_signal_value_t compare_value, uint64_t timeout_hint,
+ hsa_wait_state_t wait_state_hint);
+ hsa_signal_value_t (*hsa_signal_load_acquire_fn) (hsa_signal_t signal);
+ hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue);
+
+ /* HSA finalizer. */
+ hsa_status_t (*hsa_ext_program_add_module_fn) (hsa_ext_program_t program,
+ hsa_ext_module_t module);
+ hsa_status_t (*hsa_ext_program_create_fn)
+ (hsa_machine_model_t machine_model, hsa_profile_t profile,
+ hsa_default_float_rounding_mode_t default_float_rounding_mode,
+ const char *options, hsa_ext_program_t *program);
+ hsa_status_t (*hsa_ext_program_destroy_fn) (hsa_ext_program_t program);
+ hsa_status_t (*hsa_ext_program_finalize_fn)
+ (hsa_ext_program_t program,hsa_isa_t isa,
+ int32_t call_convention, hsa_ext_control_directives_t control_directives,
+ const char *options, hsa_code_object_type_t code_object_type,
+ hsa_code_object_t *code_object);
+};
+
+/* HSA runtime functions that are initialized in init_hsa_context. */
+
+static struct hsa_runtime_fn_info hsa_fns;
+
/* Keep the following GOMP prefixed structures in sync with respective parts of
the compiler. */
@@ -129,20 +242,36 @@ static bool debug;
static bool suppress_host_fallback;
+/* Flag to locate HSA runtime shared library that is dlopened
+ by this plug-in. */
+
+static const char *hsa_runtime_lib;
+
+/* Flag to decide if the runtime should support also CPU devices (can be
+ a simulator). */
+
+static bool support_cpu_devices;
+
/* Initialize debug and suppress_host_fallback according to the environment. */
static void
init_enviroment_variables (void)
{
- if (getenv ("HSA_DEBUG"))
+ if (secure_getenv ("HSA_DEBUG"))
debug = true;
else
debug = false;
- if (getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
+ if (secure_getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
suppress_host_fallback = true;
else
suppress_host_fallback = false;
+
+ hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB");
+ if (hsa_runtime_lib == NULL)
+ hsa_runtime_lib = HSA_RUNTIME_LIB "libhsa-runtime64.so";
+
+ support_cpu_devices = secure_getenv ("HSA_SUPPORT_CPU_DEVICES");
}
/* Print a logging message with PREFIX to stderr if HSA_DEBUG value
@@ -176,7 +305,7 @@ hsa_warn (const char *str, hsa_status_t status)
return;
const char *hsa_error_msg;
- hsa_status_string (status, &hsa_error_msg);
+ hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg);
}
@@ -188,7 +317,7 @@ static void
hsa_fatal (const char *str, hsa_status_t status)
{
const char *hsa_error_msg;
- hsa_status_string (status, &hsa_error_msg);
+ hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
hsa_error_msg);
}
@@ -200,7 +329,7 @@ static bool
hsa_error (const char *str, hsa_status_t status)
{
const char *hsa_error_msg;
- hsa_status_string (status, &hsa_error_msg);
+ hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str,
hsa_error_msg);
return false;
@@ -359,6 +488,50 @@ struct hsa_context_info
static struct hsa_context_info hsa_context;
+#define DLSYM_FN(function) \
+ hsa_fns.function##_fn = dlsym (handle, #function); \
+ if (hsa_fns.function##_fn == NULL) \
+ return false;
+
+static bool
+init_hsa_runtime_functions (void)
+{
+ void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
+ if (handle == NULL)
+ return false;
+
+ DLSYM_FN (hsa_status_string)
+ DLSYM_FN (hsa_agent_get_info)
+ DLSYM_FN (hsa_init)
+ DLSYM_FN (hsa_iterate_agents)
+ DLSYM_FN (hsa_region_get_info)
+ DLSYM_FN (hsa_queue_create)
+ DLSYM_FN (hsa_agent_iterate_regions)
+ DLSYM_FN (hsa_executable_destroy)
+ DLSYM_FN (hsa_executable_create)
+ DLSYM_FN (hsa_executable_global_variable_define)
+ DLSYM_FN (hsa_executable_load_code_object)
+ DLSYM_FN (hsa_executable_freeze)
+ DLSYM_FN (hsa_signal_create)
+ DLSYM_FN (hsa_memory_allocate)
+ DLSYM_FN (hsa_memory_free)
+ DLSYM_FN (hsa_signal_destroy)
+ DLSYM_FN (hsa_executable_get_symbol)
+ DLSYM_FN (hsa_executable_symbol_get_info)
+ DLSYM_FN (hsa_queue_add_write_index_release)
+ DLSYM_FN (hsa_queue_load_read_index_acquire)
+ DLSYM_FN (hsa_signal_wait_acquire)
+ DLSYM_FN (hsa_signal_store_relaxed)
+ DLSYM_FN (hsa_signal_store_release)
+ DLSYM_FN (hsa_signal_load_acquire)
+ DLSYM_FN (hsa_queue_destroy)
+ DLSYM_FN (hsa_ext_program_add_module)
+ DLSYM_FN (hsa_ext_program_create)
+ DLSYM_FN (hsa_ext_program_destroy)
+ DLSYM_FN (hsa_ext_program_finalize)
+ return true;
+}
+
/* Find kernel for an AGENT by name provided in KERNEL_NAME. */
static struct kernel_info *
@@ -386,17 +559,32 @@ suitable_hsa_agent_p (hsa_agent_t agent)
{
hsa_device_type_t device_type;
hsa_status_t status
- = hsa_agent_get_info (agent, HSA_AGENT_INFO_DEVICE, &device_type);
- if (status != HSA_STATUS_SUCCESS || device_type != HSA_DEVICE_TYPE_GPU)
+ = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE,
+ &device_type);
+ if (status != HSA_STATUS_SUCCESS)
return false;
+ switch (device_type)
+ {
+ case HSA_DEVICE_TYPE_GPU:
+ break;
+ case HSA_DEVICE_TYPE_CPU:
+ if (!support_cpu_devices)
+ return false;
+ break;
+ default:
+ return false;
+ }
+
uint32_t features = 0;
- status = hsa_agent_get_info (agent, HSA_AGENT_INFO_FEATURE, &features);
+ status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_FEATURE,
+ &features);
if (status != HSA_STATUS_SUCCESS
|| !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH))
return false;
hsa_queue_type_t queue_type;
- status = hsa_agent_get_info (agent, HSA_AGENT_INFO_QUEUE_TYPE, &queue_type);
+ status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_QUEUE_TYPE,
+ &queue_type);
if (status != HSA_STATUS_SUCCESS
|| (queue_type != HSA_QUEUE_TYPE_MULTI))
return false;
@@ -443,11 +631,16 @@ init_hsa_context (void)
if (hsa_context.initialized)
return true;
init_enviroment_variables ();
- status = hsa_init ();
+ if (!init_hsa_runtime_functions ())
+ {
+ HSA_DEBUG ("Run-time could not be dynamically opened\n");
+ return false;
+ }
+ status = hsa_fns.hsa_init_fn ();
if (status != HSA_STATUS_SUCCESS)
return hsa_error ("Run-time could not be initialized", status);
HSA_DEBUG ("HSA run-time initialized\n");
- status = hsa_iterate_agents (count_gpu_agents, NULL);
+ status = hsa_fns.hsa_iterate_agents_fn (count_gpu_agents, NULL);
if (status != HSA_STATUS_SUCCESS)
return hsa_error ("HSA GPU devices could not be enumerated", status);
HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
@@ -455,7 +648,7 @@ init_hsa_context (void)
hsa_context.agents
= GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
* sizeof (struct agent_info));
- status = hsa_iterate_agents (assign_agent_ids, &agent_index);
+ status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index);
if (agent_index != hsa_context.agent_count)
{
GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
@@ -485,14 +678,16 @@ get_kernarg_memory_region (hsa_region_t region, void *data)
hsa_status_t status;
hsa_region_segment_t segment;
- status = hsa_region_get_info (region, HSA_REGION_INFO_SEGMENT, &segment);
+ status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT,
+ &segment);
if (status != HSA_STATUS_SUCCESS)
return status;
if (segment != HSA_REGION_SEGMENT_GLOBAL)
return HSA_STATUS_SUCCESS;
uint32_t flags;
- status = hsa_region_get_info (region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
+ status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS,
+ &flags);
if (status != HSA_STATUS_SUCCESS)
return status;
if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
@@ -546,29 +741,36 @@ GOMP_OFFLOAD_init_device (int n)
uint32_t queue_size;
hsa_status_t status;
- status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
- &queue_size);
+ status = hsa_fns.hsa_agent_get_info_fn (agent->id,
+ HSA_AGENT_INFO_QUEUE_MAX_SIZE,
+ &queue_size);
if (status != HSA_STATUS_SUCCESS)
return hsa_error ("Error requesting maximum queue size of the HSA agent",
- status);
- status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_ISA, &agent->isa);
+ status);
+ status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_ISA,
+ &agent->isa);
if (status != HSA_STATUS_SUCCESS)
return hsa_error ("Error querying the ISA of the agent", status);
- status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
- queue_callback, NULL, UINT32_MAX, UINT32_MAX,
- &agent->command_q);
+ status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
+ HSA_QUEUE_TYPE_MULTI,
+ queue_callback, NULL, UINT32_MAX,
+ UINT32_MAX,
+ &agent->command_q);
if (status != HSA_STATUS_SUCCESS)
return hsa_error ("Error creating command queue", status);
- status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
- queue_callback, NULL, UINT32_MAX, UINT32_MAX,
- &agent->kernel_dispatch_command_q);
+ status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
+ HSA_QUEUE_TYPE_MULTI,
+ queue_callback, NULL, UINT32_MAX,
+ UINT32_MAX,
+ &agent->kernel_dispatch_command_q);
if (status != HSA_STATUS_SUCCESS)
return hsa_error ("Error creating kernel dispatch command queue", status);
agent->kernarg_region.handle = (uint64_t) -1;
- status = hsa_agent_iterate_regions (agent->id, get_kernarg_memory_region,
- &agent->kernarg_region);
+ status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id,
+ get_kernarg_memory_region,
+ &agent->kernarg_region);
if (agent->kernarg_region.handle == (uint64_t) -1)
{
GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
@@ -646,7 +848,7 @@ destroy_hsa_program (struct agent_info *agent)
HSA_DEBUG ("Destroying the current HSA program.\n");
- status = hsa_executable_destroy (agent->executable);
+ status = hsa_fns.hsa_executable_destroy_fn (agent->executable);
if (status != HSA_STATUS_SUCCESS)
return hsa_error ("Could not destroy HSA executable", status);
@@ -661,6 +863,29 @@ destroy_hsa_program (struct agent_info *agent)
return true;
}
+/* Initialize KERNEL from D and other parameters. Return true on success. */
+
+static bool
+init_basic_kernel_info (struct kernel_info *kernel,
+ struct hsa_kernel_description *d,
+ struct agent_info *agent,
+ struct module_info *module)
+{
+ kernel->agent = agent;
+ kernel->module = module;
+ kernel->name = d->name;
+ kernel->omp_data_size = d->omp_data_size;
+ kernel->gridified_kernel_p = d->gridified_kernel_p;
+ kernel->dependencies_count = d->kernel_dependencies_count;
+ kernel->dependencies = d->kernel_dependencies;
+ if (pthread_mutex_init (&kernel->init_mutex, NULL))
+ {
+ GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
+ return false;
+ }
+ return true;
+}
+
/* Part of the libgomp plugin interface. Load BRIG module described by struct
brig_image_desc in TARGET_DATA and return references to kernel descriptors
in TARGET_TABLE. */
@@ -715,19 +940,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, void *target_data,
pair->end = (uintptr_t) (kernel + 1);
struct hsa_kernel_description *d = &image_desc->kernel_infos[i];
- kernel->agent = agent;
- kernel->module = module;
- kernel->name = d->name;
- kernel->omp_data_size = d->omp_data_size;
- kernel->gridified_kernel_p = d->gridified_kernel_p;
- kernel->dependencies_count = d->kernel_dependencies_count;
- kernel->dependencies = d->kernel_dependencies;
- if (pthread_mutex_init (&kernel->init_mutex, NULL))
- {
- GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
- return -1;
- }
-
+ if (!init_basic_kernel_info (kernel, d, agent, module))
+ return -1;
kernel++;
pair++;
}
@@ -799,9 +1013,10 @@ create_and_finalize_hsa_program (struct agent_info *agent)
if (agent->prog_finalized)
goto final;
- status = hsa_ext_program_create (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
- HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
- NULL, &prog_handle);
+ status = hsa_fns.hsa_ext_program_create_fn
+ (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
+ HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
+ NULL, &prog_handle);
if (status != HSA_STATUS_SUCCESS)
hsa_fatal ("Could not create an HSA program", status);
@@ -810,8 +1025,8 @@ create_and_finalize_hsa_program (struct agent_info *agent)
struct module_info *module = agent->first_module;
while (module)
{
- status = hsa_ext_program_add_module (prog_handle,
- module->image_desc->brig_module);
+ status = hsa_fns.hsa_ext_program_add_module_fn
+ (prog_handle, module->image_desc->brig_module);
if (status != HSA_STATUS_SUCCESS)
hsa_fatal ("Could not add a module to the HSA program", status);
module = module->next;
@@ -837,7 +1052,8 @@ create_and_finalize_hsa_program (struct agent_info *agent)
continue;
}
- status = hsa_ext_program_add_module (prog_handle, library->image);
+ status = hsa_fns.hsa_ext_program_add_module_fn (prog_handle,
+ library->image);
if (status != HSA_STATUS_SUCCESS)
hsa_warn ("Could not add a shared BRIG library the HSA program",
status);
@@ -849,11 +1065,9 @@ create_and_finalize_hsa_program (struct agent_info *agent)
hsa_ext_control_directives_t control_directives;
memset (&control_directives, 0, sizeof (control_directives));
hsa_code_object_t code_object;
- status = hsa_ext_program_finalize (prog_handle, agent->isa,
- HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
- control_directives, "",
- HSA_CODE_OBJECT_TYPE_PROGRAM,
- &code_object);
+ status = hsa_fns.hsa_ext_program_finalize_fn
+ (prog_handle, agent->isa,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
+ control_directives, "", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object);
if (status != HSA_STATUS_SUCCESS)
{
hsa_warn ("Finalization of the HSA program failed", status);
@@ -861,11 +1075,12 @@ create_and_finalize_hsa_program (struct agent_info *agent)
}
HSA_DEBUG ("Finalization done\n");
- hsa_ext_program_destroy (prog_handle);
+ hsa_fns.hsa_ext_program_destroy_fn (prog_handle);
status
- = hsa_executable_create (HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN,
- "", &agent->executable);
+ = hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL,
+ HSA_EXECUTABLE_STATE_UNFROZEN,
+ "", &agent->executable);
if (status != HSA_STATUS_SUCCESS)
hsa_fatal ("Could not create HSA executable", status);
@@ -877,9 +1092,8 @@ create_and_finalize_hsa_program (struct agent_info *agent)
{
struct global_var_info *var;
var = &module->image_desc->global_variables[i];
- status
- = hsa_executable_global_variable_define (agent->executable,
- var->name, var->address);
+ status = hsa_fns.hsa_executable_global_variable_define_fn
+ (agent->executable, var->name, var->address);
HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name,
var->address);
@@ -892,11 +1106,12 @@ create_and_finalize_hsa_program (struct agent_info *agent)
module = module->next;
}
- status = hsa_executable_load_code_object (agent->executable, agent->id,
- code_object, "");
+ status = hsa_fns.hsa_executable_load_code_object_fn (agent->executable,
+ agent->id,
+ code_object, "");
if (status != HSA_STATUS_SUCCESS)
hsa_fatal ("Could not add a code object to the HSA executable", status);
- status = hsa_executable_freeze (agent->executable, "");
+ status = hsa_fns.hsa_executable_freeze_fn (agent->executable, "");
if (status != HSA_STATUS_SUCCESS)
hsa_fatal ("Could not freeze the HSA executable", status);
@@ -937,7 +1152,7 @@ create_single_kernel_dispatch (struct kernel_info *kernel,
shadow->object = kernel->object;
hsa_signal_t sync_signal;
- hsa_status_t status = hsa_signal_create (1, 0, NULL, &sync_signal);
+ hsa_status_t status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &sync_signal);
if (status != HSA_STATUS_SUCCESS)
hsa_fatal ("Error creating the HSA sync signal", status);
@@ -946,8 +1161,9 @@ create_single_kernel_dispatch (struct kernel_info *kernel,
shadow->group_segment_size = kernel->group_segment_size;
status
- = hsa_memory_allocate (agent->kernarg_region, kernel->kernarg_segment_size,
- &shadow->kernarg_address);
+ = hsa_fns.hsa_memory_allocate_fn (agent->kernarg_region,
+ kernel->kernarg_segment_size,
+ &shadow->kernarg_address);
if (status != HSA_STATUS_SUCCESS)
hsa_fatal ("Could not allocate memory for HSA kernel arguments", status);
@@ -962,11 +1178,11 @@ release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow)
HSA_DEBUG ("Released kernel dispatch: %p has value: %lu (%p)\n", shadow,
shadow->debug, (void *) shadow->debug);
- hsa_memory_free (shadow->kernarg_address);
+ hsa_fns.hsa_memory_free_fn (shadow->kernarg_address);
hsa_signal_t s;
s.handle = shadow->signal;
- hsa_signal_destroy (s);
+ hsa_fns.hsa_signal_destroy_fn (s);
free (shadow->omp_data_memory);
@@ -986,31 +1202,30 @@ init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
hsa_status_t status;
struct agent_info *agent = kernel->agent;
hsa_executable_symbol_t kernel_symbol;
- status = hsa_executable_get_symbol (agent->executable, NULL, kernel->name,
- agent->id, 0, &kernel_symbol);
+ status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
+ kernel->name, agent->id,
+ 0, &kernel_symbol);
if (status != HSA_STATUS_SUCCESS)
{
hsa_warn ("Could not find symbol for kernel in the code object", status);
goto failure;
}
HSA_DEBUG ("Located kernel %s\n", kernel->name);
- status
- = hsa_executable_symbol_get_info (kernel_symbol,
- HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
- &kernel->object);
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
+ (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel->object);
if (status != HSA_STATUS_SUCCESS)
hsa_fatal ("Could not extract a kernel object from its symbol", status);
- status = hsa_executable_symbol_get_info
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
(kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
&kernel->kernarg_segment_size);
if (status != HSA_STATUS_SUCCESS)
hsa_fatal ("Could not get info about kernel argument size", status);
- status = hsa_executable_symbol_get_info
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
(kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
&kernel->group_segment_size);
if (status != HSA_STATUS_SUCCESS)
hsa_fatal ("Could not get info about kernel group segment size", status);
- status = hsa_executable_symbol_get_info
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
(kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
&kernel->private_segment_size);
if (status != HSA_STATUS_SUCCESS)
@@ -1209,18 +1424,43 @@ parse_target_attributes (void **input,
struct GOMP_kernel_launch_attributes *kla;
kla = (struct GOMP_kernel_launch_attributes *) *input;
*result = kla;
- if (kla->ndim != 1)
- GOMP_PLUGIN_fatal ("HSA does not yet support number of dimensions "
- "different from one.");
- if (kla->gdims[0] == 0)
- return false;
-
- HSA_DEBUG ("GOMP_OFFLOAD_run called with grid size %u and group size %u\n",
- kla->gdims[0], kla->wdims[0]);
+ if (kla->ndim == 0 || kla->ndim > 3)
+ GOMP_PLUGIN_fatal ("Invalid number of dimensions (%u)", kla->ndim);
+ HSA_DEBUG ("GOMP_OFFLOAD_run called with %u dimensions:\n", kla->ndim);
+ unsigned i;
+ for (i = 0; i < kla->ndim; i++)
+ {
+ HSA_DEBUG (" Dimension %u: grid size %u and group size %u\n", i,
+ kla->gdims[i], kla->wdims[i]);
+ if (kla->gdims[i] == 0)
+ return false;
+ }
return true;
}
+/* Return the group size given the requested GROUP size, GRID size and number
+ of grid dimensions NDIM. */
+
+static uint32_t
+get_group_size (uint32_t ndim, uint32_t grid, uint32_t group)
+{
+ if (group == 0)
+ {
+ /* TODO: Provide a default via environment or device characteristics. */
+ if (ndim == 1)
+ group = 64;
+ else if (ndim == 2)
+ group = 8;
+ else
+ group = 4;
+ }
+
+ if (group > grid)
+ group = grid;
+ return group;
+}
+
/* Return true if the HSA runtime can run function FN_PTR. */
bool
@@ -1254,22 +1494,14 @@ packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest)
__atomic_store_n (packet, header | (rest << 16), __ATOMIC_RELEASE);
}
-/* Part of the libgomp plugin interface. Run a kernel on device N and pass it
- an array of pointers in VARS as a parameter. The kernel is identified by
- FN_PTR which must point to a kernel_info structure. */
+/* Run KERNEL on its agent, pass VARS to it as arguments and take
+ launchattributes from KLA. */
void
-GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
+run_kernel (struct kernel_info *kernel, void *vars,
+ struct GOMP_kernel_launch_attributes *kla)
{
- struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
struct agent_info *agent = kernel->agent;
- struct GOMP_kernel_launch_attributes def;
- struct GOMP_kernel_launch_attributes *kla;
- if (!parse_target_attributes (args, &def, &kla))
- {
- HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
- return;
- }
if (pthread_rwlock_rdlock (&agent->modules_rwlock))
GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
@@ -1288,11 +1520,12 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
print_kernel_dispatch (shadow, 2);
}
- uint64_t index = hsa_queue_add_write_index_release (agent->command_q, 1);
+ uint64_t index
+ = hsa_fns.hsa_queue_add_write_index_release_fn (agent->command_q, 1);
HSA_DEBUG ("Got AQL index %llu\n", (long long int) index);
/* Wait until the queue is not full before writing the packet. */
- while (index - hsa_queue_load_read_index_acquire (agent->command_q)
+ while (index - hsa_fns.hsa_queue_load_read_index_acquire_fn (agent->command_q)
>= agent->command_q->size)
;
@@ -1302,17 +1535,33 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4);
packet->grid_size_x = kla->gdims[0];
- uint32_t wgs = kla->wdims[0];
- if (wgs == 0)
- /* TODO: Provide a default via environment. */
- wgs = 64;
- else if (wgs > kla->gdims[0])
- wgs = kla->gdims[0];
- packet->workgroup_size_x = wgs;
- packet->grid_size_y = 1;
- packet->workgroup_size_y = 1;
- packet->grid_size_z = 1;
- packet->workgroup_size_z = 1;
+ packet->workgroup_size_x = get_group_size (kla->ndim, kla->gdims[0],
+ kla->wdims[0]);
+
+ if (kla->ndim >= 2)
+ {
+ packet->grid_size_y = kla->gdims[1];
+ packet->workgroup_size_y = get_group_size (kla->ndim, kla->gdims[1],
+ kla->wdims[1]);
+ }
+ else
+ {
+ packet->grid_size_y = 1;
+ packet->workgroup_size_y = 1;
+ }
+
+ if (kla->ndim == 3)
+ {
+ packet->grid_size_z = kla->gdims[2];
+ packet->workgroup_size_z = get_group_size (kla->ndim, kla->gdims[2],
+ kla->wdims[2]);
+ }
+ else
+ {
+ packet->grid_size_z = 1;
+ packet->workgroup_size_z = 1;
+ }
+
packet->private_segment_size = kernel->private_segment_size;
packet->group_segment_size = kernel->group_segment_size;
packet->kernel_object = kernel->object;
@@ -1320,7 +1569,7 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
hsa_signal_t s;
s.handle = shadow->signal;
packet->completion_signal = s;
- hsa_signal_store_relaxed (s, 1);
+ hsa_fns.hsa_signal_store_relaxed_fn (s, 1);
memcpy (shadow->kernarg_address, &vars, sizeof (vars));
/* PR hsa/70337. */
@@ -1344,9 +1593,10 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name);
packet_store_release ((uint32_t *) packet, header,
- 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS);
+ (uint16_t) kla->ndim << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS);
- hsa_signal_store_release (agent->command_q->doorbell_signal, index);
+ hsa_fns.hsa_signal_store_release_fn (agent->command_q->doorbell_signal,
+ index);
/* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
signal wait and signal load operations on their own and we need to
@@ -1357,8 +1607,9 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
/* Root signal waits with 1ms timeout. */
- while (hsa_signal_wait_acquire (s, HSA_SIGNAL_CONDITION_LT, 1, 1000 * 1000,
- HSA_WAIT_STATE_BLOCKED) != 0)
+ while (hsa_fns.hsa_signal_wait_acquire_fn (s, HSA_SIGNAL_CONDITION_LT, 1,
+ 1000 * 1000,
+ HSA_WAIT_STATE_BLOCKED) != 0)
for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
{
hsa_signal_t child_s;
@@ -1366,7 +1617,7 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
HSA_DEBUG ("Waiting for children completion signal: %lu\n",
shadow->children_dispatches[i]->signal);
- hsa_signal_load_acquire (child_s);
+ hsa_fns.hsa_signal_load_acquire_fn (child_s);
}
release_kernel_dispatch (shadow);
@@ -1375,6 +1626,26 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
}
+/* Part of the libgomp plugin interface. Run a kernel on device N (the number
+ is actually ignored, we assume the FN_PTR has been mapped using the correct
+ device) and pass it an array of pointers in VARS as a parameter. The kernel
+ is identified by FN_PTR which must point to a kernel_info structure. */
+
+void
+GOMP_OFFLOAD_run (int n __attribute__((unused)),
+ void *fn_ptr, void *vars, void **args)
+{
+ struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
+ struct GOMP_kernel_launch_attributes def;
+ struct GOMP_kernel_launch_attributes *kla;
+ if (!parse_target_attributes (args, &def, &kla))
+ {
+ HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
+ return;
+ }
+ run_kernel (kernel, vars, kla);
+}
+
/* Information to be passed to a thread running a kernel asycnronously. */
struct async_run_info
@@ -1534,10 +1805,10 @@ GOMP_OFFLOAD_fini_device (int n)
release_agent_shared_libraries (agent);
- hsa_status_t status = hsa_queue_destroy (agent->command_q);
+ hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (agent->command_q);
if (status != HSA_STATUS_SUCCESS)
return hsa_error ("Error destroying command queue", status);
- status = hsa_queue_destroy (agent->kernel_dispatch_command_q);
+ status = hsa_fns.hsa_queue_destroy_fn (agent->kernel_dispatch_command_q);
if (status != HSA_STATUS_SUCCESS)
return hsa_error ("Error destroying kernel dispatch command queue", status);
if (pthread_mutex_destroy (&agent->prog_mutex))