summaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
authorjamborm <jamborm@138bc75d-0d04-0410-961f-82ee72b054a4>2016-01-19 10:35:10 +0000
committerjamborm <jamborm@138bc75d-0d04-0410-961f-82ee72b054a4>2016-01-19 10:35:10 +0000
commit5668660879031f78bc693cd566f18bf726c34547 (patch)
treee669eb9cc41ef75177ede8bd306f466709040c9b /libgomp
parent56778b62f2fb68717c7085c4a112f65e8bed4701 (diff)
downloadgcc-5668660879031f78bc693cd566f18bf726c34547.tar.gz
Merge of HSA
2016-01-19 Martin Jambor <mjambor@suse.cz> Martin Liska <mliska@suse.cz> Michael Matz <matz@suse.de> libgomp/ * plugin/Makefrag.am: Add HSA plugin requirements. * plugin/configfrag.ac (HSA_RUNTIME_INCLUDE): New variable. (HSA_RUNTIME_LIB): Likewise. (HSA_RUNTIME_CPPFLAGS): Likewise. (HSA_RUNTIME_INCLUDE): New substitution. (HSA_RUNTIME_LIB): Likewise. (HSA_RUNTIME_LDFLAGS): Likewise. (hsa-runtime): New configure option. (hsa-runtime-include): Likewise. (hsa-runtime-lib): Likewise. (PLUGIN_HSA): New substitution variable. Fill HSA_RUNTIME_INCLUDE and HSA_RUNTIME_LIB according to the new configure options. (PLUGIN_HSA_CPPFLAGS): Likewise. (PLUGIN_HSA_LDFLAGS): Likewise. (PLUGIN_HSA_LIBS): Likewise. Check that we have access to HSA run-time. * libgomp-plugin.h (offload_target_type): New element OFFLOAD_TARGET_TYPE_HSA. * libgomp.h (gomp_target_task): New fields firstprivate_copies and args. (bool gomp_create_target_task): Updated. (gomp_device_descr): Extra parameter of run_func and async_run_func, new field can_run_func. * libgomp_g.h (GOMP_target_ext): Update prototype. * oacc-host.c (host_run): Added a new parameter args. * target.c (calculate_firstprivate_requirements): New function. (copy_firstprivate_data): Likewise. (gomp_target_fallback_firstprivate): Use them. (gomp_target_unshare_firstprivate): New function. (gomp_get_target_fn_addr): Allow returning NULL for shared memory devices. (GOMP_target): Do host fallback for all shared memory devices. Do not pass any args to plugins. (GOMP_target_ext): Introduce device-specific argument parameter args. Allow host fallback if device shares memory. Do not remap data if device has shared memory. (gomp_target_task_fn): Likewise. Also treat shared memory devices like host fallback for mappings. (GOMP_target_data): Treat shared memory devices like host fallback. (GOMP_target_data_ext): Likewise. (GOMP_target_update): Likewise. (GOMP_target_update_ext): Likewise. Also pass NULL as args to gomp_create_target_task. (GOMP_target_enter_exit_data): Likewise. (omp_target_alloc): Treat shared memory devices like host fallback. (omp_target_free): Likewise. (omp_target_is_present): Likewise. (omp_target_memcpy): Likewise. (omp_target_memcpy_rect): Likewise. (omp_target_associate_ptr): Likewise. (gomp_load_plugin_for_device): Also load can_run. * task.c (GOMP_PLUGIN_target_task_completion): Free firstprivate_copies. (gomp_create_target_task): Accept new argument args and store it to ttask. * plugin/plugin-hsa.c: New file. gcc/ * Makefile.in (OBJS): Add new source files. (GTFILES): Add hsa.c. * common.opt (disable_hsa): New variable. (-Whsa): New warning. * config.in (ENABLE_HSA): New. * configure.ac: Treat hsa differently from other accelerators. (OFFLOAD_TARGETS): Define ENABLE_OFFLOADING according to $enable_offloading. (ENABLE_HSA): Define ENABLE_HSA according to $enable_hsa. * doc/install.texi (Configuration): Document --with-hsa-runtime, --with-hsa-runtime-include, --with-hsa-runtime-lib and --with-hsa-kmt-lib. * doc/invoke.texi (-Whsa): Document. (hsa-gen-debug-stores): Likewise. * lto-wrapper.c (compile_images_for_offload_targets): Do not attempt to invoke offload compiler for hsa acclerator. * opts.c (common_handle_option): Determine whether HSA offloading should be performed. * params.def (PARAM_HSA_GEN_DEBUG_STORES): New parameter. * builtin-types.def (BT_FN_VOID_UINT_PTR_INT_PTR): New. (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT): Removed. (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR): New. * gimple-low.c (lower_stmt): Also handle GIMPLE_OMP_GRID_BODY. * gimple-pretty-print.c (dump_gimple_omp_for): Also handle GF_OMP_FOR_KIND_GRID_LOOP. (dump_gimple_omp_block): Also handle GIMPLE_OMP_GRID_BODY. (pp_gimple_stmt_1): Likewise. * gimple-walk.c (walk_gimple_stmt): Likewise. * gimple.c (gimple_build_omp_grid_body): New function. (gimple_copy): Also handle GIMPLE_OMP_GRID_BODY. * gimple.def (GIMPLE_OMP_GRID_BODY): New. * gimple.h (enum gf_mask): Added GF_OMP_PARALLEL_GRID_PHONY, GF_OMP_FOR_KIND_GRID_LOOP, GF_OMP_FOR_GRID_PHONY and GF_OMP_TEAMS_GRID_PHONY. (gimple_statement_omp_single_layout): Updated comments. (gimple_build_omp_grid_body): New function. (gimple_has_substatements): Also handle GIMPLE_OMP_GRID_BODY. (gimple_omp_for_grid_phony): New function. (gimple_omp_for_set_grid_phony): Likewise. (gimple_omp_parallel_grid_phony): Likewise. (gimple_omp_parallel_set_grid_phony): Likewise. (gimple_omp_teams_grid_phony): Likewise. (gimple_omp_teams_set_grid_phony): Likewise. (gimple_return_set_retbnd): Also handle GIMPLE_OMP_GRID_BODY. * omp-builtins.def (BUILT_IN_GOMP_OFFLOAD_REGISTER): New. (BUILT_IN_GOMP_OFFLOAD_UNREGISTER): Likewise. (BUILT_IN_GOMP_TARGET): Updated type. * omp-low.c: Include symbol-summary.h, hsa.h and params.h. (adjust_for_condition): New function. (get_omp_for_step_from_incr): Likewise. (extract_omp_for_data): Moved parts to adjust_for_condition and get_omp_for_step_from_incr. (build_outer_var_ref): Handle GIMPLE_OMP_GRID_BODY. (fixup_child_record_type): Bail out if receiver_decl is NULL. (scan_sharing_clauses): Handle OMP_CLAUSE__GRIDDIM_. (scan_omp_parallel): Do not create child functions for phony constructs. (check_omp_nesting_restrictions): Handle GIMPLE_OMP_GRID_BODY. (scan_omp_1_op): Checking assert we are not remapping to ERROR_MARK. Also also handle GIMPLE_OMP_GRID_BODY. (parallel_needs_hsa_kernel_p): New function. (expand_parallel_call): Register apprpriate parallel child functions as HSA kernels. (grid_launch_attributes_trees): New type. (grid_attr_trees): New variable. (grid_create_kernel_launch_attr_types): New function. (grid_insert_store_range_dim): Likewise. (grid_get_kernel_launch_attributes): Likewise. (get_target_argument_identifier_1): Likewise. (get_target_argument_identifier): Likewise. (get_target_argument_value): Likewise. (push_target_argument_according_to_value): Likewise. (get_target_arguments): Likewise. (expand_omp_target): Call get_target_arguments instead of looking up for teams and thread limit. (grid_expand_omp_for_loop): New function. (grid_arg_decl_map): New type. (grid_remap_kernel_arg_accesses): New function. (grid_expand_target_kernel_body): New function. (expand_omp): Call it. (lower_omp_for): Do not emit phony constructs. (lower_omp_taskreg): Do not emit phony constructs but create for them a temporary variable receiver_decl. (lower_omp_taskreg): Do not emit phony constructs. (lower_omp_teams): Likewise. (lower_omp_grid_body): New function. (lower_omp_1): Call it. (grid_reg_assignment_to_local_var_p): New function. (grid_seq_only_contains_local_assignments): Likewise. (grid_find_single_omp_among_assignments_1): Likewise. (grid_find_single_omp_among_assignments): Likewise. (grid_find_ungridifiable_statement): Likewise. (grid_target_follows_gridifiable_pattern): Likewise. (grid_remap_prebody_decls): Likewise. (grid_copy_leading_local_assignments): Likewise. (grid_process_kernel_body_copy): Likewise. (grid_attempt_target_gridification): Likewise. (grid_gridify_all_targets_stmt): Likewise. (grid_gridify_all_targets): Likewise. (execute_lower_omp): Call grid_gridify_all_targets. (make_gimple_omp_edges): Handle GIMPLE_OMP_GRID_BODY. * tree-core.h (omp_clause_code): Added OMP_CLAUSE__GRIDDIM_. (tree_omp_clause): Added union field dimension. * tree-pretty-print.c (dump_omp_clause): Handle OMP_CLAUSE__GRIDDIM_. * tree.c (omp_clause_num_ops): Added number of arguments of OMP_CLAUSE__GRIDDIM_. (omp_clause_code_name): Added name of OMP_CLAUSE__GRIDDIM_. (walk_tree_1): Handle OMP_CLAUSE__GRIDDIM_. * tree.h (OMP_CLAUSE_GRIDDIM_DIMENSION): New. (OMP_CLAUSE_SET_GRIDDIM_DIMENSION): Likewise. (OMP_CLAUSE_GRIDDIM_SIZE): Likewise. (OMP_CLAUSE_GRIDDIM_GROUP): Likewise. * passes.def: Schedule pass_ipa_hsa and pass_gen_hsail. * tree-pass.h (make_pass_gen_hsail): Declare. (make_pass_ipa_hsa): Likewise. * ipa-hsa.c: New file. * lto-section-in.c (lto_section_name): Add hsa section name. * lto-streamer.h (lto_section_type): Add hsa section. * timevar.def (TV_IPA_HSA): New. * hsa-brig-format.h: New file. * hsa-brig.c: New file. * hsa-dump.c: Likewise. * hsa-gen.c: Likewise. * hsa.c: Likewise. * hsa.h: Likewise. * toplev.c (compile_file): Call hsa_output_brig. * hsa-regalloc.c: New file. gcc/fortran/ * types.def (BT_FN_VOID_UINT_PTR_INT_PTR): New. (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT): Removed. (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR): New. gcc/lto/ * lto-partition.c: Include "hsa.h" (add_symbol_to_partition_1): Put hsa implementations into the same partition as host implementations. liboffloadmic/ * plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_async_run): New unused parameter. (GOMP_OFFLOAD_run): Likewise. include/ * gomp-constants.h (GOMP_DEVICE_HSA): New macro. (GOMP_VERSION_HSA): Likewise. (GOMP_TARGET_ARG_DEVICE_MASK): Likewise. (GOMP_TARGET_ARG_DEVICE_ALL): Likewise. (GOMP_TARGET_ARG_SUBSEQUENT_PARAM): Likewise. (GOMP_TARGET_ARG_ID_MASK): Likewise. (GOMP_TARGET_ARG_NUM_TEAMS): Likewise. (GOMP_TARGET_ARG_THREAD_LIMIT): Likewise. (GOMP_TARGET_ARG_VALUE_SHIFT): Likewise. (GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES): Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@232549 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/ChangeLog61
-rw-r--r--libgomp/Makefile.in49
-rw-r--r--libgomp/config.h.in6
-rwxr-xr-xlibgomp/configure166
-rw-r--r--libgomp/libgomp-plugin.h3
-rw-r--r--libgomp/libgomp.h12
-rw-r--r--libgomp/libgomp_g.h3
-rw-r--r--libgomp/oacc-host.c3
-rw-r--r--libgomp/plugin/Makefrag.am13
-rw-r--r--libgomp/plugin/configfrag.ac102
-rw-r--r--libgomp/plugin/plugin-hsa.c1493
-rw-r--r--libgomp/target.c225
-rw-r--r--libgomp/task.c4
-rw-r--r--libgomp/testsuite/Makefile.in6
14 files changed, 2068 insertions, 78 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 2efc516624a..82619e6ad8e 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,64 @@
+2016-01-19 Martin Jambor <mjambor@suse.cz>
+ Martin Liska <mliska@suse.cz>
+
+ * plugin/Makefrag.am: Add HSA plugin requirements.
+ * plugin/configfrag.ac (HSA_RUNTIME_INCLUDE): New variable.
+ (HSA_RUNTIME_LIB): Likewise.
+ (HSA_RUNTIME_CPPFLAGS): Likewise.
+ (HSA_RUNTIME_INCLUDE): New substitution.
+ (HSA_RUNTIME_LIB): Likewise.
+ (HSA_RUNTIME_LDFLAGS): Likewise.
+ (hsa-runtime): New configure option.
+ (hsa-runtime-include): Likewise.
+ (hsa-runtime-lib): Likewise.
+ (PLUGIN_HSA): New substitution variable.
+ Fill HSA_RUNTIME_INCLUDE and HSA_RUNTIME_LIB according to the new
+ configure options.
+ (PLUGIN_HSA_CPPFLAGS): Likewise.
+ (PLUGIN_HSA_LDFLAGS): Likewise.
+ (PLUGIN_HSA_LIBS): Likewise.
+ Check that we have access to HSA run-time.
+ * libgomp-plugin.h (offload_target_type): New element
+ OFFLOAD_TARGET_TYPE_HSA.
+ * libgomp.h (gomp_target_task): New fields firstprivate_copies and
+ args.
+ (bool gomp_create_target_task): Updated.
+ (gomp_device_descr): Extra parameter of run_func and async_run_func,
+ new field can_run_func.
+ * libgomp_g.h (GOMP_target_ext): Update prototype.
+ * oacc-host.c (host_run): Added a new parameter args.
+ * target.c (calculate_firstprivate_requirements): New function.
+ (copy_firstprivate_data): Likewise.
+ (gomp_target_fallback_firstprivate): Use them.
+ (gomp_target_unshare_firstprivate): New function.
+ (gomp_get_target_fn_addr): Allow returning NULL for shared memory
+ devices.
+ (GOMP_target): Do host fallback for all shared memory devices. Do not
+ pass any args to plugins.
+ (GOMP_target_ext): Introduce device-specific argument parameter args.
+ Allow host fallback if device shares memory. Do not remap data if
+ device has shared memory.
+ (gomp_target_task_fn): Likewise. Also treat shared memory devices
+ like host fallback for mappings.
+ (GOMP_target_data): Treat shared memory devices like host fallback.
+ (GOMP_target_data_ext): Likewise.
+ (GOMP_target_update): Likewise.
+ (GOMP_target_update_ext): Likewise. Also pass NULL as args to
+ gomp_create_target_task.
+ (GOMP_target_enter_exit_data): Likewise.
+ (omp_target_alloc): Treat shared memory devices like host fallback.
+ (omp_target_free): Likewise.
+ (omp_target_is_present): Likewise.
+ (omp_target_memcpy): Likewise.
+ (omp_target_memcpy_rect): Likewise.
+ (omp_target_associate_ptr): Likewise.
+ (gomp_load_plugin_for_device): Also load can_run.
+ * task.c (GOMP_PLUGIN_target_task_completion): Free
+ firstprivate_copies.
+ (gomp_create_target_task): Accept new argument args and store it to
+ ttask.
+ * plugin/plugin-hsa.c: New file.
+
2016-01-18 Tom de Vries <tom@codesourcery.com>
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: New test.
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 7a1c976c5c9..bbfac4e183f 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -17,7 +17,7 @@
# Plugins for offload execution, Makefile.am fragment.
#
-# Copyright (C) 2014-2015 Free Software Foundation, Inc.
+# Copyright (C) 2014-2016 Free Software Foundation, Inc.
#
# Contributed by Mentor Embedded.
#
@@ -89,7 +89,8 @@ DIST_COMMON = $(top_srcdir)/plugin/Makefrag.am ChangeLog \
$(srcdir)/omp_lib.f90.in $(srcdir)/libgomp_f.h.in \
$(srcdir)/libgomp.spec.in $(srcdir)/../depcomp
@PLUGIN_NVPTX_TRUE@am__append_1 = libgomp-plugin-nvptx.la
-@USE_FORTRAN_TRUE@am__append_2 = openacc.f90
+@PLUGIN_HSA_TRUE@am__append_2 = libgomp-plugin-hsa.la
+@USE_FORTRAN_TRUE@am__append_3 = openacc.f90
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
@@ -147,6 +148,17 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(infodir)" \
"$(DESTDIR)$(toolexeclibdir)"
LTLIBRARIES = $(toolexeclib_LTLIBRARIES)
am__DEPENDENCIES_1 =
+@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_DEPENDENCIES = libgomp.la \
+@PLUGIN_HSA_TRUE@ $(am__DEPENDENCIES_1)
+@PLUGIN_HSA_TRUE@am_libgomp_plugin_hsa_la_OBJECTS = \
+@PLUGIN_HSA_TRUE@ libgomp_plugin_hsa_la-plugin-hsa.lo
+libgomp_plugin_hsa_la_OBJECTS = $(am_libgomp_plugin_hsa_la_OBJECTS)
+libgomp_plugin_hsa_la_LINK = $(LIBTOOL) --tag=CC \
+ $(libgomp_plugin_hsa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(libgomp_plugin_hsa_la_LDFLAGS) $(LDFLAGS) -o $@
+@PLUGIN_HSA_TRUE@am_libgomp_plugin_hsa_la_rpath = -rpath \
+@PLUGIN_HSA_TRUE@ $(toolexeclibdir)
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_DEPENDENCIES = libgomp.la \
@PLUGIN_NVPTX_TRUE@ $(am__DEPENDENCIES_1)
@PLUGIN_NVPTX_TRUE@am_libgomp_plugin_nvptx_la_OBJECTS = \
@@ -187,7 +199,8 @@ FCLD = $(FC)
FCLINK = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
-SOURCES = $(libgomp_plugin_nvptx_la_SOURCES) $(libgomp_la_SOURCES)
+SOURCES = $(libgomp_plugin_hsa_la_SOURCES) \
+ $(libgomp_plugin_nvptx_la_SOURCES) $(libgomp_la_SOURCES)
MULTISRCTOP =
MULTIBUILDTOP =
MULTIDIRS =
@@ -255,6 +268,8 @@ FC = @FC@
FCFLAGS = @FCFLAGS@
FGREP = @FGREP@
GREP = @GREP@
+HSA_RUNTIME_INCLUDE = @HSA_RUNTIME_INCLUDE@
+HSA_RUNTIME_LIB = @HSA_RUNTIME_LIB@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
@@ -299,6 +314,10 @@ PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PERL = @PERL@
+PLUGIN_HSA = @PLUGIN_HSA@
+PLUGIN_HSA_CPPFLAGS = @PLUGIN_HSA_CPPFLAGS@
+PLUGIN_HSA_LDFLAGS = @PLUGIN_HSA_LDFLAGS@
+PLUGIN_HSA_LIBS = @PLUGIN_HSA_LIBS@
PLUGIN_NVPTX = @PLUGIN_NVPTX@
PLUGIN_NVPTX_CPPFLAGS = @PLUGIN_NVPTX_CPPFLAGS@
PLUGIN_NVPTX_LDFLAGS = @PLUGIN_NVPTX_LDFLAGS@
@@ -391,7 +410,7 @@ libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
AM_CPPFLAGS = $(addprefix -I, $(search_path))
AM_CFLAGS = $(XCFLAGS)
AM_LDFLAGS = $(XLDFLAGS) $(SECTION_LDFLAGS) $(OPT_LDFLAGS)
-toolexeclib_LTLIBRARIES = libgomp.la $(am__append_1)
+toolexeclib_LTLIBRARIES = libgomp.la $(am__append_1) $(am__append_2)
nodist_toolexeclib_HEADERS = libgomp.spec
# -Wc is only a libtool option.
@@ -415,7 +434,7 @@ libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
bar.c ptrlock.c time.c fortran.c affinity.c target.c \
splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
- priority_queue.c $(am__append_2)
+ priority_queue.c $(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -426,6 +445,16 @@ libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
@PLUGIN_NVPTX_TRUE@ $(lt_host_flags) $(PLUGIN_NVPTX_LDFLAGS)
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_LIBADD = libgomp.la $(PLUGIN_NVPTX_LIBS)
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_LIBTOOLFLAGS = --tag=disable-static
+
+# Heterogenous Systems Architecture plugin
+@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_version_info = -version-info $(libtool_VERSION)
+@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_SOURCES = plugin/plugin-hsa.c
+@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_CPPFLAGS = $(AM_CPPFLAGS) $(PLUGIN_HSA_CPPFLAGS)
+@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LDFLAGS = \
+@PLUGIN_HSA_TRUE@ $(libgomp_plugin_hsa_version_info) \
+@PLUGIN_HSA_TRUE@ $(lt_host_flags) $(PLUGIN_HSA_LDFLAGS)
+@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LIBADD = libgomp.la $(PLUGIN_HSA_LIBS)
+@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LIBTOOLFLAGS = --tag=disable-static
nodist_noinst_HEADERS = libgomp_f.h
nodist_libsubinclude_HEADERS = omp.h openacc.h
@USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
@@ -553,6 +582,8 @@ clean-toolexeclibLTLIBRARIES:
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
+libgomp-plugin-hsa.la: $(libgomp_plugin_hsa_la_OBJECTS) $(libgomp_plugin_hsa_la_DEPENDENCIES) $(EXTRA_libgomp_plugin_hsa_la_DEPENDENCIES)
+ $(libgomp_plugin_hsa_la_LINK) $(am_libgomp_plugin_hsa_la_rpath) $(libgomp_plugin_hsa_la_OBJECTS) $(libgomp_plugin_hsa_la_LIBADD) $(LIBS)
libgomp-plugin-nvptx.la: $(libgomp_plugin_nvptx_la_OBJECTS) $(libgomp_plugin_nvptx_la_DEPENDENCIES) $(EXTRA_libgomp_plugin_nvptx_la_DEPENDENCIES)
$(libgomp_plugin_nvptx_la_LINK) $(am_libgomp_plugin_nvptx_la_rpath) $(libgomp_plugin_nvptx_la_OBJECTS) $(libgomp_plugin_nvptx_la_LIBADD) $(LIBS)
libgomp.la: $(libgomp_la_OBJECTS) $(libgomp_la_DEPENDENCIES) $(EXTRA_libgomp_la_DEPENDENCIES)
@@ -575,6 +606,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp-plugin.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop.Plo@am__quote@
@@ -623,6 +655,13 @@ distclean-compile:
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
+libgomp_plugin_hsa_la-plugin-hsa.lo: plugin/plugin-hsa.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_hsa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_hsa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libgomp_plugin_hsa_la-plugin-hsa.lo -MD -MP -MF $(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Tpo -c -o libgomp_plugin_hsa_la-plugin-hsa.lo `test -f 'plugin/plugin-hsa.c' || echo '$(srcdir)/'`plugin/plugin-hsa.c
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Tpo $(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='plugin/plugin-hsa.c' object='libgomp_plugin_hsa_la-plugin-hsa.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_hsa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_hsa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libgomp_plugin_hsa_la-plugin-hsa.lo `test -f 'plugin/plugin-hsa.c' || echo '$(srcdir)/'`plugin/plugin-hsa.c
+
libgomp_plugin_nvptx_la-plugin-nvptx.lo: plugin/plugin-nvptx.c
@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_nvptx_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_nvptx_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libgomp_plugin_nvptx_la-plugin-nvptx.lo -MD -MP -MF $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Tpo -c -o libgomp_plugin_nvptx_la-plugin-nvptx.lo `test -f 'plugin/plugin-nvptx.c' || echo '$(srcdir)/'`plugin/plugin-nvptx.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Tpo $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo
diff --git a/libgomp/config.h.in b/libgomp/config.h.in
index 2e4c6981aea..226ac5358a7 100644
--- a/libgomp/config.h.in
+++ b/libgomp/config.h.in
@@ -60,6 +60,9 @@
/* Define to 1 if you have the `strtoull' function. */
#undef HAVE_STRTOULL
+/* Define to 1 if the system has the type `struct _Mutex_Control'. */
+#undef HAVE_STRUCT__MUTEX_CONTROL
+
/* Define to 1 if the target runtime linker supports binding the same symbol
to different versions. */
#undef HAVE_SYMVER_SYMBOL_RENAMING_RUNTIME_SUPPORT
@@ -119,6 +122,9 @@
/* Define to the version of this package. */
#undef PACKAGE_VERSION
+/* Define to 1 if the HSA plugin is built, 0 if not. */
+#undef PLUGIN_HSA
+
/* Define to 1 if the NVIDIA plugin is built, 0 if not. */
#undef PLUGIN_NVPTX
diff --git a/libgomp/configure b/libgomp/configure
index aaa17c9a55a..1410bc71223 100755
--- a/libgomp/configure
+++ b/libgomp/configure
@@ -627,10 +627,18 @@ LIBGOMP_BUILD_VERSIONED_SHLIB_FALSE
LIBGOMP_BUILD_VERSIONED_SHLIB_TRUE
OPT_LDFLAGS
SECTION_LDFLAGS
+PLUGIN_HSA_FALSE
+PLUGIN_HSA_TRUE
PLUGIN_NVPTX_FALSE
PLUGIN_NVPTX_TRUE
offload_additional_lib_paths
offload_additional_options
+PLUGIN_HSA_LIBS
+PLUGIN_HSA_LDFLAGS
+PLUGIN_HSA_CPPFLAGS
+PLUGIN_HSA
+HSA_RUNTIME_LIB
+HSA_RUNTIME_INCLUDE
PLUGIN_NVPTX_LIBS
PLUGIN_NVPTX_LDFLAGS
PLUGIN_NVPTX_CPPFLAGS
@@ -782,6 +790,10 @@ enable_maintainer_mode
with_cuda_driver
with_cuda_driver_include
with_cuda_driver_lib
+with_hsa_runtime
+with_hsa_runtime_include
+with_hsa_runtime_lib
+with_hsa_kmt_lib
enable_linux_futex
enable_tls
enable_symvers
@@ -1453,6 +1465,17 @@ Optional Packages:
--with-cuda-driver-lib=PATH
specify directory for the installed CUDA driver
library
+ --with-hsa-runtime=PATH specify prefix directory for installed HSA run-time
+ package. Equivalent to
+ --with-hsa-runtime-include=PATH/include plus
+ --with-hsa-runtime-lib=PATH/lib
+ --with-hsa-runtime-include=PATH
+ specify directory for installed HSA run-time include
+ files
+ --with-hsa-runtime-lib=PATH
+ specify directory for the installed HSA run-time
+ library
+ --with-hsa-kmt-lib=PATH specify directory for installed HSA KMT library.
Some influential environment variables:
CC C compiler command
@@ -11121,7 +11144,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 11124 "configure"
+#line 11147 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -11227,7 +11250,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 11230 "configure"
+#line 11253 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -15090,7 +15113,7 @@ esac
# Plugins for offload execution, configure.ac fragment. -*- mode: autoconf -*-
#
-# Copyright (C) 2014-2015 Free Software Foundation, Inc.
+# Copyright (C) 2014-2016 Free Software Foundation, Inc.
#
# Contributed by Mentor Embedded.
#
@@ -15225,6 +15248,72 @@ PLUGIN_NVPTX_LIBS=
+# Look for HSA run-time, its includes and libraries
+
+HSA_RUNTIME_INCLUDE=
+HSA_RUNTIME_LIB=
+
+
+HSA_RUNTIME_CPPFLAGS=
+HSA_RUNTIME_LDFLAGS=
+
+
+# Check whether --with-hsa-runtime was given.
+if test "${with_hsa_runtime+set}" = set; then :
+ withval=$with_hsa_runtime;
+fi
+
+
+# Check whether --with-hsa-runtime-include was given.
+if test "${with_hsa_runtime_include+set}" = set; then :
+ withval=$with_hsa_runtime_include;
+fi
+
+
+# Check whether --with-hsa-runtime-lib was given.
+if test "${with_hsa_runtime_lib+set}" = set; then :
+ withval=$with_hsa_runtime_lib;
+fi
+
+if test "x$with_hsa_runtime" != x; then
+ HSA_RUNTIME_INCLUDE=$with_hsa_runtime/include
+ HSA_RUNTIME_LIB=$with_hsa_runtime/lib
+fi
+if test "x$with_hsa_runtime_include" != x; then
+ HSA_RUNTIME_INCLUDE=$with_hsa_runtime_include
+fi
+if test "x$with_hsa_runtime_lib" != x; then
+ HSA_RUNTIME_LIB=$with_hsa_runtime_lib
+fi
+if test "x$HSA_RUNTIME_INCLUDE" != x; then
+ HSA_RUNTIME_CPPFLAGS=-I$HSA_RUNTIME_INCLUDE
+fi
+if test "x$HSA_RUNTIME_LIB" != x; then
+ HSA_RUNTIME_LDFLAGS=-L$HSA_RUNTIME_LIB
+fi
+
+
+# Check whether --with-hsa-kmt-lib was given.
+if test "${with_hsa_kmt_lib+set}" = set; then :
+ withval=$with_hsa_kmt_lib;
+fi
+
+if test "x$with_hsa_kmt_lib" != x; then
+ HSA_RUNTIME_LDFLAGS="$HSA_RUNTIME_LDFLAGS -L$with_hsa_kmt_lib"
+ HSA_RUNTIME_LIB=
+fi
+
+PLUGIN_HSA=0
+PLUGIN_HSA_CPPFLAGS=
+PLUGIN_HSA_LDFLAGS=
+PLUGIN_HSA_LIBS=
+
+
+
+
+
+
+
# Get offload targets and path to install tree of offloading compiler.
offload_additional_options=
offload_additional_lib_paths=
@@ -15277,6 +15366,60 @@ rm -f core conftest.err conftest.$ac_objext \
;;
esac
;;
+ hsa*)
+ case "${target}" in
+ x86_64-*-*)
+ case " ${CC} ${CFLAGS} " in
+ *" -m32 "*)
+ PLUGIN_HSA=0
+ ;;
+ *)
+ tgt_name=hsa
+ PLUGIN_HSA=$tgt
+ PLUGIN_HSA_CPPFLAGS=$HSA_RUNTIME_CPPFLAGS
+ PLUGIN_HSA_LDFLAGS=$HSA_RUNTIME_LDFLAGS
+ PLUGIN_HSA_LIBS="-lhsa-runtime64 -lhsakmt"
+
+ PLUGIN_HSA_save_CPPFLAGS=$CPPFLAGS
+ CPPFLAGS="$PLUGIN_HSA_CPPFLAGS $CPPFLAGS"
+ PLUGIN_HSA_save_LDFLAGS=$LDFLAGS
+ LDFLAGS="$PLUGIN_HSA_LDFLAGS $LDFLAGS"
+ PLUGIN_HSA_save_LIBS=$LIBS
+ LIBS="$PLUGIN_HSA_LIBS $LIBS"
+
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include "hsa.h"
+int
+main ()
+{
+hsa_status_t status = hsa_init ()
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ PLUGIN_HSA=1
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+ CPPFLAGS=$PLUGIN_HSA_save_CPPFLAGS
+ LDFLAGS=$PLUGIN_HSA_save_LDFLAGS
+ LIBS=$PLUGIN_HSA_save_LIBS
+ case $PLUGIN_HSA in
+ hsa*)
+ HSA_PLUGIN=0
+ as_fn_error "HSA run-time package required for HSA support" "$LINENO" 5
+ ;;
+ esac
+ ;;
+ esac
+ ;;
+ *-*-*)
+ PLUGIN_HSA=0
+ ;;
+ esac
+ ;;
*)
as_fn_error "unknown offload target specified" "$LINENO" 5
;;
@@ -15313,6 +15456,19 @@ cat >>confdefs.h <<_ACEOF
#define PLUGIN_NVPTX $PLUGIN_NVPTX
_ACEOF
+ if test $PLUGIN_HSA = 1; then
+ PLUGIN_HSA_TRUE=
+ PLUGIN_HSA_FALSE='#'
+else
+ PLUGIN_HSA_TRUE='#'
+ PLUGIN_HSA_FALSE=
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define PLUGIN_HSA $PLUGIN_HSA
+_ACEOF
+
# Check for functions needed.
@@ -16712,6 +16868,10 @@ if test -z "${PLUGIN_NVPTX_TRUE}" && test -z "${PLUGIN_NVPTX_FALSE}"; then
as_fn_error "conditional \"PLUGIN_NVPTX\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
+if test -z "${PLUGIN_HSA_TRUE}" && test -z "${PLUGIN_HSA_FALSE}"; then
+ as_fn_error "conditional \"PLUGIN_HSA\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
if test -z "${LIBGOMP_BUILD_VERSIONED_SHLIB_TRUE}" && test -z "${LIBGOMP_BUILD_VERSIONED_SHLIB_FALSE}"; then
as_fn_error "conditional \"LIBGOMP_BUILD_VERSIONED_SHLIB\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index 64035e4f44d..53f92483631 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -48,7 +48,8 @@ enum offload_target_type
OFFLOAD_TARGET_TYPE_HOST = 2,
/* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */
OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5,
- OFFLOAD_TARGET_TYPE_INTEL_MIC = 6
+ OFFLOAD_TARGET_TYPE_INTEL_MIC = 6,
+ OFFLOAD_TARGET_TYPE_HSA = 7
};
/* Auxiliary struct, used for transferring pairs of addresses from plugin
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 6ddde56a057..7108a6d0118 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -496,6 +496,10 @@ struct gomp_target_task
struct target_mem_desc *tgt;
struct gomp_task *task;
struct gomp_team *team;
+ /* Copies of firstprivate mapped data for shared memory accelerators. */
+ void *firstprivate_copies;
+ /* Device-specific target arguments. */
+ void **args;
void *hostaddrs[];
};
@@ -750,7 +754,8 @@ extern void gomp_task_maybe_wait_for_dependencies (void **);
extern bool gomp_create_target_task (struct gomp_device_descr *,
void (*) (void *), size_t, void **,
size_t *, unsigned short *, unsigned int,
- void **, enum gomp_target_task_state);
+ void **, void **,
+ enum gomp_target_task_state);
static void inline
gomp_finish_task (struct gomp_task *task)
@@ -937,8 +942,9 @@ struct gomp_device_descr
void *(*dev2host_func) (int, void *, const void *, size_t);
void *(*host2dev_func) (int, void *, const void *, size_t);
void *(*dev2dev_func) (int, void *, const void *, size_t);
- void (*run_func) (int, void *, void *);
- void (*async_run_func) (int, void *, void *, void *);
+ bool (*can_run_func) (void *);
+ void (*run_func) (int, void *, void *, void **);
+ void (*async_run_func) (int, void *, void *, void **, void *);
/* Splay tree containing information about mapped memory regions. */
struct splay_tree_s mem_map;
diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h
index 6229ca07e16..24eebb68e67 100644
--- a/libgomp/libgomp_g.h
+++ b/libgomp/libgomp_g.h
@@ -278,8 +278,7 @@ extern void GOMP_single_copy_end (void *);
extern void GOMP_target (int, void (*) (void *), const void *,
size_t, void **, size_t *, unsigned char *);
extern void GOMP_target_ext (int, void (*) (void *), size_t, void **, size_t *,
- unsigned short *, unsigned int, void **,
- int, int);
+ unsigned short *, unsigned int, void **, void **);
extern void GOMP_target_data (int, const void *,
size_t, void **, size_t *, unsigned char *);
extern void GOMP_target_data_ext (int, size_t, void **, size_t *,
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 0760e44d299..1e760f6d83a 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -123,7 +123,8 @@ host_host2dev (int n __attribute__ ((unused)),
}
static void
-host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars)
+host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars,
+ void **args __attribute__((unused)))
{
void (*fn)(void *) = (void (*)(void *)) fn_ptr;
diff --git a/libgomp/plugin/Makefrag.am b/libgomp/plugin/Makefrag.am
index 4efe9632f36..035a6636aaa 100644
--- a/libgomp/plugin/Makefrag.am
+++ b/libgomp/plugin/Makefrag.am
@@ -38,3 +38,16 @@ libgomp_plugin_nvptx_la_LDFLAGS += $(PLUGIN_NVPTX_LDFLAGS)
libgomp_plugin_nvptx_la_LIBADD = libgomp.la $(PLUGIN_NVPTX_LIBS)
libgomp_plugin_nvptx_la_LIBTOOLFLAGS = --tag=disable-static
endif
+
+if PLUGIN_HSA
+# Heterogenous Systems Architecture plugin
+libgomp_plugin_hsa_version_info = -version-info $(libtool_VERSION)
+toolexeclib_LTLIBRARIES += libgomp-plugin-hsa.la
+libgomp_plugin_hsa_la_SOURCES = plugin/plugin-hsa.c
+libgomp_plugin_hsa_la_CPPFLAGS = $(AM_CPPFLAGS) $(PLUGIN_HSA_CPPFLAGS)
+libgomp_plugin_hsa_la_LDFLAGS = $(libgomp_plugin_hsa_version_info) \
+ $(lt_host_flags)
+libgomp_plugin_hsa_la_LDFLAGS += $(PLUGIN_HSA_LDFLAGS)
+libgomp_plugin_hsa_la_LIBADD = libgomp.la $(PLUGIN_HSA_LIBS)
+libgomp_plugin_hsa_la_LIBTOOLFLAGS = --tag=disable-static
+endif
diff --git a/libgomp/plugin/configfrag.ac b/libgomp/plugin/configfrag.ac
index 768954af6d9..2a9d9f907e3 100644
--- a/libgomp/plugin/configfrag.ac
+++ b/libgomp/plugin/configfrag.ac
@@ -81,6 +81,62 @@ AC_SUBST(PLUGIN_NVPTX_CPPFLAGS)
AC_SUBST(PLUGIN_NVPTX_LDFLAGS)
AC_SUBST(PLUGIN_NVPTX_LIBS)
+# Look for HSA run-time, its includes and libraries
+
+HSA_RUNTIME_INCLUDE=
+HSA_RUNTIME_LIB=
+AC_SUBST(HSA_RUNTIME_INCLUDE)
+AC_SUBST(HSA_RUNTIME_LIB)
+HSA_RUNTIME_CPPFLAGS=
+HSA_RUNTIME_LDFLAGS=
+
+AC_ARG_WITH(hsa-runtime,
+ [AS_HELP_STRING([--with-hsa-runtime=PATH],
+ [specify prefix directory for installed HSA run-time package.
+ Equivalent to --with-hsa-runtime-include=PATH/include
+ plus --with-hsa-runtime-lib=PATH/lib])])
+AC_ARG_WITH(hsa-runtime-include,
+ [AS_HELP_STRING([--with-hsa-runtime-include=PATH],
+ [specify directory for installed HSA run-time include files])])
+AC_ARG_WITH(hsa-runtime-lib,
+ [AS_HELP_STRING([--with-hsa-runtime-lib=PATH],
+ [specify directory for the installed HSA run-time library])])
+if test "x$with_hsa_runtime" != x; then
+ HSA_RUNTIME_INCLUDE=$with_hsa_runtime/include
+ HSA_RUNTIME_LIB=$with_hsa_runtime/lib
+fi
+if test "x$with_hsa_runtime_include" != x; then
+ HSA_RUNTIME_INCLUDE=$with_hsa_runtime_include
+fi
+if test "x$with_hsa_runtime_lib" != x; then
+ HSA_RUNTIME_LIB=$with_hsa_runtime_lib
+fi
+if test "x$HSA_RUNTIME_INCLUDE" != x; then
+ HSA_RUNTIME_CPPFLAGS=-I$HSA_RUNTIME_INCLUDE
+fi
+if test "x$HSA_RUNTIME_LIB" != x; then
+ HSA_RUNTIME_LDFLAGS=-L$HSA_RUNTIME_LIB
+fi
+
+AC_ARG_WITH(hsa-kmt-lib,
+ [AS_HELP_STRING([--with-hsa-kmt-lib=PATH],
+ [specify directory for installed HSA KMT library.])])
+if test "x$with_hsa_kmt_lib" != x; then
+ HSA_RUNTIME_LDFLAGS="$HSA_RUNTIME_LDFLAGS -L$with_hsa_kmt_lib"
+ HSA_RUNTIME_LIB=
+fi
+
+PLUGIN_HSA=0
+PLUGIN_HSA_CPPFLAGS=
+PLUGIN_HSA_LDFLAGS=
+PLUGIN_HSA_LIBS=
+AC_SUBST(PLUGIN_HSA)
+AC_SUBST(PLUGIN_HSA_CPPFLAGS)
+AC_SUBST(PLUGIN_HSA_LDFLAGS)
+AC_SUBST(PLUGIN_HSA_LIBS)
+
+
+
# Get offload targets and path to install tree of offloading compiler.
offload_additional_options=
offload_additional_lib_paths=
@@ -122,6 +178,49 @@ if test x"$enable_offload_targets" != x; then
;;
esac
;;
+ hsa*)
+ case "${target}" in
+ x86_64-*-*)
+ case " ${CC} ${CFLAGS} " in
+ *" -m32 "*)
+ PLUGIN_HSA=0
+ ;;
+ *)
+ tgt_name=hsa
+ PLUGIN_HSA=$tgt
+ PLUGIN_HSA_CPPFLAGS=$HSA_RUNTIME_CPPFLAGS
+ PLUGIN_HSA_LDFLAGS=$HSA_RUNTIME_LDFLAGS
+ PLUGIN_HSA_LIBS="-lhsa-runtime64 -lhsakmt"
+
+ PLUGIN_HSA_save_CPPFLAGS=$CPPFLAGS
+ CPPFLAGS="$PLUGIN_HSA_CPPFLAGS $CPPFLAGS"
+ PLUGIN_HSA_save_LDFLAGS=$LDFLAGS
+ LDFLAGS="$PLUGIN_HSA_LDFLAGS $LDFLAGS"
+ PLUGIN_HSA_save_LIBS=$LIBS
+ LIBS="$PLUGIN_HSA_LIBS $LIBS"
+
+ AC_LINK_IFELSE(
+ [AC_LANG_PROGRAM(
+ [#include "hsa.h"],
+ [hsa_status_t status = hsa_init ()])],
+ [PLUGIN_HSA=1])
+ CPPFLAGS=$PLUGIN_HSA_save_CPPFLAGS
+ LDFLAGS=$PLUGIN_HSA_save_LDFLAGS
+ LIBS=$PLUGIN_HSA_save_LIBS
+ case $PLUGIN_HSA in
+ hsa*)
+ HSA_PLUGIN=0
+ AC_MSG_ERROR([HSA run-time package required for HSA support])
+ ;;
+ esac
+ ;;
+ esac
+ ;;
+ *-*-*)
+ PLUGIN_HSA=0
+ ;;
+ esac
+ ;;
*)
AC_MSG_ERROR([unknown offload target specified])
;;
@@ -145,3 +244,6 @@ AC_DEFINE_UNQUOTED(OFFLOAD_TARGETS, "$offload_targets",
AM_CONDITIONAL([PLUGIN_NVPTX], [test $PLUGIN_NVPTX = 1])
AC_DEFINE_UNQUOTED([PLUGIN_NVPTX], [$PLUGIN_NVPTX],
[Define to 1 if the NVIDIA plugin is built, 0 if not.])
+AM_CONDITIONAL([PLUGIN_HSA], [test $PLUGIN_HSA = 1])
+AC_DEFINE_UNQUOTED([PLUGIN_HSA], [$PLUGIN_HSA],
+ [Define to 1 if the HSA plugin is built, 0 if not.])
diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c
new file mode 100644
index 00000000000..d88849338dc
--- /dev/null
+++ b/libgomp/plugin/plugin-hsa.c
@@ -0,0 +1,1493 @@
+/* Plugin for HSAIL execution.
+
+ Copyright (C) 2013-2016 Free Software Foundation, Inc.
+
+ Contributed by Martin Jambor <mjambor@suse.cz> and
+ Martin Liska <mliska@suse.cz>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <hsa.h>
+#include <hsa_ext_finalize.h>
+#include <dlfcn.h>
+#include "libgomp-plugin.h"
+#include "gomp-constants.h"
+
+/* Keep the following GOMP prefixed structures in sync with respective parts of
+ the compiler. */
+
+/* Structure describing the run-time and grid properties of an HSA kernel
+ lauch. */
+
+struct GOMP_kernel_launch_attributes
+{
+ /* Number of dimensions the workload has. Maximum number is 3. */
+ uint32_t ndim;
+ /* Size of the grid in the three respective dimensions. */
+ uint32_t gdims[3];
+ /* Size of work-groups in the respective dimensions. */
+ uint32_t wdims[3];
+};
+
+/* Collection of information needed for a dispatch of a kernel from a
+ kernel. */
+
+struct GOMP_hsa_kernel_dispatch
+{
+ /* Pointer to a command queue associated with a kernel dispatch agent. */
+ void *queue;
+ /* Pointer to reserved memory for OMP data struct copying. */
+ void *omp_data_memory;
+ /* Pointer to a memory space used for kernel arguments passing. */
+ void *kernarg_address;
+ /* Kernel object. */
+ uint64_t object;
+ /* Synchronization signal used for dispatch synchronization. */
+ uint64_t signal;
+ /* Private segment size. */
+ uint32_t private_segment_size;
+ /* Group segment size. */
+ uint32_t group_segment_size;
+ /* Number of children kernel dispatches. */
+ uint64_t kernel_dispatch_count;
+ /* Debug purpose argument. */
+ uint64_t debug;
+ /* Levels-var ICV. */
+ uint64_t omp_level;
+ /* Kernel dispatch structures created for children kernel dispatches. */
+ struct GOMP_hsa_kernel_dispatch **children_dispatches;
+ /* Number of threads. */
+ uint32_t omp_num_threads;
+};
+
+/* Part of the libgomp plugin interface. Return the name of the accelerator,
+ which is "hsa". */
+
+const char *
+GOMP_OFFLOAD_get_name (void)
+{
+ return "hsa";
+}
+
+/* Part of the libgomp plugin interface. Return the specific capabilities the
+ HSA accelerator have. */
+
+unsigned int
+GOMP_OFFLOAD_get_caps (void)
+{
+ return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400;
+}
+
+/* Part of the libgomp plugin interface. Identify as HSA accelerator. */
+
+int
+GOMP_OFFLOAD_get_type (void)
+{
+ return OFFLOAD_TARGET_TYPE_HSA;
+}
+
+/* Return the libgomp version number we're compatible with. There is
+ no requirement for cross-version compatibility. */
+
+unsigned
+GOMP_OFFLOAD_version (void)
+{
+ return GOMP_VERSION;
+}
+
+/* Flag to decide whether print to stderr information about what is going on.
+ Set in init_debug depending on environment variables. */
+
+static bool debug;
+
+/* Flag to decide if the runtime should suppress a possible fallback to host
+ execution. */
+
+static bool suppress_host_fallback;
+
+/* Initialize debug and suppress_host_fallback according to the environment. */
+
+static void
+init_enviroment_variables (void)
+{
+ if (getenv ("HSA_DEBUG"))
+ debug = true;
+ else
+ debug = false;
+
+ if (getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
+ suppress_host_fallback = true;
+ else
+ suppress_host_fallback = false;
+}
+
+/* Print a logging message with PREFIX to stderr if HSA_DEBUG value
+ is set to true. */
+
+#define HSA_LOG(prefix, ...) \
+ do \
+ { \
+ if (debug) \
+ { \
+ fprintf (stderr, prefix); \
+ fprintf (stderr, __VA_ARGS__); \
+ } \
+ } \
+ while (false);
+
+/* Print a debugging message to stderr. */
+
+#define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__)
+
+/* Print a warning message to stderr. */
+
+#define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__)
+
+/* Print HSA warning STR with an HSA STATUS code. */
+
+static void
+hsa_warn (const char *str, hsa_status_t status)
+{
+ if (!debug)
+ return;
+
+ const char *hsa_error;
+ hsa_status_string (status, &hsa_error);
+
+ fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error);
+}
+
+/* Report a fatal error STR together with the HSA error corresponding to STATUS
+ and terminate execution of the current process. */
+
+static void
+hsa_fatal (const char *str, hsa_status_t status)
+{
+ const char *hsa_error;
+ hsa_status_string (status, &hsa_error);
+ GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
+ hsa_error);
+}
+
+struct hsa_kernel_description
+{
+ const char *name;
+ unsigned omp_data_size;
+ bool gridified_kernel_p;
+ unsigned kernel_dependencies_count;
+ const char **kernel_dependencies;
+};
+
+struct global_var_info
+{
+ const char *name;
+ void *address;
+};
+
+/* Data passed by the static initializer of a compilation unit containing BRIG
+ to GOMP_offload_register. */
+
+struct brig_image_desc
+{
+ hsa_ext_module_t brig_module;
+ const unsigned kernel_count;
+ struct hsa_kernel_description *kernel_infos;
+ const unsigned global_variable_count;
+ struct global_var_info *global_variables;
+};
+
+struct agent_info;
+
+/* Information required to identify, finalize and run any given kernel. */
+
+struct kernel_info
+{
+ /* Name of the kernel, required to locate it within the brig module. */
+ const char *name;
+ /* Size of memory space for OMP data. */
+ unsigned omp_data_size;
+ /* The specific agent the kernel has been or will be finalized for and run
+ on. */
+ struct agent_info *agent;
+ /* The specific module where the kernel takes place. */
+ struct module_info *module;
+ /* Mutex enforcing that at most once thread ever initializes a kernel for
+ use. A thread should have locked agent->modules_rwlock for reading before
+ acquiring it. */
+ pthread_mutex_t init_mutex;
+ /* Flag indicating whether the kernel has been initialized and all fields
+ below it contain valid data. */
+ bool initialized;
+ /* Flag indicating that the kernel has a problem that blocks an execution. */
+ bool initialization_failed;
+ /* The object to be put into the dispatch queue. */
+ uint64_t object;
+ /* Required size of kernel arguments. */
+ uint32_t kernarg_segment_size;
+ /* Required size of group segment. */
+ uint32_t group_segment_size;
+ /* Required size of private segment. */
+ uint32_t private_segment_size;
+ /* List of all kernel dependencies. */
+ const char **dependencies;
+ /* Number of dependencies. */
+ unsigned dependencies_count;
+ /* Maximum OMP data size necessary for kernel from kernel dispatches. */
+ unsigned max_omp_data_size;
+ /* True if the kernel is gridified. */
+ bool gridified_kernel_p;
+};
+
+/* Information about a particular brig module, its image and kernels. */
+
+struct module_info
+{
+ /* The next and previous module in the linked list of modules of an agent. */
+ struct module_info *next, *prev;
+ /* The description with which the program has registered the image. */
+ struct brig_image_desc *image_desc;
+
+ /* Number of kernels in this module. */
+ int kernel_count;
+ /* An array of kernel_info structures describing each kernel in this
+ module. */
+ struct kernel_info kernels[];
+};
+
+/* Information about shared brig library. */
+
+struct brig_library_info
+{
+ char *file_name;
+ hsa_ext_module_t image;
+};
+
+/* Description of an HSA GPU agent and the program associated with it. */
+
+struct agent_info
+{
+ /* The HSA ID of the agent. Assigned when hsa_context is initialized. */
+ hsa_agent_t id;
+ /* Whether the agent has been initialized. The fields below are usable only
+ if it has been. */
+ bool initialized;
+ /* The HSA ISA of this agent. */
+ hsa_isa_t isa;
+ /* Command queue of the agent. */
+ hsa_queue_t *command_q;
+ /* Kernel from kernel dispatch command queue. */
+ hsa_queue_t *kernel_dispatch_command_q;
+ /* The HSA memory region from which to allocate kernel arguments. */
+ hsa_region_t kernarg_region;
+
+ /* Read-write lock that protects kernels which are running or about to be run
+ from interference with loading and unloading of images. Needs to be
+ locked for reading while a kernel is being run, and for writing if the
+ list of modules is manipulated (and thus the HSA program invalidated). */
+ pthread_rwlock_t modules_rwlock;
+ /* The first module in a linked list of modules associated with this
+ kernel. */
+ struct module_info *first_module;
+
+ /* Mutex enforcing that only one thread will finalize the HSA program. A
+ thread should have locked agent->modules_rwlock for reading before
+ acquiring it. */
+ pthread_mutex_t prog_mutex;
+ /* Flag whether the HSA program that consists of all the modules has been
+ finalized. */
+ bool prog_finalized;
+ /* Flag whether the program was finalized but with a failure. */
+ bool prog_finalized_error;
+ /* HSA executable - the finalized program that is used to locate kernels. */
+ hsa_executable_t executable;
+ /* List of BRIG libraries. */
+ struct brig_library_info **brig_libraries;
+ /* Number of loaded shared BRIG libraries. */
+ unsigned brig_libraries_count;
+};
+
+/* Information about the whole HSA environment and all of its agents. */
+
+struct hsa_context_info
+{
+ /* Whether the structure has been initialized. */
+ bool initialized;
+ /* Number of usable GPU HSA agents in the system. */
+ int agent_count;
+ /* Array of agent_info structures describing the individual HSA agents. */
+ struct agent_info *agents;
+};
+
+/* Information about the whole HSA environment and all of its agents. */
+
+static struct hsa_context_info hsa_context;
+
+/* Find kernel for an AGENT by name provided in KERNEL_NAME. */
+
+static struct kernel_info *
+get_kernel_for_agent (struct agent_info *agent, const char *kernel_name)
+{
+ struct module_info *module = agent->first_module;
+
+ while (module)
+ {
+ for (unsigned i = 0; i < module->kernel_count; i++)
+ if (strcmp (module->kernels[i].name, kernel_name) == 0)
+ return &module->kernels[i];
+
+ module = module->next;
+ }
+
+ return NULL;
+}
+
+/* Return true if the agent is a GPU and acceptable of concurrent submissions
+ from different threads. */
+
+static bool
+suitable_hsa_agent_p (hsa_agent_t agent)
+{
+ hsa_device_type_t device_type;
+ hsa_status_t status
+ = hsa_agent_get_info (agent, HSA_AGENT_INFO_DEVICE, &device_type);
+ if (status != HSA_STATUS_SUCCESS || device_type != HSA_DEVICE_TYPE_GPU)
+ return false;
+
+ uint32_t features = 0;
+ status = hsa_agent_get_info (agent, HSA_AGENT_INFO_FEATURE, &features);
+ if (status != HSA_STATUS_SUCCESS
+ || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH))
+ return false;
+ hsa_queue_type_t queue_type;
+ status = hsa_agent_get_info (agent, HSA_AGENT_INFO_QUEUE_TYPE, &queue_type);
+ if (status != HSA_STATUS_SUCCESS
+ || (queue_type != HSA_QUEUE_TYPE_MULTI))
+ return false;
+
+ return true;
+}
+
+/* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment
+ agent_count in hsa_context. */
+
+static hsa_status_t
+count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused)))
+{
+ if (suitable_hsa_agent_p (agent))
+ hsa_context.agent_count++;
+ return HSA_STATUS_SUCCESS;
+}
+
+/* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent
+ id to the describing structure in the hsa context. The index of the
+ structure is pointed to by DATA, increment it afterwards. */
+
+static hsa_status_t
+assign_agent_ids (hsa_agent_t agent, void *data)
+{
+ if (suitable_hsa_agent_p (agent))
+ {
+ int *agent_index = (int *) data;
+ hsa_context.agents[*agent_index].id = agent;
+ ++*agent_index;
+ }
+ return HSA_STATUS_SUCCESS;
+}
+
+/* Initialize hsa_context if it has not already been done. */
+
+static void
+init_hsa_context (void)
+{
+ hsa_status_t status;
+ int agent_index = 0;
+
+ if (hsa_context.initialized)
+ return;
+ init_enviroment_variables ();
+ status = hsa_init ();
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Run-time could not be initialized", status);
+ HSA_DEBUG ("HSA run-time initialized\n");
+ status = hsa_iterate_agents (count_gpu_agents, NULL);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("HSA GPU devices could not be enumerated", status);
+ HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
+
+ hsa_context.agents
+ = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
+ * sizeof (struct agent_info));
+ status = hsa_iterate_agents (assign_agent_ids, &agent_index);
+ if (agent_index != hsa_context.agent_count)
+ GOMP_PLUGIN_fatal ("Failed to assign IDs to all HSA agents");
+ hsa_context.initialized = true;
+}
+
+/* Callback of dispatch queues to report errors. */
+
+static void
+queue_callback (hsa_status_t status,
+ hsa_queue_t *queue __attribute__ ((unused)),
+ void *data __attribute__ ((unused)))
+{
+ hsa_fatal ("Asynchronous queue error", status);
+}
+
+/* Callback of hsa_agent_iterate_regions. Determine if a memory REGION can be
+ used for kernarg allocations and if so write it to the memory pointed to by
+ DATA and break the query. */
+
+static hsa_status_t
+get_kernarg_memory_region (hsa_region_t region, void *data)
+{
+ hsa_status_t status;
+ hsa_region_segment_t segment;
+
+ status = hsa_region_get_info (region, HSA_REGION_INFO_SEGMENT, &segment);
+ if (status != HSA_STATUS_SUCCESS)
+ return status;
+ if (segment != HSA_REGION_SEGMENT_GLOBAL)
+ return HSA_STATUS_SUCCESS;
+
+ uint32_t flags;
+ status = hsa_region_get_info (region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
+ if (status != HSA_STATUS_SUCCESS)
+ return status;
+ if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
+ {
+ hsa_region_t *ret = (hsa_region_t *) data;
+ *ret = region;
+ return HSA_STATUS_INFO_BREAK;
+ }
+ return HSA_STATUS_SUCCESS;
+}
+
+/* Part of the libgomp plugin interface. Return the number of HSA devices on
+ the system. */
+
+int
+GOMP_OFFLOAD_get_num_devices (void)
+{
+ init_hsa_context ();
+ return hsa_context.agent_count;
+}
+
+/* Part of the libgomp plugin interface. Initialize agent number N so that it
+ can be used for computation. */
+
+void
+GOMP_OFFLOAD_init_device (int n)
+{
+ init_hsa_context ();
+ if (n >= hsa_context.agent_count)
+ GOMP_PLUGIN_fatal ("Request to initialize non-existing HSA device %i", n);
+ struct agent_info *agent = &hsa_context.agents[n];
+
+ if (agent->initialized)
+ return;
+
+ if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
+ GOMP_PLUGIN_fatal ("Failed to initialize an HSA agent rwlock");
+ if (pthread_mutex_init (&agent->prog_mutex, NULL))
+ GOMP_PLUGIN_fatal ("Failed to initialize an HSA agent program mutex");
+
+ uint32_t queue_size;
+ hsa_status_t status;
+ status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
+ &queue_size);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Error requesting maximum queue size of the HSA agent", status);
+ status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_ISA, &agent->isa);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Error querying the ISA of the agent", status);
+ status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
+ queue_callback, NULL, UINT32_MAX, UINT32_MAX,
+ &agent->command_q);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Error creating command queue", status);
+
+ status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
+ queue_callback, NULL, UINT32_MAX, UINT32_MAX,
+ &agent->kernel_dispatch_command_q);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Error creating kernel dispatch command queue", status);
+
+ agent->kernarg_region.handle = (uint64_t) -1;
+ status = hsa_agent_iterate_regions (agent->id, get_kernarg_memory_region,
+ &agent->kernarg_region);
+ if (agent->kernarg_region.handle == (uint64_t) -1)
+ GOMP_PLUGIN_fatal ("Could not find suitable memory region for kernel "
+ "arguments");
+ HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
+ (long long unsigned) agent->command_q->id);
+ HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
+ (long long unsigned) agent->kernel_dispatch_command_q->id);
+ agent->initialized = true;
+}
+
+/* Verify that hsa_context has already been initialized and return the
+ agent_info structure describing device number N. */
+
+static struct agent_info *
+get_agent_info (int n)
+{
+ if (!hsa_context.initialized)
+ GOMP_PLUGIN_fatal ("Attempt to use uninitialized HSA context.");
+ if (n >= hsa_context.agent_count)
+ GOMP_PLUGIN_fatal ("Request to operate on anon-existing HSA device %i", n);
+ if (!hsa_context.agents[n].initialized)
+ GOMP_PLUGIN_fatal ("Attempt to use an uninitialized HSA agent.");
+ return &hsa_context.agents[n];
+}
+
+/* Insert MODULE to the linked list of modules of AGENT. */
+
+static void
+add_module_to_agent (struct agent_info *agent, struct module_info *module)
+{
+ if (agent->first_module)
+ agent->first_module->prev = module;
+ module->next = agent->first_module;
+ module->prev = NULL;
+ agent->first_module = module;
+}
+
+/* Remove MODULE from the linked list of modules of AGENT. */
+
+static void
+remove_module_from_agent (struct agent_info *agent, struct module_info *module)
+{
+ if (agent->first_module == module)
+ agent->first_module = module->next;
+ if (module->prev)
+ module->prev->next = module->next;
+ if (module->next)
+ module->next->prev = module->prev;
+}
+
+/* Free the HSA program in agent and everything associated with it and set
+ agent->prog_finalized and the initialized flags of all kernels to false. */
+
+static void
+destroy_hsa_program (struct agent_info *agent)
+{
+ if (!agent->prog_finalized || agent->prog_finalized_error)
+ return;
+
+ hsa_status_t status;
+
+ HSA_DEBUG ("Destroying the current HSA program.\n");
+
+ status = hsa_executable_destroy (agent->executable);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not destroy HSA executable", status);
+
+ struct module_info *module;
+ for (module = agent->first_module; module; module = module->next)
+ {
+ int i;
+ for (i = 0; i < module->kernel_count; i++)
+ module->kernels[i].initialized = false;
+ }
+ agent->prog_finalized = false;
+}
+
+/* Part of the libgomp plugin interface. Load BRIG module described by struct
+ brig_image_desc in TARGET_DATA and return references to kernel descriptors
+ in TARGET_TABLE. */
+
+int
+GOMP_OFFLOAD_load_image (int ord, unsigned version, void *target_data,
+ struct addr_pair **target_table)
+{
+ if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
+ GOMP_PLUGIN_fatal ("Offload data incompatible with HSA plugin"
+ " (expected %u, received %u)",
+ GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
+
+ struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
+ struct agent_info *agent;
+ struct addr_pair *pair;
+ struct module_info *module;
+ struct kernel_info *kernel;
+ int kernel_count = image_desc->kernel_count;
+
+ agent = get_agent_info (ord);
+ if (pthread_rwlock_wrlock (&agent->modules_rwlock))
+ GOMP_PLUGIN_fatal ("Unable to write-lock an HSA agent rwlock");
+ if (agent->prog_finalized)
+ destroy_hsa_program (agent);
+
+ HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
+ pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
+ *target_table = pair;
+ module = (struct module_info *)
+ GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info)
+ + kernel_count * sizeof (struct kernel_info));
+ module->image_desc = image_desc;
+ module->kernel_count = kernel_count;
+
+ kernel = &module->kernels[0];
+
+ /* Allocate memory for kernel dependencies. */
+ for (unsigned i = 0; i < kernel_count; i++)
+ {
+ pair->start = (uintptr_t) kernel;
+ pair->end = (uintptr_t) (kernel + 1);
+
+ struct hsa_kernel_description *d = &image_desc->kernel_infos[i];
+ kernel->agent = agent;
+ kernel->module = module;
+ kernel->name = d->name;
+ kernel->omp_data_size = d->omp_data_size;
+ kernel->gridified_kernel_p = d->gridified_kernel_p;
+ kernel->dependencies_count = d->kernel_dependencies_count;
+ kernel->dependencies = d->kernel_dependencies;
+ if (pthread_mutex_init (&kernel->init_mutex, NULL))
+ GOMP_PLUGIN_fatal ("Failed to initialize an HSA kernel mutex");
+
+ kernel++;
+ pair++;
+ }
+
+ add_module_to_agent (agent, module);
+ if (pthread_rwlock_unlock (&agent->modules_rwlock))
+ GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
+ return kernel_count;
+}
+
+/* Add a shared BRIG library from a FILE_NAME to an AGENT. */
+
+static struct brig_library_info *
+add_shared_library (const char *file_name, struct agent_info *agent)
+{
+ struct brig_library_info *library = NULL;
+
+ void *f = dlopen (file_name, RTLD_NOW);
+ void *start = dlsym (f, "__brig_start");
+ void *end = dlsym (f, "__brig_end");
+
+ if (start == NULL || end == NULL)
+ return NULL;
+
+ unsigned size = end - start;
+ char *buf = (char *) GOMP_PLUGIN_malloc (size);
+ memcpy (buf, start, size);
+
+ library = GOMP_PLUGIN_malloc (sizeof (struct agent_info));
+ library->file_name = (char *) GOMP_PLUGIN_malloc
+ ((strlen (file_name) + 1));
+ strcpy (library->file_name, file_name);
+ library->image = (hsa_ext_module_t) buf;
+
+ return library;
+}
+
+/* Release memory used for BRIG shared libraries that correspond
+ to an AGENT. */
+
+static void
+release_agent_shared_libraries (struct agent_info *agent)
+{
+ for (unsigned i = 0; i < agent->brig_libraries_count; i++)
+ if (agent->brig_libraries[i])
+ {
+ free (agent->brig_libraries[i]->file_name);
+ free (agent->brig_libraries[i]->image);
+ free (agent->brig_libraries[i]);
+ }
+
+ free (agent->brig_libraries);
+}
+
+/* Create and finalize the program consisting of all loaded modules. */
+
+static void
+create_and_finalize_hsa_program (struct agent_info *agent)
+{
+ hsa_status_t status;
+ hsa_ext_program_t prog_handle;
+ int mi = 0;
+
+ if (pthread_mutex_lock (&agent->prog_mutex))
+ GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex");
+ if (agent->prog_finalized)
+ goto final;
+
+ status = hsa_ext_program_create (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
+ HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
+ NULL, &prog_handle);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not create an HSA program", status);
+
+ HSA_DEBUG ("Created a finalized program\n");
+
+ struct module_info *module = agent->first_module;
+ while (module)
+ {
+ status = hsa_ext_program_add_module (prog_handle,
+ module->image_desc->brig_module);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not add a module to the HSA program", status);
+ module = module->next;
+ mi++;
+ }
+
+ /* Load all shared libraries. */
+ const char *libraries[] = { "libhsamath.so", "libhsastd.so" };
+ const unsigned libraries_count = sizeof (libraries) / sizeof (const char *);
+
+ agent->brig_libraries_count = libraries_count;
+ agent->brig_libraries = GOMP_PLUGIN_malloc_cleared
+ (sizeof (struct brig_library_info) * libraries_count);
+
+ for (unsigned i = 0; i < libraries_count; i++)
+ {
+ struct brig_library_info *library = add_shared_library (libraries[i],
+ agent);
+ if (library == NULL)
+ {
+ HSA_WARNING ("Could not open a shared BRIG library: %s\n",
+ libraries[i]);
+ continue;
+ }
+
+ status = hsa_ext_program_add_module (prog_handle, library->image);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_warn ("Could not add a shared BRIG library the HSA program",
+ status);
+ else
+ HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n",
+ libraries[i]);
+ }
+
+ hsa_ext_control_directives_t control_directives;
+ memset (&control_directives, 0, sizeof (control_directives));
+ hsa_code_object_t code_object;
+ status = hsa_ext_program_finalize (prog_handle, agent->isa,
+ HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
+ control_directives, "",
+ HSA_CODE_OBJECT_TYPE_PROGRAM,
+ &code_object);
+ if (status != HSA_STATUS_SUCCESS)
+ {
+ hsa_warn ("Finalization of the HSA program failed", status);
+ goto failure;
+ }
+
+ HSA_DEBUG ("Finalization done\n");
+ hsa_ext_program_destroy (prog_handle);
+
+ status
+ = hsa_executable_create (HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN,
+ "", &agent->executable);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not create HSA executable", status);
+
+ module = agent->first_module;
+ while (module)
+ {
+ /* Initialize all global variables declared in the module. */
+ for (unsigned i = 0; i < module->image_desc->global_variable_count; i++)
+ {
+ struct global_var_info *var;
+ var = &module->image_desc->global_variables[i];
+ status
+ = hsa_executable_global_variable_define (agent->executable,
+ var->name, var->address);
+
+ HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name,
+ var->address);
+
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not define a global variable in the HSA program",
+ status);
+ }
+
+ module = module->next;
+ }
+
+ status = hsa_executable_load_code_object (agent->executable, agent->id,
+ code_object, "");
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not add a code object to the HSA executable", status);
+ status = hsa_executable_freeze (agent->executable, "");
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not freeze the HSA executable", status);
+
+ HSA_DEBUG ("Froze HSA executable with the finalized code object\n");
+
+ /* If all goes good, jump to final. */
+ goto final;
+
+failure:
+ agent->prog_finalized_error = true;
+
+final:
+ agent->prog_finalized = true;
+
+ if (pthread_mutex_unlock (&agent->prog_mutex))
+ GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
+}
+
+/* Create kernel dispatch data structure for given KERNEL. */
+
+static struct GOMP_hsa_kernel_dispatch *
+create_single_kernel_dispatch (struct kernel_info *kernel,
+ unsigned omp_data_size)
+{
+ struct agent_info *agent = kernel->agent;
+ struct GOMP_hsa_kernel_dispatch *shadow
+ = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch));
+
+ shadow->queue = agent->command_q;
+ shadow->omp_data_memory
+ = omp_data_size > 0 ? GOMP_PLUGIN_malloc (omp_data_size) : NULL;
+ unsigned dispatch_count = kernel->dependencies_count;
+ shadow->kernel_dispatch_count = dispatch_count;
+
+ shadow->children_dispatches
+ = GOMP_PLUGIN_malloc (dispatch_count * sizeof (shadow));
+
+ shadow->object = kernel->object;
+
+ hsa_signal_t sync_signal;
+ hsa_status_t status = hsa_signal_create (1, 0, NULL, &sync_signal);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Error creating the HSA sync signal", status);
+
+ shadow->signal = sync_signal.handle;
+ shadow->private_segment_size = kernel->private_segment_size;
+ shadow->group_segment_size = kernel->group_segment_size;
+
+ status
+ = hsa_memory_allocate (agent->kernarg_region, kernel->kernarg_segment_size,
+ &shadow->kernarg_address);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not allocate memory for HSA kernel arguments", status);
+
+ return shadow;
+}
+
+/* Release data structure created for a kernel dispatch in SHADOW argument. */
+
+static void
+release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow)
+{
+ HSA_DEBUG ("Released kernel dispatch: %p has value: %lu (%p)\n", shadow,
+ shadow->debug, (void *) shadow->debug);
+
+ hsa_memory_free (shadow->kernarg_address);
+
+ hsa_signal_t s;
+ s.handle = shadow->signal;
+ hsa_signal_destroy (s);
+
+ free (shadow->omp_data_memory);
+
+ for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
+ release_kernel_dispatch (shadow->children_dispatches[i]);
+
+ free (shadow->children_dispatches);
+ free (shadow);
+}
+
+/* Initialize a KERNEL without its dependencies. MAX_OMP_DATA_SIZE is used
+ to calculate maximum necessary memory for OMP data allocation. */
+
+static void
+init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
+{
+ hsa_status_t status;
+ struct agent_info *agent = kernel->agent;
+ hsa_executable_symbol_t kernel_symbol;
+ status = hsa_executable_get_symbol (agent->executable, NULL, kernel->name,
+ agent->id, 0, &kernel_symbol);
+ if (status != HSA_STATUS_SUCCESS)
+ {
+ hsa_warn ("Could not find symbol for kernel in the code object", status);
+ goto failure;
+ }
+ HSA_DEBUG ("Located kernel %s\n", kernel->name);
+ status
+ = hsa_executable_symbol_get_info (kernel_symbol,
+ HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
+ &kernel->object);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not extract a kernel object from its symbol", status);
+ status = hsa_executable_symbol_get_info
+ (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
+ &kernel->kernarg_segment_size);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not get info about kernel argument size", status);
+ status = hsa_executable_symbol_get_info
+ (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
+ &kernel->group_segment_size);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not get info about kernel group segment size", status);
+ status = hsa_executable_symbol_get_info
+ (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
+ &kernel->private_segment_size);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not get info about kernel private segment size",
+ status);
+
+ HSA_DEBUG ("Kernel structure for %s fully initialized with "
+ "following segment sizes: \n", kernel->name);
+ HSA_DEBUG (" group_segment_size: %u\n",
+ (unsigned) kernel->group_segment_size);
+ HSA_DEBUG (" private_segment_size: %u\n",
+ (unsigned) kernel->private_segment_size);
+ HSA_DEBUG (" kernarg_segment_size: %u\n",
+ (unsigned) kernel->kernarg_segment_size);
+ HSA_DEBUG (" omp_data_size: %u\n", kernel->omp_data_size);
+ HSA_DEBUG (" gridified_kernel_p: %u\n", kernel->gridified_kernel_p);
+
+ if (kernel->omp_data_size > *max_omp_data_size)
+ *max_omp_data_size = kernel->omp_data_size;
+
+ for (unsigned i = 0; i < kernel->dependencies_count; i++)
+ {
+ struct kernel_info *dependency
+ = get_kernel_for_agent (agent, kernel->dependencies[i]);
+
+ if (dependency == NULL)
+ {
+ HSA_DEBUG ("Could not find a dependency for a kernel: %s, "
+ "dependency name: %s\n", kernel->name,
+ kernel->dependencies[i]);
+ goto failure;
+ }
+
+ if (dependency->dependencies_count > 0)
+ {
+ HSA_DEBUG ("HSA does not allow kernel dispatching code with "
+ "a depth bigger than one\n")
+ goto failure;
+ }
+
+ init_single_kernel (dependency, max_omp_data_size);
+ }
+
+ return;
+
+failure:
+ kernel->initialization_failed = true;
+}
+
+/* Indent stream F by INDENT spaces. */
+
+static void
+indent_stream (FILE *f, unsigned indent)
+{
+ fprintf (f, "%*s", indent, "");
+}
+
+/* Dump kernel DISPATCH data structure and indent it by INDENT spaces. */
+
+static void
+print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned indent)
+{
+ indent_stream (stderr, indent);
+ fprintf (stderr, "this: %p\n", dispatch);
+ indent_stream (stderr, indent);
+ fprintf (stderr, "queue: %p\n", dispatch->queue);
+ indent_stream (stderr, indent);
+ fprintf (stderr, "omp_data_memory: %p\n", dispatch->omp_data_memory);
+ indent_stream (stderr, indent);
+ fprintf (stderr, "kernarg_address: %p\n", dispatch->kernarg_address);
+ indent_stream (stderr, indent);
+ fprintf (stderr, "object: %lu\n", dispatch->object);
+ indent_stream (stderr, indent);
+ fprintf (stderr, "signal: %lu\n", dispatch->signal);
+ indent_stream (stderr, indent);
+ fprintf (stderr, "private_segment_size: %u\n",
+ dispatch->private_segment_size);
+ indent_stream (stderr, indent);
+ fprintf (stderr, "group_segment_size: %u\n",
+ dispatch->group_segment_size);
+ indent_stream (stderr, indent);
+ fprintf (stderr, "children dispatches: %lu\n",
+ dispatch->kernel_dispatch_count);
+ indent_stream (stderr, indent);
+ fprintf (stderr, "omp_num_threads: %u\n",
+ dispatch->omp_num_threads);
+ fprintf (stderr, "\n");
+
+ for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++)
+ print_kernel_dispatch (dispatch->children_dispatches[i], indent + 2);
+}
+
+/* Create kernel dispatch data structure for a KERNEL and all its
+ dependencies. */
+
+static struct GOMP_hsa_kernel_dispatch *
+create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
+{
+ struct GOMP_hsa_kernel_dispatch *shadow
+ = create_single_kernel_dispatch (kernel, omp_data_size);
+ shadow->omp_num_threads = 64;
+ shadow->debug = 0;
+ shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0;
+
+ /* Create kernel dispatch data structures. We do not allow to have
+ a kernel dispatch with depth bigger than one. */
+ for (unsigned i = 0; i < kernel->dependencies_count; i++)
+ {
+ struct kernel_info *dependency
+ = get_kernel_for_agent (kernel->agent, kernel->dependencies[i]);
+ shadow->children_dispatches[i]
+ = create_single_kernel_dispatch (dependency, omp_data_size);
+ shadow->children_dispatches[i]->queue
+ = kernel->agent->kernel_dispatch_command_q;
+ shadow->children_dispatches[i]->omp_level = 1;
+ }
+
+ return shadow;
+}
+
+/* Do all the work that is necessary before running KERNEL for the first time.
+ The function assumes the program has been created, finalized and frozen by
+ create_and_finalize_hsa_program. */
+
+static void
+init_kernel (struct kernel_info *kernel)
+{
+ if (pthread_mutex_lock (&kernel->init_mutex))
+ GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
+ if (kernel->initialized)
+ {
+ if (pthread_mutex_unlock (&kernel->init_mutex))
+ GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
+ "mutex");
+
+ return;
+ }
+
+ /* Precomputed maximum size of OMP data necessary for a kernel from kernel
+ dispatch operation. */
+ init_single_kernel (kernel, &kernel->max_omp_data_size);
+
+ if (!kernel->initialization_failed)
+ HSA_DEBUG ("\n");
+
+ kernel->initialized = true;
+ if (pthread_mutex_unlock (&kernel->init_mutex))
+ GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
+ "mutex");
+}
+
+/* Parse the target attributes INPUT provided by the compiler and return true
+ if we should run anything all. If INPUT is NULL, fill DEF with default
+ values, then store INPUT or DEF into *RESULT. */
+
+static bool
+parse_target_attributes (void **input,
+ struct GOMP_kernel_launch_attributes *def,
+ struct GOMP_kernel_launch_attributes **result)
+{
+ if (!input)
+ GOMP_PLUGIN_fatal ("No target arguments provided");
+
+ bool attrs_found = false;
+ while (*input)
+ {
+ uintptr_t id = (uintptr_t) *input;
+ if ((id & GOMP_TARGET_ARG_DEVICE_MASK) == GOMP_DEVICE_HSA
+ && ((id & GOMP_TARGET_ARG_ID_MASK)
+ == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES))
+ {
+ input++;
+ attrs_found = true;
+ break;
+ }
+
+ if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
+ input++;
+ input++;
+ }
+
+ if (!attrs_found)
+ {
+ def->ndim = 1;
+ def->gdims[0] = 1;
+ def->gdims[1] = 1;
+ def->gdims[2] = 1;
+ def->wdims[0] = 1;
+ def->wdims[1] = 1;
+ def->wdims[2] = 1;
+ *result = def;
+ HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n");
+ return true;
+ }
+
+ struct GOMP_kernel_launch_attributes *kla;
+ kla = (struct GOMP_kernel_launch_attributes *) *input;
+ *result = kla;
+ if (kla->ndim != 1)
+ GOMP_PLUGIN_fatal ("HSA does not yet support number of dimensions "
+ "different from one.");
+ if (kla->gdims[0] == 0)
+ return false;
+
+ HSA_DEBUG ("GOMP_OFFLOAD_run called with grid size %u and group size %u\n",
+ kla->gdims[0], kla->wdims[0]);
+
+ return true;
+}
+
+/* Return true if the HSA runtime can run function FN_PTR. */
+
+bool
+GOMP_OFFLOAD_can_run (void *fn_ptr)
+{
+ struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
+ struct agent_info *agent = kernel->agent;
+ create_and_finalize_hsa_program (agent);
+
+ if (agent->prog_finalized_error)
+ goto failure;
+
+ init_kernel (kernel);
+ if (kernel->initialization_failed)
+ goto failure;
+
+ return true;
+
+failure:
+ if (suppress_host_fallback)
+ GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed");
+ HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n");
+ return false;
+}
+
+/* Part of the libgomp plugin interface. Run a kernel on device N and pass it
+ an array of pointers in VARS as a parameter. The kernel is identified by
+ FN_PTR which must point to a kernel_info structure. */
+
+void
+GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
+{
+ struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
+ struct agent_info *agent = kernel->agent;
+ struct GOMP_kernel_launch_attributes def;
+ struct GOMP_kernel_launch_attributes *kla;
+ if (!parse_target_attributes (args, &def, &kla))
+ {
+ HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
+ return;
+ }
+ if (pthread_rwlock_rdlock (&agent->modules_rwlock))
+ GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
+
+ if (!agent->initialized)
+ GOMP_PLUGIN_fatal ("Agent must be initialized");
+
+ if (!kernel->initialized)
+ GOMP_PLUGIN_fatal ("Called kernel must be initialized");
+
+ struct GOMP_hsa_kernel_dispatch *shadow
+ = create_kernel_dispatch (kernel, kernel->max_omp_data_size);
+
+ if (debug)
+ {
+ fprintf (stderr, "\nKernel has following dependencies:\n");
+ print_kernel_dispatch (shadow, 2);
+ }
+
+ uint64_t index = hsa_queue_add_write_index_release (agent->command_q, 1);
+ HSA_DEBUG ("Got AQL index %llu\n", (long long int) index);
+
+ /* Wait until the queue is not full before writing the packet. */
+ while (index - hsa_queue_load_read_index_acquire (agent->command_q)
+ >= agent->command_q->size)
+ ;
+
+ hsa_kernel_dispatch_packet_t *packet;
+ packet = ((hsa_kernel_dispatch_packet_t *) agent->command_q->base_address)
+ + index % agent->command_q->size;
+
+ memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4);
+ packet->setup |= (uint16_t) 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
+ packet->grid_size_x = kla->gdims[0];
+ uint32_t wgs = kla->wdims[0];
+ if (wgs == 0)
+ /* TODO: Provide a default via environment. */
+ wgs = 64;
+ else if (wgs > kla->gdims[0])
+ wgs = kla->gdims[0];
+ packet->workgroup_size_x = wgs;
+ packet->grid_size_y = 1;
+ packet->workgroup_size_y = 1;
+ packet->grid_size_z = 1;
+ packet->workgroup_size_z = 1;
+ packet->private_segment_size = kernel->private_segment_size;
+ packet->group_segment_size = kernel->group_segment_size;
+ packet->kernel_object = kernel->object;
+ packet->kernarg_address = shadow->kernarg_address;
+ hsa_signal_t s;
+ s.handle = shadow->signal;
+ packet->completion_signal = s;
+ hsa_signal_store_relaxed (s, 1);
+ memcpy (shadow->kernarg_address, &vars, sizeof (vars));
+
+ memcpy (shadow->kernarg_address + sizeof (vars), &shadow,
+ sizeof (struct hsa_kernel_runtime *));
+
+ HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n");
+
+ uint16_t header;
+ header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+ header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
+ header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
+
+ HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name);
+
+ __atomic_store_n ((uint16_t *) (&packet->header), header, __ATOMIC_RELEASE);
+ hsa_signal_store_release (agent->command_q->doorbell_signal, index);
+
+ /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
+ signal wait and signal load operations on their own and we need to
+ periodically call the hsa_signal_load_acquire on completion signals of
+ children kernels in the CPU to make that happen. As soon the
+ limitation will be resolved, this workaround can be removed. */
+
+ HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
+
+ /* Root signal waits with 1ms timeout. */
+ while (hsa_signal_wait_acquire (s, HSA_SIGNAL_CONDITION_LT, 1, 1000 * 1000,
+ HSA_WAIT_STATE_BLOCKED) != 0)
+ for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
+ {
+ hsa_signal_t child_s;
+ child_s.handle = shadow->children_dispatches[i]->signal;
+
+ HSA_DEBUG ("Waiting for children completion signal: %lu\n",
+ shadow->children_dispatches[i]->signal);
+ hsa_signal_load_acquire (child_s);
+ }
+
+ release_kernel_dispatch (shadow);
+
+ if (pthread_rwlock_unlock (&agent->modules_rwlock))
+ GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
+}
+
+/* Information to be passed to a thread running a kernel asycnronously. */
+
+struct async_run_info
+{
+ int device;
+ void *tgt_fn;
+ void *tgt_vars;
+ void **args;
+ void *async_data;
+};
+
+/* Thread routine to run a kernel asynchronously. */
+
+static void *
+run_kernel_asynchronously (void *thread_arg)
+{
+ struct async_run_info *info = (struct async_run_info *) thread_arg;
+ int device = info->device;
+ void *tgt_fn = info->tgt_fn;
+ void *tgt_vars = info->tgt_vars;
+ void **args = info->args;
+ void *async_data = info->async_data;
+
+ free (info);
+ GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
+ GOMP_PLUGIN_target_task_completion (async_data);
+ return NULL;
+}
+
+/* Part of the libgomp plugin interface. Run a kernel like GOMP_OFFLOAD_run
+ does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
+ has finished. */
+
+void
+GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
+ void **args, void *async_data)
+{
+ pthread_t pt;
+ struct async_run_info *info;
+ HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n")
+ info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
+
+ info->device = device;
+ info->tgt_fn = tgt_fn;
+ info->tgt_vars = tgt_vars;
+ info->args = args;
+ info->async_data = async_data;
+
+ int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info);
+ if (err != 0)
+ GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
+ strerror (err));
+ err = pthread_detach (pt);
+ if (err != 0)
+ GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
+ "asynchronously: %s", strerror (err));
+}
+
+/* Deinitialize all information associated with MODULE and kernels within
+ it. */
+
+void
+destroy_module (struct module_info *module)
+{
+ int i;
+ for (i = 0; i < module->kernel_count; i++)
+ if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
+ GOMP_PLUGIN_fatal ("Failed to destroy an HSA kernel initialization "
+ "mutex");
+}
+
+/* Part of the libgomp plugin interface. Unload BRIG module described by
+ struct brig_image_desc in TARGET_DATA from agent number N. */
+
+void
+GOMP_OFFLOAD_unload_image (int n, unsigned version, void *target_data)
+{
+ if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
+ GOMP_PLUGIN_fatal ("Offload data incompatible with HSA plugin"
+ " (expected %u, received %u)",
+ GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
+
+ struct agent_info *agent;
+ agent = get_agent_info (n);
+ if (pthread_rwlock_wrlock (&agent->modules_rwlock))
+ GOMP_PLUGIN_fatal ("Unable to write-lock an HSA agent rwlock");
+
+ struct module_info *module = agent->first_module;
+ while (module)
+ {
+ if (module->image_desc == target_data)
+ break;
+ module = module->next;
+ }
+ if (!module)
+ GOMP_PLUGIN_fatal ("Attempt to unload an image that has never been "
+ "loaded before");
+
+ remove_module_from_agent (agent, module);
+ destroy_module (module);
+ free (module);
+ destroy_hsa_program (agent);
+ if (pthread_rwlock_unlock (&agent->modules_rwlock))
+ GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
+}
+
+/* Part of the libgomp plugin interface. Deinitialize all information and
+ status associated with agent number N. We do not attempt any
+ synchronization, assuming the user and libgomp will not attempt
+ deinitialization of a device that is in any way being used at the same
+ time. */
+
+void
+GOMP_OFFLOAD_fini_device (int n)
+{
+ struct agent_info *agent = get_agent_info (n);
+ if (!agent->initialized)
+ return;
+
+ struct module_info *next_module = agent->first_module;
+ while (next_module)
+ {
+ struct module_info *module = next_module;
+ next_module = module->next;
+ destroy_module (module);
+ free (module);
+ }
+ agent->first_module = NULL;
+ destroy_hsa_program (agent);
+
+ release_agent_shared_libraries (agent);
+
+ hsa_status_t status = hsa_queue_destroy (agent->command_q);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Error destroying command queue", status);
+ status = hsa_queue_destroy (agent->kernel_dispatch_command_q);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Error destroying kernel dispatch command queue", status);
+ if (pthread_mutex_destroy (&agent->prog_mutex))
+ GOMP_PLUGIN_fatal ("Failed to destroy an HSA agent program mutex");
+ if (pthread_rwlock_destroy (&agent->modules_rwlock))
+ GOMP_PLUGIN_fatal ("Failed to destroy an HSA agent rwlock");
+ agent->initialized = false;
+}
+
+/* Part of the libgomp plugin interface. Not implemented as it is not required
+ for HSA. */
+
+void *
+GOMP_OFFLOAD_alloc (int ord, size_t size)
+{
+ GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_alloc is not implemented because "
+ "it should never be called");
+}
+
+/* Part of the libgomp plugin interface. Not implemented as it is not required
+ for HSA. */
+
+void
+GOMP_OFFLOAD_free (int ord, void *ptr)
+{
+ GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_free is not implemented because "
+ "it should never be called");
+}
+
+/* Part of the libgomp plugin interface. Not implemented as it is not required
+ for HSA. */
+
+void *
+GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
+{
+ GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
+ "it should never be called");
+}
+
+/* Part of the libgomp plugin interface. Not implemented as it is not required
+ for HSA. */
+
+void *
+GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
+{
+ GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
+ "it should never be called");
+}
+
+/* Part of the libgomp plugin interface. Not implemented as it is not required
+ for HSA. */
+
+void *
+GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
+{
+ GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
+ "it should never be called");
+}
diff --git a/libgomp/target.c b/libgomp/target.c
index bea5822312c..f1f58492ee5 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1329,44 +1329,90 @@ gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
*thr = old_thr;
}
-/* Host fallback with firstprivate map-type handling. */
+/* Calculate alignment and size requirements of a private copy of data shared
+ as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
-static void
-gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum,
- void **hostaddrs, size_t *sizes,
- unsigned short *kinds)
+static inline void
+calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
+ unsigned short *kinds, size_t *tgt_align,
+ size_t *tgt_size)
{
- size_t i, tgt_align = 0, tgt_size = 0;
- char *tgt = NULL;
+ size_t i;
+ for (i = 0; i < mapnum; i++)
+ if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
+ {
+ size_t align = (size_t) 1 << (kinds[i] >> 8);
+ if (*tgt_align < align)
+ *tgt_align = align;
+ *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
+ *tgt_size += sizes[i];
+ }
+}
+
+/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
+
+static inline void
+copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
+ size_t *sizes, unsigned short *kinds, size_t tgt_align,
+ size_t tgt_size)
+{
+ uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
+ if (al)
+ tgt += tgt_align - al;
+ tgt_size = 0;
+ size_t i;
for (i = 0; i < mapnum; i++)
if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
{
size_t align = (size_t) 1 << (kinds[i] >> 8);
- if (tgt_align < align)
- tgt_align = align;
tgt_size = (tgt_size + align - 1) & ~(align - 1);
- tgt_size += sizes[i];
+ memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
+ hostaddrs[i] = tgt + tgt_size;
+ tgt_size = tgt_size + sizes[i];
}
+}
+
+/* Host fallback with firstprivate map-type handling. */
+
+static void
+gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum,
+ void **hostaddrs, size_t *sizes,
+ unsigned short *kinds)
+{
+ size_t tgt_align = 0, tgt_size = 0;
+ calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align,
+ &tgt_size);
if (tgt_align)
{
- tgt = gomp_alloca (tgt_size + tgt_align - 1);
- uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
- if (al)
- tgt += tgt_align - al;
- tgt_size = 0;
- for (i = 0; i < mapnum; i++)
- if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
- {
- size_t align = (size_t) 1 << (kinds[i] >> 8);
- tgt_size = (tgt_size + align - 1) & ~(align - 1);
- memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
- hostaddrs[i] = tgt + tgt_size;
- tgt_size = tgt_size + sizes[i];
- }
+ char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
+ copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align,
+ tgt_size);
}
gomp_target_fallback (fn, hostaddrs);
}
+/* Handle firstprivate map-type for shared memory devices and the host
+ fallback. Return the pointer of firstprivate copies which has to be freed
+ after use. */
+
+static void *
+gomp_target_unshare_firstprivate (size_t mapnum, void **hostaddrs,
+ size_t *sizes, unsigned short *kinds)
+{
+ size_t tgt_align = 0, tgt_size = 0;
+ char *tgt = NULL;
+
+ calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align,
+ &tgt_size);
+ if (tgt_align)
+ {
+ tgt = gomp_malloc (tgt_size + tgt_align - 1);
+ copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align,
+ tgt_size);
+ }
+ return tgt;
+}
+
/* Helper function of GOMP_target{,_ext} routines. */
static void *
@@ -1390,7 +1436,12 @@ gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
gomp_mutex_unlock (&devicep->lock);
if (tgt_fn == NULL)
- gomp_fatal ("Target function wasn't mapped");
+ {
+ if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return NULL;
+ else
+ gomp_fatal ("Target function wasn't mapped");
+ }
return (void *) tgt_fn->tgt_offset;
}
@@ -1416,13 +1467,16 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
void *fn_addr;
if (devicep == NULL
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ /* All shared memory devices should use the GOMP_target_ext function. */
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
|| !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
return gomp_target_fallback (fn, hostaddrs);
struct target_mem_desc *tgt_vars
= gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
GOMP_MAP_VARS_TARGET);
- devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
+ devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
+ NULL);
gomp_unmap_vars (tgt_vars, true);
}
@@ -1430,6 +1484,15 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
and several arguments have been added:
FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
DEPEND is array of dependencies, see GOMP_task for details.
+
+ ARGS is a pointer to an array consisting of a variable number of both
+ device-independent and device-specific arguments, which can take one two
+ elements where the first specifies for which device it is intended, the type
+ and optionally also the value. If the value is not present in the first
+ one, the whole second element the actual value. The last element of the
+ array is a single NULL. Among the device independent can be for example
+ NUM_TEAMS and THREAD_LIMIT.
+
NUM_TEAMS is positive if GOMP_teams will be called in the body with
that value, or 1 if teams construct is not present, or 0, if
teams construct does not have num_teams clause and so the choice is
@@ -1443,14 +1506,10 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
void
GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
void **hostaddrs, size_t *sizes, unsigned short *kinds,
- unsigned int flags, void **depend, int num_teams,
- int thread_limit)
+ unsigned int flags, void **depend, void **args)
{
struct gomp_device_descr *devicep = resolve_device (device);
- (void) num_teams;
- (void) thread_limit;
-
if (flags & GOMP_TARGET_FLAG_NOWAIT)
{
struct gomp_thread *thr = gomp_thread ();
@@ -1487,7 +1546,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
&& !thr->task->final_task)
{
gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
- sizes, kinds, flags, depend,
+ sizes, kinds, flags, depend, args,
GOMP_TARGET_TASK_BEFORE_MAP);
return;
}
@@ -1507,17 +1566,30 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
void *fn_addr;
if (devicep == NULL
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
- || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
+ || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
+ || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
{
gomp_target_fallback_firstprivate (fn, mapnum, hostaddrs, sizes, kinds);
return;
}
- struct target_mem_desc *tgt_vars
- = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
- GOMP_MAP_VARS_TARGET);
- devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
- gomp_unmap_vars (tgt_vars, true);
+ struct target_mem_desc *tgt_vars;
+ void *fpc = NULL;
+ if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ {
+ fpc = gomp_target_unshare_firstprivate (mapnum, hostaddrs, sizes, kinds);
+ tgt_vars = NULL;
+ }
+ else
+ tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
+ true, GOMP_MAP_VARS_TARGET);
+ devicep->run_func (devicep->target_id, fn_addr,
+ tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
+ args);
+ if (tgt_vars)
+ gomp_unmap_vars (tgt_vars, true);
+ else
+ free (fpc);
}
/* Host fallback for GOMP_target_data{,_ext} routines. */
@@ -1547,7 +1619,8 @@ GOMP_target_data (int device, const void *unused, size_t mapnum,
struct gomp_device_descr *devicep = resolve_device (device);
if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
return gomp_target_data_fallback ();
struct target_mem_desc *tgt
@@ -1565,7 +1638,8 @@ GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
struct gomp_device_descr *devicep = resolve_device (device);
if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return gomp_target_data_fallback ();
struct target_mem_desc *tgt
@@ -1595,7 +1669,8 @@ GOMP_target_update (int device, const void *unused, size_t mapnum,
struct gomp_device_descr *devicep = resolve_device (device);
if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return;
gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
@@ -1626,7 +1701,7 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
mapnum, hostaddrs, sizes, kinds,
flags | GOMP_TARGET_FLAG_UPDATE,
- depend, GOMP_TARGET_TASK_DATA))
+ depend, NULL, GOMP_TARGET_TASK_DATA))
return;
}
else
@@ -1646,7 +1721,8 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
}
if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return;
struct gomp_thread *thr = gomp_thread ();
@@ -1756,7 +1832,7 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
{
if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
mapnum, hostaddrs, sizes, kinds,
- flags, depend,
+ flags, depend, NULL,
GOMP_TARGET_TASK_DATA))
return;
}
@@ -1777,7 +1853,8 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
}
if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return;
struct gomp_thread *thr = gomp_thread ();
@@ -1815,7 +1892,8 @@ gomp_target_task_fn (void *data)
void *fn_addr;
if (devicep == NULL
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
- || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn)))
+ || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
+ || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
{
ttask->state = GOMP_TARGET_TASK_FALLBACK;
gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum,
@@ -1826,22 +1904,36 @@ gomp_target_task_fn (void *data)
if (ttask->state == GOMP_TARGET_TASK_FINISHED)
{
- gomp_unmap_vars (ttask->tgt, true);
+ if (ttask->tgt)
+ gomp_unmap_vars (ttask->tgt, true);
return false;
}
- ttask->tgt
- = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL,
- ttask->sizes, ttask->kinds, true,
- GOMP_MAP_VARS_TARGET);
+ void *actual_arguments;
+ if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ {
+ ttask->tgt = NULL;
+ ttask->firstprivate_copies
+ = gomp_target_unshare_firstprivate (ttask->mapnum, ttask->hostaddrs,
+ ttask->sizes, ttask->kinds);
+ actual_arguments = ttask->hostaddrs;
+ }
+ else
+ {
+ ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
+ NULL, ttask->sizes, ttask->kinds, true,
+ GOMP_MAP_VARS_TARGET);
+ actual_arguments = (void *) ttask->tgt->tgt_start;
+ }
ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
- devicep->async_run_func (devicep->target_id, fn_addr,
- (void *) ttask->tgt->tgt_start, (void *) ttask);
+ devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
+ ttask->args, (void *) ttask);
return true;
}
else if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return false;
size_t i;
@@ -1891,7 +1983,8 @@ omp_target_alloc (size_t size, int device_num)
if (devicep == NULL)
return NULL;
- if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return malloc (size);
gomp_mutex_lock (&devicep->lock);
@@ -1919,7 +2012,8 @@ omp_target_free (void *device_ptr, int device_num)
if (devicep == NULL)
return;
- if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
{
free (device_ptr);
return;
@@ -1946,7 +2040,8 @@ omp_target_is_present (void *ptr, int device_num)
if (devicep == NULL)
return 0;
- if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return 1;
gomp_mutex_lock (&devicep->lock);
@@ -1976,7 +2071,8 @@ omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
if (dst_devicep == NULL)
return EINVAL;
- if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
dst_devicep = NULL;
}
if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
@@ -1988,7 +2084,8 @@ omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
if (src_devicep == NULL)
return EINVAL;
- if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
src_devicep = NULL;
}
if (src_devicep == NULL && dst_devicep == NULL)
@@ -2118,7 +2215,8 @@ omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
if (dst_devicep == NULL)
return EINVAL;
- if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
dst_devicep = NULL;
}
if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
@@ -2130,7 +2228,8 @@ omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
if (src_devicep == NULL)
return EINVAL;
- if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
src_devicep = NULL;
}
@@ -2166,7 +2265,8 @@ omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
if (devicep == NULL)
return EINVAL;
- if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return EINVAL;
gomp_mutex_lock (&devicep->lock);
@@ -2309,6 +2409,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
{
DLSYM (run);
DLSYM (async_run);
+ DLSYM_OPT (can_run, can_run);
DLSYM (dev2dev);
}
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
diff --git a/libgomp/task.c b/libgomp/task.c
index b18b6e26b7d..0f45c444623 100644
--- a/libgomp/task.c
+++ b/libgomp/task.c
@@ -582,6 +582,7 @@ GOMP_PLUGIN_target_task_completion (void *data)
return;
}
ttask->state = GOMP_TARGET_TASK_FINISHED;
+ free (ttask->firstprivate_copies);
gomp_target_task_completion (team, task);
gomp_mutex_unlock (&team->task_lock);
}
@@ -594,7 +595,7 @@ bool
gomp_create_target_task (struct gomp_device_descr *devicep,
void (*fn) (void *), size_t mapnum, void **hostaddrs,
size_t *sizes, unsigned short *kinds,
- unsigned int flags, void **depend,
+ unsigned int flags, void **depend, void **args,
enum gomp_target_task_state state)
{
struct gomp_thread *thr = gomp_thread ();
@@ -654,6 +655,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
ttask->devicep = devicep;
ttask->fn = fn;
ttask->mapnum = mapnum;
+ ttask->args = args;
memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
diff --git a/libgomp/testsuite/Makefile.in b/libgomp/testsuite/Makefile.in
index c25d21fa971..1fae9e8cdcb 100644
--- a/libgomp/testsuite/Makefile.in
+++ b/libgomp/testsuite/Makefile.in
@@ -111,6 +111,8 @@ FC = @FC@
FCFLAGS = @FCFLAGS@
FGREP = @FGREP@
GREP = @GREP@
+HSA_RUNTIME_INCLUDE = @HSA_RUNTIME_INCLUDE@
+HSA_RUNTIME_LIB = @HSA_RUNTIME_LIB@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
@@ -155,6 +157,10 @@ PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PERL = @PERL@
+PLUGIN_HSA = @PLUGIN_HSA@
+PLUGIN_HSA_CPPFLAGS = @PLUGIN_HSA_CPPFLAGS@
+PLUGIN_HSA_LDFLAGS = @PLUGIN_HSA_LDFLAGS@
+PLUGIN_HSA_LIBS = @PLUGIN_HSA_LIBS@
PLUGIN_NVPTX = @PLUGIN_NVPTX@
PLUGIN_NVPTX_CPPFLAGS = @PLUGIN_NVPTX_CPPFLAGS@
PLUGIN_NVPTX_LDFLAGS = @PLUGIN_NVPTX_LDFLAGS@