summaryrefslogtreecommitdiff
path: root/openmp/docs
diff options
context:
space:
mode:
authorJohannes Doerfert <johannes@jdoerfert.de>2023-01-07 16:14:48 -0800
committerJohannes Doerfert <johannes@jdoerfert.de>2023-01-21 18:46:36 -0800
commit40f9bf082ff0c837b8801e907f582990828b78b9 (patch)
tree09bba968b27dbadacb9a3d66c585499f5ba04d4b /openmp/docs
parent99fd12f20ee8d4559bad67146e28ed6523fd2a06 (diff)
downloadllvm-40f9bf082ff0c837b8801e907f582990828b78b9.tar.gz
[OpenMP] Introduce the `ompx_dyn_cgroup_mem(<N>)` clause
Dynamic memory allows users to allocate fast shared memory when a kernel is launched. We support a single size for all kernels via the `LIBOMPTARGET_SHARED_MEMORY_SIZE` environment variable but now we can control it per kernel invocation, hence allow computed values. Note: Only the nextgen plugins will allocate memory based on the clause, the old plugins will silently miscompile. Differential Revision: https://reviews.llvm.org/D141233
Diffstat (limited to 'openmp/docs')
-rw-r--r--openmp/docs/design/Runtimes.rst31
1 files changed, 27 insertions, 4 deletions
diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
index 2db14aaeddb4..02424114635c 100644
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -1200,8 +1200,9 @@ buffer. This pointer can be obtained using the
``llvm_omp_target_dynamic_shared_alloc`` extension. If this function is called
from the host it will simply return a null pointer. In order to use this buffer
the kernel must be launched with an adequate amount of dynamic shared memory
-allocated. Currently this is done using the ``LIBOMPTARGET_SHARED_MEMORY_SIZE``
-environment variable. An example is given below.
+allocated. This can be done using the ``LIBOMPTARGET_SHARED_MEMORY_SIZE``
+environment variable or the ``ompx_dyn_cgroup_mem(<N>)`` target directive
+clause. Examples for both are given below.
.. code-block:: c++
@@ -1210,20 +1211,42 @@ environment variable. An example is given below.
#pragma omp target parallel map(from : x)
{
int *buf = llvm_omp_target_dynamic_shared_alloc();
- #pragma omp barrier
if (omp_get_thread_num() == 0)
*buf = 1;
#pragma omp barrier
if (omp_get_thread_num() == 1)
x = *buf;
}
+ assert(x == 1);
}
.. code-block:: console
- $ clang++ -fopenmp -fopenmp-targets=nvptx64 shared.c
+ $ clang++ -fopenmp --offload-arch=sm_80 -O3 shared.c
$ env LIBOMPTARGET_SHARED_MEMORY_SIZE=256 ./shared
+.. code-block:: c++
+
+ void foo(int N) {
+ int x;
+ #pragma omp target parallel map(from : x) ompx_dyn_cgroup_mem(N * sizeof(int))
+ {
+ int *buf = llvm_omp_target_dynamic_shared_alloc();
+ if (omp_get_thread_num() == 0)
+ buf[N - 1] = 1;
+ #pragma omp barrier
+ if (omp_get_thread_num() == 1)
+ x = buf[N - 1];
+ }
+ assert(x == 1);
+ }
+
+.. code-block:: console
+
+ $ clang++ -fopenmp --offload-arch=gfx90a -O3 shared.c
+ $ env LIBOMPTARGET_NEXTGEN_PLUGINS=1 ./shared
+
+
.. _libomptarget_device_debugging:
Debugging