summaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
authorCesar Philippidis <cesar@codesourcery.com>2018-06-22 03:04:14 -0700
committerThomas Schwinge <tschwinge@gcc.gnu.org>2018-06-22 12:04:14 +0200
commit31dd69b7ff60979b615e45229f759613873989e6 (patch)
treed3f7ad7604a2aa1dc7c92c08dba3aa31c5e791e7 /libgomp
parentebbb116851bd0b43e6c86cd719b7a70684991d66 (diff)
downloadgcc-31dd69b7ff60979b615e45229f759613873989e6.tar.gz
Update OpenACC testcases
gcc/testsuite/ * c-c++-common/goacc/deviceptr-4.c: New file. * c-c++-common/goacc/kernels-counter-var-redundant-load.c: Likewise. * c-c++-common/goacc/kernels-loop-data-2.c: Likewise. * c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Likewise. * c-c++-common/goacc/kernels-loop-data-enter-exit.c: Likewise. * c-c++-common/goacc/kernels-loop-data-update.c: Likewise. * c-c++-common/goacc/kernels-loop-data.c: Likewise. * c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: Likewise. * c-c++-common/goacc/parallel-reduction.c: Likewise. * c-c++-common/goacc/private-reduction-1.c: Likewise. * gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95: Likewise. * gfortran.dg/goacc/modules.f95: Likewise. * gfortran.dg/goacc/routine-8.f90: Likewise. * gfortran.dg/goacc/routine-level-of-parallelism-1.f90: Likewise. libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Don't force "-O2". * testsuite/libgomp.oacc-c-c++-common/data-2.c: Update. * testsuite/libgomp.oacc-c-c++-common/host_data-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/mode-transitions.c: Likewise. * testsuite/libgomp.oacc-fortran/data-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/data-2.f90: Likewise. * testsuite/libgomp.oacc-c++/non-scalar-data.C: New file. * testsuite/libgomp.oacc-c-c++-common/declare-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/enter-data.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.h: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h: Likewise. * testsuite/libgomp.oacc-fortran/cublas-fixed.h: Likewise. * testsuite/libgomp.oacc-fortran/dummy-array.f90: Likewise. * testsuite/libgomp.oacc-fortran/host_data-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/host_data-3.f: Likewise. * testsuite/libgomp.oacc-fortran/host_data-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-collapse-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-collapse-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-independent.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-loop-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-map-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90: Likewise. * testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/lib-12.f90: Likewise. * testsuite/libgomp.oacc-fortran/lib-13.f90: Likewise. * testsuite/libgomp.oacc-fortran/lib-14.f90: Likewise. * testsuite/libgomp.oacc-fortran/lib-15.f90: Likewise. * testsuite/libgomp.oacc-fortran/parallel-loop-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/reference-reductions.f90: Likewise. * testsuite/libgomp.oacc-fortran/vector-routine.f90: Likewise. Co-Authored-By: James Norris <jnorris@codesourcery.com> Co-Authored-By: Julian Brown <julian@codesourcery.com> Co-Authored-By: Thomas Schwinge <thomas@codesourcery.com> Co-Authored-By: Tom de Vries <tom@codesourcery.com> From-SVN: r261884
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/ChangeLog125
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/non-scalar-data.C110
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c27
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c61
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/enter-data.c23
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c48
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c53
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c51
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c48
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c50
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c47
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c49
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c54
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c49
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c55
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c58
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c51
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c27
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c31
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c31
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c35
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c32
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c40
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c51
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c46
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c36
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c43
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c54
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c49
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c51
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c55
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c54
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c24
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/mode-transitions.c270
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.c38
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.h20
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h280
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/cublas-fixed.h16
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/data-1.f90231
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/data-2.f9050
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/dummy-array.f9028
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/host_data-2.f9098
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/host_data-3.f85
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/host_data-4.f90101
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction-2.f9026
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction.f9021
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-collapse-3.f9030
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-collapse-4.f9041
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-independent.f9042
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-1.f9066
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-map-1.f90116
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f9536
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f9023
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f9028
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f9028
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f9036
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f9041
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f9038
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f9027
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f9036
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f9048
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f9045
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f9048
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f9049
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f9044
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/kernels-reduction-1.f9019
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/lib-12.f9027
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/lib-13.f9034
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/lib-14.f9082
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/lib-15.f9052
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/parallel-loop-1.f9077
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/reference-reductions.f9038
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/vector-routine.f9041
74 files changed, 4020 insertions, 57 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index c4ba406386b..d8277394851 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,128 @@
+2018-06-22 Cesar Philippidis <cesar@codesourcery.com>
+ James Norris <jnorris@codesourcery.com>
+ Julian Brown <julian@codesourcery.com>
+ Thomas Schwinge <thomas@codesourcery.com>
+ Tom de Vries <tom@codesourcery.com>
+
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Don't force "-O2".
+ * testsuite/libgomp.oacc-c-c++-common/data-2.c: Update.
+ * testsuite/libgomp.oacc-c-c++-common/host_data-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/mode-transitions.c: Likewise.
+ * testsuite/libgomp.oacc-fortran/data-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/data-2.f90: Likewise.
+ * testsuite/libgomp.oacc-c++/non-scalar-data.C: New file.
+ * testsuite/libgomp.oacc-c-c++-common/declare-3.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/enter-data.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.h: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h: Likewise.
+ * testsuite/libgomp.oacc-fortran/cublas-fixed.h: Likewise.
+ * testsuite/libgomp.oacc-fortran/dummy-array.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/host_data-2.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/host_data-3.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/host_data-4.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction-2.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-collapse-3.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-collapse-4.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-independent.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-loop-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-map-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90:
+ Likewise.
+ * testsuite/libgomp.oacc-fortran/lib-12.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/lib-13.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/lib-14.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/lib-15.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/parallel-loop-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/reference-reductions.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/vector-routine.f90: Likewise.
+
2018-06-20 Chung-Lin Tang <cltang@codesourcery.com>
Thomas Schwinge <thomas@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
diff --git a/libgomp/testsuite/libgomp.oacc-c++/non-scalar-data.C b/libgomp/testsuite/libgomp.oacc-c++/non-scalar-data.C
new file mode 100644
index 00000000000..8e4b296382b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/non-scalar-data.C
@@ -0,0 +1,110 @@
+// Ensure that a non-scalar dummy arguments which are implicitly used inside
+// offloaded regions are properly mapped using present_or_copy semantics.
+
+// { dg-xfail-if "TODO" { *-*-* } }
+// { dg-excess-errors "ICE" }
+
+#include <cassert>
+
+const int n = 100;
+
+struct data {
+ int v;
+};
+
+void
+kernels_present (data &d, int &x)
+{
+#pragma acc kernels present (d, x) default (none)
+ {
+ d.v = x;
+ }
+}
+
+void
+parallel_present (data &d, int &x)
+{
+#pragma acc parallel present (d, x) default (none)
+ {
+ d.v = x;
+ }
+}
+
+void
+kernels_implicit (data &d, int &x)
+{
+#pragma acc kernels
+ {
+ d.v = x;
+ }
+}
+
+void
+parallel_implicit (data &d, int &x)
+{
+#pragma acc parallel
+ {
+ d.v = x;
+ }
+}
+
+void
+reference_data (data &d, int &x)
+{
+#pragma acc data copy(d, x)
+ {
+ kernels_present (d, x);
+
+#pragma acc update host(d)
+ assert (d.v == x);
+
+ x = 200;
+#pragma acc update device(x)
+
+ parallel_present (d, x);
+ }
+
+ assert (d.v == x);
+
+ x = 300;
+ kernels_implicit (d, x);
+ assert (d.v == x);
+
+ x = 400;
+ parallel_implicit (d, x);
+ assert (d.v == x);
+}
+
+int
+main ()
+{
+ data d;
+ int x = 100;
+
+#pragma acc data copy(d, x)
+ {
+ kernels_present (d, x);
+
+#pragma acc update host(d)
+ assert (d.v == x);
+
+ x = 200;
+#pragma acc update device(x)
+
+ parallel_present (d, x);
+ }
+
+ assert (d.v == x);
+
+ x = 300;
+ kernels_implicit (d, x);
+ assert (d.v == x);
+
+ x = 400;
+ parallel_implicit (d, x);
+ assert (d.v == x);
+
+ reference_data (d, x);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
index c1c0825919d..0c6abe69dc1 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
@@ -1,6 +1,7 @@
/* Test 'acc enter/exit data' regions. */
/* { dg-do run } */
+/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } } */
#include <stdlib.h>
@@ -46,6 +47,32 @@ main (int argc, char **argv)
for (i = 0; i < N; i++)
{
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc enter data copyin (a[0:N]) async
+#pragma acc enter data copyin (b[0:N]) async wait
+#pragma acc enter data copyin (N) async wait
+#pragma acc parallel async wait
+#pragma acc loop
+ for (i = 0; i < N; i++)
+ b[i] = a[i];
+
+#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) delete (N) wait async
+#pragma acc wait
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
a[i] = 2.0;
b[i] = 0.0;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c
new file mode 100644
index 00000000000..c3a21876312
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+float *b;
+#pragma acc declare deviceptr (b)
+
+#pragma acc routine
+float *
+subr2 (void)
+{
+ return b;
+}
+
+float
+subr1 (float a)
+{
+ float b;
+#pragma acc declare present_or_copy (b)
+ float c;
+#pragma acc declare present_or_copyin (c)
+ float d;
+#pragma acc declare present_or_create (d)
+ float e;
+#pragma acc declare present_or_copyout (e)
+
+#pragma acc parallel copy (a)
+ {
+ b = a;
+ c = b;
+ d = c;
+ e = d;
+ a = e;
+ }
+
+ return a;
+}
+
+int
+main (int argc, char **argv)
+{
+ float a;
+ float *c;
+
+ a = 2.0;
+
+ a = subr1 (a);
+
+ if (a != 2.0)
+ abort ();
+
+ b = (float *) acc_malloc (sizeof (float));
+
+ c = subr2 ();
+
+ if (b != c)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/enter-data.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/enter-data.c
new file mode 100644
index 00000000000..0f566c9d844
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/enter-data.c
@@ -0,0 +1,23 @@
+/* This test verifies that the present data clauses to acc enter data
+ don't cause duplicate mapping failures at runtime. */
+
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main (void)
+{
+ int a;
+
+#pragma acc enter data copyin (a)
+#pragma acc enter data pcopyin (a)
+#pragma acc enter data pcreate (a)
+#pragma acc exit data delete (a)
+
+#pragma acc enter data create (a)
+#pragma acc enter data pcreate (a)
+#pragma acc exit data delete (a)
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c
index 51745ba726d..21d2139af27 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c
@@ -1,14 +1,16 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
-/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+/* { dg-additional-options "-lm -lcuda -lcublas -lcudart -Wall -Wextra" } */
#include <stdlib.h>
+#include <math.h>
#include <openacc.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cublas_v2.h>
+#pragma acc routine
void
-saxpy_host (int n, float a, float *x, float *y)
+saxpy (int n, float a, float *x, float *y)
{
int i;
@@ -16,18 +18,18 @@ saxpy_host (int n, float a, float *x, float *y)
y[i] = y[i] + a * x[i];
}
-#pragma acc routine
void
-saxpy_target (int n, float a, float *x, float *y)
+validate_results (int n, float *a, float *b)
{
int i;
for (i = 0; i < n; i++)
- y[i] = y[i] + a * x[i];
+ if (fabs (a[i] - b[i]) > .00001)
+ abort ();
}
int
-main(int argc, char **argv)
+main()
{
#define N 8
int i;
@@ -42,7 +44,7 @@ main(int argc, char **argv)
y[i] = y_ref[i] = 3.0;
}
- saxpy_host (N, a, x_ref, y_ref);
+ saxpy (N, a, x_ref, y_ref);
cublasCreate (&h);
@@ -54,11 +56,7 @@ main(int argc, char **argv)
}
}
- for (i = 0; i < N; i++)
- {
- if (y[i] != y_ref[i])
- abort ();
- }
+ validate_results (N, y, y_ref);
#pragma acc data create (x[0:N]) copyout (y[0:N])
{
@@ -74,11 +72,7 @@ main(int argc, char **argv)
cublasDestroy (h);
- for (i = 0; i < N; i++)
- {
- if (y[i] != y_ref[i])
- abort ();
- }
+ validate_results (N, y, y_ref);
for (i = 0; i < N; i++)
y[i] = 3.0;
@@ -87,14 +81,24 @@ main(int argc, char **argv)
#pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N])
{
#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
- saxpy_target (N, a, x, y);
+ saxpy (N, a, x, y);
}
+ validate_results (N, y, y_ref);
+
+ /* Exercise host_data with data transferred with acc enter data. */
+
for (i = 0; i < N; i++)
- {
- if (y[i] != y_ref[i])
- abort ();
- }
+ y[i] = 3.0;
+
+#pragma acc enter data copyin (x, a, y)
+#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
+ {
+ saxpy (N, a, x, y);
+ }
+#pragma acc exit data delete (x, a) copyout (y)
+
+ validate_results (N, y, y_ref);
return 0;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c
new file mode 100644
index 00000000000..607c35018df
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c
@@ -0,0 +1,53 @@
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+
+#pragma acc data copyout (a[0:N])
+ {
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+ }
+
+#pragma acc data copyout (b[0:N])
+ {
+#pragma acc kernels present (b[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+ }
+
+#pragma acc data copyin (a[0:N], b[0:N]) copyout (c[0:N])
+ {
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+ }
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c
new file mode 100644
index 00000000000..8b9dd5f815a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c
@@ -0,0 +1,51 @@
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+
+#pragma acc enter data create (a[0:N])
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+#pragma acc exit data copyout (a[0:N])
+
+#pragma acc enter data create (b[0:N])
+#pragma acc kernels present (b[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+#pragma acc exit data copyout (b[0:N])
+
+
+#pragma acc enter data copyin (a[0:N], b[0:N]) create (c[0:N])
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+#pragma acc exit data copyout (c[0:N])
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c
new file mode 100644
index 00000000000..5d5da6fcc01
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c
@@ -0,0 +1,48 @@
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+
+#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
+
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+
+#pragma acc kernels present (b[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+
+#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N])
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c
new file mode 100644
index 00000000000..c111c8f56e7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c
@@ -0,0 +1,50 @@
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+
+#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
+
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+
+#pragma acc update device (b[0:N])
+
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+
+#pragma acc exit data copyout (a[0:N], c[0:N])
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
+
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c
new file mode 100644
index 00000000000..947bcdac452
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c
@@ -0,0 +1,47 @@
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+
+#pragma acc data copyout (a[0:N], b[0:N], c[0:N])
+ {
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+
+#pragma acc kernels present (b[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+ }
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c
new file mode 100644
index 00000000000..ebcc6e14d9a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c
@@ -0,0 +1,49 @@
+#include <stdlib.h>
+
+#define N (1024 * 512)
+#define COUNTERTYPE unsigned int
+
+int
+main (void)
+{
+ unsigned int *__restrict a;
+ unsigned int *__restrict b;
+ unsigned int *__restrict c;
+
+ a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+ c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
+
+#pragma acc enter data create (a[0:N], b[0:N], c[0:N])
+
+#pragma acc kernels present (a[0:N])
+ {
+ for (COUNTERTYPE i = 0; i < N; i++)
+ a[i] = i * 2;
+ }
+
+#pragma acc parallel present (b[0:N])
+ {
+#pragma acc loop
+ for (COUNTERTYPE i = 0; i < N; i++)
+ b[i] = i * 4;
+ }
+
+#pragma acc kernels present (a[0:N], b[0:N], c[0:N])
+ {
+ for (COUNTERTYPE ii = 0; ii < N; ii++)
+ c[ii] = a[ii] + b[ii];
+ }
+
+#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N])
+
+ for (COUNTERTYPE i = 0; i < N; i++)
+ if (c[i] != a[i] + b[i])
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c
new file mode 100644
index 00000000000..bcbe28a6778
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c
@@ -0,0 +1,54 @@
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+ to vector-partitioned mode. Back-to-back worker loops. */
+
+int
+main (int argc, char* argv[])
+{
+ int i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ int j;
+
+ #pragma acc loop gang(num:32)
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker(num:32)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ int x = i ^ j * 3;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+
+ #pragma acc loop worker(num:32)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ int x = i | j * 5;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c
new file mode 100644
index 00000000000..a944486fac3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c
@@ -0,0 +1,49 @@
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+ to vector-partitioned mode. Successive vector loops. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ int j;
+
+ #pragma acc loop gang(num:32)
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker(num:32)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ int x = i ^ j * 3;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+
+ x = i | j * 5;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c
new file mode 100644
index 00000000000..ba0b44dc5be
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c
@@ -0,0 +1,55 @@
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+ to vector-partitioned mode. Aggregate worker variable. */
+
+typedef struct
+{
+ int x, y;
+} vec2;
+
+int
+main (int argc, char* argv[])
+{
+ int i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ int j;
+
+ #pragma acc loop gang(num:32)
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker(num:32)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ vec2 pt;
+
+ pt.x = i ^ j * 3;
+ pt.y = i | j * 5;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt.x * k;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt.y * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c
new file mode 100644
index 00000000000..7189d2a99cd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c
@@ -0,0 +1,58 @@
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+ to vector-partitioned mode. Addressable worker variable. */
+
+typedef struct
+{
+ int x, y;
+} vec2;
+
+int
+main (int argc, char* argv[])
+{
+ int i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ int j;
+
+ #pragma acc loop gang(num:32)
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker(num:32)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ vec2 pt, *ptp;
+
+ ptp = &pt;
+
+ pt.x = i ^ j * 3;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += ptp->x * k;
+
+ ptp->y = i | j * 5;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt.y * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c
new file mode 100644
index 00000000000..854ad7e9b3b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c
@@ -0,0 +1,51 @@
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+ to vector-partitioned mode. Array worker variable. */
+
+int
+main (int argc, char* argv[])
+{
+ int i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ int j;
+
+ #pragma acc loop gang(num:32)
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker(num:32)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ int pt[2];
+
+ pt[0] = i ^ j * 3;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt[0] * k;
+
+ pt[1] = i | j * 5;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt[1] * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c
new file mode 100644
index 00000000000..5bc90c2367b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c
@@ -0,0 +1,27 @@
+#include <assert.h>
+
+/* Test of gang-private variables declared on loop directive. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32];
+
+ for (i = 0; i < 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ #pragma acc loop gang(num:32) private(x)
+ for (i = 0; i < 32; i++)
+ {
+ x = i * 2;
+ arr[i] += x;
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (arr[i] == i * 3);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c
new file mode 100644
index 00000000000..3eb11670e36
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c
@@ -0,0 +1,31 @@
+#include <assert.h>
+
+/* Test of gang-private variables declared on loop directive, with broadcasting
+ to partitioned workers. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ #pragma acc loop gang(num:32) private(x)
+ for (i = 0; i < 32; i++)
+ {
+ x = i * 2;
+
+ #pragma acc loop worker(num:32)
+ for (int j = 0; j < 32; j++)
+ arr[i * 32 + j] += x;
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (i / 32) * 2);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c
new file mode 100644
index 00000000000..86b9a7179e1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c
@@ -0,0 +1,31 @@
+#include <assert.h>
+
+/* Test of gang-private variables declared on loop directive, with broadcasting
+ to partitioned vectors. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ #pragma acc loop gang(num:32) private(x)
+ for (i = 0; i < 32; i++)
+ {
+ x = i * 2;
+
+ #pragma acc loop vector(length:32)
+ for (int j = 0; j < 32; j++)
+ arr[i * 32 + j] += x;
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (i / 32) * 2);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c
new file mode 100644
index 00000000000..4174248ee4e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c
@@ -0,0 +1,35 @@
+#include <assert.h>
+
+/* Test of gang-private addressable variable declared on loop directive, with
+ broadcasting to partitioned workers. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ #pragma acc loop gang(num:32) private(x)
+ for (i = 0; i < 32; i++)
+ {
+ int *p = &x;
+
+ x = i * 2;
+
+ #pragma acc loop worker(num:32)
+ for (int j = 0; j < 32; j++)
+ arr[i * 32 + j] += x;
+
+ (*p)--;
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (i / 32) * 2);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c
new file mode 100644
index 00000000000..b160eaa604d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c
@@ -0,0 +1,32 @@
+#include <assert.h>
+
+/* Test of gang-private array variable declared on loop directive, with
+ broadcasting to partitioned workers. */
+
+int
+main (int argc, char* argv[])
+{
+ int x[8], i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ #pragma acc loop gang(num:32) private(x)
+ for (i = 0; i < 32; i++)
+ {
+ for (int j = 0; j < 8; j++)
+ x[j] = j * 2;
+
+ #pragma acc loop worker(num:32)
+ for (int j = 0; j < 32; j++)
+ arr[i * 32 + j] += x[j % 8];
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (i % 8) * 2);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c
new file mode 100644
index 00000000000..88ab245b0ce
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c
@@ -0,0 +1,40 @@
+#include <assert.h>
+
+/* Test of gang-private aggregate variable declared on loop directive, with
+ broadcasting to partitioned workers. */
+
+typedef struct {
+ int x, y, z;
+ int attr[13];
+} vec3;
+
+int
+main (int argc, char* argv[])
+{
+ int i, arr[32 * 32];
+ vec3 pt;
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ #pragma acc loop gang private(pt)
+ for (i = 0; i < 32; i++)
+ {
+ pt.x = i;
+ pt.y = i * 2;
+ pt.z = i * 4;
+ pt.attr[5] = i * 6;
+
+ #pragma acc loop worker
+ for (int j = 0; j < 32; j++)
+ arr[i * 32 + j] += pt.x + pt.y + pt.z + pt.attr[5];
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (i / 32) * 13);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c
new file mode 100644
index 00000000000..df4add11df4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c
@@ -0,0 +1,51 @@
+#include <assert.h>
+
+/* Test of vector-private variables declared on loop directive. */
+
+int
+main (int argc, char* argv[])
+{
+ int x, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ int j;
+
+ #pragma acc loop gang(num:32)
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker(num:32)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ #pragma acc loop vector(length:32) private(x)
+ for (k = 0; k < 32; k++)
+ {
+ x = i ^ j * 3;
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+
+ #pragma acc loop vector(length:32) private(x)
+ for (k = 0; k < 32; k++)
+ {
+ x = i | j * 5;
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c
new file mode 100644
index 00000000000..53c56b2d362
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c
@@ -0,0 +1,46 @@
+#include <assert.h>
+
+/* Test of vector-private variables declared on loop directive. Array type. */
+
+int
+main (int argc, char* argv[])
+{
+ int pt[2], i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ int j;
+
+ #pragma acc loop gang(num:32)
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker(num:32)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ #pragma acc loop vector(length:32) private(pt)
+ for (k = 0; k < 32; k++)
+ {
+ pt[0] = i ^ j * 3;
+ pt[1] = i | j * 5;
+ arr[i * 1024 + j * 32 + k] += pt[0] * k;
+ arr[i * 1024 + j * 32 + k] += pt[1] * k;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c
new file mode 100644
index 00000000000..95db2f8912e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c
@@ -0,0 +1,36 @@
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ int j;
+
+ #pragma acc loop gang(num:32)
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker(num:32) private(x)
+ for (j = 0; j < 32; j++)
+ {
+ x = i ^ j * 3;
+ /* Try to ensure 'x' accesses doesn't get optimized into a
+ temporary. */
+ __asm__ __volatile__ ("");
+ arr[i * 32 + j] += x;
+ }
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + ((i / 32) ^ (i % 32) * 3));
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c
new file mode 100644
index 00000000000..ceaa3ee9ecd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c
@@ -0,0 +1,43 @@
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+ to vector-partitioned mode. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ int j;
+
+ #pragma acc loop gang(num:32)
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker(num:32) private(x)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ x = i ^ j * 3;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c
new file mode 100644
index 00000000000..193a1d1063b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c
@@ -0,0 +1,54 @@
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+ to vector-partitioned mode. Back-to-back worker loops. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ int j;
+
+ #pragma acc loop gang(num:32)
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker(num:32) private(x)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ x = i ^ j * 3;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+
+ #pragma acc loop worker(num:32) private(x)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ x = i | j * 5;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c
new file mode 100644
index 00000000000..4320cd81e69
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c
@@ -0,0 +1,49 @@
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+ to vector-partitioned mode. Successive vector loops. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ int j;
+
+ #pragma acc loop gang(num:32)
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker(num:32) private(x)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ x = i ^ j * 3;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+
+ x = i | j * 5;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c
new file mode 100644
index 00000000000..80992eed0f8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c
@@ -0,0 +1,51 @@
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+ to vector-partitioned mode. Addressable worker variable. */
+
+int
+main (int argc, char* argv[])
+{
+ int x = 5, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ int j;
+
+ #pragma acc loop gang(num:32)
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker(num:32) private(x)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ int *p = &x;
+
+ x = i ^ j * 3;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+
+ *p = i | j * 5;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c
new file mode 100644
index 00000000000..005ba60a341
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c
@@ -0,0 +1,55 @@
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+ to vector-partitioned mode. Aggregate worker variable. */
+
+typedef struct
+{
+ int x, y;
+} vec2;
+
+int
+main (int argc, char* argv[])
+{
+ int i, arr[32 * 32 * 32];
+ vec2 pt;
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc kernels copy(arr)
+ {
+ int j;
+
+ #pragma acc loop gang(num:32)
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker(num:32) private(pt)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ pt.x = i ^ j * 3;
+ pt.y = i | j * 5;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt.x * k;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt.y * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c
new file mode 100644
index 00000000000..8d367fb00e0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c
@@ -0,0 +1,54 @@
+#include <assert.h>
+
+/* Test of worker-private variables declared on loop directive, broadcasting
+ to vector-partitioned mode. Array worker variable. */
+
+int
+main (int argc, char* argv[])
+{
+ int i, arr[32 * 32 * 32];
+ int pt[2];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ /* "pt" is treated as "present_or_copy" on the kernels directive because it
+ is an array variable. */
+ #pragma acc kernels copy(arr)
+ {
+ int j;
+
+ #pragma acc loop gang(num:32)
+ for (i = 0; i < 32; i++)
+ {
+ /* But here, it is made private per-worker. */
+ #pragma acc loop worker(num:32) private(pt)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ pt[0] = i ^ j * 3;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt[0] * k;
+
+ pt[1] = i | j * 5;
+
+ #pragma acc loop vector(length:32)
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt[1] * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c
new file mode 100644
index 00000000000..95f1b77986c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c
@@ -0,0 +1,24 @@
+/* Verify that a simple, explicit acc loop reduction works inside
+ a kernels region. */
+
+#include <stdlib.h>
+
+#define N 100
+
+int
+main ()
+{
+ int i, red = 0;
+
+#pragma acc kernels
+ {
+#pragma acc loop reduction (+:red)
+ for (i = 0; i < N; i++)
+ red++;
+ }
+
+ if (red != N)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
index 6743afaca6a..71d3969f7b6 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
@@ -1,6 +1,3 @@
-/* { dg-do run } */
-/* { dg-additional-options "-O2" } */
-
#include <stdio.h>
#include <openacc.h>
#include <gomp-constants.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/mode-transitions.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/mode-transitions.c
index 2394ac8cbd6..4474c127992 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/mode-transitions.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/mode-transitions.c
@@ -74,6 +74,57 @@ void t2()
}
+/* Test conditional vector-partitioned loops. */
+
+void t3()
+{
+ int n[32], arr[1024], i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = 0;
+
+ for (i = 0; i < 32; i++)
+ n[i] = 0;
+
+ #pragma acc parallel copy(n, arr) \
+ num_gangs(32) num_workers(1) vector_length(32)
+ {
+ int j, k;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ if ((j % 2) == 0)
+ {
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k]++;
+ }
+ else
+ {
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k]--;
+ }
+ }
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (n[i] == 2);
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == ((i % 64) < 32) ? 1 : -1);
+}
+
+
/* Test conditions inside vector-partitioned loops. */
void t4()
@@ -156,6 +207,79 @@ void t5()
}
+/* Test switch containing vector-partitioned loops inside gang-partitioned
+ loops. */
+
+void t6()
+{
+ int n[32], arr[1024], i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = 0;
+
+ for (i = 0; i < 32; i++)
+ n[i] = i % 5;
+
+ #pragma acc parallel copy(n, arr) \
+ num_gangs(32) num_workers(1) vector_length(32)
+ {
+ int j, k;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ switch (n[j])
+ {
+ case 1:
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k] += 1;
+ break;
+
+ case 2:
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k] += 2;
+ break;
+
+ case 3:
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k] += 3;
+ break;
+
+ case 4:
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k] += 4;
+ break;
+
+ case 5:
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k] += 5;
+ break;
+
+ default:
+ abort ();
+ }
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (n[i] == (i % 5) + 2);
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == ((i / 32) % 5) + 1);
+}
+
+
/* Test trivial operation of vector-single mode. */
void t7()
@@ -381,6 +505,100 @@ void t13()
}
+/* Test condition in worker-partitioned mode. */
+
+void t14()
+{
+ int arr[32 * 32 * 8], i;
+
+ for (i = 0; i < 32 * 32 * 8; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) \
+ num_gangs(8) num_workers(8) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ #pragma acc loop worker
+ for (k = 0; k < 8; k++)
+ {
+ int m;
+ if ((k % 2) == 0)
+ {
+ #pragma acc loop vector
+ for (m = 0; m < 32; m++)
+ arr[j * 32 * 8 + k * 32 + m]++;
+ }
+ else
+ {
+ #pragma acc loop vector
+ for (m = 0; m < 32; m++)
+ arr[j * 32 * 8 + k * 32 + m] += 2;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < 32 * 32 * 8; i++)
+ assert (arr[i] == i + ((i / 32) % 2) + 1);
+}
+
+
+/* Test switch in worker-partitioned mode. */
+
+void t15()
+{
+ int arr[32 * 32 * 8], i;
+
+ for (i = 0; i < 32 * 32 * 8; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) \
+ num_gangs(8) num_workers(8) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ #pragma acc loop worker
+ for (k = 0; k < 8; k++)
+ {
+ int m;
+ switch ((j * 32 + k) % 3)
+ {
+ case 0:
+ #pragma acc loop vector
+ for (m = 0; m < 32; m++)
+ arr[j * 32 * 8 + k * 32 + m]++;
+ break;
+
+ case 1:
+ #pragma acc loop vector
+ for (m = 0; m < 32; m++)
+ arr[j * 32 * 8 + k * 32 + m] += 2;
+ break;
+
+ case 2:
+ #pragma acc loop vector
+ for (m = 0; m < 32; m++)
+ arr[j * 32 * 8 + k * 32 + m] += 3;
+ break;
+
+ default: ;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < 32 * 32 * 8; i++)
+ assert (arr[i] == i + ((i / 32) % 3) + 1);
+}
+
+
/* Test worker-single/worker-partitioned transitions. */
void t16()
@@ -790,6 +1008,53 @@ void t25()
}
+/* Test multiple conditional vector-partitioned loops in worker-single
+ mode. */
+
+void t26()
+{
+ int arr[32 * 32], i;
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) \
+ num_gangs(8) num_workers(8) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ if ((j % 3) == 0)
+ {
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ {
+ #pragma acc atomic
+ arr[j * 32 + k] += 3;
+ }
+ }
+ else if ((j % 3) == 1)
+ {
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ {
+ #pragma acc atomic
+ arr[j * 32 + k] += 7;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ {
+ int j = (i / 32) % 3;
+ assert (arr[i] == i + ((j == 0) ? 3 : (j == 1) ? 7 : 0));
+ }
+}
+
+
/* Test worker-single, vector-partitioned, gang-redundant mode. */
#define ACTUAL_GANGS 8
@@ -869,8 +1134,10 @@ int main()
{
t1();
t2();
+ t3();
t4();
t5();
+ t6();
t7();
t8();
t9();
@@ -878,6 +1145,8 @@ int main()
t11();
t12();
t13();
+ t14();
+ t15();
t16();
t17();
t18();
@@ -888,6 +1157,7 @@ int main()
t23();
t24();
t25();
+ t26();
t27();
t28();
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.c
new file mode 100644
index 00000000000..4bc71415688
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define PK parallel
+#define M(x, y, z) O(x, y, z)
+#define O(x, y, z) x ## _ ## y ## _ ## z
+
+#define F
+#define G none
+#define L
+#include "parallel-loop-1.h"
+#undef L
+#undef F
+#undef G
+
+#define F num_gangs (10)
+#define G gangs
+#define L gang
+#include "parallel-loop-1.h"
+#undef L
+#undef F
+#undef G
+
+int
+main ()
+{
+ if (test_none_none ()
+ || test_none_auto ()
+ || test_none_independent ()
+ || test_none_seq ()
+ || test_gangs_none ()
+ || test_gangs_auto ()
+ || test_gangs_independent ()
+ || test_gangs_seq ())
+ abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.h
new file mode 100644
index 00000000000..fd83dd4ada5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.h
@@ -0,0 +1,20 @@
+#define S
+#define N(x) M(x, G, none)
+#include "parallel-loop-2.h"
+#undef S
+#undef N
+#define S auto
+#define N(x) M(x, G, auto)
+#include "parallel-loop-2.h"
+#undef S
+#undef N
+#define S independent
+#define N(x) M(x, G, independent)
+#include "parallel-loop-2.h"
+#undef S
+#undef N
+#define S seq
+#define N(x) M(x, G, seq)
+#include "parallel-loop-2.h"
+#undef S
+#undef N
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h
new file mode 100644
index 00000000000..5691b7e845f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h
@@ -0,0 +1,280 @@
+#ifndef VARS
+#define VARS
+int a[1500];
+float b[10][15][10];
+#pragma acc routine
+__attribute__((noreturn)) void
+noreturn (void)
+{
+ for (;;);
+}
+#endif
+#ifndef SC
+#define SC
+#endif
+
+__attribute__((noinline, noclone)) void
+N(f0) (void)
+{
+ int i;
+#pragma acc PK loop L F
+ for (i = 0; i < 1500; i++)
+ a[i] += 2;
+}
+
+__attribute__((noinline, noclone)) void
+N(f1) (void)
+{
+#pragma acc PK loop L F
+ for (unsigned int i = __INT_MAX__; i < 3000U + __INT_MAX__; i += 2)
+ a[(i - __INT_MAX__) >> 1] -= 2;
+}
+
+__attribute__((noinline, noclone)) void
+N(f2) (void)
+{
+ unsigned long long i;
+#pragma acc PK loop L F
+ for (i = __LONG_LONG_MAX__ + 4500ULL - 27;
+ i > __LONG_LONG_MAX__ - 27ULL; i -= 3)
+ a[(i + 26LL - __LONG_LONG_MAX__) / 3] -= 4;
+}
+
+__attribute__((noinline, noclone)) void
+N(f3) (long long n1, long long n2, long long s3)
+{
+#pragma acc PK loop L F
+ for (long long i = n1 + 23; i > n2 - 25; i -= s3)
+ a[i + 48] += 7;
+}
+
+__attribute__((noinline, noclone)) void
+N(f4) (void)
+{
+ unsigned int i;
+#pragma acc PK loop L F
+ for (i = 30; i < 20; i += 2)
+ a[i] += 10;
+}
+
+__attribute__((noinline, noclone)) void
+N(f5) (int n11, int n12, int n21, int n22, int n31, int n32,
+ int s1, int s2, int s3)
+{
+ SC int v1, v2, v3;
+#pragma acc PK loop L F
+ for (v1 = n11; v1 < n12; v1 += s1)
+#pragma acc loop S
+ for (v2 = n21; v2 < n22; v2 += s2)
+ for (v3 = n31; v3 < n32; v3 += s3)
+ b[v1][v2][v3] += 2.5;
+}
+
+__attribute__((noinline, noclone)) void
+N(f6) (int n11, int n12, int n21, int n22, long long n31, long long n32,
+ int s1, int s2, long long int s3)
+{
+ SC int v1, v2;
+ SC long long v3;
+#pragma acc PK loop L F
+ for (v1 = n11; v1 > n12; v1 += s1)
+#pragma acc loop S
+ for (v2 = n21; v2 > n22; v2 += s2)
+ for (v3 = n31; v3 > n32; v3 += s3)
+ b[v1][v2 / 2][v3] -= 4.5;
+}
+
+__attribute__((noinline, noclone)) void
+N(f7) (void)
+{
+ SC unsigned int v1, v3;
+ SC unsigned long long v2;
+#pragma acc PK loop L F
+ for (v1 = 0; v1 < 20; v1 += 2)
+#pragma acc loop S
+ for (v2 = __LONG_LONG_MAX__ + 16ULL;
+ v2 > __LONG_LONG_MAX__ - 29ULL; v2 -= 3)
+ for (v3 = 10; v3 > 0; v3--)
+ b[v1 >> 1][(v2 - __LONG_LONG_MAX__ + 64) / 3 - 12][v3 - 1] += 5.5;
+}
+
+__attribute__((noinline, noclone)) void
+N(f8) (void)
+{
+ SC long long v1, v2, v3;
+#pragma acc PK loop L F
+ for (v1 = 0; v1 < 20; v1 += 2)
+#pragma acc loop S
+ for (v2 = 30; v2 < 20; v2++)
+ for (v3 = 10; v3 < 0; v3--)
+ b[v1][v2][v3] += 5.5;
+}
+
+__attribute__((noinline, noclone)) void
+N(f9) (void)
+{
+ int i;
+#pragma acc PK loop L F
+ for (i = 20; i < 10; i++)
+ {
+ a[i] += 2;
+ noreturn ();
+ a[i] -= 4;
+ }
+}
+
+__attribute__((noinline, noclone)) void
+N(f10) (void)
+{
+ SC int i;
+#pragma acc PK loop L F
+ for (i = 0; i < 10; i++)
+#pragma acc loop S
+ for (int j = 10; j < 8; j++)
+ for (long k = -10; k < 10; k++)
+ {
+ b[i][j][k] += 4;
+ noreturn ();
+ b[i][j][k] -= 8;
+ }
+}
+
+__attribute__((noinline, noclone)) void
+N(f11) (int n)
+{
+ int i;
+#pragma acc PK loop L F
+ for (i = 20; i < n; i++)
+ {
+ a[i] += 8;
+ noreturn ();
+ a[i] -= 16;
+ }
+}
+
+__attribute__((noinline, noclone)) void
+N(f12) (int n)
+{
+ SC int i;
+#pragma acc PK loop L F
+ for (i = 0; i < 10; i++)
+#pragma acc loop S
+ for (int j = n; j < 8; j++)
+ for (long k = -10; k < 10; k++)
+ {
+ b[i][j][k] += 16;
+ noreturn ();
+ b[i][j][k] -= 32;
+ }
+}
+
+__attribute__((noinline, noclone)) void
+N(f13) (void)
+{
+ int *i;
+#pragma acc PK loop L F
+ for (i = a; i < &a[1500]; i++)
+ i[0] += 2;
+}
+
+__attribute__((noinline, noclone)) void
+N(f14) (void)
+{
+ SC float *i;
+#pragma acc PK loop L F
+ for (i = &b[0][0][0]; i < &b[0][0][10]; i++)
+#pragma acc loop S
+ for (float *j = &b[0][15][0]; j > &b[0][0][0]; j -= 10)
+ for (float *k = &b[0][0][10]; k > &b[0][0][0]; --k)
+ b[i - &b[0][0][0]][(j - &b[0][0][0]) / 10 - 1][(k - &b[0][0][0]) - 1]
+ -= 3.5;
+}
+
+__attribute__((noinline, noclone)) int
+N(test) (void)
+{
+ int i, j, k;
+ for (i = 0; i < 1500; i++)
+ a[i] = i - 25;
+ N(f0) ();
+ for (i = 0; i < 1500; i++)
+ if (a[i] != i - 23)
+ return 1;
+ N(f1) ();
+ for (i = 0; i < 1500; i++)
+ if (a[i] != i - 25)
+ return 1;
+ N(f2) ();
+ for (i = 0; i < 1500; i++)
+ if (a[i] != i - 29)
+ return 1;
+ N(f3) (1500LL - 1 - 23 - 48, -1LL + 25 - 48, 1LL);
+ for (i = 0; i < 1500; i++)
+ if (a[i] != i - 22)
+ return 1;
+ N(f3) (1500LL - 1 - 23 - 48, 1500LL - 1, 7LL);
+ for (i = 0; i < 1500; i++)
+ if (a[i] != i - 22)
+ return 1;
+ N(f4) ();
+ for (i = 0; i < 1500; i++)
+ if (a[i] != i - 22)
+ return 1;
+ for (i = 0; i < 10; i++)
+ for (j = 0; j < 15; j++)
+ for (k = 0; k < 10; k++)
+ b[i][j][k] = i - 2.5 + 1.5 * j - 1.5 * k;
+ N(f5) (0, 10, 0, 15, 0, 10, 1, 1, 1);
+ for (i = 0; i < 10; i++)
+ for (j = 0; j < 15; j++)
+ for (k = 0; k < 10; k++)
+ if (b[i][j][k] != i + 1.5 * j - 1.5 * k)
+ return 1;
+ N(f5) (0, 10, 30, 15, 0, 10, 4, 5, 6);
+ for (i = 0; i < 10; i++)
+ for (j = 0; j < 15; j++)
+ for (k = 0; k < 10; k++)
+ if (b[i][j][k] != i + 1.5 * j - 1.5 * k)
+ return 1;
+ N(f6) (9, -1, 29, 0, 9, -1, -1, -2, -1);
+ for (i = 0; i < 10; i++)
+ for (j = 0; j < 15; j++)
+ for (k = 0; k < 10; k++)
+ if (b[i][j][k] != i - 4.5 + 1.5 * j - 1.5 * k)
+ return 1;
+ N(f7) ();
+ for (i = 0; i < 10; i++)
+ for (j = 0; j < 15; j++)
+ for (k = 0; k < 10; k++)
+ if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k)
+ return 1;
+ N(f8) ();
+ for (i = 0; i < 10; i++)
+ for (j = 0; j < 15; j++)
+ for (k = 0; k < 10; k++)
+ if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k)
+ return 1;
+ N(f9) ();
+ N(f10) ();
+ N(f11) (10);
+ N(f12) (12);
+ for (i = 0; i < 1500; i++)
+ if (a[i] != i - 22)
+ return 1;
+ for (i = 0; i < 10; i++)
+ for (j = 0; j < 15; j++)
+ for (k = 0; k < 10; k++)
+ if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k)
+ return 1;
+ N(f13) ();
+ N(f14) ();
+ for (i = 0; i < 1500; i++)
+ if (a[i] != i - 20)
+ return 1;
+ for (i = 0; i < 10; i++)
+ for (j = 0; j < 15; j++)
+ for (k = 0; k < 10; k++)
+ if (b[i][j][k] != i - 2.5 + 1.5 * j - 1.5 * k)
+ return 1;
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/cublas-fixed.h b/libgomp/testsuite/libgomp.oacc-fortran/cublas-fixed.h
new file mode 100644
index 00000000000..4a5f61ae560
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/cublas-fixed.h
@@ -0,0 +1,16 @@
+! CUDA BLAS interface binding for SAXPY.
+
+ use iso_c_binding
+ interface
+ subroutine cublassaxpy(N, alpha, x, incx, y, incy)
+ 1 bind(c, name="cublasSaxpy")
+ use iso_c_binding
+ integer(kind=c_int), value :: N
+ real(kind=c_float), value :: alpha
+ type(*), dimension(*) :: x
+ integer(kind=c_int), value :: incx
+ type(*), dimension(*) :: y
+ integer(kind=c_int), value :: incy
+ end subroutine cublassaxpy
+ end interface
+
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-1.f90
index f4e90539818..bf323b3f540 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/data-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-1.f90
@@ -1,45 +1,212 @@
! { dg-do run }
+! { dg-additional-options "-cpp" }
-program test
- integer, parameter :: N = 8
- real, allocatable :: a(:), b(:)
+function is_mapped (n) result (rc)
+ use openacc
- allocate (a(N))
- allocate (b(N))
+ integer, intent (in) :: n
+ logical rc
- a(:) = 3.0
- b(:) = 0.0
+#if ACC_MEM_SHARED
+ integer i
- !$acc enter data copyin (a(1:N), b(1:N))
+ rc = .TRUE.
+ i = n
+#else
+ rc = acc_is_present (n, sizeof (n))
+#endif
- !$acc parallel
- do i = 1, n
- b(i) = a (i)
- end do
- !$acc end parallel
+end function is_mapped
- !$acc exit data copyout (a(1:N), b(1:N))
+program main
+ integer i, j
+ logical is_mapped
- do i = 1, n
- if (a(i) .ne. 3.0) STOP 1
- if (b(i) .ne. 3.0) STOP 2
- end do
+ i = -1
+ j = -2
- a(:) = 5.0
- b(:) = 1.0
+ !$acc data copyin (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
- !$acc enter data copyin (a(1:N), b(1:N))
+ if (i .ne. -1 .or. j .ne. -2) call abort
- !$acc parallel
- do i = 1, n
- b(i) = a (i)
- end do
- !$acc end parallel
+ i = 2
+ j = 1
- !$acc exit data copyout (a(1:N), b(1:N))
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
- do i = 1, n
- if (a(i) .ne. 5.0) STOP 3
- if (b(i) .ne. 5.0) STOP 4
- end do
-end program test
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data copyout (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ !$acc parallel present (i, j)
+ i = 4
+ j = 2
+ !$acc end parallel
+ !$acc end data
+
+ if (i .ne. 4 .or. j .ne. 2) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data create (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data present_or_copyin (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data present_or_copyout (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ !$acc parallel present (i, j)
+ i = 4
+ j = 2
+ !$acc end parallel
+ !$acc end data
+
+ if (i .ne. 4 .or. j .ne. 2) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data present_or_copy (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
+
+#if ACC_MEM_SHARED
+ if (i .ne. 2 .or. j .ne. 1) call abort
+#else
+ if (i .ne. -1 .or. j .ne. -2) call abort
+#endif
+
+ i = -1
+ j = -2
+
+ !$acc data present_or_create (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data copyin (i, j)
+ !$acc data present (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
+ !$acc end data
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data copyin (i, j)
+ !$acc data present (i, j)
+ if (is_mapped (i) .eqv. .FALSE.) call abort
+ if (is_mapped (j) .eqv. .FALSE.) call abort
+
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
+ !$acc end data
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+ i = -1
+ j = -2
+
+ !$acc data
+#if !ACC_MEM_SHARED
+ if (is_mapped (i) .eqv. .TRUE.) call abort
+ if (is_mapped (j) .eqv. .TRUE.) call abort
+#endif
+ if (i .ne. -1 .or. j .ne. -2) call abort
+
+ i = 2
+ j = 1
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+ !$acc end data
+
+ if (i .ne. 2 .or. j .ne. 1) call abort
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
index 22525b8f59e..83a540070e6 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
@@ -1,8 +1,14 @@
! { dg-do run }
program test
+ use openacc
integer, parameter :: N = 8
real, allocatable :: a(:,:), b(:,:)
+ real, allocatable :: c(:), d(:)
+ integer i, j
+
+ i = 0
+ j = 0
allocate (a(N,N))
allocate (b(N,N))
@@ -28,4 +34,48 @@ program test
if (b(j,i) .ne. 3.0) STOP 2
end do
end do
+
+ allocate (c(N))
+ allocate (d(N))
+
+ c(:) = 3.0
+ d(:) = 0.0
+
+ !$acc enter data copyin (c(1:N)) create (d(1:N)) async
+ !$acc wait
+
+ !$acc parallel
+ do i = 1, N
+ d(i) = c(i) + 1
+ end do
+ !$acc end parallel
+
+ !$acc exit data copyout (c(1:N), d(1:N)) async
+ !$acc wait
+
+ do i = 1, N
+ if (d(i) .ne. 4.0) call abort
+ end do
+
+ c(:) = 3.0
+ d(:) = 0.0
+
+ !$acc enter data copyin (c(1:N)) async
+ !$acc enter data create (d(1:N)) wait
+ !$acc wait
+
+ !$acc parallel
+ do i = 1, N
+ d(i) = c(i) + 1
+ end do
+ !$acc end parallel
+
+ !$acc exit data copyout (d(1:N)) async
+ !$acc exit data async
+ !$acc wait
+
+ do i = 1, N
+ if (d(i) .ne. 4.0) call abort
+ end do
+
end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/dummy-array.f90 b/libgomp/testsuite/libgomp.oacc-fortran/dummy-array.f90
new file mode 100644
index 00000000000..e95563cd406
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/dummy-array.f90
@@ -0,0 +1,28 @@
+! Ensure that dummy arrays are transferred to the accelerator
+! via an implicit pcopy.
+
+! { dg-do run }
+
+program main
+ integer, parameter :: n = 1000
+ integer :: a(n)
+ integer :: i
+
+ a(:) = -1
+
+ call dummy_array (a, n)
+
+ do i = 1, n
+ if (a(i) .ne. i) call abort
+ end do
+end program main
+
+subroutine dummy_array (a, n)
+ integer a(n)
+
+ !$acc parallel loop num_gangs (100) gang
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel loop
+end subroutine
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/host_data-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/host_data-2.f90
new file mode 100644
index 00000000000..ff0921863f6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/host_data-2.f90
@@ -0,0 +1,98 @@
+! Test host_data interoperability with CUDA blas. This test was
+! derived from libgomp.oacc-c-c++-common/host_data-1.c.
+
+! { dg-do run { target openacc_nvidia_accel_selected } }
+! { dg-additional-options "-lcublas -Wall -Wextra" }
+
+program test
+ implicit none
+
+ integer, parameter :: N = 10
+ integer :: i
+ real*4 :: x_ref(N), y_ref(N), x(N), y(N), a
+
+ interface
+ subroutine cublassaxpy(N, alpha, x, incx, y, incy) bind(c, name="cublasSaxpy")
+ use iso_c_binding
+ integer(kind=c_int), value :: N
+ real(kind=c_float), value :: alpha
+ type(*), dimension(*) :: x
+ integer(kind=c_int), value :: incx
+ type(*), dimension(*) :: y
+ integer(kind=c_int), value :: incy
+ end subroutine cublassaxpy
+ end interface
+
+ a = 2.0
+
+ do i = 1, N
+ x(i) = 4.0 * i
+ y(i) = 3.0
+ x_ref(i) = x(i)
+ y_ref(i) = y(i)
+ end do
+
+ call saxpy (N, a, x_ref, y_ref)
+
+ !$acc data copyin (x) copy (y)
+ !$acc host_data use_device (x, y)
+ call cublassaxpy(N, a, x, 1, y, 1)
+ !$acc end host_data
+ !$acc end data
+
+ call validate_results (N, y, y_ref)
+
+ !$acc data create (x) copyout (y)
+ !$acc parallel loop
+ do i = 1, N
+ y(i) = 3.0
+ end do
+ !$acc end parallel loop
+
+ !$acc host_data use_device (x, y)
+ call cublassaxpy(N, a, x, 1, y, 1)
+ !$acc end host_data
+ !$acc end data
+
+ call validate_results (N, y, y_ref)
+
+ y(:) = 3.0
+
+ !$acc data copyin (x) copyin (a) copy (y)
+ !$acc parallel present (x) pcopy (y) present (a)
+ call saxpy (N, a, x, y)
+ !$acc end parallel
+ !$acc end data
+
+ call validate_results (N, y, y_ref)
+
+ y(:) = 3.0
+
+ !$acc enter data copyin (x, a, y)
+ !$acc parallel present (x) pcopy (y) present (a)
+ call saxpy (N, a, x, y)
+ !$acc end parallel
+ !$acc exit data delete (x, a) copyout (y)
+
+ call validate_results (N, y, y_ref)
+end program test
+
+subroutine saxpy (nn, aa, xx, yy)
+ integer :: nn
+ real*4 :: aa, xx(nn), yy(nn)
+ integer i
+ !$acc routine
+
+ do i = 1, nn
+ yy(i) = yy(i) + aa * xx(i)
+ end do
+end subroutine saxpy
+
+subroutine validate_results (n, a, b)
+ integer :: n
+ real*4 :: a(n), b(n)
+
+ do i = 1, N
+ if (abs(a(i) - b(i)) > 0.0001) call abort
+ end do
+end subroutine validate_results
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/host_data-3.f b/libgomp/testsuite/libgomp.oacc-fortran/host_data-3.f
new file mode 100644
index 00000000000..05ed949ee5c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/host_data-3.f
@@ -0,0 +1,85 @@
+! Fixed-mode host_data interaction with CUDA BLAS.
+
+! { dg-do run { target openacc_nvidia_accel_selected } }
+! { dg-additional-options "-lcublas -Wall -Wextra" }
+
+ include "cublas-fixed.h"
+
+ integer, parameter :: N = 10
+ integer :: i
+ real*4 :: x_ref(N), y_ref(N), x(N), y(N), a
+
+ a = 2.0
+
+ do i = 1, N
+ x(i) = 4.0 * i
+ y(i) = 3.0
+ x_ref(i) = x(i)
+ y_ref(i) = y(i)
+ end do
+
+ call saxpy (N, a, x_ref, y_ref)
+
+!$acc data copyin (x) copy (y)
+!$acc host_data use_device (x, y)
+ call cublassaxpy(N, a, x, 1, y, 1)
+!$acc end host_data
+!$acc end data
+
+ call validate_results (N, y, y_ref)
+
+!$acc data create (x) copyout (y)
+!$acc parallel loop
+ do i = 1, N
+ y(i) = 3.0
+ end do
+!$acc end parallel loop
+
+!$acc host_data use_device (x, y)
+ call cublassaxpy(N, a, x, 1, y, 1)
+!$acc end host_data
+!$acc end data
+
+ call validate_results (N, y, y_ref)
+
+ y(:) = 3.0
+
+!$acc data copyin (x) copyin (a) copy (y)
+!$acc parallel present (x) pcopy (y) present (a)
+ call saxpy (N, a, x, y)
+!$acc end parallel
+!$acc end data
+
+ call validate_results (N, y, y_ref)
+
+ y(:) = 3.0
+
+!$acc enter data copyin (x, a, y)
+!$acc parallel present (x) pcopy (y) present (a)
+ call saxpy (N, a, x, y)
+!$acc end parallel
+!$acc exit data delete (x, a) copyout (y)
+
+ call validate_results (N, y, y_ref)
+ end
+
+ subroutine saxpy (nn, aa, xx, yy)
+ integer :: nn
+ real*4 :: aa, xx(nn), yy(nn)
+ integer i
+!$acc routine
+
+ do i = 1, nn
+ yy(i) = yy(i) + aa * xx(i)
+ end do
+ end subroutine saxpy
+
+ subroutine validate_results (n, a, b)
+ integer :: n
+ real*4 :: a(n), b(n)
+
+ do i = 1, N
+ if (abs(a(i) - b(i)) > 0.0001) call abort
+ end do
+ end subroutine validate_results
+
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/host_data-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/host_data-4.f90
new file mode 100644
index 00000000000..6e379b5485b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/host_data-4.f90
@@ -0,0 +1,101 @@
+! Test host_data interoperability with CUDA blas using modules.
+
+! { dg-do run { target openacc_nvidia_accel_selected } }
+! { dg-additional-options "-lcublas -Wall -Wextra" }
+
+module cublas
+ interface
+ subroutine cublassaxpy(N, alpha, x, incx, y, incy) bind(c, name="cublasSaxpy")
+ use iso_c_binding
+ integer(kind=c_int), value :: N
+ real(kind=c_float), value :: alpha
+ type(*), dimension(*) :: x
+ integer(kind=c_int), value :: incx
+ type(*), dimension(*) :: y
+ integer(kind=c_int), value :: incy
+ end subroutine cublassaxpy
+ end interface
+
+contains
+ subroutine saxpy (nn, aa, xx, yy)
+ integer :: nn
+ real*4 :: aa, xx(nn), yy(nn)
+ integer i
+ !$acc routine
+
+ do i = 1, nn
+ yy(i) = yy(i) + aa * xx(i)
+ end do
+ end subroutine saxpy
+
+ subroutine validate_results (n, a, b)
+ integer :: n
+ real*4 :: a(n), b(n)
+
+ do i = 1, N
+ if (abs(a(i) - b(i)) > 0.0001) call abort
+ end do
+ end subroutine validate_results
+end module cublas
+
+program test
+ use cublas
+ implicit none
+
+ integer, parameter :: N = 10
+ integer :: i
+ real*4 :: x_ref(N), y_ref(N), x(N), y(N), a
+
+ a = 2.0
+
+ do i = 1, N
+ x(i) = 4.0 * i
+ y(i) = 3.0
+ x_ref(i) = x(i)
+ y_ref(i) = y(i)
+ end do
+
+ call saxpy (N, a, x_ref, y_ref)
+
+ !$acc data copyin (x) copy (y)
+ !$acc host_data use_device (x, y)
+ call cublassaxpy(N, a, x, 1, y, 1)
+ !$acc end host_data
+ !$acc end data
+
+ call validate_results (N, y, y_ref)
+
+ !$acc data create (x) copyout (y)
+ !$acc parallel loop
+ do i = 1, N
+ y(i) = 3.0
+ end do
+ !$acc end parallel loop
+
+ !$acc host_data use_device (x, y)
+ call cublassaxpy(N, a, x, 1, y, 1)
+ !$acc end host_data
+ !$acc end data
+
+ call validate_results (N, y, y_ref)
+
+ y(:) = 3.0
+
+ !$acc data copyin (x) copyin (a) copy (y)
+ !$acc parallel present (x) pcopy (y) present (a)
+ call saxpy (N, a, x, y)
+ !$acc end parallel
+ !$acc end data
+
+ call validate_results (N, y, y_ref)
+
+ y(:) = 3.0
+
+ !$acc enter data copyin (x, a, y)
+ !$acc parallel present (x) pcopy (y) present (a)
+ call saxpy (N, a, x, y)
+ !$acc end parallel
+ !$acc exit data delete (x, a) copyout (y)
+
+ call validate_results (N, y, y_ref)
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction-2.f90
new file mode 100644
index 00000000000..fdf9409bde4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction-2.f90
@@ -0,0 +1,26 @@
+program foo
+
+ IMPLICIT NONE
+ INTEGER :: vol = 0
+
+ call bar (vol)
+
+ if (vol .ne. 4) call abort
+end program foo
+
+subroutine bar(vol)
+ IMPLICIT NONE
+
+ INTEGER :: vol
+ INTEGER :: j,k
+
+ !$ACC KERNELS
+ !$ACC LOOP REDUCTION(+:vol)
+ DO k=1,2
+ !$ACC LOOP REDUCTION(+:vol)
+ DO j=1,2
+ vol = vol + 1
+ ENDDO
+ ENDDO
+ !$ACC END KERNELS
+end subroutine bar
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction.f90
new file mode 100644
index 00000000000..912a22b5153
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction.f90
@@ -0,0 +1,21 @@
+program foo
+ IMPLICIT NONE
+ INTEGER :: vol = 0
+
+ call bar (vol)
+
+ if (vol .ne. 2) call abort
+end program foo
+
+subroutine bar(vol)
+ IMPLICIT NONE
+ INTEGER :: vol
+ INTEGER :: j
+
+ !$ACC KERNELS
+ !$ACC LOOP REDUCTION(+:vol)
+ DO j=1,2
+ vol = vol + 1
+ ENDDO
+ !$ACC END KERNELS
+end subroutine bar
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-collapse-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-collapse-3.f90
new file mode 100644
index 00000000000..4ef99cd3475
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-collapse-3.f90
@@ -0,0 +1,30 @@
+! Test the collapse clause inside a kernels region.
+
+! { dg-do run }
+
+program collapse3
+ integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+ !$acc kernels
+ !$acc loop collapse(3)
+ do 115 k=1,3
+dokk: do kk=1,3
+ do kkk=1,3
+ a(k,kk,kkk) = 1
+ enddo
+ enddo dokk
+115 continue
+ !$acc end kernels
+ if (any(a(1:3,1:3,1:3).ne.1)) call abort
+
+ !$acc kernels
+ !$acc loop collapse(3)
+dol: do 120 l=1,3
+doll: do ll=1,3
+ do lll=1,3
+ a(l,ll,lll) = 2
+ enddo
+ enddo doll
+120 end do dol
+ !$acc end kernels
+ if (any(a(1:3,1:3,1:3).ne.2)) call abort
+end program collapse3
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-collapse-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-collapse-4.f90
new file mode 100644
index 00000000000..db382a7deb6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-collapse-4.f90
@@ -0,0 +1,41 @@
+! Test the collapse and reduction loop clauses inside a kernels region.
+
+! { dg-do run }
+
+program collapse4
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ !$acc kernels
+ !$acc loop collapse (3) reduction (.or.:l)
+ do i = 2, 6
+ do j = -2, 4
+ do k = 13, 18
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end kernels
+ do i = 2, 6
+ do j = -2, 4
+ do k = 13, 18
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.l) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = 2, 6
+ do j = -2, 4
+ do k = 13, 18
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+end program collapse4
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-independent.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-independent.f90
new file mode 100644
index 00000000000..a881fbbe5cc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-independent.f90
@@ -0,0 +1,42 @@
+! { dg-do run }
+! { dg-additional-options "-cpp" }
+
+#define N (1024 * 512)
+
+subroutine foo (a, b, c)
+ integer, parameter :: n = N
+ integer, dimension (n) :: a
+ integer, dimension (n) :: b
+ integer, dimension (n) :: c
+ integer i, ii
+
+ do i = 1, n
+ a(i) = i * 2;
+ end do
+
+ do i = 1, n
+ b(i) = i * 4;
+ end do
+
+ !$acc kernels copyin (a(1:n), b(1:n)) copyout (c(1:n))
+ !$acc loop independent
+ do ii = 1, n
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+
+ do i = 1, n
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end subroutine
+
+program main
+ integer, parameter :: n = N
+ integer :: a(n)
+ integer :: b(n)
+ integer :: c(n)
+
+ call foo (a, b, c)
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-1.f90
new file mode 100644
index 00000000000..edcdc56ec1c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-1.f90
@@ -0,0 +1,66 @@
+! Exercise the auto, independent, seq and tile loop clauses inside
+! kernels regions.
+
+! { dg-do run }
+
+program loops
+ integer, parameter :: n = 20
+ integer :: i, a(n), b(n)
+
+ a(:) = 0
+ b(:) = 0
+
+ ! COPY
+
+ !$acc kernels copy (a)
+ !$acc loop auto
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end kernels
+
+ do i = 1, n
+ b(i) = i
+ end do
+
+ call check (a, b, n)
+
+ ! COPYOUT
+
+ a(:) = 0
+
+ !$acc kernels copyout (a)
+ !$acc loop independent
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end kernels
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+ call check (a, b, n)
+
+ ! COPYIN
+
+ a(:) = 0
+
+ !$acc kernels copyout (a) copyin (b)
+ !$acc loop seq
+ do i = 1, n
+ a(i) = b(i)
+ end do
+ !$acc end kernels
+
+ call check (a, b, n)
+
+end program loops
+
+subroutine check (a, b, n)
+ integer :: n, a(n), b(n)
+ integer :: i
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+end subroutine check
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-map-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-map-1.f90
new file mode 100644
index 00000000000..704ff622854
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-map-1.f90
@@ -0,0 +1,116 @@
+! Test the copy, copyin, copyout, pcopy, pcopyin, pcopyout, and pcreate
+! clauses on kernels constructs.
+
+! { dg-do run }
+
+program map
+ integer, parameter :: n = 20, c = 10
+ integer :: i, a(n), b(n), d(n)
+
+ a(:) = 0
+ b(:) = 0
+
+ ! COPY
+
+ !$acc kernels copy (a)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end kernels
+
+ do i = 1, n
+ b(i) = i
+ end do
+
+ call check (a, b, n)
+
+ ! COPYOUT
+
+ a(:) = 0
+
+ !$acc kernels copyout (a)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end kernels
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+ call check (a, b, n)
+
+ ! COPYIN
+
+ a(:) = 0
+
+ !$acc kernels copyout (a) copyin (b)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end kernels
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPY
+
+ !$acc kernels pcopy (a)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end kernels
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPYOUT
+
+ a(:) = 0
+
+ !$acc kernels pcopyout (a)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end kernels
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPYIN
+
+ a(:) = 0
+
+ !$acc kernels pcopyout (a) pcopyin (b)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end kernels
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_CREATE
+
+ a(:) = 0
+
+ !$acc kernels pcopyout (a) pcreate (d)
+ !$acc loop
+ do i = 1, n
+ d(i) = i
+ a(i) = d(i)
+ end do
+ !$acc end kernels
+
+ call check (a, b, n)
+end program map
+
+subroutine check (a, b, n)
+ integer :: n, a(n), b(n)
+ integer :: i
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+end subroutine check
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95
new file mode 100644
index 00000000000..fe1088c0d04
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95
@@ -0,0 +1,36 @@
+! { dg-do run }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i, ii
+
+ !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1))
+
+ !$acc kernels present (a(0:n-1))
+ do i = 0, n - 1
+ a(i) = i * 2
+ end do
+ !$acc end kernels
+
+ !$acc parallel present (b(0:n-1))
+ !$acc loop
+ do i = 0, n -1
+ b(i) = i * 4
+ end do
+ !$acc end parallel
+
+ !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1))
+ do ii = 0, n - 1
+ c(ii) = a(ii) + b(ii)
+ end do
+ !$acc end kernels
+
+ !$acc exit data copyout (a(0:n-1), b(0:n-1), c(0:n-1))
+
+ do i = 0, n - 1
+ if (c(i) .ne. a(i) + b(i)) call abort
+ end do
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90
new file mode 100644
index 00000000000..5119fabadaf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90
@@ -0,0 +1,23 @@
+! Test of gang-private variables declared on loop directive.
+
+! { dg-do run }
+
+program main
+ integer :: x, i, arr(32)
+
+ do i = 1, 32
+ arr(i) = i
+ end do
+
+ !$acc kernels copy(arr)
+ !$acc loop gang(num:32) private(x)
+ do i = 1, 32
+ x = i * 2;
+ arr(i) = arr(i) + x;
+ end do
+ !$acc end kernels
+
+ do i = 1, 32
+ if (arr(i) .ne. i * 3) call abort
+ end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90
new file mode 100644
index 00000000000..5e46287497d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90
@@ -0,0 +1,28 @@
+! Test of gang-private variables declared on loop directive, with broadcasting
+! to partitioned workers.
+
+! { dg-do run }
+
+program main
+ integer :: x, i, j, arr(0:32*32)
+
+ do i = 0, 32*32 -1
+ arr(i) = i
+ end do
+
+ !$acc kernels copy(arr)
+ !$acc loop gang(num:32) private(x)
+ do i = 0, 31
+ x = i * 2;
+
+ !$acc loop worker(num:32)
+ do j = 0, 31
+ arr(i * 32 + j) = arr(i * 32 + j) + x;
+ end do
+ end do
+ !$acc end kernels
+
+ do i = 0, 32 * 32 - 1
+ if (arr(i) .ne. i + (i / 32) * 2) call abort
+ end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90
new file mode 100644
index 00000000000..5cc3378f459
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90
@@ -0,0 +1,28 @@
+! Test of gang-private variables declared on loop directive, with broadcasting
+! to partitioned vectors.
+
+! { dg-do run }
+
+program main
+ integer :: x, i, j, arr(0:32*32)
+
+ do i = 0, 32*32-1
+ arr(i) = i
+ end do
+
+ !$acc kernels copy(arr)
+ !$acc loop gang(num:32) private(x)
+ do i = 0, 31
+ x = i * 2;
+
+ !$acc loop vector(length:32)
+ do j = 0, 31
+ arr(i * 32 + j) = arr(i * 32 + j) + x;
+ end do
+ end do
+ !$acc end kernels
+
+ do i = 0, 32 * 32 - 1
+ if (arr(i) .ne. i + (i / 32) * 2) call abort
+ end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90
new file mode 100644
index 00000000000..1e41555aa1c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90
@@ -0,0 +1,36 @@
+! Test of gang-private addressable variable declared on loop directive, with
+! broadcasting to partitioned workers.
+
+! { dg-do run }
+
+program main
+ type vec3
+ integer x, y, z, attr(13)
+ end type vec3
+
+ integer x, i, j, arr(0:32*32)
+ type(vec3) pt
+
+ do i = 0, 32*32-1
+ arr(i) = i
+ end do
+
+ !$acc kernels copy(arr)
+ !$acc loop gang(num:32) private(pt)
+ do i = 0, 31
+ pt%x = i
+ pt%y = i * 2
+ pt%z = i * 4
+ pt%attr(5) = i * 6
+
+ !$acc loop vector(length:32)
+ do j = 0, 31
+ arr(i * 32 + j) = arr(i * 32 + j) + pt%x + pt%y + pt%z + pt%attr(5);
+ end do
+ end do
+ !$acc end kernels
+
+ do i = 0, 32 * 32 - 1
+ if (arr(i) .ne. i + (i / 32) * 13) call abort
+ end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90
new file mode 100644
index 00000000000..3efd9fe473b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90
@@ -0,0 +1,41 @@
+! Test of vector-private variables declared on loop directive.
+
+! { dg-do run }
+
+program main
+ integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc kernels copy(arr)
+ !$acc loop gang(num:32)
+ do i = 0, 31
+ !$acc loop worker(num:8)
+ do j = 0, 31
+ !$acc loop vector(length:32) private(x)
+ do k = 0, 31
+ x = ieor(i, j * 3)
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+ !$acc loop vector(length:32) private(x)
+ do k = 0, 31
+ x = ior(i, j * 5)
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+ end do
+ end do
+ !$acc end kernels
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+ call abort
+ end if
+ end do
+ end do
+ end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90
new file mode 100644
index 00000000000..1cf3b9818ef
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90
@@ -0,0 +1,38 @@
+! Test of vector-private variables declared on loop directive. Array type.
+
+! { dg-do run }
+
+program main
+ integer :: i, j, k, idx, arr(0:32*32*32), pt(2)
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc kernels copy(arr)
+ !$acc loop gang(num:32)
+ do i = 0, 31
+ !$acc loop worker(num:8)
+ do j = 0, 31
+ !$acc loop vector(length:32) private(x, pt)
+ do k = 0, 31
+ pt(1) = ieor(i, j * 3)
+ pt(2) = ior(i, j * 5)
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k
+ end do
+ end do
+ end do
+ !$acc end kernels
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+ call abort
+ end if
+ end do
+ end do
+ end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90
new file mode 100644
index 00000000000..55e98e05c03
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90
@@ -0,0 +1,27 @@
+! Test of worker-private variables declared on a loop directive.
+
+! { dg-do run }
+
+program main
+ integer :: x, i, j, arr(0:32*32)
+ common x
+
+ do i = 0, 32*32-1
+ arr(i) = i
+ end do
+
+ !$acc kernels copy(arr)
+ !$acc loop gang(num:32) private(x)
+ do i = 0, 31
+ !$acc loop worker(num:8) private(x)
+ do j = 0, 31
+ x = ieor(i, j * 3)
+ arr(i * 32 + j) = arr(i * 32 + j) + x
+ end do
+ end do
+ !$acc end kernels
+
+ do i = 0, 32 * 32 - 1
+ if (arr(i) .ne. i + ieor(i / 32, mod(i, 32) * 3)) call abort
+ end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90
new file mode 100644
index 00000000000..7924e7f13a6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90
@@ -0,0 +1,36 @@
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode.
+
+! { dg-do run }
+
+program main
+ integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc kernels copy(arr)
+ !$acc loop gang(num:32)
+ do i = 0, 31
+ !$acc loop worker(num:8) private(x)
+ do j = 0, 31
+ x = ieor(i, j * 3)
+
+ !$acc loop vector(length:32)
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+ end do
+ end do
+ !$acc end kernels
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k) call abort
+ end do
+ end do
+ end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90
new file mode 100644
index 00000000000..598c6fd7226
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90
@@ -0,0 +1,48 @@
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode. Back-to-back worker loops.
+
+! { dg-do run }
+
+program main
+ integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc kernels copy(arr)
+ !$acc loop gang(num:32)
+ do i = 0, 31
+ !$acc loop worker(num:8) private(x)
+ do j = 0, 31
+ x = ieor(i, j * 3)
+
+ !$acc loop vector(length:32)
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+ end do
+
+ !$acc loop worker(num:8) private(x)
+ do j = 0, 31
+ x = ior(i, j * 5)
+
+ !$acc loop vector(length:32)
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+ end do
+ end do
+ !$acc end kernels
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+ call abort
+ end if
+ end do
+ end do
+ end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90
new file mode 100644
index 00000000000..8512d7c3966
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90
@@ -0,0 +1,45 @@
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode. Successive vector loops. */
+
+! { dg-do run }
+
+program main
+ integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc kernels copy(arr)
+ !$acc loop gang(num:32)
+ do i = 0, 31
+ !$acc loop worker(num:8) private(x)
+ do j = 0, 31
+ x = ieor(i, j * 3)
+
+ !$acc loop vector(length:32)
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+
+ x = ior(i, j * 5)
+
+ !$acc loop vector(length:32)
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+ end do
+ end do
+ !$acc end kernels
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+ call abort
+ end if
+ end do
+ end do
+ end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90
new file mode 100644
index 00000000000..c3ebf744578
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90
@@ -0,0 +1,48 @@
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode. Addressable worker variable.
+
+! { dg-do run }
+
+program main
+ integer :: i, j, k, idx, arr(0:32*32*32)
+ integer, target :: x
+ integer, pointer :: p
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc kernels copy(arr)
+ !$acc loop gang(num:32)
+ do i = 0, 31
+ !$acc loop worker(num:8) private(x, p)
+ do j = 0, 31
+ p => x
+ x = ieor(i, j * 3)
+
+ !$acc loop vector(length:32)
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+
+ p = ior(i, j * 5)
+
+ !$acc loop vector(length:32)
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+ end do
+ end do
+ !$acc end kernels
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+ call abort
+ end if
+ end do
+ end do
+ end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90
new file mode 100644
index 00000000000..2a8a5905895
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90
@@ -0,0 +1,49 @@
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode. Aggregate worker variable.
+
+! { dg-do run }
+
+program main
+ type vec2
+ integer x, y
+ end type vec2
+
+ integer :: i, j, k, idx, arr(0:32*32*32)
+ type(vec2) :: pt
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc kernels copy(arr)
+ !$acc loop gang(num:32)
+ do i = 0, 31
+ !$acc loop worker(num:8) private(pt)
+ do j = 0, 31
+ pt%x = ieor(i, j * 3)
+ pt%y = ior(i, j * 5)
+
+ !$acc loop vector(length:32)
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%x * k
+ end do
+
+ !$acc loop vector(length:32)
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%y * k
+ end do
+ end do
+ end do
+ !$acc end kernels
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+ call abort
+ end if
+ end do
+ end do
+ end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90
new file mode 100644
index 00000000000..7dd1d3da7be
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90
@@ -0,0 +1,44 @@
+! Test of worker-private variables declared on loop directive, broadcasting
+! to vector-partitioned mode. Array worker variable.
+
+! { dg-do run }
+
+program main
+ integer :: i, j, k, idx, arr(0:32*32*32), pt(2)
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc kernels copy(arr)
+ !$acc loop gang(num:32)
+ do i = 0, 31
+ !$acc loop worker(num:8) private(pt)
+ do j = 0, 31
+ pt(1) = ieor(i, j * 3)
+ pt(2) = ior(i, j * 5)
+
+ !$acc loop vector(length:32)
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k
+ end do
+
+ !$acc loop vector(length:32)
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k
+ end do
+ end do
+ end do
+ !$acc end kernels
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+ call abort
+ end if
+ end do
+ end do
+ end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90
new file mode 100644
index 00000000000..c7a52ed98f6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90
@@ -0,0 +1,19 @@
+! Test a simple acc loop reduction inside a kernels region.
+
+! { dg-do run }
+
+program reduction
+ integer, parameter :: n = 20
+ integer :: i, red
+
+ red = 0
+
+ !$acc kernels
+ !$acc loop reduction (+:red)
+ do i = 1, n
+ red = red + 1
+ end do
+ !$acc end kernels
+
+ if (red .ne. n) call abort
+end program reduction
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-12.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-12.f90
new file mode 100644
index 00000000000..e307dfde374
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-12.f90
@@ -0,0 +1,27 @@
+! { dg-do run }
+! { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "-O0" "-O1" } { "" } }
+
+program main
+ use openacc
+ implicit none
+
+ integer :: i, j, n
+
+ j = 0
+ n = 1000000
+
+ !$acc parallel async (0) copy (j)
+ do i = 1, 1000000
+ j = j + 1
+ end do
+ !$acc end parallel
+
+ call acc_wait_async (0, 1)
+
+ if (acc_async_test (0) .neqv. .TRUE.) call abort
+
+ if (acc_async_test (1) .neqv. .TRUE.) call abort
+
+ call acc_wait (1)
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90
new file mode 100644
index 00000000000..6d713b1cd95
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90
@@ -0,0 +1,34 @@
+! { dg-do run }
+! { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } { "-O0" "-O1" } { "" } }
+
+program main
+ use openacc
+ implicit none
+
+ integer :: i, j
+ integer, parameter :: N = 1000000
+ integer, parameter :: nprocs = 2
+ integer :: k(nprocs)
+
+ k(:) = 0
+
+ !$acc data copy (k(1:nprocs))
+ do j = 1, nprocs
+ !$acc parallel async (j)
+ do i = 1, N
+ k(j) = k(j) + 1
+ end do
+ !$acc end parallel
+ end do
+ !$acc end data
+
+ if (acc_async_test (1) .neqv. .TRUE.) call abort
+ if (acc_async_test (2) .neqv. .TRUE.) call abort
+
+ call acc_wait_all_async (nprocs + 1)
+
+ if (acc_async_test (nprocs + 1) .neqv. .TRUE.) call abort
+
+ call acc_wait_all ()
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-14.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-14.f90
new file mode 100644
index 00000000000..eb0206ccce1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-14.f90
@@ -0,0 +1,82 @@
+! Exercise the data movement runtime library functions on non-shared memory
+! targets.
+
+! { dg-do run { target openacc_nvidia_accel_selected } }
+
+program main
+ use openacc
+ implicit none
+
+ integer, parameter :: N = 256
+ integer, allocatable :: h(:)
+ integer :: i
+
+ allocate (h(N))
+
+ do i = 1, N
+ h(i) = i
+ end do
+
+ call acc_present_or_copyin (h)
+
+ if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+ call acc_copyout (h)
+
+ if (acc_is_present (h) .neqv. .FALSE.) call abort
+
+ do i = 1, N
+ if (h(i) /= i) call abort
+ end do
+
+ do i = 1, N
+ h(i) = i + i
+ end do
+
+ call acc_pcopyin (h, sizeof (h))
+
+ if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+ call acc_copyout (h)
+
+ if (acc_is_present (h) .neqv. .FALSE.) call abort
+
+ do i = 1, N
+ if (h(i) /= i + i) call abort
+ end do
+
+ call acc_create (h)
+
+ if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+ !$acc parallel loop
+ do i = 1, N
+ h(i) = i
+ end do
+ !$end acc parallel
+
+ call acc_copyout (h)
+
+ if (acc_is_present (h) .neqv. .FALSE.) call abort
+
+ do i = 1, N
+ if (h(i) /= i) call abort
+ end do
+
+ call acc_present_or_create (h, sizeof (h))
+
+ if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+ call acc_delete (h)
+
+ if (acc_is_present (h) .neqv. .FALSE.) call abort
+
+ call acc_pcreate (h)
+
+ if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+ call acc_delete (h)
+
+ if (acc_is_present (h) .neqv. .FALSE.) call abort
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-15.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-15.f90
new file mode 100644
index 00000000000..3a834dbb22a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-15.f90
@@ -0,0 +1,52 @@
+! { dg-do run }
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+program main
+ use openacc
+ implicit none
+
+ integer, parameter :: N = 256
+ integer, allocatable :: h(:)
+ integer :: i
+
+ allocate (h(N))
+
+ do i = 1, N
+ h(i) = i
+ end do
+
+ call acc_copyin (h)
+
+ do i = 1, N
+ h(i) = i + i
+ end do
+
+ call acc_update_device (h, sizeof (h))
+
+ if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+ h(:) = 0
+
+ call acc_copyout (h, sizeof (h))
+
+ do i = 1, N
+ if (h(i) /= i + i) call abort
+ end do
+
+ call acc_copyin (h, sizeof (h))
+
+ h(:) = 0
+
+ call acc_update_self (h, sizeof (h))
+
+ if (acc_is_present (h) .neqv. .TRUE.) call abort
+
+ do i = 1, N
+ if (h(i) /= i + i) call abort
+ end do
+
+ call acc_delete (h)
+
+ if (acc_is_present (h) .neqv. .FALSE.) call abort
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/parallel-loop-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/parallel-loop-1.f90
new file mode 100644
index 00000000000..754b833a4ba
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/parallel-loop-1.f90
@@ -0,0 +1,77 @@
+! Exercise the auto, independent, seq and tile loop clauses inside
+! parallel regions.
+
+! { dg-do run }
+
+program loops
+ integer, parameter :: n = 20, c = 10
+ integer :: i, a(n), b(n)
+
+ a(:) = 0
+ b(:) = 0
+
+ ! COPY
+
+ !$acc parallel copy (a)
+ !$acc loop auto
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ do i = 1, n
+ b(i) = i
+ end do
+
+ call check (a, b, n)
+
+ ! COPYOUT
+
+ a(:) = 0
+
+ !$acc parallel copyout (a)
+ !$acc loop independent
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+ call check (a, b, n)
+
+ ! COPYIN
+
+ a(:) = 0
+
+ !$acc parallel copyout (a) copyin (b)
+ !$acc loop seq
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPY
+
+ !$acc parallel pcopy (a)
+ !$acc loop tile (*)
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+end program loops
+
+subroutine check (a, b, n)
+ integer :: n, a(n), b(n)
+ integer :: i
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+end subroutine check
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reference-reductions.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reference-reductions.f90
new file mode 100644
index 00000000000..a684d07977c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reference-reductions.f90
@@ -0,0 +1,38 @@
+! Test reductions on dummy arguments inside modules.
+
+! { dg-do run }
+
+module prm
+ implicit none
+
+contains
+
+subroutine param_reduction(var)
+ implicit none
+ integer(kind=8) :: var
+ integer :: j,k
+
+!$acc parallel copy(var)
+!$acc loop reduction(+ : var) gang
+ do k=1,10
+!$acc loop vector reduction(+ : var)
+ do j=1,100
+ var = var + 1.0
+ enddo
+ enddo
+!$acc end parallel
+end subroutine param_reduction
+
+end module prm
+
+program test
+ use prm
+ implicit none
+
+ integer(8) :: r
+
+ r=10.0
+ call param_reduction (r)
+
+ if (r .ne. 1010) call abort ()
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/vector-routine.f90 b/libgomp/testsuite/libgomp.oacc-fortran/vector-routine.f90
new file mode 100644
index 00000000000..1edcee48677
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/vector-routine.f90
@@ -0,0 +1,41 @@
+! { dg-do run }
+
+module param
+ integer, parameter :: N = 32
+end module param
+
+program main
+ use param
+ integer :: i
+ integer :: a(N)
+
+ do i = 1, N
+ a(i) = i
+ end do
+
+ !$acc parallel copy (a)
+ !$acc loop worker
+ do i = 1, N
+ call vector (a)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (a(i) .ne. 0) call abort
+ end do
+
+contains
+
+ subroutine vector (a)
+ !$acc routine vector
+ integer, intent (inout) :: a(N)
+ integer :: i
+
+ !$acc loop vector
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+
+end subroutine vector
+
+end program main