summaryrefslogtreecommitdiff
path: root/polly/test
diff options
context:
space:
mode:
authorMichael Kruse <llvm-project@meinersbur.de>2023-01-25 14:03:57 -0600
committerMichael Kruse <llvm-project@meinersbur.de>2023-03-08 17:33:04 -0600
commit19afbfe33156d211fa959dadeea46cd17b9c723c (patch)
treedb53498143b16127c6c0e22a671a8d11eece4152 /polly/test
parent115c7beda74f3cfaf83b91d14bc97a39bff4cf19 (diff)
downloadllvm-19afbfe33156d211fa959dadeea46cd17b9c723c.tar.gz
[Polly] Remove Polly-ACC.
Polly-ACC is unmaintained and since it has never been ported to the NPM pipeline, since D136621 it is not even accessible anymore without manually specifying the passes on the `opt` command line. Since there is no plan to put it to a maintainable state, remove it from Polly. Reviewed By: grosser Differential Revision: https://reviews.llvm.org/D142580
Diffstat (limited to 'polly/test')
-rw-r--r--polly/test/GPGPU/Inputs/libdevice-functions-copied-into-kernel_libdevice.ll9
-rw-r--r--polly/test/GPGPU/add-scalars-in-scop-to-kills.ll71
-rw-r--r--polly/test/GPGPU/align-params-in-schedule.ll53
-rw-r--r--polly/test/GPGPU/array-with-elem-type-smaller-than-byte.ll50
-rw-r--r--polly/test/GPGPU/bounds-construction-with-ignore-param-bounds.ll55
-rw-r--r--polly/test/GPGPU/cuda-annotations.ll37
-rw-r--r--polly/test/GPGPU/cuda-managed-memory-simple.ll118
-rw-r--r--polly/test/GPGPU/debug-metadata-leak.ll104
-rw-r--r--polly/test/GPGPU/double-parallel-loop.ll254
-rw-r--r--polly/test/GPGPU/failing-invariant-load-handling.ll57
-rw-r--r--polly/test/GPGPU/failing-invariant-load-hoisting.ll41
-rw-r--r--polly/test/GPGPU/host-control-flow.ll176
-rw-r--r--polly/test/GPGPU/host-statement.ll204
-rw-r--r--polly/test/GPGPU/ignore-parameter-bounds.ll41
-rw-r--r--polly/test/GPGPU/intrinsic-copied-into-kernel.ll76
-rw-r--r--polly/test/GPGPU/invalid-kernel-assert-verifymodule.ll47
-rw-r--r--polly/test/GPGPU/invalid-kernel.ll73
-rw-r--r--polly/test/GPGPU/invariant-load-array-access.ll70
-rw-r--r--polly/test/GPGPU/invariant-load-escaping-values.ll30
-rw-r--r--polly/test/GPGPU/invariant-load-hoisting-of-array.ll101
-rw-r--r--polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll47
-rw-r--r--polly/test/GPGPU/invariant-load-hoisting-with-variable-bounds.ll62
-rw-r--r--polly/test/GPGPU/invariant-load-hoisting-with-variable-lower-bound.ll56
-rw-r--r--polly/test/GPGPU/invariant-load-hoisting-with-variable-upper-bound.ll57
-rw-r--r--polly/test/GPGPU/invariant-load-hoisting.ll116
-rw-r--r--polly/test/GPGPU/invariant-load-of-scalar.ll81
-rw-r--r--polly/test/GPGPU/kernel-params-only-some-arrays.ll106
-rw-r--r--polly/test/GPGPU/kernel-params-scop-parameter.ll38
-rw-r--r--polly/test/GPGPU/kernels-names-across-scops-funcs.ll124
-rw-r--r--polly/test/GPGPU/libdevice-functions-copied-into-kernel.ll89
-rw-r--r--polly/test/GPGPU/live-range-reordering-with-privatization.ll78
-rw-r--r--polly/test/GPGPU/loops-outside-scop.ll67
-rw-r--r--polly/test/GPGPU/managed-memory-rewrite-alloca.ll60
-rw-r--r--polly/test/GPGPU/managed-memory-rewrite-malloc-free-inside-constexpr.ll93
-rw-r--r--polly/test/GPGPU/managed-memory-rewrite-malloc-free.ll91
-rw-r--r--polly/test/GPGPU/memory-only-referenced-from-access.ll44
-rw-r--r--polly/test/GPGPU/mostly-sequential.ll105
-rw-r--r--polly/test/GPGPU/non-read-only-scalars.ll168
-rw-r--r--polly/test/GPGPU/non-zero-array-offset.ll116
-rw-r--r--polly/test/GPGPU/only-part-of-array-modified.ll40
-rw-r--r--polly/test/GPGPU/parametric-loop-bound.ll62
-rw-r--r--polly/test/GPGPU/partial_writes.ll49
-rw-r--r--polly/test/GPGPU/partial_writes___%bb2---%bb14.jscop47
-rw-r--r--polly/test/GPGPU/phi-nodes-in-kernel.ll86
-rw-r--r--polly/test/GPGPU/private-memory.ll82
-rw-r--r--polly/test/GPGPU/privatization-simple.ll58
-rw-r--r--polly/test/GPGPU/privatization.ll62
-rw-r--r--polly/test/GPGPU/region-stmt.ll81
-rw-r--r--polly/test/GPGPU/remove-dead-instructions-in-stmt-2.ll39
-rw-r--r--polly/test/GPGPU/remove-dead-instructions-in-stmt.ll62
-rw-r--r--polly/test/GPGPU/run-time-check.ll58
-rw-r--r--polly/test/GPGPU/scalar-param-and-value-32-bit.ll41
-rw-r--r--polly/test/GPGPU/scalar-param-and-value-use.ll67
-rw-r--r--polly/test/GPGPU/scalar-parameter-fp128.ll39
-rw-r--r--polly/test/GPGPU/scalar-parameter-half.ll35
-rw-r--r--polly/test/GPGPU/scalar-parameter-i120.ll39
-rw-r--r--polly/test/GPGPU/scalar-parameter-i128.ll34
-rw-r--r--polly/test/GPGPU/scalar-parameter-i3000.ll38
-rw-r--r--polly/test/GPGPU/scalar-parameter-i80.ll39
-rw-r--r--polly/test/GPGPU/scalar-parameter-ppc_fp128.ll38
-rw-r--r--polly/test/GPGPU/scalar-parameter-x86_fp80.ll39
-rw-r--r--polly/test/GPGPU/scalar-parameter.ll411
-rw-r--r--polly/test/GPGPU/scalar-writes-in-scop-requires-abort.ll65
-rw-r--r--polly/test/GPGPU/scheduler-timeout.ll174
-rw-r--r--polly/test/GPGPU/shared-memory-scalar.ll65
-rw-r--r--polly/test/GPGPU/shared-memory-two-dimensional.ll103
-rw-r--r--polly/test/GPGPU/shared-memory.ll83
-rw-r--r--polly/test/GPGPU/simple-managed-memory-rewrite.ll71
-rw-r--r--polly/test/GPGPU/size-cast.ll63
-rw-r--r--polly/test/GPGPU/spir-codegen.ll118
-rw-r--r--polly/test/GPGPU/spir-typesize.ll90
-rw-r--r--polly/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll82
-rw-r--r--polly/test/GPGPU/untouched-arrays.ll270
-rw-r--r--polly/test/Unit/lit.site.cfg.in1
-rw-r--r--polly/test/lit.cfg2
-rw-r--r--polly/test/lit.site.cfg.in4
76 files changed, 0 insertions, 6002 deletions
diff --git a/polly/test/GPGPU/Inputs/libdevice-functions-copied-into-kernel_libdevice.ll b/polly/test/GPGPU/Inputs/libdevice-functions-copied-into-kernel_libdevice.ll
deleted file mode 100644
index 3f4c4a0aa610..000000000000
--- a/polly/test/GPGPU/Inputs/libdevice-functions-copied-into-kernel_libdevice.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-define float @__nv_expf(float %a) {
- ret float %a
-}
-define float @__nv_cosf(float %a) {
- ret float %a
-}
-define float @__nv_logf(float %a) {
- ret float %a
-}
diff --git a/polly/test/GPGPU/add-scalars-in-scop-to-kills.ll b/polly/test/GPGPU/add-scalars-in-scop-to-kills.ll
deleted file mode 100644
index 64b4cc4aa100..000000000000
--- a/polly/test/GPGPU/add-scalars-in-scop-to-kills.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: opt %loadPolly -polly-print-scops -disable-output < %s | FileCheck %s -check-prefix=SCOP
-; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s -check-prefix=HOST-IR
-
-; REQUIRES: pollyacc
-
-; Check that we detect a scop.
-; SCOP: Function: checkScalarKill
-; SCOP-NEXT: Region: %XLoopInit---%for.end
-; SCOP-NEXT: Max Loop Depth: 1
-
-; Check that we have a scalar that is not a phi node in the scop.
-; SCOP: i32 MemRef_x_0; // Element size 4
-
-; Check that kernel launch is generated in host IR.
-; the declare would not be generated unless a call to a kernel exists.
-; HOST-IR: declare void @polly_launchKernel(ptr, i32, i32, i32, i32, i32, ptr)
-
-; Check that we add variables that are local to a scop into the kills that we
-; pass to PPCG. This should enable PPCG to codegen this example.
-; void checkScalarKill(int A[], int B[], int C[], const int control1, int control2) {
-; int x;
-; #pragma scop
-; for(int i = 0; i < 1000; i++) {
-; XLoopInit: x = 0;
-;
-; if (control1 > 2)
-; C1Add: x += 10;
-; if (control2 > 3)
-; C2Add: x += A[i];
-;
-; BLoopAccumX: B[i] += x;
-; }
-;
-; #pragma endscop
-; }
-; ModuleID = 'test.ll'
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @checkScalarKill(ptr %A, ptr %B, ptr %C, i32 %control1, i32 %control2) {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- br label %XLoopInit
-
-XLoopInit: ; preds = %entry.split, %BLoopAccumX
- %indvars.iv = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %BLoopAccumX ]
- %cmp1 = icmp sgt i32 %control1, 2
- %x.0 = select i1 %cmp1, i32 10, i32 0
- %cmp2 = icmp sgt i32 %control2, 3
- br i1 %cmp2, label %C2Add, label %BLoopAccumX
-
-C2Add: ; preds = %XLoopInit
- %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
- %tmp6 = load i32, ptr %arrayidx, align 4
- %add4 = add nsw i32 %tmp6, %x.0
- br label %BLoopAccumX
-
-BLoopAccumX: ; preds = %XLoopInit, %C2Add
- %x.1 = phi i32 [ %add4, %C2Add ], [ %x.0, %XLoopInit ]
- %arrayidx7 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
- %tmp11 = load i32, ptr %arrayidx7, align 4
- %add8 = add nsw i32 %tmp11, %x.1
- store i32 %add8, ptr %arrayidx7, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp ne i64 %indvars.iv.next, 1000
- br i1 %exitcond, label %XLoopInit, label %for.end
-
-for.end: ; preds = %BLoopAccumX
- ret void
-}
diff --git a/polly/test/GPGPU/align-params-in-schedule.ll b/polly/test/GPGPU/align-params-in-schedule.ll
deleted file mode 100644
index fa9a8f3eb4e5..000000000000
--- a/polly/test/GPGPU/align-params-in-schedule.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: opt %loadPolly -S -polly-process-unprofitable -polly-codegen-ppcg \
-; RUN: -polly-invariant-load-hoisting -polly-ignore-parameter-bounds < %s | \
-; RUN: FileCheck %s
-
-; REQUIRES: pollyacc
-
-; CHECK: polly_launchKernel
-
-; Verify that this program compiles. At some point, this compilation crashed
-; due to insufficient parameters being available.
-
-source_filename = "bugpoint-output-4d01492.bc"
-target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-
-%struct.barney = type { ptr, i64, i64, [2 x %struct.widget] }
-%struct.widget = type { i64, i64, i64 }
-
-@global = external unnamed_addr global %struct.barney, align 32
-
-; Function Attrs: nounwind uwtable
-define void @wobble(ptr noalias %arg) #0 {
-bb:
- %tmp = load i32, ptr %arg, align 4
- br label %bb1
-
-bb1: ; preds = %bb13, %bb
- %tmp2 = phi i32 [ %tmp15, %bb13 ], [ 1, %bb ]
- br label %bb3
-
-bb3: ; preds = %bb3, %bb1
- %tmp4 = load ptr, ptr @global, align 32
- %tmp5 = sext i32 %tmp2 to i64
- %tmp6 = load i64, ptr getelementptr inbounds (%struct.barney, ptr @global, i64 0, i32 3, i64 1, i32 0), align 8
- %tmp7 = mul i64 %tmp6, %tmp5
- %tmp8 = add i64 %tmp7, 0
- %tmp9 = load i64, ptr getelementptr inbounds (%struct.barney, ptr @global, i64 0, i32 1), align 8
- %tmp10 = add i64 %tmp8, %tmp9
- %tmp11 = getelementptr i32, ptr %tmp4, i64 %tmp10
- store i32 undef, ptr %tmp11, align 4
- %tmp12 = icmp eq i32 0, 0
- br i1 %tmp12, label %bb13, label %bb3
-
-bb13: ; preds = %bb3
- %tmp14 = icmp eq i32 %tmp2, %tmp
- %tmp15 = add i32 %tmp2, 1
- br i1 %tmp14, label %bb16, label %bb1
-
-bb16: ; preds = %bb13
- ret void
-}
-
-attributes #0 = { nounwind uwtable }
diff --git a/polly/test/GPGPU/array-with-elem-type-smaller-than-byte.ll b/polly/test/GPGPU/array-with-elem-type-smaller-than-byte.ll
deleted file mode 100644
index 12b872d55192..000000000000
--- a/polly/test/GPGPU/array-with-elem-type-smaller-than-byte.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: opt %loadPolly -S -polly-codegen-ppcg \
-; RUN: -polly-use-llvm-names < %s
-; ModuleID = 'test/GPGPU/zero-size-array.ll'
-
-; REQUIRES: pollyacc
-
-target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-
-
-; We used to divide the element size by 8 to arrive at the 'actual' size
-; of an array element. This used to cause arrays that have an element size
-; of less than 8 to collapse to size 0. This test makes sure that it does
-; not happen anymore.
-
-; f(int *niters_ptr, int *arr[0]) {
-; const int inters = *niters_ptr;
-; for(int i = 0; i < niters; i++) {
-; arr[0][i + 1] = 0
-; }
-; }
-
-; Function Attrs: nounwind uwtable
-define void @f(ptr noalias %niters.ptr, ptr noalias %arr) #0 {
-entry:
- %niters = load i32, ptr %niters.ptr, align 4
- br label %loop.body
-
-loop.body: ; preds = %loop.body, %entry
- %indvar = phi i32 [ %indvar.next, %loop.body ], [ 1, %entry ]
- %indvar.sext = sext i32 %indvar to i64
- %arr.slot = getelementptr [0 x i32], ptr %arr, i64 0, i64 %indvar.sext
- store i32 0, ptr %arr.slot, align 4
- %tmp8 = icmp eq i32 %indvar, %niters
- %indvar.next = add i32 %indvar, 1
- br i1 %tmp8, label %loop.exit, label %loop.body
-
-loop.exit: ; preds = %loop.body
- %tmp10 = icmp sgt i32 undef, 0
- br label %auxiliary.loop
-
-auxiliary.loop: ; preds = %"101", %loop.exit
- %tmp11 = phi i1 [ %tmp10, %loop.exit ], [ undef, %auxiliary.loop ]
- br i1 undef, label %auxiliary.loop, label %exit
-
-exit: ; preds = %auxiliary.loop
- ret void
-}
-
-attributes #0 = { nounwind uwtable }
diff --git a/polly/test/GPGPU/bounds-construction-with-ignore-param-bounds.ll b/polly/test/GPGPU/bounds-construction-with-ignore-param-bounds.ll
deleted file mode 100644
index a60744289885..000000000000
--- a/polly/test/GPGPU/bounds-construction-with-ignore-param-bounds.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: opt %loadPolly -S -polly-codegen-ppcg \
-; RUN: -polly-ignore-parameter-bounds \
-; RUN: -polly-invariant-load-hoisting < %s| FileCheck %s -check-prefix=HOST-IR
-;
-; REQUIRES: pollyacc
-
-; When we have `-polly-ignore-parameter-bounds`, `Scop::Context` does not contain
-; all the parameters present in the program.
-;
-; The construction of the `isl_multi_pw_aff` requires all the indivisual `pw_aff`
-; to have the same parameter dimensions. To achieve this, we used to realign
-; every `pw_aff` with `Scop::Context`. However, in conjunction with
-; `-polly-ignore-parameter-bounds`, this is now incorrect, since `Scop::Context`
-; does not contain all parameters.
-;
-; We check that Polly does the right thing in this case and sets up the parameter
-; dimensions correctly.
-
-
-; Check that kernel launch is generated in host IR.
-; the declare would not be generated unless a call to a kernel exists.
-; HOST-IR: declare void @polly_launchKernel(ptr, i32, i32, i32, i32, i32, ptr)
-; ModuleID = 'test/GPGPU/bounds-construction-with-ignore-param-bounds.ll'
-
-; C pseudocode
-; ------------
-; void f(int *arr, long niters, long stride) {
-; for(int i = 0; i < niters; i++) {
-; arr[i * stride] = 1;
-; }
-; }
-
-target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-
-; Function Attrs: nounwind uwtable
-define void @f(ptr %arr, i64 %niters, i64 %stride) unnamed_addr #1 {
-entry:
- br label %loop
-
-loop: ; preds = %loop, %entry
- %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
- %idx = mul nuw nsw i64 %indvar, %stride
- %slot = getelementptr i32, ptr %arr, i64 %idx
- store i32 1, ptr %slot, align 4
- %indvar.next = add nuw nsw i64 %indvar, 1
- %check = icmp sgt i64 %indvar.next, %niters
- br i1 %check, label %exit, label %loop
-
-exit: ; preds = %loop
- ret void
-}
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind uwtable }
diff --git a/polly/test/GPGPU/cuda-annotations.ll b/polly/test/GPGPU/cuda-annotations.ll
deleted file mode 100644
index cbb0296d48ef..000000000000
--- a/polly/test/GPGPU/cuda-annotations.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=KERNEL %s
-
-; REQUIRES: pollyacc
-
-; KERNEL: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_0(ptr addrspace(1) %MemRef_A, i64 %n) #0 {
-
-; KERNEL: !nvvm.annotations = !{!0}
-
-; KERNEL: !0 = !{ptr @FUNC_foo_SCOP_0_KERNEL_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1}
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @foo(ptr %A, i64 %n) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb6, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp7, %bb6 ]
- %tmp = icmp slt i64 %i.0, %n
- br i1 %tmp, label %bb2, label %bb8
-
-bb2: ; preds = %bb1
- %tmp3 = getelementptr inbounds i64, ptr %A, i64 %i.0
- %tmp4 = load i64, ptr %tmp3, align 8
- %tmp5 = add nsw i64 %tmp4, 100
- store i64 %tmp5, ptr %tmp3, align 8
- br label %bb6
-
-bb6: ; preds = %bb2
- %tmp7 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb8: ; preds = %bb1
- ret void
-}
diff --git a/polly/test/GPGPU/cuda-managed-memory-simple.ll b/polly/test/GPGPU/cuda-managed-memory-simple.ll
deleted file mode 100644
index 8ef7e336cfad..000000000000
--- a/polly/test/GPGPU/cuda-managed-memory-simple.ll
+++ /dev/null
@@ -1,118 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -S -polly-process-unprofitable -polly-acc-mincompute=0 -polly-codegen-ppcg -polly-acc-codegen-managed-memory < %s | \
-; RUN: FileCheck %s
-
-; REQUIRES: pollyacc
-
-;
-; #include <cuda_runtime.h>
-;
-; static const int N = 45;
-;
-; void copy(int *R, int *A) {
-; for (int i = 0; i < N; i++) {
-; R[i] = A[i] * 10;
-; }
-; }
-;
-; int main() {
-; int *A, *R;
-;
-; cudaMallocManaged((void **)(&A), sizeof(int) * N, cudaMemAttachGlobal);
-; cudaMallocManaged((void **)(&R), sizeof(int) * N, cudaMemAttachGlobal);
-;
-; for (int i = 0; i < N; i++) {
-; A[i] = i;
-; R[i] = 0;
-; }
-; copy(R, A);
-;
-; return 0;
-; }
-;
-
-; CHECK-NOT: polly_copyFromHostToDevice
-; CHECK-NOT: polly_copyFromDeviceToHost
-; CHECK-NOT: polly_freeDeviceMemory
-; CHECK-NOT: polly_allocateMemoryForDevice
-
-; CHECK: %[[REGCTX:[0-9]+]] = call i8* @polly_initContextCUDA()
-; CHECK-NEXT: %[[REGCA:[0-9]+]] = bitcast i32* %A to i8*
-; CHECK-NEXT: %[[REGCR:[0-9]+]] = bitcast i32* %R to i8*
-; CHECK-NEXT: %[[REGGEP0:[0-9]+]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 0
-; CHECK-NEXT: store i8* %[[REGCA]], i8** %polly_launch_0_param_0
-; CHECK-NEXT: %[[REGCP0:[0-9]+]] = bitcast i8** %polly_launch_0_param_0 to i8*
-; CHECK-NEXT: store i8* %[[REGCP0]], i8** %[[REGGEP0]]
-; CHECK-NEXT: %[[REGGEP1:[0-9]+]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 1
-; CHECK-NEXT: store i8* %[[REGCR]], i8** %polly_launch_0_param_1
-; CHECK-NEXT: %[[REGCP1:[0-9]+]] = bitcast i8** %polly_launch_0_param_1 to i8*
-; CHECK-NEXT: store i8* %[[REGCP1]], i8** %[[REGGEP1]]
-; CHECK-NEXT: %[[REGKERNEL:[0-9]+]] = call i8* @polly_getKernel(i8* getelementptr inbounds ([863 x i8], [863 x i8]* @FUNC_copy_SCOP_0_KERNEL_0, i32 0, i32 0), i8* getelementptr inbounds ([26 x i8], [26 x i8]* @FUNC_copy_SCOP_0_KERNEL_0_name, i32 0, i32 0))
-; CHECK-NEXT: call void @polly_launchKernel(i8* %[[REGKERNEL]], i32 2, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
-; CHECK-NEXT: call void @polly_freeKernel(i8* %[[REGKERNEL]])
-; CHECK-NEXT: call void @polly_synchronizeDevice()
-; CHECK-NEXT: call void @polly_freeContext(i8* %[[REGCTX]])
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @copy(i32* %R, i32* %A) {
-entry:
- br label %for.cond
-
-for.cond: ; preds = %for.inc, %entry
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
- %exitcond = icmp ne i64 %indvars.iv, 45
- br i1 %exitcond, label %for.body, label %for.end
-
-for.body: ; preds = %for.cond
- %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
- %tmp = load i32, i32* %arrayidx, align 4
- %mul = mul nsw i32 %tmp, 10
- %arrayidx2 = getelementptr inbounds i32, i32* %R, i64 %indvars.iv
- store i32 %mul, i32* %arrayidx2, align 4
- br label %for.inc
-
-for.inc: ; preds = %for.body
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- br label %for.cond
-
-for.end: ; preds = %for.cond
- ret void
-}
-
-define i32 @main() {
-entry:
- %A = alloca i32*, align 8
- %R = alloca i32*, align 8
- %tmp = bitcast i32** %A to i8**
- %call = call i32 @cudaMallocManaged(i8** nonnull %tmp, i64 180, i32 1) #2
- %tmp1 = bitcast i32** %R to i8**
- %call1 = call i32 @cudaMallocManaged(i8** nonnull %tmp1, i64 180, i32 1) #2
- br label %for.cond
-
-for.cond: ; preds = %for.inc, %entry
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
- %exitcond = icmp ne i64 %indvars.iv, 45
- br i1 %exitcond, label %for.body, label %for.end
-
-for.body: ; preds = %for.cond
- %tmp2 = load i32*, i32** %A, align 8
- %arrayidx = getelementptr inbounds i32, i32* %tmp2, i64 %indvars.iv
- %tmp3 = trunc i64 %indvars.iv to i32
- store i32 %tmp3, i32* %arrayidx, align 4
- %tmp4 = load i32*, i32** %R, align 8
- %arrayidx3 = getelementptr inbounds i32, i32* %tmp4, i64 %indvars.iv
- store i32 0, i32* %arrayidx3, align 4
- br label %for.inc
-
-for.inc: ; preds = %for.body
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- br label %for.cond
-
-for.end: ; preds = %for.cond
- %tmp5 = load i32*, i32** %R, align 8
- %tmp6 = load i32*, i32** %A, align 8
- call void @copy(i32* %tmp5, i32* %tmp6)
- ret i32 0
-}
-
-declare i32 @cudaMallocManaged(i8**, i64, i32) #1
diff --git a/polly/test/GPGPU/debug-metadata-leak.ll b/polly/test/GPGPU/debug-metadata-leak.ll
deleted file mode 100644
index c90926c318e8..000000000000
--- a/polly/test/GPGPU/debug-metadata-leak.ll
+++ /dev/null
@@ -1,104 +0,0 @@
-; RUN: opt %loadPolly %s -polly-process-unprofitable -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
-; RUN: | FileCheck --check-prefix=KERNEL-IR %s
-
-; REQUIRES: pollyacc
-
-; KERNEL-IR: define ptx_kernel void @FUNC_vec_add_1_SCOP_0_KERNEL_0(ptr addrspace(1) %MemRef_arr, i32 %N) #0 {
-
-; The instruction marked <<<LeakyInst>>> is copied into the GPUModule,
-; with changes only to the parameters to access data on the device instead of
-; the host, i.e., MemRef_arr becomes polly.access.cast.MemRef_arr. Since the
-; instruction is annotated with a DILocation, copying the instruction also copies
-; the metadata into the GPUModule. This stops codegenerating the ptx_kernel by
-; failing the verification of the Module in GPUNodeBuilder::finalize, due to the
-; copied DICompileUnit not being listed in a llvm.dbg.cu which was neither copied
-; nor created.
-;
-; https://reviews.llvm.org/D35630 removes this debug metadata before the
-; instruction is copied to the GPUModule.
-;
-; vec_add_1.c:
-; void vec_add_1(int N, int arr[N]) {
-; int i=0;
-; for( i=0 ; i<N ; i++) arr[i] += 1;
-; }
-;
-source_filename = "vec_add_1.c"
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @vec_add_1(i32 %N, ptr %arr) !dbg !7 {
-entry:
- call void @llvm.dbg.value(metadata i32 %N, i64 0, metadata !13, metadata !16), !dbg !17
- call void @llvm.dbg.value(metadata ptr %arr, i64 0, metadata !14, metadata !16), !dbg !18
- call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !16), !dbg !19
- %tmp = sext i32 %N to i64, !dbg !20
- br label %for.cond, !dbg !20
-
-for.cond: ; preds = %for.inc, %entry
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
- call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !15, metadata !16), !dbg !19
- %cmp = icmp slt i64 %indvars.iv, %tmp, !dbg !22
- br i1 %cmp, label %for.body, label %for.end, !dbg !24
-
-for.body: ; preds = %for.cond
- %arrayidx = getelementptr inbounds i32, ptr %arr, i64 %indvars.iv, !dbg !25
- %tmp1 = load i32, ptr %arrayidx, align 4, !dbg !26, !tbaa !27
- %add = add nsw i32 %tmp1, 1, !dbg !26 ; <<<LeakyInst>>>
- store i32 %add, ptr %arrayidx, align 4, !dbg !26, !tbaa !27
- br label %for.inc, !dbg !25
-
-for.inc: ; preds = %for.body
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !31
- call void @llvm.dbg.value(metadata !2, i64 0, metadata !15, metadata !16), !dbg !19
- br label %for.cond, !dbg !32, !llvm.loop !33
-
-for.end: ; preds = %for.cond
- ret void, !dbg !35
-}
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata)
-
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
-
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!3, !4, !5}
-!llvm.ident = !{!6}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
-!1 = !DIFile(filename: "vec_add_1.c", directory: "/tmp")
-!2 = !{}
-!3 = !{i32 2, !"Dwarf Version", i32 4}
-!4 = !{i32 2, !"Debug Info Version", i32 3}
-!5 = !{i32 1, !"wchar_size", i32 4}
-!6 = !{!"clang version 5.0.0"}
-!7 = distinct !DISubprogram(name: "vec_add_1", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12)
-!8 = !DISubroutineType(types: !9)
-!9 = !{null, !10, !11}
-!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
-!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64)
-!12 = !{!13, !14, !15}
-!13 = !DILocalVariable(name: "N", arg: 1, scope: !7, file: !1, line: 1, type: !10)
-!14 = !DILocalVariable(name: "arr", arg: 2, scope: !7, file: !1, line: 1, type: !11)
-!15 = !DILocalVariable(name: "i", scope: !7, file: !1, line: 2, type: !10)
-!16 = !DIExpression()
-!17 = !DILocation(line: 1, column: 20, scope: !7)
-!18 = !DILocation(line: 1, column: 27, scope: !7)
-!19 = !DILocation(line: 2, column: 7, scope: !7)
-!20 = !DILocation(line: 3, column: 8, scope: !21)
-!21 = distinct !DILexicalBlock(scope: !7, file: !1, line: 3, column: 3)
-!22 = !DILocation(line: 3, column: 15, scope: !23)
-!23 = distinct !DILexicalBlock(scope: !21, file: !1, line: 3, column: 3)
-!24 = !DILocation(line: 3, column: 3, scope: !21)
-!25 = !DILocation(line: 3, column: 25, scope: !23)
-!26 = !DILocation(line: 3, column: 32, scope: !23)
-!27 = !{!28, !28, i64 0}
-!28 = !{!"int", !29, i64 0}
-!29 = !{!"omnipotent char", !30, i64 0}
-!30 = !{!"Simple C/C++ TBAA"}
-!31 = !DILocation(line: 3, column: 21, scope: !23)
-!32 = !DILocation(line: 3, column: 3, scope: !23)
-!33 = distinct !{!33, !24, !34}
-!34 = !DILocation(line: 3, column: 35, scope: !21)
-!35 = !DILocation(line: 4, column: 1, scope: !7)
diff --git a/polly/test/GPGPU/double-parallel-loop.ll b/polly/test/GPGPU/double-parallel-loop.ll
deleted file mode 100644
index 4aeee035a407..000000000000
--- a/polly/test/GPGPU/double-parallel-loop.ll
+++ /dev/null
@@ -1,254 +0,0 @@
-; RUN: opt %loadPolly -polly-print-scops -disable-output < %s | FileCheck %s
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-schedule \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=SCHED %s
-
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
-; RUN: FileCheck %s -check-prefix=IR
-
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck %s -check-prefix=KERNEL-IR
-
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-asm \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck %s -check-prefix=KERNEL-ASM
-
-; XFAIL: *
-
-; REQUIRES: pollyacc, target=nvptx{{.*}}
-
-; This fails today due to extensive output differences from when the test was written.
-
-; CHECK: Stmt_bb5
-; CHECK-NEXT: Domain :=
-; CHECK-NEXT: { Stmt_bb5[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 };
-; CHECK-NEXT: Schedule :=
-; CHECK-NEXT: { Stmt_bb5[i0, i1] -> [i0, i1] };
-; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT: { Stmt_bb5[i0, i1] -> MemRef_A[i0, i1] };
-; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT: { Stmt_bb5[i0, i1] -> MemRef_A[i0, i1] };
-
-; SCHED: domain: "{ Stmt_bb5[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 }"
-; SCHED-NEXT: child:
-; SCHED-NEXT: context: "{ [] }"
-; SCHED-NEXT: child:
-; SCHED-NEXT: extension: "{ [] -> from_device_MemRef_A[]; [] -> to_device_MemRef_A[] }"
-; SCHED-NEXT: child:
-; SCHED-NEXT: sequence:
-; SCHED-NEXT: - filter: "{ to_device_MemRef_A[] }"
-; SCHED-NEXT: child:
-; SCHED-NEXT: set:
-; SCHED-NEXT: - filter: "{ to_device_MemRef_A[] }"
-; SCHED-NEXT: child:
-; SCHED-NEXT: guard: "{ [] }"
-; SCHED-NEXT: - filter: "{ Stmt_bb5[i0, i1] }"
-; SCHED-NEXT: child:
-; SCHED-NEXT: guard: "{ [] }"
-; SCHED-NEXT: child:
-; SCHED-NEXT: mark: "kernel"
-; SCHED-NEXT: child:
-; SCHED-NEXT: context: "[b0, b1, t0, t1] -> { [] : 0 <= b0 <= 31 and 0 <= b1 <= 31 and 0 <= t0 <= 31 and 0 <= t1 <= 15 }"
-; SCHED-NEXT: child:
-; SCHED-NEXT: filter: "[b0, b1] -> { Stmt_bb5[i0, i1] : -31 - 32b0 + i0 <= 8192*floor((i0)/8192) <= -32b0 + i0 and -31 - 32b1 + i1 <= 8192*floor((i1)/8192) <= -32b1 + i1 }"
-; SCHED-NEXT: child:
-; SCHED-NEXT: schedule: "[{ Stmt_bb5[i0, i1] -> [(floor((i0)/8192))] }, { Stmt_bb5[i0, i1] -> [(floor((i1)/8192))] }]"
-; SCHED-NEXT: permutable: 1
-; SCHED-NEXT: coincident: [ 1, 1 ]
-; SCHED-NEXT: child:
-; SCHED-NEXT: filter: "[t0, t1] -> { Stmt_bb5[i0, i1] : 32*floor((-t0 + i0)/32) = -t0 + i0 and 16*floor((-t1 + i1)/16) = -t1 + i1 and 0 <= t0 <= 31 and 0 <= t1 <= 15 }"
-; SCHED-NEXT: child:
-; SCHED-NEXT: schedule: "[{ Stmt_bb5[i0, i1] -> [(0)] }, { Stmt_bb5[i0, i1] -> [(floor((i1)/16) - 2*floor((i1)/32))] }]"
-; SCHED-NEXT: permutable: 1
-; SCHED-NEXT: coincident: [ 1, 1 ]
-; SCHED-NEXT: - filter: "{ from_device_MemRef_A[] }"
-; SCHED-NEXT: child:
-; SCHED-NEXT: set:
-; SCHED-NEXT: - filter: "{ from_device_MemRef_A[] }"
-; SCHED-NEXT: child:
-; SCHED-NEXT: guard: "{ [] }"
-
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * (1024) * sizeof(float), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(16, 32);
-; CODE-NEXT: dim3 k0_dimGrid(32, 32);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * (1024) * sizeof(float), cudaMemcpyDeviceToHost));
-; CODE-NEXT: }
-
-; CODE: # kernel0
-; CODE-NEXT: for (int c3 = 0; c3 <= 1; c3 += 1)
-; CODE-NEXT: Stmt_bb5(32 * b0 + t0, 32 * b1 + t1 + 16 * c3);
-
-; IR: polly.split_new_and_old:
-; IR-NEXT: %0 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 1, i64 1024)
-; IR-NEXT: %.obit = extractvalue { i64, i1 } %0, 1
-; IR-NEXT: %polly.overflow.state = or i1 false, %.obit
-; IR-NEXT: %.res = extractvalue { i64, i1 } %0, 0
-; IR-NEXT: %1 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %.res, i64 1024)
-; IR-NEXT: %.obit1 = extractvalue { i64, i1 } %1, 1
-; IR-NEXT: %polly.overflow.state2 = or i1 %polly.overflow.state, %.obit1
-; IR-NEXT: %.res3 = extractvalue { i64, i1 } %1, 0
-; IR-NEXT: %2 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 7, i64 %.res3)
-; IR-NEXT: %.obit4 = extractvalue { i64, i1 } %2, 1
-; IR-NEXT: %polly.overflow.state5 = or i1 %polly.overflow.state2, %.obit4
-; IR-NEXT: %.res6 = extractvalue { i64, i1 } %2, 0
-; IR-NEXT: %3 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 0, i64 %.res6)
-; IR-NEXT: %.obit7 = extractvalue { i64, i1 } %3, 1
-; IR-NEXT: %polly.overflow.state8 = or i1 %polly.overflow.state5, %.obit7
-; IR-NEXT: %.res9 = extractvalue { i64, i1 } %3, 0
-; IR-NEXT: %4 = icmp sge i64 %.res9, 2621440
-; IR-NEXT: %5 = and i1 true, %4
-; IR-NEXT: %polly.rtc.overflown = xor i1 %polly.overflow.state8, true
-; IR-NEXT: %polly.rtc.result = and i1 %5, %polly.rtc.overflown
-; IR-NEXT: br i1 %polly.rtc.result, label %polly.start, label %bb2
-
-; IR: polly.start:
-; IR-NEXT: br label %polly.acc.initialize
-
-; IR: polly.acc.initialize:
-; IR-NEXT: [[GPUContext:%.*]] = call ptr @polly_initContext()
-; IR-NEXT: %p_dev_array_MemRef_A = call ptr @polly_allocateMemoryForDevice(i64 4194304)
-; IR-NEXT: call void @polly_copyFromHostToDevice(ptr %A, ptr %p_dev_array_MemRef_A, i64 4194304)
-; IR-NEXT: [[DevPtr:%.*]] = call ptr @polly_getDevicePtr(ptr %p_dev_array_MemRef_A)
-; IR-NEXT: store ptr [[DevPtr]], ptr %polly_launch_0_param_0
-; IR-NEXT: store ptr %polly_launch_0_param_0, ptr %polly_launch_0_params
-; IR-NEXT: call ptr @polly_getKernel
-; IR-NEXT: call void @polly_launchKernel(ptr %11, i32 32, i32 32, i32 32, i32 16, i32 1, ptr %polly_launch_0_params_i8ptr)
-; IR-NEXT: call void @polly_freeKernel
-; IR-NEXT: call void @polly_copyFromDeviceToHost(ptr %p_dev_array_MemRef_A, ptr %A, i64 4194304)
-; IR-NEXT: call void @polly_freeDeviceMemory(ptr %p_dev_array_MemRef_A)
-; IR-NEXT: call void @polly_freeContext(ptr [[GPUContext]])
-; IR-NEXT: br label %polly.exiting
-
-; IR: polly.exiting:
-; IR-NEXT: br label %polly.merge_new_and_old
-
-; KERNEL-IR-LABEL: define ptx_kernel void @kernel_0(ptr %MemRef_A) #0 {
-; KERNEL-IR-NEXT: entry:
-; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
-; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
-; KERNEL-IR-NEXT: %1 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
-; KERNEL-IR-NEXT: %b1 = zext i32 %1 to i64
-; KERNEL-IR-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-; KERNEL-IR-NEXT: %t0 = zext i32 %2 to i64
-; KERNEL-IR-NEXT: %3 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
-; KERNEL-IR-NEXT: %t1 = zext i32 %3 to i64
-; KERNEL-IR-NEXT: br label %polly.loop_preheader
-
-; KERNEL-IR-LABEL: polly.loop_exit: ; preds = %polly.stmt.bb5
-; KERNEL-IR-NEXT: ret void
-
-; KERNEL-IR-LABEL: polly.loop_header: ; preds = %polly.stmt.bb5, %polly.loop_preheader
-; KERNEL-IR-NEXT: %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.bb5 ]
-; KERNEL-IR-NEXT: %4 = mul nsw i64 32, %b0
-; KERNEL-IR-NEXT: %5 = add nsw i64 %4, %t0
-; KERNEL-IR-NEXT: %6 = mul nsw i64 32, %b1
-; KERNEL-IR-NEXT: %7 = add nsw i64 %6, %t1
-; KERNEL-IR-NEXT: %8 = mul nsw i64 16, %polly.indvar
-; KERNEL-IR-NEXT: %9 = add nsw i64 %7, %8
-; KERNEL-IR-NEXT: br label %polly.stmt.bb5
-
-; KERNEL-IR-LABEL: polly.stmt.bb5: ; preds = %polly.loop_header
-; KERNEL-IR-NEXT: %10 = mul i64 %5, %9
-; KERNEL-IR-NEXT: %p_tmp6 = sitofp i64 %10 to float
-; KERNEL-IR-NEXT: %11 = mul nsw i64 32, %b0
-; KERNEL-IR-NEXT: %12 = add nsw i64 %11, %t0
-; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A = mul nsw i64 %12, 1024
-; KERNEL-IR-NEXT: %13 = mul nsw i64 32, %b1
-; KERNEL-IR-NEXT: %14 = add nsw i64 %13, %t1
-; KERNEL-IR-NEXT: %15 = mul nsw i64 16, %polly.indvar
-; KERNEL-IR-NEXT: %16 = add nsw i64 %14, %15
-; KERNEL-IR-NEXT: %polly.access.add.MemRef_A = add nsw i64 %polly.access.mul.MemRef_A, %16
-; KERNEL-IR-NEXT: %polly.access.MemRef_A = getelementptr float, ptr %MemRef_A, i64 %polly.access.add.MemRef_A
-; KERNEL-IR-NEXT: %tmp8_p_scalar_ = load float, ptr %polly.access.MemRef_A, align 4
-; KERNEL-IR-NEXT: %p_tmp9 = fadd float %tmp8_p_scalar_, %p_tmp6
-; KERNEL-IR-NEXT: %17 = mul nsw i64 32, %b0
-; KERNEL-IR-NEXT: %18 = add nsw i64 %17, %t0
-; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A2 = mul nsw i64 %18, 1024
-; KERNEL-IR-NEXT: %19 = mul nsw i64 32, %b1
-; KERNEL-IR-NEXT: %20 = add nsw i64 %19, %t1
-; KERNEL-IR-NEXT: %21 = mul nsw i64 16, %polly.indvar
-; KERNEL-IR-NEXT: %22 = add nsw i64 %20, %21
-; KERNEL-IR-NEXT: %polly.access.add.MemRef_A3 = add nsw i64 %polly.access.mul.MemRef_A2, %22
-; KERNEL-IR-NEXT: %polly.access.MemRef_A4 = getelementptr float, ptr %MemRef_A, i64 %polly.access.add.MemRef_A3
-; KERNEL-IR-NEXT: store float %p_tmp9, ptr %polly.access.MemRef_A4, align 4
-; KERNEL-IR-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1
-; KERNEL-IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar, 0
-; KERNEL-IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
-
-; KERNEL-IR-LABEL: polly.loop_preheader: ; preds = %entry
-; KERNEL-IR-NEXT: br label %polly.loop_header
-
-; KERNEL-IR: attributes #0 = { "polly.skip.fn" }
-
-; KERNEL-ASM: .version 3.2
-; KERNEL-ASM-NEXT: .target sm_30
-; KERNEL-ASM-NEXT: .address_size 64
-
-; KERNEL-ASM: // .globl kernel_0
-
-; KERNEL-ASM: .visible .entry kernel_0(
-; KERNEL-ASM-NEXT: .param .u64 kernel_0_param_0
-; KERNEL-ASM-NEXT: )
-
-; void double_parallel_loop(float A[][1024]) {
-; for (long i = 0; i < 1024; i++)
-; for (long j = 0; j < 1024; j++)
-; A[i][j] += i * j;
-; }
-;
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @double_parallel_loop(ptr %A) {
-bb:
- br label %bb2
-
-bb2: ; preds = %bb13, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp14, %bb13 ]
- %exitcond1 = icmp ne i64 %i.0, 1024
- br i1 %exitcond1, label %bb3, label %bb15
-
-bb3: ; preds = %bb2
- br label %bb4
-
-bb4: ; preds = %bb10, %bb3
- %j.0 = phi i64 [ 0, %bb3 ], [ %tmp11, %bb10 ]
- %exitcond = icmp ne i64 %j.0, 1024
- br i1 %exitcond, label %bb5, label %bb12
-
-bb5: ; preds = %bb4
- %tmp = mul nuw nsw i64 %i.0, %j.0
- %tmp6 = sitofp i64 %tmp to float
- %tmp7 = getelementptr inbounds [1024 x float], ptr %A, i64 %i.0, i64 %j.0
- %tmp8 = load float, ptr %tmp7, align 4
- %tmp9 = fadd float %tmp8, %tmp6
- store float %tmp9, ptr %tmp7, align 4
- br label %bb10
-
-bb10: ; preds = %bb5
- %tmp11 = add nuw nsw i64 %j.0, 1
- br label %bb4
-
-bb12: ; preds = %bb4
- br label %bb13
-
-bb13: ; preds = %bb12
- %tmp14 = add nuw nsw i64 %i.0, 1
- br label %bb2
-
-bb15: ; preds = %bb2
- ret void
-}
diff --git a/polly/test/GPGPU/failing-invariant-load-handling.ll b/polly/test/GPGPU/failing-invariant-load-handling.ll
deleted file mode 100644
index 70f88667bd60..000000000000
--- a/polly/test/GPGPU/failing-invariant-load-handling.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: opt %loadPolly -polly-process-unprofitable -polly-invariant-load-hoisting -polly-print-scops -disable-output < %s | FileCheck %s -check-prefix=SCOPS
-; RUN: opt %loadPolly -S < %s -polly-codegen-ppcg -polly-process-unprofitable -polly-invariant-load-hoisting | FileCheck %s -check-prefix=CODEGEN
-
-; REQUIRES: pollyacc
-
-target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64"
-
-%S = type { i32, i32, [12 x %L] }
-%L = type { i32, i32, double, i32, i32, i32, i32, i32 }
-
-define void @test(ptr %cpi, i1 %b) {
-; SCOPS-LABEL: Region: %if.then14---%exit
-; SCOPS: Invariant Accesses: {
-; SCOPS-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOPS-NEXT: [l2, l1] -> { Stmt_for_body_i[i0] -> MemRef_cpi[0, 0] };
-; SCOPS-NEXT: Execution Context: [l2, l1] -> { : }
-; SCOPS-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOPS-NEXT: [l2, l1] -> { Stmt_for_body_lr_ph_i[] -> MemRef_cpi[0, 1] };
-; SCOPS-NEXT: Execution Context: [l2, l1] -> { : l2 > 0 }
-; SCOPS-NEXT: }
-; SCOPS: Arrays {
-; SCOPS-NEXT: i32 MemRef_cpi[*][(10 * %l1)]; // Element size 4
-; SCOPS-NEXT: }
-
-; Check that we gracefully handle failing invariant loads.
-; This test case is taken from:
-; test/Isl/CodeGen/invariant-load-dimension.ll
-
-; FIXME: Figure out how to actually generate code for this loop.
-; CODEGEN-NOT: LLVM ERROR: preloading invariant loads failed in function
-
-entry:
- %nt = getelementptr inbounds %S, ptr %cpi, i32 0, i32 1
- br i1 %b, label %if.then14, label %exit
-
-if.then14:
- %l0 = load i32, ptr %cpi, align 8
- %cmp12.i = icmp sgt i32 %l0, 0
- br i1 %cmp12.i, label %for.body.lr.ph.i, label %exit
-
-for.body.lr.ph.i:
- %l1 = load i32, ptr %nt, align 4
- br label %for.body.i
-
-for.body.i:
- %phi = phi i32 [ 0, %for.body.lr.ph.i ], [ %inc, %for.body.i ]
- %mul.i163 = mul nsw i32 %phi, %l1
- %cv = getelementptr inbounds %S, ptr %cpi, i32 0, i32 2, i32 %mul.i163, i32 0
- store i32 0, ptr %cv, align 8
- %inc = add nuw nsw i32 %phi, 1
- %l2 = load i32, ptr %cpi, align 8
- %cmp.i164 = icmp slt i32 %inc, %l2
- br i1 %cmp.i164, label %for.body.i, label %exit
-
-exit:
- ret void
-}
diff --git a/polly/test/GPGPU/failing-invariant-load-hoisting.ll b/polly/test/GPGPU/failing-invariant-load-hoisting.ll
deleted file mode 100644
index aa62921e1af5..000000000000
--- a/polly/test/GPGPU/failing-invariant-load-hoisting.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: opt %loadPolly -S < %s -polly-codegen-ppcg \
-; RUN: -polly-invariant-load-hoisting | FileCheck %s -check-prefix=CODEGEN
-
-; REQUIRES: pollyacc
-
-target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64"
-
-%S = type { i32, i32, [12 x %L] }
-%L = type { i32, i32, double, i32, i32, i32, i32, i32 }
-
-define void @test(ptr %cpi, i1 %b) {
-; CODEGEN-LABEL: @test(
-; CODEGEN: polly.preload.begin:
-; CODEGEN-NEXT: br i1 false
-
-entry:
- %nt = getelementptr inbounds %S, ptr %cpi, i32 0, i32 1
- br i1 %b, label %if.then14, label %exit
-
-if.then14:
- %l0 = load i32, ptr %cpi, align 8
- %cmp12.i = icmp sgt i32 %l0, 0
- br i1 %cmp12.i, label %for.body.lr.ph.i, label %exit
-
-for.body.lr.ph.i:
- %l1 = load i32, ptr %nt, align 4
- br label %for.body.i
-
-for.body.i:
- %phi = phi i32 [ 0, %for.body.lr.ph.i ], [ %inc, %for.body.i ]
- %mul.i163 = mul nsw i32 %phi, %l1
- %cv = getelementptr inbounds %S, ptr %cpi, i32 0, i32 2, i32 %mul.i163, i32 0
- store i32 0, ptr %cv, align 8
- %inc = add nuw nsw i32 %phi, 1
- %l2 = load i32, ptr %cpi, align 8
- %cmp.i164 = icmp slt i32 %inc, %l2
- br i1 %cmp.i164, label %for.body.i, label %exit
-
-exit:
- ret void
-}
diff --git a/polly/test/GPGPU/host-control-flow.ll b/polly/test/GPGPU/host-control-flow.ll
deleted file mode 100644
index 5ba65d60819c..000000000000
--- a/polly/test/GPGPU/host-control-flow.ll
+++ /dev/null
@@ -1,176 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg -disable-output \
-; RUN: -polly-acc-dump-code < %s | FileCheck %s -check-prefix=CODE
-
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg -disable-output \
-; RUN: -polly-acc-dump-kernel-ir < %s | FileCheck %s -check-prefix=KERNEL-IR
-
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg \
-; RUN: -S < %s | FileCheck %s -check-prefix=IR
-; void foo(float A[2][100]) {
-; for (long t = 0; t < 100; t++)
-; for (long i = 1; i < 99; i++)
-; A[(t + 1) % 2][i] += A[t % 2][i - 1] + A[t % 2][i] + A[t % 2][i + 1];
-; }
-
-; REQUIRES: pollyacc
-
-; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (2) * (100) * sizeof(float), cudaMemcpyHostToDevice));
-; CODE-NEXT: for (int c0 = 0; c0 <= 99; c0 += 1)
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(4);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, c0);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (2) * (100) * sizeof(float), cudaMemcpyDeviceToHost));
-; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_A));
-; CODE-NEXT: }
-
-; IR-LABEL: polly.loop_header: ; preds = %polly.loop_header, %polly.loop_preheader
-; IR-NEXT: %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.loop_header ]
-; ...
-; IR: store i64 %polly.indvar, i64* %polly_launch_0_param_1
-; IR-NEXT: [[REGA:%.+]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 1
-; IR-NEXT: [[REGB:%.+]] = bitcast i64* %polly_launch_0_param_1 to i8*
-; IR-NEXT: store i8* [[REGB]], i8** [[REGA]]
-; IR: call i8* @polly_getKernel
-; ...
-; IR: call void @polly_freeKernel
-; IR-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1
-; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, 99
-; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
-
-; KERNEL-IR: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %c0)
-; KERNEL-IR-LABEL: entry:
-; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
-; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
-; KERNEL-IR-NEXT: %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-; KERNEL-IR-NEXT: %t0 = zext i32 %1 to i64
-; KERNEL-IR-NEXT: br label %polly.cond
-
-; KERNEL-IR-LABEL: polly.cond: ; preds = %entry
-; KERNEL-IR-NEXT: %2 = mul nsw i64 32, %b0
-; KERNEL-IR-NEXT: %3 = add nsw i64 %2, %t0
-; KERNEL-IR-NEXT: %4 = icmp sle i64 %3, 97
-; KERNEL-IR-NEXT: br i1 %4, label %polly.then, label %polly.else
-
-; KERNEL-IR-LABEL: polly.merge: ; preds = %polly.else, %polly.stmt.for.body3
-; KERNEL-IR-NEXT: ret void
-
-; KERNEL-IR-LABEL: polly.then: ; preds = %polly.cond
-; KERNEL-IR-NEXT: %5 = mul nsw i64 32, %b0
-; KERNEL-IR-NEXT: %6 = add nsw i64 %5, %t0
-; KERNEL-IR-NEXT: br label %polly.stmt.for.body3
-
-; KERNEL-IR-LABEL: polly.stmt.for.body3: ; preds = %polly.then
-; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
-; KERNEL-IR-NEXT: %pexp.pdiv_r = urem i64 %c0, 2
-; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A = mul nsw i64 %pexp.pdiv_r, 100
-; KERNEL-IR-NEXT: %7 = mul nsw i64 32, %b0
-; KERNEL-IR-NEXT: %8 = add nsw i64 %7, %t0
-; KERNEL-IR-NEXT: %polly.access.add.MemRef_A = add nsw i64 %polly.access.mul.MemRef_A, %8
-; KERNEL-IR-NEXT: %polly.access.MemRef_A = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A, i64 %polly.access.add.MemRef_A
-; KERNEL-IR-NEXT: %tmp_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A, align 4
-; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A1 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
-; KERNEL-IR-NEXT: %pexp.pdiv_r2 = urem i64 %c0, 2
-; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A3 = mul nsw i64 %pexp.pdiv_r2, 100
-; KERNEL-IR-NEXT: %9 = mul nsw i64 32, %b0
-; KERNEL-IR-NEXT: %10 = add nsw i64 %9, %t0
-; KERNEL-IR-NEXT: %11 = add nsw i64 %10, 1
-; KERNEL-IR-NEXT: %polly.access.add.MemRef_A4 = add nsw i64 %polly.access.mul.MemRef_A3, %11
-; KERNEL-IR-NEXT: %polly.access.MemRef_A5 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A1, i64 %polly.access.add.MemRef_A4
-; KERNEL-IR-NEXT: %tmp2_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A5, align 4
-; KERNEL-IR-NEXT: %p_add = fadd float %tmp_p_scalar_, %tmp2_p_scalar_
-; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A6 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
-; KERNEL-IR-NEXT: %pexp.pdiv_r7 = urem i64 %c0, 2
-; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A8 = mul nsw i64 %pexp.pdiv_r7, 100
-; KERNEL-IR-NEXT: %12 = mul nsw i64 32, %b0
-; KERNEL-IR-NEXT: %13 = add nsw i64 %12, %t0
-; KERNEL-IR-NEXT: %14 = add nsw i64 %13, 2
-; KERNEL-IR-NEXT: %polly.access.add.MemRef_A9 = add nsw i64 %polly.access.mul.MemRef_A8, %14
-; KERNEL-IR-NEXT: %polly.access.MemRef_A10 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A6, i64 %polly.access.add.MemRef_A9
-; KERNEL-IR-NEXT: %tmp3_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A10, align 4
-; KERNEL-IR-NEXT: %p_add12 = fadd float %p_add, %tmp3_p_scalar_
-; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A11 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
-; KERNEL-IR-NEXT: %15 = add nsw i64 %c0, 1
-; KERNEL-IR-NEXT: %pexp.pdiv_r12 = urem i64 %15, 2
-; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A13 = mul nsw i64 %pexp.pdiv_r12, 100
-; KERNEL-IR-NEXT: %16 = mul nsw i64 32, %b0
-; KERNEL-IR-NEXT: %17 = add nsw i64 %16, %t0
-; KERNEL-IR-NEXT: %18 = add nsw i64 %17, 1
-; KERNEL-IR-NEXT: %polly.access.add.MemRef_A14 = add nsw i64 %polly.access.mul.MemRef_A13, %18
-; KERNEL-IR-NEXT: %polly.access.MemRef_A15 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A11, i64 %polly.access.add.MemRef_A14
-; KERNEL-IR-NEXT: %tmp4_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A15, align 4
-; KERNEL-IR-NEXT: %p_add17 = fadd float %tmp4_p_scalar_, %p_add12
-; KERNEL-IR-NEXT: %polly.access.cast.MemRef_A16 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
-; KERNEL-IR-NEXT: %19 = add nsw i64 %c0, 1
-; KERNEL-IR-NEXT: %pexp.pdiv_r17 = urem i64 %19, 2
-; KERNEL-IR-NEXT: %polly.access.mul.MemRef_A18 = mul nsw i64 %pexp.pdiv_r17, 100
-; KERNEL-IR-NEXT: %20 = mul nsw i64 32, %b0
-; KERNEL-IR-NEXT: %21 = add nsw i64 %20, %t0
-; KERNEL-IR-NEXT: %22 = add nsw i64 %21, 1
-; KERNEL-IR-NEXT: %polly.access.add.MemRef_A19 = add nsw i64 %polly.access.mul.MemRef_A18, %22
-; KERNEL-IR-NEXT: %polly.access.MemRef_A20 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A16, i64 %polly.access.add.MemRef_A19
-; KERNEL-IR-NEXT: store float %p_add17, float addrspace(1)* %polly.access.MemRef_A20, align 4
-; KERNEL-IR-NEXT: br label %polly.merge
-
-; KERNEL-IR-LABEL: polly.else: ; preds = %polly.cond
-; KERNEL-IR-NEXT: br label %polly.merge
-; KERNEL-IR-NEXT: }
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @foo([100 x float]* %A) {
-entry:
- br label %for.cond
-
-for.cond: ; preds = %for.inc18, %entry
- %t.0 = phi i64 [ 0, %entry ], [ %inc19, %for.inc18 ]
- %exitcond1 = icmp ne i64 %t.0, 100
- br i1 %exitcond1, label %for.body, label %for.end20
-
-for.body: ; preds = %for.cond
- br label %for.cond1
-
-for.cond1: ; preds = %for.inc, %for.body
- %i.0 = phi i64 [ 1, %for.body ], [ %inc, %for.inc ]
- %exitcond = icmp ne i64 %i.0, 99
- br i1 %exitcond, label %for.body3, label %for.end
-
-for.body3: ; preds = %for.cond1
- %sub = add nsw i64 %i.0, -1
- %rem = srem i64 %t.0, 2
- %arrayidx4 = getelementptr inbounds [100 x float], [100 x float]* %A, i64 %rem, i64 %sub
- %tmp = load float, float* %arrayidx4, align 4
- %rem5 = srem i64 %t.0, 2
- %arrayidx7 = getelementptr inbounds [100 x float], [100 x float]* %A, i64 %rem5, i64 %i.0
- %tmp2 = load float, float* %arrayidx7, align 4
- %add = fadd float %tmp, %tmp2
- %add8 = add nuw nsw i64 %i.0, 1
- %rem9 = srem i64 %t.0, 2
- %arrayidx11 = getelementptr inbounds [100 x float], [100 x float]* %A, i64 %rem9, i64 %add8
- %tmp3 = load float, float* %arrayidx11, align 4
- %add12 = fadd float %add, %tmp3
- %add13 = add nuw nsw i64 %t.0, 1
- %rem14 = srem i64 %add13, 2
- %arrayidx16 = getelementptr inbounds [100 x float], [100 x float]* %A, i64 %rem14, i64 %i.0
- %tmp4 = load float, float* %arrayidx16, align 4
- %add17 = fadd float %tmp4, %add12
- store float %add17, float* %arrayidx16, align 4
- br label %for.inc
-
-for.inc: ; preds = %for.body3
- %inc = add nuw nsw i64 %i.0, 1
- br label %for.cond1
-
-for.end: ; preds = %for.cond1
- br label %for.inc18
-
-for.inc18: ; preds = %for.end
- %inc19 = add nuw nsw i64 %t.0, 1
- br label %for.cond
-
-for.end20: ; preds = %for.cond
- ret void
-}
diff --git a/polly/test/GPGPU/host-statement.ll b/polly/test/GPGPU/host-statement.ll
deleted file mode 100644
index d7232b2fa538..000000000000
--- a/polly/test/GPGPU/host-statement.ll
+++ /dev/null
@@ -1,204 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -polly-invariant-load-hoisting=false \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
-; RUN: -polly-invariant-load-hoisting=false \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=KERNEL-IR %s
-
-; REQUIRES: pollyacc
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-declare void @llvm.lifetime.start(i64, ptr nocapture) #0
-
-; This test case tests that we can correctly handle a ScopStmt that is
-; scheduled on the host, instead of within a kernel.
-
-; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_R, MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyHostToDevice));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_Q, MemRef_Q, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(16);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: if (p_0 <= 510 && p_1 <= 510) {
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k1_dimBlock(32);
-; CODE-NEXT: dim3 k1_dimGrid(p_1 <= -1048034 ? 32768 : -p_1 + floord(31 * p_1 + 30, 32) + 16);
-; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: {
-; CODE-NEXT: dim3 k2_dimBlock(16, 32);
-; CODE-NEXT: dim3 k2_dimGrid(16, p_1 <= -7650 ? 256 : -p_1 + floord(31 * p_1 + 30, 32) + 16);
-; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: }
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_R, dev_MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_Q, dev_MemRef_Q, (512) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
-; CODE-NEXT: Stmt_for_cond33_preheader_last();
-
-; CODE: }
-
-; CODE: # kernel0
-; CODE-NEXT: Stmt_for_body16(32 * b0 + t0);
-
-; CODE: # kernel1
-; CODE-NEXT: for (int c0 = 0; c0 <= (-p_1 - 32 * b0 + 510) / 1048576; c0 += 1)
-; CODE-NEXT: for (int c1 = 0; c1 <= 15; c1 += 1) {
-; CODE-NEXT: if (p_1 + 32 * b0 + t0 + 1048576 * c0 <= 510 && c1 == 0)
-; CODE-NEXT: Stmt_for_body35(32 * b0 + t0 + 1048576 * c0);
-; CODE-NEXT: if (p_1 + 32 * b0 + t0 + 1048576 * c0 <= 510)
-; CODE-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1)
-; CODE-NEXT: Stmt_for_body42(32 * b0 + t0 + 1048576 * c0, 32 * c1 + c3);
-; CODE-NEXT: sync0();
-; CODE-NEXT: }
-
-; CODE: # kernel2
-; CODE-NEXT: for (int c0 = 0; c0 <= (-p_1 - 32 * b0 + 510) / 8192; c0 += 1)
-; CODE-NEXT: if (p_1 + 32 * b0 + t0 + 8192 * c0 <= 510)
-; CODE-NEXT: for (int c3 = 0; c3 <= 1; c3 += 1)
-; CODE-NEXT: Stmt_for_body62(32 * b0 + t0 + 8192 * c0, 32 * b1 + t1 + 16 * c3);
-
-; KERNEL-IR: call void @llvm.nvvm.barrier0()
-
-; Function Attrs: nounwind uwtable
-define internal void @kernel_gramschmidt(i32 %ni, i32 %nj, ptr %A, ptr %R, ptr %Q) #1 {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- br label %for.cond1.preheader
-
-for.cond1.preheader: ; preds = %entry.split, %for.inc86
- %indvars.iv24 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next25, %for.inc86 ]
- %indvars.iv19 = phi i64 [ 1, %entry.split ], [ %indvars.iv.next20, %for.inc86 ]
- br label %for.inc
-
-for.inc: ; preds = %for.cond1.preheader, %for.inc
- %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ]
- %nrm.02 = phi double [ 0.000000e+00, %for.cond1.preheader ], [ %add, %for.inc ]
- %arrayidx5 = getelementptr inbounds [512 x double], ptr %A, i64 %indvars.iv, i64 %indvars.iv24
- %tmp = load double, ptr %arrayidx5, align 8, !tbaa !1
- %arrayidx9 = getelementptr inbounds [512 x double], ptr %A, i64 %indvars.iv, i64 %indvars.iv24
- %tmp27 = load double, ptr %arrayidx9, align 8, !tbaa !1
- %mul = fmul double %tmp, %tmp27
- %add = fadd double %nrm.02, %mul
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp ne i64 %indvars.iv.next, 512
- br i1 %exitcond, label %for.inc, label %for.end
-
-for.end: ; preds = %for.inc
- %add.lcssa = phi double [ %add, %for.inc ]
- %call = tail call double @sqrt(double %add.lcssa) #2
- %arrayidx13 = getelementptr inbounds [512 x double], ptr %R, i64 %indvars.iv24, i64 %indvars.iv24
- store double %call, ptr %arrayidx13, align 8, !tbaa !1
- br label %for.body16
-
-for.cond33.preheader: ; preds = %for.body16
- %indvars.iv.next25 = add nuw nsw i64 %indvars.iv24, 1
- %cmp347 = icmp slt i64 %indvars.iv.next25, 512
- br i1 %cmp347, label %for.body35.lr.ph, label %for.inc86
-
-for.body35.lr.ph: ; preds = %for.cond33.preheader
- br label %for.body35
-
-for.body16: ; preds = %for.end, %for.body16
- %indvars.iv10 = phi i64 [ 0, %for.end ], [ %indvars.iv.next11, %for.body16 ]
- %arrayidx20 = getelementptr inbounds [512 x double], ptr %A, i64 %indvars.iv10, i64 %indvars.iv24
- %tmp28 = load double, ptr %arrayidx20, align 8, !tbaa !1
- %arrayidx24 = getelementptr inbounds [512 x double], ptr %R, i64 %indvars.iv24, i64 %indvars.iv24
- %tmp29 = load double, ptr %arrayidx24, align 8, !tbaa !1
- %div = fdiv double %tmp28, %tmp29
- %arrayidx28 = getelementptr inbounds [512 x double], ptr %Q, i64 %indvars.iv10, i64 %indvars.iv24
- store double %div, ptr %arrayidx28, align 8, !tbaa !1
- %indvars.iv.next11 = add nuw nsw i64 %indvars.iv10, 1
- %exitcond12 = icmp ne i64 %indvars.iv.next11, 512
- br i1 %exitcond12, label %for.body16, label %for.cond33.preheader
-
-for.cond33.loopexit: ; preds = %for.body62
- %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
- %lftr.wideiv = trunc i64 %indvars.iv.next22 to i32
- %exitcond23 = icmp ne i32 %lftr.wideiv, 512
- br i1 %exitcond23, label %for.body35, label %for.cond33.for.inc86_crit_edge
-
-for.body35: ; preds = %for.body35.lr.ph, %for.cond33.loopexit
- %indvars.iv21 = phi i64 [ %indvars.iv19, %for.body35.lr.ph ], [ %indvars.iv.next22, %for.cond33.loopexit ]
- %arrayidx39 = getelementptr inbounds [512 x double], ptr %R, i64 %indvars.iv24, i64 %indvars.iv21
- store double 0.000000e+00, ptr %arrayidx39, align 8, !tbaa !1
- br label %for.body42
-
-for.cond60.preheader: ; preds = %for.body42
- br label %for.body62
-
-for.body42: ; preds = %for.body35, %for.body42
- %indvars.iv13 = phi i64 [ 0, %for.body35 ], [ %indvars.iv.next14, %for.body42 ]
- %arrayidx46 = getelementptr inbounds [512 x double], ptr %Q, i64 %indvars.iv13, i64 %indvars.iv24
- %tmp30 = load double, ptr %arrayidx46, align 8, !tbaa !1
- %arrayidx50 = getelementptr inbounds [512 x double], ptr %A, i64 %indvars.iv13, i64 %indvars.iv21
- %tmp31 = load double, ptr %arrayidx50, align 8, !tbaa !1
- %mul51 = fmul double %tmp30, %tmp31
- %arrayidx55 = getelementptr inbounds [512 x double], ptr %R, i64 %indvars.iv24, i64 %indvars.iv21
- %tmp32 = load double, ptr %arrayidx55, align 8, !tbaa !1
- %add56 = fadd double %tmp32, %mul51
- store double %add56, ptr %arrayidx55, align 8, !tbaa !1
- %indvars.iv.next14 = add nuw nsw i64 %indvars.iv13, 1
- %exitcond15 = icmp ne i64 %indvars.iv.next14, 512
- br i1 %exitcond15, label %for.body42, label %for.cond60.preheader
-
-for.body62: ; preds = %for.cond60.preheader, %for.body62
- %indvars.iv16 = phi i64 [ 0, %for.cond60.preheader ], [ %indvars.iv.next17, %for.body62 ]
- %arrayidx66 = getelementptr inbounds [512 x double], ptr %A, i64 %indvars.iv16, i64 %indvars.iv21
- %tmp33 = load double, ptr %arrayidx66, align 8, !tbaa !1
- %arrayidx70 = getelementptr inbounds [512 x double], ptr %Q, i64 %indvars.iv16, i64 %indvars.iv24
- %tmp34 = load double, ptr %arrayidx70, align 8, !tbaa !1
- %arrayidx74 = getelementptr inbounds [512 x double], ptr %R, i64 %indvars.iv24, i64 %indvars.iv21
- %tmp35 = load double, ptr %arrayidx74, align 8, !tbaa !1
- %mul75 = fmul double %tmp34, %tmp35
- %sub = fsub double %tmp33, %mul75
- %arrayidx79 = getelementptr inbounds [512 x double], ptr %A, i64 %indvars.iv16, i64 %indvars.iv21
- store double %sub, ptr %arrayidx79, align 8, !tbaa !1
- %indvars.iv.next17 = add nuw nsw i64 %indvars.iv16, 1
- %exitcond18 = icmp ne i64 %indvars.iv.next17, 512
- br i1 %exitcond18, label %for.body62, label %for.cond33.loopexit
-
-for.cond33.for.inc86_crit_edge: ; preds = %for.cond33.loopexit
- br label %for.inc86
-
-for.inc86: ; preds = %for.cond33.for.inc86_crit_edge, %for.cond33.preheader
- %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
- %exitcond26 = icmp ne i64 %indvars.iv.next25, 512
- br i1 %exitcond26, label %for.cond1.preheader, label %for.end88
-
-for.end88: ; preds = %for.inc86
- ret void
-}
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end(i64, ptr nocapture) #0
-
-; Function Attrs: nounwind
-declare double @sqrt(double) #2
-
-attributes #0 = { argmemonly nounwind }
-attributes #1 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.9.0 (trunk 275267) (llvm/trunk 275268)"}
-!1 = !{!2, !2, i64 0}
-!2 = !{!"double", !3, i64 0}
-!3 = !{!"omnipotent char", !4, i64 0}
-!4 = !{!"Simple C/C++ TBAA"}
diff --git a/polly/test/GPGPU/ignore-parameter-bounds.ll b/polly/test/GPGPU/ignore-parameter-bounds.ll
deleted file mode 100644
index 1d0b5482941e..000000000000
--- a/polly/test/GPGPU/ignore-parameter-bounds.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; REQUIRES: pollyacc
-
-; CODE: Code
-; CODE: ====
-; CODE: No code generated
-
-source_filename = "bugpoint-output-83bcdeb.bc"
-target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-
-@__data_radiation_MOD_cobi = external global [168 x double], align 32
-
-; Function Attrs: nounwind uwtable
-define void @__radiation_rg_MOD_coe_so() #0 {
-entry:
- %polly.access.kspec.load = load i32, ptr undef, align 4
- %0 = or i1 undef, undef
- br label %polly.preload.cond29
-
-polly.preload.cond29: ; preds = %entry
- br i1 %0, label %polly.preload.exec31, label %polly.preload.merge30
-
-polly.preload.merge30: ; preds = %polly.preload.exec31, %polly.preload.cond29
- %polly.preload..merge32 = phi double [ %polly.access.__data_radiation_MOD_cobi.load, %polly.preload.exec31 ], [ 0.000000e+00, %polly.preload.cond29 ]
- ret void
-
-polly.preload.exec31: ; preds = %polly.preload.cond29
- %1 = sext i32 %polly.access.kspec.load to i64
- %2 = mul nsw i64 7, %1
- %3 = add nsw i64 0, %2
- %4 = add nsw i64 %3, 48
- %polly.access.__data_radiation_MOD_cobi = getelementptr double, ptr @__data_radiation_MOD_cobi, i64 %4
- %polly.access.__data_radiation_MOD_cobi.load = load double, ptr %polly.access.__data_radiation_MOD_cobi, align 8
- br label %polly.preload.merge30
-}
-
-attributes #0 = { nounwind uwtable }
diff --git a/polly/test/GPGPU/intrinsic-copied-into-kernel.ll b/polly/test/GPGPU/intrinsic-copied-into-kernel.ll
deleted file mode 100644
index 7c1e3672abb5..000000000000
--- a/polly/test/GPGPU/intrinsic-copied-into-kernel.ll
+++ /dev/null
@@ -1,76 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-print-scops -disable-output < %s | FileCheck %s --check-prefix=SCOP
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir -disable-output < %s | FileCheck %s --check-prefix=KERNEL-IR
-; RUN: opt -opaque-pointers=0 %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s --check-prefix=HOST-IR
-
-; Test that we do recognise and codegen a kernel that has intrinsics.
-
-; REQUIRES: pollyacc
-
-; Check that we model the kernel as a scop.
-; SCOP: Function: f
-; SCOP-NEXT: Region: %entry.split---%for.end
-
-; Check that the intrinsic call is present in the kernel IR.
-; KERNEL-IR: %p_sqrt = tail call float @llvm.sqrt.f32(float %A.arr.i.val_p_scalar_)
-; KERNEL-IR: declare float @llvm.sqrt.f32(float)
-; KERNEL-IR: declare float @llvm.fabs.f32(float)
-
-
-; Check that kernel launch is generated in host IR.
-; the declare would not be generated unless a call to a kernel exists.
-; HOST-IR: declare void @polly_launchKernel(i8*, i32, i32, i32, i32, i32, i8*)
-
-
-; void f(float *A, float *B, int N) {
-; for(int i = 0; i < N; i++) {
-; float tmp0 = A[i];
-; float tmp1 = sqrt(tmp1);
-; float tmp2 = fabs(tmp2);
-; float tmp3 = copysignf(tmp1, tmp2);
-; B[i] = tmp4;
-; }
-; }
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @f(float* %A, float* %B, i32 %N) {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- %cmp1 = icmp sgt i32 %N, 0
- br i1 %cmp1, label %for.body.lr.ph, label %for.end
-
-for.body.lr.ph: ; preds = %entry.split
- br label %for.body
-
-for.body: ; preds = %for.body.lr.ph, %for.body
- %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
- %A.arr.i = getelementptr inbounds float, float* %A, i64 %indvars.iv
- %A.arr.i.val = load float, float* %A.arr.i, align 4
- ; Call to intrinsics that should be part of the kernel.
- %sqrt = tail call float @llvm.sqrt.f32(float %A.arr.i.val)
- %fabs = tail call float @llvm.fabs.f32(float %sqrt);
- %copysign = tail call float @llvm.copysign.f32(float %sqrt, float %fabs);
- %B.arr.i = getelementptr inbounds float, float* %B, i64 %indvars.iv
- store float %copysign, float* %B.arr.i, align 4
-
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %wide.trip.count = zext i32 %N to i64
- %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
- br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
-
-for.cond.for.end_crit_edge: ; preds = %for.body
- br label %for.end
-
-for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split
- ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @llvm.sqrt.f32(float) #0
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.copysign.f32(float, float) #0
-
-attributes #0 = { nounwind readnone }
-
diff --git a/polly/test/GPGPU/invalid-kernel-assert-verifymodule.ll b/polly/test/GPGPU/invalid-kernel-assert-verifymodule.ll
deleted file mode 100644
index 4b9139f0b44c..000000000000
--- a/polly/test/GPGPU/invalid-kernel-assert-verifymodule.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-fail-on-verify-module-failure \
-; RUN: -disable-output < %s
-
-; Make sure that if -polly-acc-fail-on-verify-module-failure is on, we actually
-; fail on an illegal module.
-
-; REQUIRES: pollyacc, asserts
-; XFAIL: *
-;
-; void foo(long A[1024], long B[1024]) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += (B[i] + (long)&B[i]);
-; }
-
-
-; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @foo(ptr %A, ptr %B) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb10, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp11, %bb10 ]
- %exitcond = icmp ne i64 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb12
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds i64, ptr %B, i64 %i.0
- %tmp3 = load i64, ptr %tmp, align 8
- %tmp4 = getelementptr inbounds i64, ptr %B, i64 %i.0
- %tmp5 = ptrtoint ptr %tmp4 to i64
- %tmp6 = add nsw i64 %tmp3, %tmp5
- %tmp7 = getelementptr inbounds i64, ptr %A, i64 %i.0
- %tmp8 = load i64, ptr %tmp7, align 8
- %tmp9 = add nsw i64 %tmp8, %tmp6
- store i64 %tmp9, ptr %tmp7, align 8
- br label %bb10
-
-bb10: ; preds = %bb2
- %tmp11 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb12: ; preds = %bb1
- ret void
-}
diff --git a/polly/test/GPGPU/invalid-kernel.ll b/polly/test/GPGPU/invalid-kernel.ll
deleted file mode 100644
index 9dd32eac97c0..000000000000
--- a/polly/test/GPGPU/invalid-kernel.ll
+++ /dev/null
@@ -1,73 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
-; RUN: -disable-output < %s | \
-; RUN: not FileCheck %s -check-prefix=KERNEL-IR
-
-; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
-; RUN: FileCheck %s -check-prefix=IR
-
-; REQUIRES: pollyacc
-;
-; void foo(long A[1024], long B[1024]) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += (B[i] + (long)&B[i]);
-; }
-
-; This kernel loads/stores a pointer address we model. This is a rare case,
-; were we still lack proper code-generation support. We check here that we
-; detect the invalid IR and bail out gracefully.
-
-; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (1024) * sizeof(i64), cudaMemcpyHostToDevice));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * sizeof(i64), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(32);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_B, dev_MemRef_A);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * sizeof(i64), cudaMemcpyDeviceToHost));
-
-; CODE: # kernel0
-; CODE-NEXT: Stmt_bb2(32 * b0 + t0);
-
-; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
-; RUN: FileCheck %s -check-prefix=IR
-
-; KERNEL-IR: kernel
-
-; IR: br i1 false, label %polly.start, label %bb1
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @foo(ptr %A, ptr %B) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb10, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp11, %bb10 ]
- %exitcond = icmp ne i64 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb12
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds i64, ptr %B, i64 %i.0
- %tmp3 = load i64, ptr %tmp, align 8
- %tmp4 = getelementptr inbounds i64, ptr %B, i64 %i.0
- %tmp5 = ptrtoint ptr %tmp4 to i64
- %tmp6 = add nsw i64 %tmp3, %tmp5
- %tmp7 = getelementptr inbounds i64, ptr %A, i64 %i.0
- %tmp8 = load i64, ptr %tmp7, align 8
- %tmp9 = add nsw i64 %tmp8, %tmp6
- store i64 %tmp9, ptr %tmp7, align 8
- br label %bb10
-
-bb10: ; preds = %bb2
- %tmp11 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb12: ; preds = %bb1
- ret void
-}
diff --git a/polly/test/GPGPU/invariant-load-array-access.ll b/polly/test/GPGPU/invariant-load-array-access.ll
deleted file mode 100644
index 02c0330a7e7e..000000000000
--- a/polly/test/GPGPU/invariant-load-array-access.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; RUN: opt %loadPolly -polly-invariant-load-hoisting -polly-print-scops -disable-output < %s | FileCheck %s -check-prefix=SCOP
-
-; RUN: opt %loadPolly -S -polly-codegen-ppcg \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=HOST-IR
-
-
-; REQUIRES: pollyacc
-
-; Check that we detect a scop.
-; SCOP: Function: f
-; SCOP-NEXT: Region: %for.body---%for.end
-; SCOP-NEXT: Max Loop Depth: 1
-; SCOP-NEXT: Invariant Accesses: {
-; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT: [tmp] -> { Stmt_for_body[i0] -> MemRef_control[0] };
-; SCOP-NEXT: Execution Context: [tmp] -> { : }
-; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT: [tmp] -> { Stmt_if_then[i0] -> MemRef_readarr[0] };
-; SCOP-NEXT: Execution Context: [tmp] -> { : tmp >= 4 }
-; SCOP-NEXT: }
-
-; Check that kernel launch is generated in host IR.
-; the declare would not be generated unless a call to a kernel exists.
-; HOST-IR: declare void @polly_launchKernel(ptr, i32, i32, i32, i32, i32, ptr)
-
-; This test makes sure that such an access pattern is handled correctly
-; by PPCGCodeGeneration. It appears that not calling `preloadInvariantLoads`
-; was the main reason that caused this test case to crash.
-;
-; void f(int *arr, const int *control, const int *readarr) {
-; for(int i = 0; i < 1000; i++) {
-; int t = 0;
-; if (*control > 3) {
-; t += *readarr;
-; }
-; arr[i] = t;
-; }
-; }
-
-
-target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
-target triple = "i386-apple-macosx10.12.0"
-define void @f(ptr %arr, ptr %control, ptr %readarr) {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- br label %for.body
-
-for.body: ; preds = %entry.split, %if.end
- %i.01 = phi i32 [ 0, %entry.split ], [ %inc, %if.end ]
- %tmp = load i32, ptr %control, align 4
- %cmp1 = icmp sgt i32 %tmp, 3
- br i1 %cmp1, label %if.then, label %if.end
-
-if.then: ; preds = %for.body
- %tmp1 = load i32, ptr %readarr, align 4
- br label %if.end
-
-if.end: ; preds = %if.then, %for.body
- %t.0 = phi i32 [ %tmp1, %if.then ], [ 0, %for.body ]
- %arrayidx = getelementptr inbounds i32, ptr %arr, i32 %i.01
- store i32 %t.0, ptr %arrayidx, align 4
- %inc = add nuw nsw i32 %i.01, 1
- %exitcond = icmp eq i32 %inc, 1000
- br i1 %exitcond, label %for.end, label %for.body
-
-for.end: ; preds = %if.end
- ret void
-}
diff --git a/polly/test/GPGPU/invariant-load-escaping-values.ll b/polly/test/GPGPU/invariant-load-escaping-values.ll
deleted file mode 100644
index 54f4b43fdb92..000000000000
--- a/polly/test/GPGPU/invariant-load-escaping-values.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: opt %loadPolly -S -polly-codegen-ppcg \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s
-
-; REQUIRES: pollyacc
-
-; CHECK: store i64 %polly.access.B.load, ptr %invariant.preload.s2a
-; CHECK: %invariant.final_reload = load i64, ptr %invariant.preload.s2a
-
-; Verify that the final reload of an invariant scalar memory access uses the
-; same stack slot that into which the invariant memory access was stored
-; originally. Earlier, this was broken as we introduce a new stack slot aside
-; of the preload stack slot, which remained uninitialized and caused our escaping
-; loads to contain garbage.
-
-define i64 @foo(ptr %A, ptr %B) {
-entry:
- br label %loop
-
-loop:
- %indvar = phi i64 [0, %entry], [%indvar.next, %loop]
- %indvar.next = add nsw i64 %indvar, 1
- %idx = getelementptr float, ptr %A, i64 %indvar
- store float 42.0, ptr %idx
- %invariant = load i64, ptr %B
- %cmp = icmp sle i64 %indvar, 1024
- br i1 %cmp, label %loop, label %exit
-
-exit:
- ret i64 %invariant
-}
diff --git a/polly/test/GPGPU/invariant-load-hoisting-of-array.ll b/polly/test/GPGPU/invariant-load-hoisting-of-array.ll
deleted file mode 100644
index 015a3dacbe10..000000000000
--- a/polly/test/GPGPU/invariant-load-hoisting-of-array.ll
+++ /dev/null
@@ -1,101 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-invariant-load-hoisting -polly-print-scops -disable-output < %s | FileCheck %s -check-prefix=SCOP
-
-; RUN: opt -opaque-pointers=0 %loadPolly -S -polly-codegen-ppcg \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=HOST-IR
-
-; REQUIRES: pollyacc
-
-; Entry: Contains (%loaded.ptr.preload.s2a = alloca double*) which is
-; | invariant load hoisted `%loaded.ptr`
-; v
-; Run-time check --(failure branch)--> { old code - contains `%loaded.ptr` }
-; |
-; (success branch)
-; |
-; v
-; New Code: Should refer to `%loaded.ptr.preload.s2a`, which is
-; the invariant load hoisted value, NOT `%loaded.ptr`.
-
-; In Polly, we preserve the old code and create a separate branch that executes
-; the GPU code if a run-time check succeeds.
-
-; We need to make sure that in the new branch, we pick up invariant load hoisted
-; values. The old values will belong to the old code branch.
-
-; In this case, we use to try to load the 'original' %loaded.ptr in the
-; 'New Code' branch,which is wrong. Check that this does not happen.
-
-; Check that we have a Scop with an invariant load of the array.
-; SCOP: Function: f
-; SCOP-NEXT: Region: %arrload---%for.exit
-; SCOP-NEXT: Max Loop Depth: 1
-; SCOP-NEXT: Invariant Accesses: {
-; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT: { Stmt_arrload[] -> MemRef_arr_of_ptrs[0] };
-
-
-
-; Check that we have the preloaded array.
-; HOST-IR: entry:
-; HOST-IR-NEXT: %loaded.ptr.preload.s2a = alloca double*
-
-; Chek that we store the correct value in the preload.
-; polly.preload.begin: ; preds = %polly.split_new_and_old
-; HOST-IR: %polly.access.arr.of.ptrs = getelementptr double*, double** %arr.of.ptrs, i64 0
-; HOST-IR-NEXT: %polly.access.arr.of.ptrs.load = load double*, double** %polly.access.arr.of.ptrs
-; HOST-IR-NEXT: store double* %polly.access.arr.of.ptrs.load, double** %loaded.ptr.preload.s2a
-
-; Check that we get back data from the kernel.
-; HOST-IR: polly.acc.initialize: ; preds = %polly.start
-; HOST-IR: [[FIRSTINDEX:%.+]] = getelementptr double, double* %polly.access.arr.of.ptrs.load, i64 1
-; HOST-IR: [[BITCASTED:%.+]] = bitcast double* [[FIRSTINDEX]] to i8*
-; HOST-IR: call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_loaded_ptr, i8* [[BITCASTED]], i64 800)
-
-; Check that the kernel launch is generated in the host IR.
-; This declaration would not have been generated unless a kernel launch exists.
-; HOST-IR: declare void @polly_launchKernel(i8*, i32, i32, i32, i32, i32, i8*)
-
-
-; C pseudocode equivalent
-; void f(double **arr_of_ptrs) {
-; double *loaded_ptr = arr_of_ptrs[0];
-; if (false) { return; }
-; else {
-; for(int i = 1; i < 100; i++) {
-; loaded_ptr[i] = 42.0;
-; }
-; }
-; }
-
-
-target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-
-
-; Function Attrs: nounwind uwtable
-define void @f(double **%arr.of.ptrs) #0 {
-entry:
- br label %arrload
-
-arrload: ; preds = %"7"
- %loaded.ptr = load double*, double** %arr.of.ptrs, align 8
- br i1 false, label %"for.exit", label %"for.preheader"
-
-"for.preheader": ; preds = %"51"
- br label %"for.body"
-
-"for.body": ; preds = %"53", %"53.lr.ph"
- %indvar = phi i64 [ 1, %"for.preheader" ], [ %indvar.next, %"for.body" ]
- %slot = getelementptr double, double* %loaded.ptr, i64 %indvar
- store double 42.0, double* %slot, align 8
-
- %indvar.next = add nuw nsw i64 %indvar, 1
-
- %check = icmp sgt i64 %indvar.next, 100
- br i1 %check, label %"for.exit", label %"for.body"
-
-"for.exit": ; preds = %"52.54_crit_edge", %"51"
- ret void
-}
-
-attributes #0 = { nounwind uwtable }
diff --git a/polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll b/polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll
deleted file mode 100644
index ad30ef6f9b24..000000000000
--- a/polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-invariant-load-hoisting \
-; RUN: -S < %s | \
-; RUN: FileCheck -check-prefix=HOST-IR %s
-
-; RUN: opt %loadPolly -disable-output -polly-acc-dump-kernel-ir \
-; RUN: -polly-codegen-ppcg -polly-scops \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck -check-prefix=KERNEL-IR %s
-
-; REQUIRES: pollyacc
-
-; Verify that invariant loads used in a kernel statement are correctly forwarded
-; as subtree value to the GPU kernel.
-
-; HOST-IR: store float %polly.access.p.load, ptr %invariant.preload.s2a, align 4
-
-; KERNEL-IR: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_2({{.*}}ptr addrspace(1) %MemRef_indvar2f__phi{{.*}})
-; KERNEL-IR: %indvar2f.phiops.reload = load float, ptr %indvar2f.phiops, align 4
-; KERNEL-IR: store float %indvar2f.phiops.reload, ptr addrspace(1) %polly.access.MemRef_A, align 4
-
-; FIXME: store float %indvar2f.phiops.reload, ptr %indvar2f.phiops, align 4
-; For some reason the above instruction is emitted that stores back to the addess it was just loaded from.
-
-define void @foo(ptr %A, ptr %p) {
-entry:
- br label %loop
-
-loop:
- %indvar = phi i64 [0, %entry], [%indvar.next, %loop]
- %indvar.next = add i64 %indvar, 1
- %invariant = load float, ptr %p
- %ptr = getelementptr float, ptr %A, i64 %indvar
- store float 42.0, ptr %ptr
- %cmp = icmp sle i64 %indvar, 1024
- br i1 %cmp, label %loop, label %anotherloop
-
-anotherloop:
- %indvar2 = phi i64 [0, %loop], [%indvar2.next, %anotherloop]
- %indvar2f = phi float [%invariant, %loop], [%indvar2f, %anotherloop]
- %indvar2.next = add i64 %indvar2, 1
- store float %indvar2f, ptr %A
- %cmp2 = icmp sle i64 %indvar2, 1024
- br i1 %cmp2, label %anotherloop, label %end
-
-end:
- ret void
-
-}
diff --git a/polly/test/GPGPU/invariant-load-hoisting-with-variable-bounds.ll b/polly/test/GPGPU/invariant-load-hoisting-with-variable-bounds.ll
deleted file mode 100644
index 7a650eeb22ee..000000000000
--- a/polly/test/GPGPU/invariant-load-hoisting-with-variable-bounds.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; RUN: opt %loadPolly -polly-invariant-load-hoisting -polly-print-scops -disable-output < %s | FileCheck %s -check-prefix=SCOP
-
-
-; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-codegen-ppcg \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=HOST-IR
-
-; REQUIRES: pollyacc
-
-; SCOP: Function: f
-; SCOP-NEXT: Region: %entry.split---%for.end
-; SCOP-NEXT: Max Loop Depth: 1
-; SCOP-NEXT: Invariant Accesses: {
-; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT: [tmp1, tmp4] -> { Stmt_entry_split[] -> MemRef_begin[0] };
-; SCOP-NEXT: Execution Context: [tmp1, tmp4] -> { : }
-; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT: [tmp1, tmp4] -> { Stmt_for_body[i0] -> MemRef_end[0] };
-; SCOP-NEXT: Execution Context: [tmp1, tmp4] -> { : }
-; SCOP-NEXT: }
-
-
-; Check that the kernel launch is generated in the host IR.
-; This declaration would not have been generated unless a kernel launch exists.
-; HOST-IR: declare void @polly_launchKernel(ptr, i32, i32, i32, i32, i32, ptr)
-
-; void f(int *begin, int *end, int *arr) {
-; for (int i = *begin; i < *end; i++) {
-; arr[i] = 0;
-; }
-; }
-;
-
-target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
-
-define void @f(ptr %begin, ptr %end, ptr %arr) {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- %tmp1 = load i32, ptr %begin, align 4
- %tmp41 = load i32, ptr %end, align 4
- %cmp2 = icmp slt i32 %tmp1, %tmp41
- br i1 %cmp2, label %for.body.lr.ph, label %for.end
-
-for.body.lr.ph: ; preds = %entry.split
- br label %for.body
-
-for.body: ; preds = %for.body.lr.ph, %for.body
- %i.03 = phi i32 [ %tmp1, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i32, ptr %arr, i32 %i.03
- store i32 0, ptr %arrayidx, align 4
- %inc = add nsw i32 %i.03, 1
- %tmp4 = load i32, ptr %end, align 4
- %cmp = icmp slt i32 %inc, %tmp4
- br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
-
-for.cond.for.end_crit_edge: ; preds = %for.body
- br label %for.end
-
-for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split
- ret void
-}
diff --git a/polly/test/GPGPU/invariant-load-hoisting-with-variable-lower-bound.ll b/polly/test/GPGPU/invariant-load-hoisting-with-variable-lower-bound.ll
deleted file mode 100644
index a637cc44c7a3..000000000000
--- a/polly/test/GPGPU/invariant-load-hoisting-with-variable-lower-bound.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: opt %loadPolly -polly-invariant-load-hoisting -polly-print-scops -disable-output < %s | FileCheck %s -check-prefix=SCOP
-
-
-; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-codegen-ppcg \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=HOST-IR
-
-; REQUIRES: pollyacc
-
-; Check that we detect a scop with invariant accesses.
-; SCOP: Function: f
-; SCOP-NEXT: Region: %entry.split---%for.end
-; SCOP-NEXT: Max Loop Depth: 1
-; SCOP-NEXT: Invariant Accesses: {
-; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT: [beginval] -> { Stmt_entry_split[] -> MemRef_begin[0] };
-; SCOP-NEXT: Execution Context: [beginval] -> { : }
-; SCOP-NEXT: }
-
-; Check that the kernel launch is generated in the host IR.
-; This declaration would not have been generated unless a kernel launch exists.
-; HOST-IR: declare void @polly_launchKernel(ptr, i32, i32, i32, i32, i32, ptr)
-
-;
-; void f(int *begin, int *arr) {
-; for (int i = *begin; i < 100; i++) {
-; arr[i] = 0;
-; }
-; }
-
-target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
-
-define void @f(ptr %begin, ptr %arr) {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- %beginval = load i32, ptr %begin, align 4
- %cmp1 = icmp slt i32 %beginval, 100
- br i1 %cmp1, label %for.body, label %for.end
-
-
-
-for.body: ; preds = %for.body.lr.ph, %for.body
- %ival = phi i32 [ %beginval, %entry.split ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i32, ptr %arr, i32 %ival
- store i32 0, ptr %arrayidx, align 4
- %inc = add nsw i32 %ival, 1
- %cmp = icmp slt i32 %ival, 99
- br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
-
-for.cond.for.end_crit_edge: ; preds = %for.body
- br label %for.end
-
-for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split
- ret void
-}
diff --git a/polly/test/GPGPU/invariant-load-hoisting-with-variable-upper-bound.ll b/polly/test/GPGPU/invariant-load-hoisting-with-variable-upper-bound.ll
deleted file mode 100644
index 3c19a306734a..000000000000
--- a/polly/test/GPGPU/invariant-load-hoisting-with-variable-upper-bound.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: opt %loadPolly -polly-invariant-load-hoisting -polly-print-scops -disable-output < %s | FileCheck %s -check-prefix=SCOP
-; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-codegen-ppcg -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=HOST-IR
-
-; REQUIRES: pollyacc
-
-; Check that we detect a scop with invariant accesses.
-; SCOP: Function: f
-; SCOP-NEXT: Region: %entry.split---%for.end
-; SCOP-NEXT: Max Loop Depth: 1
-; SCOP-NEXT: Invariant Accesses: {
-; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT: [tmp2] -> { Stmt_for_body[i0] -> MemRef_idx[0] };
-; SCOP-NEXT: Execution Context: [tmp2] -> { : }
-; SCOP-NEXT: }
-
-; Check that kernel launch is generated in host IR.
-; the declare would not be generated unless a call to a kernel exists.
-; HOST-IR: declare void @polly_launchKernel(ptr, i32, i32, i32, i32, i32, ptr)
-
-; Check if we generate GPU code for simple loop with variable upper bound.
-; This always worked, but have this test to prevent regressions.
-; void f(int *idx, int *arr) {
-; for (int i = 0; i < *idx; i++) {
-; arr[i] = 0;
-; }
-; }
-;
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @f(ptr %idx, ptr %arr) {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- %tmp21 = load i32, ptr %idx, align 4
- %cmp2 = icmp sgt i32 %tmp21, 0
- br i1 %cmp2, label %for.body.lr.ph, label %for.end
-
-for.body.lr.ph: ; preds = %entry.split
- br label %for.body
-
-for.body: ; preds = %for.body.lr.ph, %for.body
- %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32, ptr %arr, i64 %indvars.iv
- store i32 0, ptr %arrayidx, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %tmp2 = load i32, ptr %idx, align 4
- %0 = sext i32 %tmp2 to i64
- %cmp = icmp slt i64 %indvars.iv.next, %0
- br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
-
-for.cond.for.end_crit_edge: ; preds = %for.body
- br label %for.end
-
-for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split
- ret void
-}
diff --git a/polly/test/GPGPU/invariant-load-hoisting.ll b/polly/test/GPGPU/invariant-load-hoisting.ll
deleted file mode 100644
index 5ae1cfae255d..000000000000
--- a/polly/test/GPGPU/invariant-load-hoisting.ll
+++ /dev/null
@@ -1,116 +0,0 @@
-; RUN: opt %loadPolly -polly-invariant-load-hoisting -polly-print-scops -disable-output < %s | FileCheck %s -check-prefix=SCOP
-;
-; RUN: opt %loadPolly -polly-scops -S -polly-invariant-load-hoisting \
-; RUN: -polly-codegen-ppcg < %s | FileCheck %s -check-prefix=HOST-IR
-;
-; RUN: opt %loadPolly -polly-invariant-load-hoisting -polly-codegen-ppcg -polly-acc-dump-kernel-ir -polly-print-scops -disable-output < %s | FileCheck %s -check-prefix=KERNEL-IR
-;
-; REQUIRES: pollyacc
-;
-; SCOP: Function: f
-; SCOP-NEXT: Region: %entry.split---%for.end26
-; SCOP-NEXT: Max Loop Depth: 3
-; SCOP-NEXT: Invariant Accesses: {
-; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT: [n, tmp12] -> { Stmt_for_body6[i0, i1, i2] -> MemRef_invariant[0] };
-; SCOP-NEXT: Execution Context: [n, tmp12] -> { : n > 0 }
-; SCOP-NEXT: }
-; HOST-IR: call void @polly_launchKernel(ptr %[[REGC:[0-9]+]], i32 %{{[0-9]+}}, i32 1, i32 32, i32 1, i32 1, ptr %polly_launch_0_params_i8ptr)
-; HOST-IR-NEXT: call void @polly_freeKernel(ptr %[[REGC]])
-
-; KERNEL-IR: define ptx_kernel void @FUNC_f_SCOP_0_KERNEL_0(ptr addrspace(1) %MemRef_B, ptr addrspace(1) %MemRef_A, i32 %n, i32 %tmp12, i32 %polly.preload.tmp21.merge)
-
-
-; Check that we generate correct GPU code in case of invariant load hoisting.
-;
-;
-; static const int N = 3000;
-;
-; void f(int A[N][N], int *invariant, int B[N][N], int n) {
-; for (int i = 0; i < n; i++) {
-; for (int j = 0; j < n; j++) {
-; for (int k = 0; k < n; k++) {
-;
-; A[*invariant][k] = B[k][k];
-; A[k][*invariant] += B[k][k];
-; }
-; }
-; }
-; }
-;
-
-define void @f(ptr %A, ptr %invariant, ptr %B, i32 %n) {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- %cmp6 = icmp sgt i32 %n, 0
- br i1 %cmp6, label %for.cond1.preheader.lr.ph, label %for.end26
-
-for.cond1.preheader.lr.ph: ; preds = %entry.split
- br label %for.cond1.preheader
-
-for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.inc24
- %i.07 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc25, %for.inc24 ]
- %cmp23 = icmp sgt i32 %n, 0
- br i1 %cmp23, label %for.cond4.preheader.lr.ph, label %for.inc24
-
-for.cond4.preheader.lr.ph: ; preds = %for.cond1.preheader
- br label %for.cond4.preheader
-
-for.cond4.preheader: ; preds = %for.cond4.preheader.lr.ph, %for.inc21
- %j.04 = phi i32 [ 0, %for.cond4.preheader.lr.ph ], [ %inc22, %for.inc21 ]
- %cmp51 = icmp sgt i32 %n, 0
- br i1 %cmp51, label %for.body6.lr.ph, label %for.inc21
-
-for.body6.lr.ph: ; preds = %for.cond4.preheader
- br label %for.body6
-
-for.body6: ; preds = %for.body6.lr.ph, %for.body6
- %k.02 = phi i32 [ 0, %for.body6.lr.ph ], [ %inc, %for.body6 ]
- %idxprom = sext i32 %k.02 to i64
- %idxprom7 = sext i32 %k.02 to i64
- %arrayidx8 = getelementptr inbounds [3000 x i32], ptr %B, i64 %idxprom, i64 %idxprom7
- %tmp9 = load i32, ptr %arrayidx8, align 4
- %tmp12 = load i32, ptr %invariant, align 4
- %idxprom9 = sext i32 %tmp12 to i64
- %idxprom11 = sext i32 %k.02 to i64
- %arrayidx12 = getelementptr inbounds [3000 x i32], ptr %A, i64 %idxprom9, i64 %idxprom11
- store i32 %tmp9, ptr %arrayidx12, align 4
- %idxprom13 = sext i32 %k.02 to i64
- %idxprom15 = sext i32 %k.02 to i64
- %arrayidx16 = getelementptr inbounds [3000 x i32], ptr %B, i64 %idxprom13, i64 %idxprom15
- %tmp17 = load i32, ptr %arrayidx16, align 4
- %idxprom17 = sext i32 %k.02 to i64
- %tmp21 = load i32, ptr %invariant, align 4
- %idxprom19 = sext i32 %tmp21 to i64
- %arrayidx20 = getelementptr inbounds [3000 x i32], ptr %A, i64 %idxprom17, i64 %idxprom19
- %tmp22 = load i32, ptr %arrayidx20, align 4
- %add = add nsw i32 %tmp22, %tmp17
- store i32 %add, ptr %arrayidx20, align 4
- %inc = add nuw nsw i32 %k.02, 1
- %cmp5 = icmp slt i32 %inc, %n
- br i1 %cmp5, label %for.body6, label %for.cond4.for.inc21_crit_edge
-
-for.cond4.for.inc21_crit_edge: ; preds = %for.body6
- br label %for.inc21
-
-for.inc21: ; preds = %for.cond4.for.inc21_crit_edge, %for.cond4.preheader
- %inc22 = add nuw nsw i32 %j.04, 1
- %cmp2 = icmp slt i32 %inc22, %n
- br i1 %cmp2, label %for.cond4.preheader, label %for.cond1.for.inc24_crit_edge
-
-for.cond1.for.inc24_crit_edge: ; preds = %for.inc21
- br label %for.inc24
-
-for.inc24: ; preds = %for.cond1.for.inc24_crit_edge, %for.cond1.preheader
- %inc25 = add nuw nsw i32 %i.07, 1
- %cmp = icmp slt i32 %inc25, %n
- br i1 %cmp, label %for.cond1.preheader, label %for.cond.for.end26_crit_edge
-
-for.cond.for.end26_crit_edge: ; preds = %for.inc24
- br label %for.end26
-
-for.end26: ; preds = %for.cond.for.end26_crit_edge, %entry.split
- ret void
-}
diff --git a/polly/test/GPGPU/invariant-load-of-scalar.ll b/polly/test/GPGPU/invariant-load-of-scalar.ll
deleted file mode 100644
index fbc1d4d7ecee..000000000000
--- a/polly/test/GPGPU/invariant-load-of-scalar.ll
+++ /dev/null
@@ -1,81 +0,0 @@
-; RUN: opt %loadPolly -polly-invariant-load-hoisting -polly-print-scops -disable-output < %s | FileCheck -check-prefix=SCOP %s
-
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-invariant-load-hoisting \
-; RUN: -S < %s | \
-; RUN: FileCheck -check-prefix=HOST-IR %s
-
-
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-invariant-load-hoisting \
-; RUN: -disable-output -polly-acc-dump-kernel-ir < %s | \
-; RUN: FileCheck -check-prefix=KERNEL-IR %s
-
-; REQUIRES: pollyacc
-
-; Check that we offload invariant loads of scalars correctly.
-
-; Check that invariant loads are present.
-; SCOP: Function: checkPrivatization
-; SCOP-NEXT: Region: %entry.split---%for.end
-; SCOP-NEXT: Max Loop Depth: 1
-; SCOP-NEXT: Invariant Accesses: {
-; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT: [tmp, tmp2] -> { Stmt_entry_split[] -> MemRef_begin[0] };
-; SCOP-NEXT: Execution Context: [tmp, tmp2] -> { : }
-; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT: [tmp, tmp2] -> { Stmt_for_body[i0] -> MemRef_end[0] };
-; SCOP-NEXT: Execution Context: [tmp, tmp2] -> { : }
-; SCOP-NEXT: }
-;
-
-; Check that we do not actually allocate arrays for %begin, %end, since they are
-; invariant load hoisted.
-; HOST-IR: %p_dev_array_MemRef_A = call ptr @polly_allocateMemoryForDevice
-; HOST-IR-NOT: call ptr @polly_allocateMemoryForDevice
-
-; Check that we send the invariant loaded scalars as parameters to the
-; kernel function.
-; KERNEL-IR: define ptx_kernel void @FUNC_checkPrivatization_SCOP_0_KERNEL_0
-; KERNEL-IR-SAME: (ptr addrspace(1) %MemRef_A, i32 %tmp,
-; KERNEL-IR-SAME: i32 %tmp2, i32 %polly.access.begin.load)
-
-
-; void checkScalarPointerOffload(int A[], int *begin, int *end) {
-; for(int i = *begin; i < *end; i++) {
-; A[i] = 10;
-; }
-; }
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.12.0"
-
-define void @checkPrivatization(ptr %A, ptr %begin, ptr %end) {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- %tmp = load i32, ptr %begin, align 4
- %tmp21 = load i32, ptr %end, align 4
- %cmp3 = icmp slt i32 %tmp, %tmp21
- br i1 %cmp3, label %for.body.lr.ph, label %for.end
-
-for.body.lr.ph: ; preds = %entry.split
- %tmp1 = sext i32 %tmp to i64
- br label %for.body
-
-for.body: ; preds = %for.body.lr.ph, %for.body
- %indvars.iv4 = phi i64 [ %tmp1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv4
- store i32 10, ptr %arrayidx, align 4
- %indvars.iv.next = add i64 %indvars.iv4, 1
- %tmp2 = load i32, ptr %end, align 4
- %tmp3 = sext i32 %tmp2 to i64
- %cmp = icmp slt i64 %indvars.iv.next, %tmp3
- br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
-
-for.cond.for.end_crit_edge: ; preds = %for.body
- br label %for.end
-
-for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split
- ret void
-}
-
diff --git a/polly/test/GPGPU/kernel-params-only-some-arrays.ll b/polly/test/GPGPU/kernel-params-only-some-arrays.ll
deleted file mode 100644
index 87ae470e29bc..000000000000
--- a/polly/test/GPGPU/kernel-params-only-some-arrays.ll
+++ /dev/null
@@ -1,106 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=KERNEL %s
-
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg \
-; RUN: -S < %s | \
-; RUN: FileCheck -check-prefix=IR %s
-
-; REQUIRES: pollyacc
-;
-; void kernel_params_only_some_arrays(float A[], float B[]) {
-; for (long i = 0; i < 32; i++)
-; A[i] += 42;
-;
-; for (long i = 0; i < 32; i++)
-; B[i] += 42;
-; }
-
-; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_0'
-; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_0"
-; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
-
-; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_B)
-; KERNEL-NEXT: entry:
-; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
-; KERNEL-NEXT: %b0 = zext i32 %0 to i64
-; KERNEL-NEXT: %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-; KERNEL-NEXT: %t0 = zext i32 %1 to i64
-
-; KERNEL: ret void
-; KERNEL-NEXT: }
-
-; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_1'
-; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_1"
-; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
-
-; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_1(i8 addrspace(1)* %MemRef_A)
-; KERNEL-NEXT: entry:
-; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
-; KERNEL-NEXT: %b0 = zext i32 %0 to i64
-; KERNEL-NEXT: %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-; KERNEL-NEXT: %t0 = zext i32 %1 to i64
-
-; KERNEL: ret void
-; KERNEL-NEXT: }
-
-
-; IR: [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_B)
-; IR-NEXT: [[SLOT:%.*]] = getelementptr [1 x i8*], [1 x i8*]* %polly_launch_0_params, i64 0, i64 0
-; IR-NEXT: store i8* [[DEVPTR]], i8** %polly_launch_0_param_0
-; IR-NEXT: [[DATA:%.*]] = bitcast i8** %polly_launch_0_param_0 to i8*
-; IR-NEXT: store i8* [[DATA]], i8** [[SLOT]]
-
-; IR: [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_A)
-; IR-NEXT: [[SLOT:%.*]] = getelementptr [1 x i8*], [1 x i8*]* %polly_launch_1_params, i64 0, i64 0
-; IR-NEXT: store i8* [[DEVPTR]], i8** %polly_launch_1_param_0
-; IR-NEXT: [[DATA:%.*]] = bitcast i8** %polly_launch_1_param_0 to i8*
-; IR-NEXT: store i8* [[DATA]], i8** [[SLOT]]
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @kernel_params_only_some_arrays(float* %A, float* %B) {
-entry:
- br label %for.cond
-
-for.cond: ; preds = %for.inc, %entry
- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
- %exitcond1 = icmp ne i64 %i.0, 32
- br i1 %exitcond1, label %for.body, label %for.end
-
-for.body: ; preds = %for.cond
- %arrayidx = getelementptr inbounds float, float* %A, i64 %i.0
- %tmp = load float, float* %arrayidx, align 4
- %add = fadd float %tmp, 4.200000e+01
- store float %add, float* %arrayidx, align 4
- br label %for.inc
-
-for.inc: ; preds = %for.body
- %inc = add nuw nsw i64 %i.0, 1
- br label %for.cond
-
-for.end: ; preds = %for.cond
- br label %for.cond2
-
-for.cond2: ; preds = %for.inc7, %for.end
- %i1.0 = phi i64 [ 0, %for.end ], [ %inc8, %for.inc7 ]
- %exitcond = icmp ne i64 %i1.0, 32
- br i1 %exitcond, label %for.body4, label %for.end9
-
-for.body4: ; preds = %for.cond2
- %arrayidx5 = getelementptr inbounds float, float* %B, i64 %i1.0
- %tmp2 = load float, float* %arrayidx5, align 4
- %add6 = fadd float %tmp2, 4.200000e+01
- store float %add6, float* %arrayidx5, align 4
- br label %for.inc7
-
-for.inc7: ; preds = %for.body4
- %inc8 = add nuw nsw i64 %i1.0, 1
- br label %for.cond2
-
-for.end9: ; preds = %for.cond2
- ret void
-}
diff --git a/polly/test/GPGPU/kernel-params-scop-parameter.ll b/polly/test/GPGPU/kernel-params-scop-parameter.ll
deleted file mode 100644
index 527492bfd5fb..000000000000
--- a/polly/test/GPGPU/kernel-params-scop-parameter.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=KERNEL-IR %s
-
-; REQUIRES: pollyacc
-
-; void kernel_params_scop_parameter(float A[], long n) {
-; for (long i = 0; i < n; i++)
-; A[i] += 42;
-; }
-
-; KERNEL-IR: define ptx_kernel void @FUNC_kernel_params_scop_parameter_SCOP_0_KERNEL_0(ptr addrspace(1) %MemRef_A, i64 %n)
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @kernel_params_scop_parameter(ptr %A, i64 %n) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb6, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp7, %bb6 ]
- %tmp = icmp slt i64 %i.0, %n
- br i1 %tmp, label %bb2, label %bb8
-
-bb2: ; preds = %bb1
- %tmp3 = getelementptr inbounds float, ptr %A, i64 %i.0
- %tmp4 = load float, ptr %tmp3, align 4
- %tmp5 = fadd float %tmp4, 4.200000e+01
- store float %tmp5, ptr %tmp3, align 4
- br label %bb6
-
-bb6: ; preds = %bb2
- %tmp7 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb8: ; preds = %bb1
- ret void
-}
diff --git a/polly/test/GPGPU/kernels-names-across-scops-funcs.ll b/polly/test/GPGPU/kernels-names-across-scops-funcs.ll
deleted file mode 100644
index 57fe70ec0d9b..000000000000
--- a/polly/test/GPGPU/kernels-names-across-scops-funcs.ll
+++ /dev/null
@@ -1,124 +0,0 @@
-; RUN: opt %loadPolly -polly-process-unprofitable -polly-codegen-ppcg \
-; RUN: -polly-acc-dump-kernel-ir -disable-output < %s | \
-; RUN: FileCheck -check-prefix=KERNEL %s
-
-; REQUIRES: pollyacc
-
-; KERNEL: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_0(ptr addrspace(1) %MemRef_arg1, i32 %arg) #0 {
-; KERNEL: define ptx_kernel void @FUNC_foo_SCOP_1_KERNEL_0(ptr addrspace(1) %MemRef_arg1, i32 %arg) #0 {
-; KERNEL: define ptx_kernel void @FUNC_foo2_SCOP_0_KERNEL_0(ptr addrspace(1) %MemRef_arg1, i32 %arg) #0 {
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-; Function Attrs: nounwind uwtable
-define void @foo(i32 %arg, ptr %arg1) #0 {
-bb:
- br label %bb2
-
-bb2: ; preds = %bb
- %tmp = icmp sgt i32 %arg, 0
- br i1 %tmp, label %bb3, label %bb13
-
-bb3: ; preds = %bb2
- br label %bb4
-
-bb4: ; preds = %bb4, %bb3
- %tmp5 = phi i64 [ 0, %bb3 ], [ %tmp9, %bb4 ]
- %tmp6 = getelementptr inbounds i32, ptr %arg1, i64 %tmp5
- %tmp7 = load i32, ptr %tmp6, align 4, !tbaa !2
- %tmp8 = add nsw i32 %tmp7, 1
- store i32 %tmp8, ptr %tmp6, align 4, !tbaa !2
- %tmp9 = add nuw nsw i64 %tmp5, 1
- %tmp10 = zext i32 %arg to i64
- %tmp11 = icmp ne i64 %tmp9, %tmp10
- br i1 %tmp11, label %bb4, label %bb12
-
-bb12: ; preds = %bb4
- br label %bb13
-
-bb13: ; preds = %bb12, %bb2
- %tmp14 = tail call i64 @clock() #3
- %tmp15 = icmp eq i64 %tmp14, 0
- br i1 %tmp15, label %bb16, label %bb29
-
-bb16: ; preds = %bb13
- %tmp17 = icmp sgt i32 %arg, 0
- br i1 %tmp17, label %bb18, label %bb28
-
-bb18: ; preds = %bb16
- br label %bb19
-
-bb19: ; preds = %bb19, %bb18
- %tmp20 = phi i64 [ 0, %bb18 ], [ %tmp24, %bb19 ]
- %tmp21 = getelementptr inbounds i32, ptr %arg1, i64 %tmp20
- %tmp22 = load i32, ptr %tmp21, align 4, !tbaa !2
- %tmp23 = add nsw i32 %tmp22, 1
- store i32 %tmp23, ptr %tmp21, align 4, !tbaa !2
- %tmp24 = add nuw nsw i64 %tmp20, 1
- %tmp25 = zext i32 %arg to i64
- %tmp26 = icmp ne i64 %tmp24, %tmp25
- br i1 %tmp26, label %bb19, label %bb27
-
-bb27: ; preds = %bb19
- br label %bb28
-
-bb28: ; preds = %bb27, %bb16
- br label %bb29
-
-bb29: ; preds = %bb28, %bb13
- ret void
-}
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1
-
-; Function Attrs: nounwind
-declare i64 @clock() #2
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1
-
-; Function Attrs: nounwind uwtable
-define void @foo2(i32 %arg, ptr %arg1) #0 {
-bb:
- br label %bb2
-
-bb2: ; preds = %bb
- %tmp = icmp sgt i32 %arg, 0
- br i1 %tmp, label %bb3, label %bb13
-
-bb3: ; preds = %bb2
- br label %bb4
-
-bb4: ; preds = %bb4, %bb3
- %tmp5 = phi i64 [ 0, %bb3 ], [ %tmp9, %bb4 ]
- %tmp6 = getelementptr inbounds i32, ptr %arg1, i64 %tmp5
- %tmp7 = load i32, ptr %tmp6, align 4, !tbaa !2
- %tmp8 = add nsw i32 %tmp7, 1
- store i32 %tmp8, ptr %tmp6, align 4, !tbaa !2
- %tmp9 = add nuw nsw i64 %tmp5, 1
- %tmp10 = zext i32 %arg to i64
- %tmp11 = icmp ne i64 %tmp9, %tmp10
- br i1 %tmp11, label %bb4, label %bb12
-
-bb12: ; preds = %bb4
- br label %bb13
-
-bb13: ; preds = %bb12, %bb2
- ret void
-}
-
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { argmemonly nounwind }
-attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{!"clang version 5.0.0"}
-!2 = !{!3, !3, i64 0}
-!3 = !{!"int", !4, i64 0}
-!4 = !{!"omnipotent char", !5, i64 0}
-!5 = !{!"Simple C/C++ TBAA"}
diff --git a/polly/test/GPGPU/libdevice-functions-copied-into-kernel.ll b/polly/test/GPGPU/libdevice-functions-copied-into-kernel.ll
deleted file mode 100644
index 0f8405dad7e8..000000000000
--- a/polly/test/GPGPU/libdevice-functions-copied-into-kernel.ll
+++ /dev/null
@@ -1,89 +0,0 @@
-; RUN: opt %loadPolly -polly-acc-libdevice=%S/Inputs/libdevice-functions-copied-into-kernel_libdevice.ll -polly-print-scops -disable-output < %s | FileCheck %s --check-prefix=SCOP
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir -polly-acc-libdevice=%S/Inputs/libdevice-functions-copied-into-kernel_libdevice.ll -disable-output < %s | FileCheck %s --check-prefix=KERNEL-IR
-; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s \
-; RUN: -polly-acc-libdevice=%S/Inputs/libdevice-functions-copied-into-kernel_libdevice.ll \
-; RUN: | FileCheck %s --check-prefix=HOST-IR
-
-; Test that we do recognise and codegen a kernel that has functions that can
-; be mapped to NVIDIA's libdevice
-
-; REQUIRES: pollyacc
-
-; Check that we model the kernel as a scop.
-; SCOP: Function: f
-; SCOP-NEXT: Region: %entry.split---%for.end
-
-; Check that the intrinsic call is present in the kernel IR.
-; KERNEL-IR: %p_expf = tail call float @__nv_expf(float %A.arr.i.val_p_scalar_)
-; KERNEL-IR: %p_cosf = tail call float @__nv_cosf(float %p_expf)
-; KERNEL-IR: %p_logf = tail call float @__nv_logf(float %p_cosf)
-
-; Powi and exp cannot be lowered directly. Rather, we expect them to be
-; lowered by libdevice.
-; KERNEL-IR: %p_powi = tail call float @__nv_powif(float %p_logf, i32 2)
-; KERNEL-IR: %p_exp = tail call float @__nv_expf(float %p_powi)
-
-; Check that kernel launch is generated in host IR.
-; the declare would not be generated unless a call to a kernel exists.
-; HOST-IR: declare void @polly_launchKernel(ptr, i32, i32, i32, i32, i32, ptr)
-
-
-; void f(float *A, float *B, int N) {
-; for(int i = 0; i < N; i++) {
-; float tmp0 = A[i];
-; float expf = expf(tmp1);
-; cosf = cosf(expf);
-; logf = logf(cosf);
-; powi = powi(logf, 2);
-; exp = exp(powi);
-; B[i] = logf;
-; }
-; }
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @f(ptr %A, ptr %B, i32 %N) {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- %cmp1 = icmp sgt i32 %N, 0
- br i1 %cmp1, label %for.body.lr.ph, label %for.end
-
-for.body.lr.ph: ; preds = %entry.split
- br label %for.body
-
-for.body: ; preds = %for.body.lr.ph, %for.body
- %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
- %A.arr.i = getelementptr inbounds float, ptr %A, i64 %indvars.iv
- %A.arr.i.val = load float, ptr %A.arr.i, align 4
- ; Call to intrinsics that should be part of the kernel.
- %expf = tail call float @expf(float %A.arr.i.val)
- %cosf = tail call float @cosf(float %expf)
- %logf = tail call float @logf(float %cosf)
- %powi = tail call float @llvm.powi.f32.i32(float %logf, i32 2)
- %exp = tail call float @llvm.exp.f32(float %powi)
- %B.arr.i = getelementptr inbounds float, ptr %B, i64 %indvars.iv
- store float %exp, ptr %B.arr.i, align 4
-
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %wide.trip.count = zext i32 %N to i64
- %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
- br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
-
-for.cond.for.end_crit_edge: ; preds = %for.body
- br label %for.end
-
-for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split
- ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @expf(float) #0
-declare float @cosf(float) #0
-declare float @logf(float) #0
-declare float @llvm.powi.f32.i32(float, i32) #0
-declare float @llvm.exp.f32(float) #0
-
-attributes #0 = { nounwind readnone }
-
diff --git a/polly/test/GPGPU/live-range-reordering-with-privatization.ll b/polly/test/GPGPU/live-range-reordering-with-privatization.ll
deleted file mode 100644
index 3b047fd557ff..000000000000
--- a/polly/test/GPGPU/live-range-reordering-with-privatization.ll
+++ /dev/null
@@ -1,78 +0,0 @@
- ; RUN: opt %loadPolly -polly-use-llvm-names -polly-scops \
-; RUN: -polly-invariant-load-hoisting -polly-codegen-ppcg \
-; RUN: -polly-acc-dump-code -disable-output \
-; RUN: < %s | FileCheck %s -check-prefix=CODE
-
-; RUN: opt %loadPolly -polly-use-llvm-names -polly-scops \
-; RUN: -polly-invariant-load-hoisting -polly-codegen-ppcg \
-; RUN: -polly-acc-dump-kernel-ir -disable-output \
-; RUN: < %s | FileCheck %s -check-prefix=KERNELIR
-
-; REQUIRES: pollyacc
-
-; void f(const int *end, int *arr, const int *control, const int *readarr) {
-; for (int i = 0; i < *end; i++) {
-; int t = 0;
-; if (*control > 3) {
-; t += readarr[i];
-; }
-; arr[i] = t;
-; }
-; }
-
-; This test case tests the ability to infer that `t` is local to each loop
-; iteration, and can therefore be privatized.
-
-; CODE: # kernel0
-; CODE-NEXT: for (int c0 = 0; c0 <= (tmp - 32 * b0 - 1) / 1048576; c0 += 1)
-; CODE-NEXT: if (tmp >= 32 * b0 + t0 + 1048576 * c0 + 1) {
-; CODE-NEXT: Stmt_for_body_last(32 * b0 + t0 + 1048576 * c0);
-; CODE-NEXT: if (tmp1 >= 4)
-; CODE-NEXT: Stmt_if_then(32 * b0 + t0 + 1048576 * c0);
-; CODE-NEXT: Stmt_if_end(32 * b0 + t0 + 1048576 * c0);
-; CODE-NEXT: }
-
-; KERNELIR: %private_array = alloca i32
-
-target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
-target triple = "i386-apple-macosx10.12.0"
-
-define void @f(ptr %end, ptr %arr, ptr %control, ptr %readarr) {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- %tmp3 = load i32, ptr %end, align 4
- %cmp4 = icmp sgt i32 %tmp3, 0
- br i1 %cmp4, label %for.body.lr.ph, label %for.end
-
-for.body.lr.ph: ; preds = %entry.split
- br label %for.body
-
-for.body: ; preds = %for.body.lr.ph, %if.end
- %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ]
- %tmp1 = load i32, ptr %control, align 4
- %cmp1 = icmp sgt i32 %tmp1, 3
- br i1 %cmp1, label %if.then, label %if.end
-
-if.then: ; preds = %for.body
- %arrayidx = getelementptr inbounds i32, ptr %readarr, i32 %i.05
- %tmp2 = load i32, ptr %arrayidx, align 4
- br label %if.end
-
-if.end: ; preds = %if.then, %for.body
- %t.0 = phi i32 [ %tmp2, %if.then ], [ 0, %for.body ]
- %arrayidx2 = getelementptr inbounds i32, ptr %arr, i32 %i.05
- store i32 %t.0, ptr %arrayidx2, align 4
- %inc = add nuw nsw i32 %i.05, 1
- %tmp = load i32, ptr %end, align 4
- %cmp = icmp slt i32 %inc, %tmp
- br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
-
-for.cond.for.end_crit_edge: ; preds = %if.end
- br label %for.end
-
-for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split
- ret void
-}
-
diff --git a/polly/test/GPGPU/loops-outside-scop.ll b/polly/test/GPGPU/loops-outside-scop.ll
deleted file mode 100644
index 36b3a706338a..000000000000
--- a/polly/test/GPGPU/loops-outside-scop.ll
+++ /dev/null
@@ -1,67 +0,0 @@
-; RUN: opt %loadPolly -polly-print-scops -disable-output < %s | FileCheck %s -check-prefix=SCOP
-
-; There is no FileCheck because we want to make sure that this doesn't crash.
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-fail-on-verify-module-failure \
-; RUN: -disable-output < %s
-
-; REQUIRES: pollyacc
-
-; Due to the existence of the `fence` call, We can only detect the inner loop
-; and not the outer loop. PPCGCodeGeneration had not implemented this case.
-; The fix was to pull the implementation from `IslNodeBuilder.
-
-; Make sure that we only capture the inner loop
-; SCOP: Function: f
-; SCOP-NEXT: Region: %for2.body---%for2.body.fence
-; SCOP-NEXT: Max Loop Depth: 1
-
-target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-
-declare void @fn_to_fence(ptr %val)
-
-; void f(int *arr, bool shouldcont) {
-; for(int i = 0; ; i++) {
-; for(int j = 0; j < 10; j++) {
-; arr[j] = i;
-; }
-; fence(arr);
-; if (!shouldcont) break;
-; }
-; }
-
-
-; Function Attrs: nounwind uwtable
-define void @f(ptr %arr, i1 %shouldcont) #1 {
-entry:
- br label %for.init
-
-for.init: ; preds = %for.end, %entry.split
- %i = phi i32 [ %i.next, %for.end ], [ 0, %entry ]
- br label %for2.body
-
-for2.body: ; preds = %"65", %"64"
- %j = phi i32 [ %j.next, %for2.body ], [ 0, %for.init ]
- %j.sext = sext i32 %j to i64
- %arr.slot = getelementptr i32, ptr %arr, i64 %j.sext
- store i32 %i, ptr %arr.slot, align 4
- %exitcond = icmp eq i32 %j, 10
- %j.next = add i32 %j, 1
- br i1 %exitcond, label %for2.body.fence, label %for2.body
-
-for2.body.fence: ; preds = %"65"
- call void @fn_to_fence(ptr %arr) #2
- br i1 %shouldcont, label %for.end, label %exit
-for.end: ; preds = %"69"
- %i.next = add i32 %i, 1
- br label %for.init
-
-exit: ; preds = %"69"
- ret void
-
-}
-
-
-attributes #0 = { argmemonly nounwind }
-attributes #1 = { nounwind uwtable }
-attributes #2 = { nounwind }
diff --git a/polly/test/GPGPU/managed-memory-rewrite-alloca.ll b/polly/test/GPGPU/managed-memory-rewrite-alloca.ll
deleted file mode 100644
index 6dbd87db5eb5..000000000000
--- a/polly/test/GPGPU/managed-memory-rewrite-alloca.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; RUN: opt %loadPolly -polly-print-scops -disable-output < %s | FileCheck %s --check-prefix=SCOP
-
-; RUN: opt %loadPolly -S -polly-process-unprofitable -polly-acc-mincompute=0 \
-; RUN: -polly-codegen-ppcg -polly-acc-codegen-managed-memory \
-; RUN: -polly-acc-rewrite-managed-memory -polly-acc-rewrite-allocas < %s | FileCheck %s --check-prefix=HOST-IR
-
-; REQUIRES: pollyacc
-
-; SCOP: Function: f
-; SCOP-NEXT: Region: %for.body---%for.end
-; SCOP-NEXT: Max Loop Depth: 1
-; SCOP: i32 MemRef_arr[*];
-
-; Check that we generate a constructor call for @A.toptr
-; HOST-IR-NOT: %arr = alloca [100 x i32]
-
-source_filename = "test.c"
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.12.0"
-
-
-define void @f() {
-entry:
- %arr = alloca [100 x i32]
- br label %entry.split
-
-entry.split: ; preds = %entry
- br label %for.body
-
-for.body: ; preds = %entry.split, %for.body
- %indvars.iv1 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds [100 x i32], ptr %arr, i64 0, i64 %indvars.iv1
- store i32 42, ptr %arrayidx, align 4, !tbaa !3
- %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 100
- br i1 %exitcond, label %for.end, label %for.body
-
-for.end: ; preds = %for.body
- ret void
-}
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #0
-
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #0
-
-attributes #0 = { argmemonly nounwind }
-
-!llvm.module.flags = !{!0, !1}
-!llvm.ident = !{!2}
-
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{i32 7, !"PIC Level", i32 2}
-!2 = !{!"clang version 6.0.0"}
-!3 = !{!4, !4, i64 0}
-!4 = !{!"int", !5, i64 0}
-!5 = !{!"omnipotent char", !6, i64 0}
-!6 = !{!"Simple C/C++ TBAA"}
diff --git a/polly/test/GPGPU/managed-memory-rewrite-malloc-free-inside-constexpr.ll b/polly/test/GPGPU/managed-memory-rewrite-malloc-free-inside-constexpr.ll
deleted file mode 100644
index 946da40919ec..000000000000
--- a/polly/test/GPGPU/managed-memory-rewrite-malloc-free-inside-constexpr.ll
+++ /dev/null
@@ -1,93 +0,0 @@
-; RUN: opt %loadPolly -polly-print-scops -disable-output < %s | FileCheck %s --check-prefix=SCOP
-
-; RUN: opt %loadPolly -polly-codegen-ppcg \
-; RUN: -S -polly-acc-codegen-managed-memory \
-; RUN: -polly-acc-rewrite-managed-memory < %s | FileCheck %s --check-prefix=HOST-IR
-;
-; REQUIRES: pollyacc
-;
-; Check that we can correctly rewrite `malloc` to `polly_mallocManaged`, and
-; `free` to `polly_freeManaged` with the `polly-acc-rewrite-managed-memory`
-; pass, even inside `constantExpr`. This is necessary because a cookie cutter
-; Inst->replaceUsesOfWith(...) call does not actually work, because this does
-; not replace the instruction within a ConstantExpr.
-;
-; #include <memory.h>
-;
-; static const int N = 100;
-; int* f(int *ToFree) {
-; free(ToFree);
-; int *A = (int *)malloc(sizeof(int) * N);
-; for(int i = 0; i < N; i++) {
-; A[i] = 42;
-; }
-; return A;
-;
-; }
-
-; SCOP: Function: f
-; SCOP-NEXT: Region: %for.body---%for.end
-; SCOP-NEXT: Max Loop Depth: 1
-
-; SCOP: Arrays {
-; SCOP-NEXT: i32 MemRef_tmp[*]; // Element size 4
-; SCOP-NEXT: }
-
-; // Check that polly_mallocManaged is declared and used correctly.
-; HOST-IR: declare ptr @polly_mallocManaged(i64)
-
-; // Check that polly_freeManaged is declared and used correctly.
-; HOST-IR call void @polly_freeManaged(i8* %toFree)
-; HOST-IR: declare void @polly_freeManaged(ptr)
-
-; // Check that we remove the original malloc,free
-; HOST-IR-NOT: declare ptr @malloc(i64)
-; HOST-IR-NOT: declare void @free(ptr)
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.12.0"
-
-define ptr @f(ptr %toFree) {
-entry:
- ; Free inside bitcast
- call void @free (ptr %toFree)
- br label %entry.split
-
-entry.split: ; preds = %entry
- ; malloc inside bitcast.
- %tmp = call ptr @malloc (i64 400)
- br label %for.body
-
-for.body: ; preds = %entry.split, %for.body
- %indvars.iv1 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32, ptr %tmp, i64 %indvars.iv1
- store i32 42, ptr %arrayidx, align 4, !tbaa !3
- %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 100
- br i1 %exitcond, label %for.end, label %for.body
-
-for.end: ; preds = %for.body
- ret ptr %tmp
-}
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #0
-
-declare ptr @malloc(i64)
-declare void @free(ptr)
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #0
-
-attributes #0 = { argmemonly nounwind }
-
-!llvm.module.flags = !{!0, !1}
-!llvm.ident = !{!2}
-
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{i32 7, !"PIC Level", i32 2}
-!2 = !{!"clang version 6.0.0"}
-!3 = !{!4, !4, i64 0}
-!4 = !{!"int", !5, i64 0}
-!5 = !{!"omnipotent char", !6, i64 0}
-!6 = !{!"Simple C/C++ TBAA"}
diff --git a/polly/test/GPGPU/managed-memory-rewrite-malloc-free.ll b/polly/test/GPGPU/managed-memory-rewrite-malloc-free.ll
deleted file mode 100644
index 8e456127b127..000000000000
--- a/polly/test/GPGPU/managed-memory-rewrite-malloc-free.ll
+++ /dev/null
@@ -1,91 +0,0 @@
-; RUN: opt %loadPolly -polly-print-scops -disable-output < %s | FileCheck %s --check-prefix=SCOP
-
-; RUN: opt %loadPolly -polly-codegen-ppcg \
-; RUN: -S -polly-acc-codegen-managed-memory \
-; RUN: -polly-acc-rewrite-managed-memory < %s | FileCheck %s --check-prefix=HOST-IR
-;
-; REQUIRES: pollyacc
-;
-; Check that we can correctly rewrite `malloc` to `polly_mallocManaged`, and
-; `free` to `polly_freeManaged` with the `polly-acc-rewrite-managed-memory`
-; pass.
-;
-; #include <memory.h>
-;
-; static const int N = 100;
-; int* f(int *ToFree) {
-; free(ToFree);
-; int *A = (int *)malloc(sizeof(int) * N);
-; for(int i = 0; i < N; i++) {
-; A[i] = 42;
-; }
-; return A;
-;
-; }
-
-; SCOP: Function: f
-; SCOP-NEXT: Region: %for.body---%for.end
-; SCOP-NEXT: Max Loop Depth: 1
-
-; SCOP: Arrays {
-; SCOP-NEXT: i32 MemRef_call[*]; // Element size 4
-; SCOP-NEXT: }
-
-; // Check that polly_mallocManaged is declared and used correctly.
-; HOST-IR: %call = tail call ptr @polly_mallocManaged(i64 400)
-; HOST-IR: declare ptr @polly_mallocManaged(i64)
-
-; // Check that polly_freeManaged is declared and used correctly.
-; HOST-IR %toFreeBitcast = bitcast i32* %toFree to i8*
-; HOST-IR call void @polly_freeManaged(i8* %toFreeBitcast)
-; HOST-IR: declare void @polly_freeManaged(ptr)
-
-; // Check that we remove the original malloc,free
-; HOST-IR-NOT: declare ptr @malloc(i64)
-; HOST-IR-NOT: declare void @free(ptr)
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.12.0"
-
-define ptr @f(ptr %toFree) {
-entry:
- call void @free(ptr %toFree)
- br label %entry.split
-
-entry.split: ; preds = %entry
- %call = tail call ptr @malloc(i64 400)
- br label %for.body
-
-for.body: ; preds = %entry.split, %for.body
- %indvars.iv1 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32, ptr %call, i64 %indvars.iv1
- store i32 42, ptr %arrayidx, align 4, !tbaa !3
- %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 100
- br i1 %exitcond, label %for.end, label %for.body
-
-for.end: ; preds = %for.body
- ret ptr %call
-}
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #0
-
-declare ptr @malloc(i64)
-declare void @free(ptr)
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #0
-
-attributes #0 = { argmemonly nounwind }
-
-!llvm.module.flags = !{!0, !1}
-!llvm.ident = !{!2}
-
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{i32 7, !"PIC Level", i32 2}
-!2 = !{!"clang version 6.0.0"}
-!3 = !{!4, !4, i64 0}
-!4 = !{!"int", !5, i64 0}
-!5 = !{!"omnipotent char", !6, i64 0}
-!6 = !{!"Simple C/C++ TBAA"}
diff --git a/polly/test/GPGPU/memory-only-referenced-from-access.ll b/polly/test/GPGPU/memory-only-referenced-from-access.ll
deleted file mode 100644
index b3828950324a..000000000000
--- a/polly/test/GPGPU/memory-only-referenced-from-access.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
-; RUN: -polly-invariant-load-hoisting -polly-ignore-aliasing \
-; RUN: -polly-process-unprofitable -polly-ignore-parameter-bounds \
-; RUN: -polly-acc-fail-on-verify-module-failure \
-; RUN: -polly-acc-codegen-managed-memory \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck %s
-
-; REQUIRES: pollyacc
-
-; Verify that we correctly generate a kernel even if certain invariant load
-; hoisted parameters appear only in memory accesses, but not domain elements.
-
-; CHECK: @FUNC_quux_SCOP_0_KERNEL_0(ptr addrspace(1) %MemRef_tmp4, i32 %tmp3, i32 %tmp, i32 %tmp31, i32 %tmp2)
-
-target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-
-%struct.hoge = type { ptr, i64, i64, [1 x %struct.widget] }
-%struct.widget = type { i64, i64, i64 }
-
-@global = external unnamed_addr global %struct.hoge, align 32
-
-define void @quux(ptr noalias %arg, ptr noalias %arg1) {
-bb:
- %tmp = load i32, ptr %arg, align 4
- %tmp2 = sext i32 %tmp to i64
- %tmp3 = load i32, ptr %arg1, align 4
- %tmp4 = load ptr, ptr @global, align 32
- br label %bb5
-
-bb5: ; preds = %bb5, %bb
- %tmp6 = phi i32 [ %tmp11, %bb5 ], [ 0, %bb ]
- %tmp7 = sext i32 %tmp6 to i64
- %tmp8 = sub nsw i64 %tmp7, %tmp2
- %tmp9 = getelementptr [0 x double], ptr %tmp4, i64 0, i64 %tmp8
- store double undef, ptr %tmp9, align 8
- %tmp10 = icmp eq i32 %tmp6, %tmp3
- %tmp11 = add i32 %tmp6, 1
- br i1 %tmp10, label %bb12, label %bb5
-
-bb12: ; preds = %bb5
- ret void
-}
diff --git a/polly/test/GPGPU/mostly-sequential.ll b/polly/test/GPGPU/mostly-sequential.ll
deleted file mode 100644
index c42c24482a38..000000000000
--- a/polly/test/GPGPU/mostly-sequential.ll
+++ /dev/null
@@ -1,105 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; REQUIRES: pollyacc
-
-; void foo(float A[]) {
-; for (long i = 0; i < 128; i++)
-; A[i] += i;
-;
-; for (long i = 0; i < 128; i++)
-; for (long j = 0; j < 128; j++)
-; A[42] += i + j;
-; }
-
-; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (128) * sizeof(float), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(4);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: {
-; CODE-NEXT: dim3 k1_dimBlock;
-; CODE-NEXT: dim3 k1_dimGrid;
-; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_A);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (128) * sizeof(float), cudaMemcpyDeviceToHost));
-; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_A));
-; CODE-NEXT: }
-
-; CODE: # kernel0
-; CODE-NEXT: Stmt_bb4(32 * b0 + t0);
-
-; CODE: # kernel1
-; CODE-NEXT: for (int c0 = 0; c0 <= 127; c0 += 1)
-; CODE-NEXT: for (int c1 = 0; c1 <= 127; c1 += 1)
-; CODE-NEXT: Stmt_bb14(c0, c1);
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @foo(ptr %A) {
-bb:
- br label %bb3
-
-bb3: ; preds = %bb8, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp9, %bb8 ]
- %exitcond2 = icmp ne i64 %i.0, 128
- br i1 %exitcond2, label %bb4, label %bb10
-
-bb4: ; preds = %bb3
- %tmp = sitofp i64 %i.0 to float
- %tmp5 = getelementptr inbounds float, ptr %A, i64 %i.0
- %tmp6 = load float, ptr %tmp5, align 4
- %tmp7 = fadd float %tmp6, %tmp
- store float %tmp7, ptr %tmp5, align 4
- br label %bb8
-
-bb8: ; preds = %bb4
- %tmp9 = add nuw nsw i64 %i.0, 1
- br label %bb3
-
-bb10: ; preds = %bb3
- br label %bb11
-
-bb11: ; preds = %bb23, %bb10
- %i1.0 = phi i64 [ 0, %bb10 ], [ %tmp24, %bb23 ]
- %exitcond1 = icmp ne i64 %i1.0, 128
- br i1 %exitcond1, label %bb12, label %bb25
-
-bb12: ; preds = %bb11
- br label %bb13
-
-bb13: ; preds = %bb20, %bb12
- %j.0 = phi i64 [ 0, %bb12 ], [ %tmp21, %bb20 ]
- %exitcond = icmp ne i64 %j.0, 128
- br i1 %exitcond, label %bb14, label %bb22
-
-bb14: ; preds = %bb13
- %tmp15 = add nuw nsw i64 %i1.0, %j.0
- %tmp16 = sitofp i64 %tmp15 to float
- %tmp17 = getelementptr inbounds float, ptr %A, i64 42
- %tmp18 = load float, ptr %tmp17, align 4
- %tmp19 = fadd float %tmp18, %tmp16
- store float %tmp19, ptr %tmp17, align 4
- br label %bb20
-
-bb20: ; preds = %bb14
- %tmp21 = add nuw nsw i64 %j.0, 1
- br label %bb13
-
-bb22: ; preds = %bb13
- br label %bb23
-
-bb23: ; preds = %bb22
- %tmp24 = add nuw nsw i64 %i1.0, 1
- br label %bb11
-
-bb25: ; preds = %bb11
- ret void
-}
diff --git a/polly/test/GPGPU/non-read-only-scalars.ll b/polly/test/GPGPU/non-read-only-scalars.ll
deleted file mode 100644
index 1ce6e0991ebb..000000000000
--- a/polly/test/GPGPU/non-read-only-scalars.ll
+++ /dev/null
@@ -1,168 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck %s -check-prefix=KERNEL-IR
-;
-; REQUIRES: pollyacc
-;
-; #include <stdio.h>
-;
-; float foo(float A[]) {
-; float sum = 0;
-;
-; for (long i = 0; i < 32; i++)
-; A[i] = i;
-;
-; for (long i = 0; i < 32; i++)
-; A[i] += i;
-;
-; for (long i = 0; i < 32; i++)
-; sum += A[i];
-;
-; return sum;
-; }
-;
-; int main() {
-; float A[32];
-; float sum = foo(A);
-; printf("%f\n", sum);
-; }
-
-; CODE: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(1);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: {
-; CODE-NEXT: dim3 k1_dimBlock;
-; CODE-NEXT: dim3 k1_dimGrid;
-; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_sum_0__phi);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: {
-; CODE-NEXT: dim3 k2_dimBlock;
-; CODE-NEXT: dim3 k2_dimGrid;
-; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_sum_0__phi, dev_MemRef_sum_0);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (32) * sizeof(float), cudaMemcpyDeviceToHost));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(&MemRef_sum_0, dev_MemRef_sum_0, sizeof(float), cudaMemcpyDeviceToHost));
-; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_A));
-; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_sum_0__phi));
-; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_sum_0));
-; CODE-NEXT: }
-
-; CODE: # kernel0
-; CODE-NEXT: {
-; CODE-NEXT: Stmt_bb4(t0);
-; CODE-NEXT: Stmt_bb10(t0);
-; CODE-NEXT: }
-
-; CODE: # kernel1
-; CODE-NEXT: Stmt_bb17();
-
-; CODE: # kernel2
-; TODO-NEXT: {
-; TODO-NEXT: read();
-; TODO-NEXT: for (int c0 = 0; c0 <= 32; c0 += 1) {
-; TODO-NEXT: Stmt_bb18(c0);
-; TODO-NEXT: if (c0 <= 31)
-; TODO-NEXT: Stmt_bb20(c0);
-; TODO-NEXT: }
-; TODO-NEXT: write();
-; TODO-NEXT: }
-
-
-; KERNEL-IR: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_1(ptr addrspace(1) %MemRef_sum_0__phi)
-; KERNEL-IR: store float 0.000000e+00, ptr %sum.0.phiops
-; KERNEL-IR: [[REGA:%.+]] = addrspacecast ptr addrspace(1) %MemRef_sum_0__phi to ptr
-; KERNEL-IR: [[REGB:%.+]] = load float, ptr %sum.0.phiops
-; KERNEL-IR: store float [[REGB]], ptr [[REGA]]
-
-; KERNEL-IR: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_2(ptr addrspace(1) %MemRef_A, ptr addrspace(1) %MemRef_sum_0__phi, ptr addrspace(1) %MemRef_sum_0)
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1
-
-define float @foo(ptr %A) {
-bb:
- br label %bb3
-
-bb3: ; preds = %bb6, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp7, %bb6 ]
- %exitcond2 = icmp ne i64 %i.0, 32
- br i1 %exitcond2, label %bb4, label %bb8
-
-bb4: ; preds = %bb3
- %tmp = sitofp i64 %i.0 to float
- %tmp5 = getelementptr inbounds float, ptr %A, i64 %i.0
- store float %tmp, ptr %tmp5, align 4
- br label %bb6
-
-bb6: ; preds = %bb4
- %tmp7 = add nuw nsw i64 %i.0, 1
- br label %bb3
-
-bb8: ; preds = %bb3
- br label %bb9
-
-bb9: ; preds = %bb15, %bb8
- %i1.0 = phi i64 [ 0, %bb8 ], [ %tmp16, %bb15 ]
- %exitcond1 = icmp ne i64 %i1.0, 32
- br i1 %exitcond1, label %bb10, label %bb17
-
-bb10: ; preds = %bb9
- %tmp11 = sitofp i64 %i1.0 to float
- %tmp12 = getelementptr inbounds float, ptr %A, i64 %i1.0
- %tmp13 = load float, ptr %tmp12, align 4
- %tmp14 = fadd float %tmp13, %tmp11
- store float %tmp14, ptr %tmp12, align 4
- br label %bb15
-
-bb15: ; preds = %bb10
- %tmp16 = add nuw nsw i64 %i1.0, 1
- br label %bb9
-
-bb17: ; preds = %bb9
- br label %bb18
-
-bb18: ; preds = %bb20, %bb17
- %sum.0 = phi float [ 0.000000e+00, %bb17 ], [ %tmp23, %bb20 ]
- %i2.0 = phi i64 [ 0, %bb17 ], [ %tmp24, %bb20 ]
- %exitcond = icmp ne i64 %i2.0, 32
- br i1 %exitcond, label %bb19, label %bb25
-
-bb19: ; preds = %bb18
- br label %bb20
-
-bb20: ; preds = %bb19
- %tmp21 = getelementptr inbounds float, ptr %A, i64 %i2.0
- %tmp22 = load float, ptr %tmp21, align 4
- %tmp23 = fadd float %sum.0, %tmp22
- %tmp24 = add nuw nsw i64 %i2.0, 1
- br label %bb18
-
-bb25: ; preds = %bb18
- %sum.0.lcssa = phi float [ %sum.0, %bb18 ]
- ret float %sum.0.lcssa
-}
-
-define i32 @main() {
-bb:
- %A = alloca [32 x float], align 16
- %tmp1 = call float @foo(ptr %A)
- %tmp2 = fpext float %tmp1 to double
- %tmp3 = call i32 (ptr, ...) @printf(ptr @.str, double %tmp2) #2
- ret i32 0
-}
-
-declare i32 @printf(ptr, ...) #1
-
diff --git a/polly/test/GPGPU/non-zero-array-offset.ll b/polly/test/GPGPU/non-zero-array-offset.ll
deleted file mode 100644
index f18f6828a47f..000000000000
--- a/polly/test/GPGPU/non-zero-array-offset.ll
+++ /dev/null
@@ -1,116 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
-; RUN: FileCheck %s -check-prefix=IR
-;
-; REQUIRES: pollyacc
-
-; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (16) * sizeof(float), cudaMemcpyHostToDevice));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (8) * sizeof(float), cudaMemcpyHostToDevice));
-
-; CODE: dim3 k0_dimBlock(8);
-; CODE-NEXT: dim3 k0_dimGrid(1);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_B);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: {
-; CODE-NEXT: dim3 k1_dimBlock(8);
-; CODE-NEXT: dim3 k1_dimGrid(1);
-; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_A);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_B, dev_MemRef_B, (16) * sizeof(float), cudaMemcpyDeviceToHost));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (8) * sizeof(float), cudaMemcpyDeviceToHost));
-
-; CODE: # kernel0
-; CODE-NEXT: Stmt_bb3(t0);
-
-; CODE: # kernel1
-; CODE-NEXT: Stmt_bb11(t0);
-
-; IR: %p_dev_array_MemRef_B = call ptr @polly_allocateMemoryForDevice(i64 32)
-; IR-NEXT: %p_dev_array_MemRef_A = call ptr @polly_allocateMemoryForDevice(i64 32)
-; IR-NEXT: [[REG0:%.+]] = getelementptr float, ptr %B, i64 8
-; IR-NEXT: call void @polly_copyFromHostToDevice(ptr [[REG0]], ptr %p_dev_array_MemRef_B, i64 32)
-
-; IR: [[REGA:%.+]] = call ptr @polly_getDevicePtr(ptr %p_dev_array_MemRef_B)
-; IR-NEXT: [[REGC:%.+]] = getelementptr float, ptr [[REGA]], i64 -8
-
-; void foo(float A[], float B[]) {
-; for (long i = 0; i < 8; i++)
-; B[i + 8] *= 4;
-;
-; for (long i = 0; i < 8; i++)
-; A[i] *= 12;
-; }
-;
-; #ifdef OUTPUT
-; int main() {
-; float A[16];
-;
-; for (long i = 0; i < 16; i++) {
-; __sync_synchronize();
-; A[i] = i;
-; }
-;
-; foo(A, A);
-;
-; float sum = 0;
-; for (long i = 0; i < 16; i++) {
-; __sync_synchronize();
-; sum += A[i];
-; }
-;
-; printf("%f\n", sum);
-; }
-; #endif
-;
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @foo(ptr %A, ptr %B) {
-bb:
- br label %bb2
-
-bb2: ; preds = %bb7, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp8, %bb7 ]
- %exitcond1 = icmp ne i64 %i.0, 8
- br i1 %exitcond1, label %bb3, label %bb9
-
-bb3: ; preds = %bb2
- %tmp = add nuw nsw i64 %i.0, 8
- %tmp4 = getelementptr inbounds float, ptr %B, i64 %tmp
- %tmp5 = load float, ptr %tmp4, align 4
- %tmp6 = fmul float %tmp5, 4.000000e+00
- store float %tmp6, ptr %tmp4, align 4
- br label %bb7
-
-bb7: ; preds = %bb3
- %tmp8 = add nuw nsw i64 %i.0, 1
- br label %bb2
-
-bb9: ; preds = %bb2
- br label %bb10
-
-bb10: ; preds = %bb15, %bb9
- %i1.0 = phi i64 [ 0, %bb9 ], [ %tmp16, %bb15 ]
- %exitcond = icmp ne i64 %i1.0, 8
- br i1 %exitcond, label %bb11, label %bb17
-
-bb11: ; preds = %bb10
- %tmp12 = getelementptr inbounds float, ptr %A, i64 %i1.0
- %tmp13 = load float, ptr %tmp12, align 4
- %tmp14 = fmul float %tmp13, 1.200000e+01
- store float %tmp14, ptr %tmp12, align 4
- br label %bb15
-
-bb15: ; preds = %bb11
- %tmp16 = add nuw nsw i64 %i1.0, 1
- br label %bb10
-
-bb17: ; preds = %bb10
- ret void
-}
diff --git a/polly/test/GPGPU/only-part-of-array-modified.ll b/polly/test/GPGPU/only-part-of-array-modified.ll
deleted file mode 100644
index abc380badfb6..000000000000
--- a/polly/test/GPGPU/only-part-of-array-modified.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-;
-; REQUIRES: pollyacc
-;
-; void foo(float A[], float B[]) {
-; for (long i = 0; i < 1024; i++)
-; A[2 * i] = B[i];
-; }
-
-; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (1024) * sizeof(i32), cudaMemcpyHostToDevice));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (2047) * sizeof(i32), cudaMemcpyHostToDevice));
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @foo(ptr %A, ptr %B) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb8, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp9, %bb8 ]
- %exitcond = icmp ne i64 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb10
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds float, ptr %B, i64 %i.0
- %tmp4 = load i32, ptr %tmp, align 4
- %tmp5 = shl nsw i64 %i.0, 1
- %tmp6 = getelementptr inbounds float, ptr %A, i64 %tmp5
- store i32 %tmp4, ptr %tmp6, align 4
- br label %bb8
-
-bb8: ; preds = %bb2
- %tmp9 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb10: ; preds = %bb1
- ret void
-}
diff --git a/polly/test/GPGPU/parametric-loop-bound.ll b/polly/test/GPGPU/parametric-loop-bound.ll
deleted file mode 100644
index e436bd663a4a..000000000000
--- a/polly/test/GPGPU/parametric-loop-bound.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; RUN: opt %loadPolly -polly-codegen-ppcg \
-; RUN: -S < %s | \
-; RUN: FileCheck -check-prefix=IR %s
-
-; REQUIRES: pollyacc
-
-; void foo(long A[], long n) {
-; for (long i = 0; i < n; i++)
-; A[i] += 100;
-; }
-
-; CODE: if (n >= 1) {
-; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (n) * sizeof(i64), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(n >= 1048545 ? 32768 : (n + 31) / 32);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, n);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (n) * sizeof(i64), cudaMemcpyDeviceToHost));
-; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_A));
-; CODE-NEXT: }
-
-; CODE: # kernel0
-; CODE-NEXT: for (int c0 = 0; c0 <= (n - 32 * b0 - 1) / 1048576; c0 += 1)
-; CODE-NEXT: if (n >= 32 * b0 + t0 + 1048576 * c0 + 1)
-; CODE-NEXT: Stmt_bb2(32 * b0 + t0 + 1048576 * c0);
-
-; IR: store i64 %n, ptr %polly_launch_0_param_1
-; IR-NEXT: [[REGA:%.+]] = getelementptr [2 x ptr], ptr %polly_launch_0_params, i64 0, i64 1
-; IR-NEXT: store ptr %polly_launch_0_param_1, ptr [[REGA]]
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @foo(ptr %A, i64 %n) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb6, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp7, %bb6 ]
- %tmp = icmp slt i64 %i.0, %n
- br i1 %tmp, label %bb2, label %bb8
-
-bb2: ; preds = %bb1
- %tmp3 = getelementptr inbounds i64, ptr %A, i64 %i.0
- %tmp4 = load i64, ptr %tmp3, align 8
- %tmp5 = add nsw i64 %tmp4, 100
- store i64 %tmp5, ptr %tmp3, align 8
- br label %bb6
-
-bb6: ; preds = %bb2
- %tmp7 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb8: ; preds = %bb1
- ret void
-}
diff --git a/polly/test/GPGPU/partial_writes.ll b/polly/test/GPGPU/partial_writes.ll
deleted file mode 100644
index c3df624df7ac..000000000000
--- a/polly/test/GPGPU/partial_writes.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: opt %loadPolly -polly-import-jscop -polly-codegen-ppcg -polly-stmt-granularity=bb -S < %s \
-; RUN: | FileCheck %s
-
-; REQUIRES: pollyacc
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; CHECK: polly_launchKernel
-
-; Function Attrs: nounwind uwtable
-define void @partial_writes() {
-bb:
- %tmp = tail call ptr @wibble() #2
- br label %bb2
-
-bb2: ; preds = %bb11, %bb
- %tmp3 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
- %tmp4 = getelementptr inbounds [1200 x double], ptr %tmp, i64 0, i64 %tmp3
- %tmp5 = load double, ptr %tmp4, align 8, !tbaa !1
- br label %bb6
-
-bb6: ; preds = %bb6, %bb2
- %tmp7 = phi double [ undef, %bb2 ], [ undef, %bb6 ]
- %tmp8 = phi i64 [ 0, %bb2 ], [ %tmp9, %bb6 ]
- store double undef, ptr %tmp4, align 8, !tbaa !1
- %tmp9 = add nuw nsw i64 %tmp8, 1
- %tmp10 = icmp eq i64 %tmp9, 900
- br i1 %tmp10, label %bb11, label %bb6
-
-bb11: ; preds = %bb6
- %tmp12 = add nuw nsw i64 %tmp3, 1
- %tmp13 = icmp eq i64 %tmp12, 1200
- br i1 %tmp13, label %bb14, label %bb2
-
-bb14: ; preds = %bb11
- ret void
-}
-
-declare ptr @wibble()
-
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 6.0.0 (trunk 309912) (llvm/trunk 309933)"}
-!1 = !{!2, !2, i64 0}
-!2 = !{!"double", !3, i64 0}
-!3 = !{!"omnipotent char", !4, i64 0}
-!4 = !{!"Simple C/C++ TBAA"}
diff --git a/polly/test/GPGPU/partial_writes___%bb2---%bb14.jscop b/polly/test/GPGPU/partial_writes___%bb2---%bb14.jscop
deleted file mode 100644
index d5b537ee1f05..000000000000
--- a/polly/test/GPGPU/partial_writes___%bb2---%bb14.jscop
+++ /dev/null
@@ -1,47 +0,0 @@
-{
- "arrays" : [
- {
- "name" : "MemRef_tmp",
- "sizes" : [ "*" ],
- "type" : "double"
- }
- ],
- "context" : "{ : }",
- "name" : "%bb2---%bb14",
- "statements" : [
- {
- "accesses" : [
- {
- "kind" : "read",
- "relation" : "{ Stmt_bb2[i0] -> MemRef_tmp[i0] }"
- },
- {
- "kind" : "write",
- "relation" : "{ Stmt_bb2[i0] -> MemRef_tmp[i0] }"
- }
- ],
- "domain" : "{ Stmt_bb2[i0] : 0 <= i0 <= 1199 }",
- "name" : "Stmt_bb2",
- "schedule" : "{ Stmt_bb2[i0] -> [i0, 0, 0] }"
- },
- {
- "accesses" : [
- {
- "kind" : "write",
- "relation" : "{ Stmt_bb6[i0, i1] -> MemRef_tmp[i0] : i1 <= 898 }"
- },
- {
- "kind" : "read",
- "relation" : "{ Stmt_bb6[i0, i1] -> MemRef_tmp[i0] }"
- },
- {
- "kind" : "write",
- "relation" : "{ Stmt_bb6[i0, i1] -> MemRef_tmp[i0] }"
- }
- ],
- "domain" : "{ Stmt_bb6[i0, i1] : 0 <= i0 <= 1199 and 0 <= i1 <= 899 }",
- "name" : "Stmt_bb6",
- "schedule" : "{ Stmt_bb6[i0, i1] -> [i0, 1, i1] }"
- }
- ]
-}
diff --git a/polly/test/GPGPU/phi-nodes-in-kernel.ll b/polly/test/GPGPU/phi-nodes-in-kernel.ll
deleted file mode 100644
index acb1f2c4e0e2..000000000000
--- a/polly/test/GPGPU/phi-nodes-in-kernel.ll
+++ /dev/null
@@ -1,86 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg -S < %s | \
-; RUN: FileCheck %s -check-prefix=IR
-
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck %s -check-prefix=KERNEL-IR
-
-; REQUIRES: pollyacc
-
-; Approximate C source:
-; void kernel_dynprog(int c[50]) {
-; int iter = 0;
-; int outl = 0;
-;
-; while(1) {
-; for(int indvar = 1 ; indvar <= 49; indvar++) {
-; c[indvar] = undef;
-; }
-; add78 = c[49] + outl;
-; inc80 = iter + 1;
-;
-; if (true) break;
-;
-; outl = add78;
-; iter = inc80;
-; }
-;}
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; CODE: cudaCheckReturn(cudaMalloc((void **) &dev_MemRef_c, (50) * sizeof(i32)));
-
-; CODE: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(2);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_c);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_c, dev_MemRef_c, (50) * sizeof(i32), cudaMemcpyDeviceToHost));
-; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_c));
-
-; CODE: # kernel0
-; CODE-NEXT: if (32 * b0 + t0 <= 48)
-; CODE-NEXT: Stmt_for_body17(0, 32 * b0 + t0);
-
-; IR-LABEL: call void @polly_freeKernel
-; IR: [[REGC:%.+]] = bitcast i32* %{{[0-9]+}} to i8*
-; IR-NEXT: call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_c, i8* [[REGC]], i64 196)
-
-; KERNEL-IR: define ptx_kernel void @FUNC_kernel_dynprog_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_c) #0 {
-; KERNEL-IR: %polly.access.MemRef_c = getelementptr i32, i32 addrspace(1)* %polly.access.cast.MemRef_c, i64 %9
-; KERNEL-IR-NEXT: store i32 422, i32 addrspace(1)* %polly.access.MemRef_c, align 4
-
-define void @kernel_dynprog([50 x i32]* %c) {
-entry:
- %arrayidx77 = getelementptr inbounds [50 x i32], [50 x i32]* %c, i64 0, i64 49
- br label %for.cond1.preheader
-
-for.cond1.preheader: ; preds = %for.cond15.for.cond12.loopexit_crit_edge, %entry
- %out_l.055 = phi i32 [ 0, %entry ], [ %add78, %for.cond15.for.cond12.loopexit_crit_edge ]
- %iter.054 = phi i32 [ 0, %entry ], [ %inc80, %for.cond15.for.cond12.loopexit_crit_edge ]
- br label %for.body17
-
-for.cond15.for.cond12.loopexit_crit_edge: ; preds = %for.body17
- %tmp = load i32, i32* %arrayidx77, align 4
- %add78 = add nsw i32 %tmp, %out_l.055
- %inc80 = add nuw nsw i32 %iter.054, 1
- br i1 false, label %for.cond1.preheader, label %for.end81
-
-for.body17: ; preds = %for.body17, %for.cond1.preheader
- %indvars.iv71 = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next72, %for.body17 ]
- %arrayidx69 = getelementptr inbounds [50 x i32], [50 x i32]* %c, i64 0, i64 %indvars.iv71
- store i32 422, i32* %arrayidx69, align 4
- %indvars.iv.next72 = add nuw nsw i64 %indvars.iv71, 1
- %lftr.wideiv74 = trunc i64 %indvars.iv.next72 to i32
- %exitcond75 = icmp ne i32 %lftr.wideiv74, 50
- br i1 %exitcond75, label %for.body17, label %for.cond15.for.cond12.loopexit_crit_edge
-
-for.end81: ; preds = %for.cond15.for.cond12.loopexit_crit_edge
- ret void
-}
diff --git a/polly/test/GPGPU/private-memory.ll b/polly/test/GPGPU/private-memory.ll
deleted file mode 100644
index d4ba9fa19b39..000000000000
--- a/polly/test/GPGPU/private-memory.ll
+++ /dev/null
@@ -1,82 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -polly-acc-use-private \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg \
-; RUN: -polly-acc-use-private \
-; RUN: -disable-output -polly-acc-dump-kernel-ir < %s | \
-; RUN: FileCheck -check-prefix=KERNEL %s
-
-; REQUIRES: pollyacc
-
-; void add(float *A) {
-; for (long i = 0; i < 32; i++)
-; for (long j = 0; j < 10; j++)
-; A[i] += 1;
-; }
-
-; CODE: # kernel0
-; CODE: {
-; CODE: read(t0);
-; CODE: for (int c3 = 0; c3 <= 9; c3 += 1)
-; CODE: Stmt_bb5(t0, c3);
-; CODE: write(t0);
-; CODE: }
-
-; KERNEL: %private_array = alloca [1 x float]
-
-; KERNEL: %polly.access.cast.private_array = bitcast [1 x float]* %private_array to float*
-; KERNEL-NEXT: %polly.access.private_array = getelementptr float, float* %polly.access.cast.private_array, i64 0
-; KERNEL-NEXT: %polly.access.cast.MemRef_A = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
-; KERNEL-NEXT: %polly.access.MemRef_A = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A, i64 %t0
-; KERNEL-NEXT: %shared.read = load float, float addrspace(1)* %polly.access.MemRef_A
-; KERNEL-NEXT: store float %shared.read, float* %polly.access.private_array
-
-; KERNEL: %polly.access.cast.private_array5 = bitcast [1 x float]* %private_array to float*
-; KERNEL-NEXT: %polly.access.private_array6 = getelementptr float, float* %polly.access.cast.private_array5, i64 0
-; KERNEL-NEXT: %polly.access.cast.MemRef_A7 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
-; KERNEL-NEXT: %polly.access.MemRef_A8 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A7, i64 %t0
-; KERNEL-NEXT: %shared.write = load float, float* %polly.access.private_array6
-; KERNEL-NEXT: store float %shared.write, float addrspace(1)* %polly.access.MemRef_A8
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @add(float* %A) {
-bb:
- br label %bb2
-
-bb2: ; preds = %bb11, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
- %exitcond1 = icmp ne i64 %i.0, 32
- br i1 %exitcond1, label %bb3, label %bb13
-
-bb3: ; preds = %bb2
- br label %bb4
-
-bb4: ; preds = %bb8, %bb3
- %j.0 = phi i64 [ 0, %bb3 ], [ %tmp9, %bb8 ]
- %exitcond = icmp ne i64 %j.0, 10
- br i1 %exitcond, label %bb5, label %bb10
-
-bb5: ; preds = %bb4
- %tmp = getelementptr inbounds float, float* %A, i64 %i.0
- %tmp6 = load float, float* %tmp, align 4
- %tmp7 = fadd float %tmp6, 1.000000e+00
- store float %tmp7, float* %tmp, align 4
- br label %bb8
-
-bb8: ; preds = %bb5
- %tmp9 = add nuw nsw i64 %j.0, 1
- br label %bb4
-
-bb10: ; preds = %bb4
- br label %bb11
-
-bb11: ; preds = %bb10
- %tmp12 = add nuw nsw i64 %i.0, 1
- br label %bb2
-
-bb13: ; preds = %bb2
- ret void
-}
diff --git a/polly/test/GPGPU/privatization-simple.ll b/polly/test/GPGPU/privatization-simple.ll
deleted file mode 100644
index c715b8e77b67..000000000000
--- a/polly/test/GPGPU/privatization-simple.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: opt %loadPolly -polly-print-scops -disable-output < %s | FileCheck %s -check-prefix=SCOP
-; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s -check-prefix=HOST-IR
-
-; REQUIRES: pollyacc
-
-; SCOP: Function: f
-; SCOP-NEXT: Region: %for.body---%for.end
-; SCOP-NEXT: Max Loop Depth: 1
-
-; Check that kernel launch is generated in host IR.
-; the declare would not be generated unless a call to a kernel exists.
-; HOST-IR: declare void @polly_launchKernel(ptr, i32, i32, i32, i32, i32, ptr)
-
-; void f(int A[], int B[], int control, int C[]) {
-; int x;
-; #pragma scop
-; for(int i = 0; i < 1000; i ++) {
-; x = 0;
-; if(control) x = C[i];
-; B[i] = x * A[i];
-;
-; }
-; #pragma endscop
-; }
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @f(ptr %A, ptr %B, i32 %control, ptr %C) {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- br label %for.body
-
-for.body: ; preds = %entry.split, %if.end
- %indvars.iv = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %if.end ]
- %tobool = icmp eq i32 %control, 0
- br i1 %tobool, label %if.end, label %if.then
-
-if.then: ; preds = %for.body
- %arrayidx = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
- %tmp4 = load i32, ptr %arrayidx, align 4
- br label %if.end
-
-if.end: ; preds = %for.body, %if.then
- %x.0 = phi i32 [ %tmp4, %if.then ], [ 0, %for.body ]
- %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
- %tmp8 = load i32, ptr %arrayidx2, align 4
- %mul = mul nsw i32 %tmp8, %x.0
- %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
- store i32 %mul, ptr %arrayidx4, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp ne i64 %indvars.iv.next, 1000
- br i1 %exitcond, label %for.body, label %for.end
-
-for.end: ; preds = %if.end
- ret void
-}
diff --git a/polly/test/GPGPU/privatization.ll b/polly/test/GPGPU/privatization.ll
deleted file mode 100644
index fbb291575146..000000000000
--- a/polly/test/GPGPU/privatization.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; RUN: opt %loadPolly -polly-print-scops -disable-output < %s | FileCheck %s -check-prefix=SCOP
-; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s -check-prefix=HOST-IR
-
-; REQUIRES: pollyacc
-
-; SCOP: Function: checkPrivatization
-; SCOP-NEXT: Region: %for.body---%for.end
-; SCOP-NEXT: Max Loop Depth: 1
-
-
-; Check that kernel launch is generated in host IR.
-; the declare would not be generated unless a call to a kernel exists.
-; HOST-IR: declare void @polly_launchKernel(ptr, i32, i32, i32, i32, i32, ptr)
-
-;
-;
-; void checkPrivatization(int A[], int B[], int C[], int control) {
-; int x;
-; #pragma scop
-; for (int i = 0; i < 1000; i++) {
-; x = 0;
-; if (control)
-; x += C[i];
-;
-; B[i] = x * A[i];
-; }
-; #pragma endscop
-; }
-;
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @checkPrivatization(ptr %A, ptr %B, ptr %C, i32 %control) {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- br label %for.body
-
-for.body: ; preds = %entry.split, %if.end
- %indvars.iv = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %if.end ]
- %tobool = icmp eq i32 %control, 0
- br i1 %tobool, label %if.end, label %if.then
-
-if.then: ; preds = %for.body
- %arrayidx = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
- %tmp4 = load i32, ptr %arrayidx, align 4
- br label %if.end
-
-if.end: ; preds = %for.body, %if.then
- %x.0 = phi i32 [ %tmp4, %if.then ], [ 0, %for.body ]
- %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
- %tmp9 = load i32, ptr %arrayidx2, align 4
- %mul = mul nsw i32 %tmp9, %x.0
- %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
- store i32 %mul, ptr %arrayidx4, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp ne i64 %indvars.iv.next, 1000
- br i1 %exitcond, label %for.body, label %for.end
-
-for.end: ; preds = %if.end
- ret void
-}
diff --git a/polly/test/GPGPU/region-stmt.ll b/polly/test/GPGPU/region-stmt.ll
deleted file mode 100644
index 8e392fb30062..000000000000
--- a/polly/test/GPGPU/region-stmt.ll
+++ /dev/null
@@ -1,81 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
-; RUN: FileCheck %s -check-prefix=IR
-
-; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (128) * sizeof(float), cudaMemcpyHostToDevice));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (128) * sizeof(float), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(4);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_B);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_B, dev_MemRef_B, (128) * sizeof(float), cudaMemcpyDeviceToHost));
-
-; CODE: # kernel0
-; CODE-NEXT: Stmt_for_body__TO__if_end(32 * b0 + t0);
-
-; IR: @polly_initContext
-
-; KERNEL-IR: kernel_0
-
-; REQUIRES: pollyacc
-
-; void foo(float A[], float B[]) {
-; for (long i = 0; i < 128; i++)
-; if (A[i] == 42)
-; B[i] += 2 * i;
-; else
-; B[i] += 4 * i;
-; }
-;
-source_filename = "/tmp/test.c"
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @foo(ptr %A, ptr %B) {
-entry:
- br label %for.cond
-
-for.cond: ; preds = %for.inc, %entry
- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
- %exitcond = icmp ne i64 %i.0, 128
- br i1 %exitcond, label %for.body, label %for.end
-
-for.body: ; preds = %for.cond
- %arrayidx = getelementptr inbounds float, ptr %A, i64 %i.0
- %tmp = load float, ptr %arrayidx, align 4
- %cmp1 = fcmp oeq float %tmp, 4.200000e+01
- br i1 %cmp1, label %if.then, label %if.else
-
-if.then: ; preds = %for.body
- %mul = shl nsw i64 %i.0, 1
- %conv = sitofp i64 %mul to float
- %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %i.0
- %tmp1 = load float, ptr %arrayidx2, align 4
- %add = fadd float %tmp1, %conv
- store float %add, ptr %arrayidx2, align 4
- br label %if.end
-
-if.else: ; preds = %for.body
- %mul3 = shl nsw i64 %i.0, 2
- %conv4 = sitofp i64 %mul3 to float
- %arrayidx5 = getelementptr inbounds float, ptr %B, i64 %i.0
- %tmp2 = load float, ptr %arrayidx5, align 4
- %add6 = fadd float %tmp2, %conv4
- store float %add6, ptr %arrayidx5, align 4
- br label %if.end
-
-if.end: ; preds = %if.else, %if.then
- br label %for.inc
-
-for.inc: ; preds = %if.end
- %inc = add nuw nsw i64 %i.0, 1
- br label %for.cond
-
-for.end: ; preds = %for.cond
- ret void
-}
diff --git a/polly/test/GPGPU/remove-dead-instructions-in-stmt-2.ll b/polly/test/GPGPU/remove-dead-instructions-in-stmt-2.ll
deleted file mode 100644
index 326236cf92fd..000000000000
--- a/polly/test/GPGPU/remove-dead-instructions-in-stmt-2.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck %s -check-prefix=KERNEL-IR
-
-; REQUIRES: pollyacc
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; KERNEL-IR: store i32 0, ptr addrspace(1) %polly.access.MemRef_sum_c, align 4
-; KERNEL-IR-NEXT: br label %polly.merge
-
-define void @kernel_dynprog(ptr %sum_c) {
-entry:
- br label %for.cond1.preheader
-
-for.cond1.preheader: ; preds = %entry
- br label %for.body3
-
-for.cond1.loopexit: ; preds = %for.end
- %indvars.iv.next49 = add nuw nsw i64 %indvars.iv48, 1
- %exitcond57 = icmp ne i64 %indvars.iv.next56, 49
- br i1 %exitcond57, label %for.body3, label %for.inc55
-
-for.body3: ; preds = %for.cond1.loopexit, %for.cond1.preheader
- %indvars.iv55 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next56, %for.cond1.loopexit ]
- %indvars.iv48 = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next49, %for.cond1.loopexit ]
- %indvars.iv.next56 = add nuw nsw i64 %indvars.iv55, 1
- %arrayidx10 = getelementptr inbounds [50 x [50 x i32]], ptr %sum_c, i64 %indvars.iv55, i64 %indvars.iv48, i64 %indvars.iv55
- store i32 0, ptr %arrayidx10, align 4
- %cmp1334 = icmp slt i64 %indvars.iv.next56, %indvars.iv48
- br label %for.end
-
-for.end: ; preds = %for.body3
- br label %for.cond1.loopexit
-
-for.inc55: ; preds = %for.cond1.loopexit
- ret void
-}
diff --git a/polly/test/GPGPU/remove-dead-instructions-in-stmt.ll b/polly/test/GPGPU/remove-dead-instructions-in-stmt.ll
deleted file mode 100644
index 2024f006c53a..000000000000
--- a/polly/test/GPGPU/remove-dead-instructions-in-stmt.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck %s -check-prefix=KERNEL-IR
-
-; REQUIRES: pollyacc
-
-; Ensure that no dead instructions are emitted between the store and the
-; branch instruction of the ScopStmt. At some point, our dead-code-elimination
-; did not remove code that was inserted to compute the old (unused) branch
-; condition. This code referred to CPU registers and consequently resulted
-; in invalid bitcode.
-
-; KERNEL-IR: store i32 0, ptr addrspace(1) %polly.access.MemRef_sum_c, align 4
-; KERNEL-IR-NEXT: br label %polly.merge
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @kernel_dynprog(ptr %sum_c) {
-entry:
- br label %for.cond1.preheader
-
-for.cond1.preheader: ; preds = %entry
- br label %for.body3
-
-for.cond4.for.cond1.loopexit_crit_edge: ; preds = %for.end
- br label %for.cond1.loopexit
-
-for.cond1.loopexit: ; preds = %for.cond4.for.cond1.loopexit_crit_edge
- br i1 undef, label %for.body3, label %for.inc55
-
-for.body3: ; preds = %for.cond1.loopexit, %for.cond1.preheader
- %indvars.iv55 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next56, %for.cond1.loopexit ]
- %indvars.iv.next56 = add nuw nsw i64 %indvars.iv55, 1
- br label %for.body6
-
-for.body6: ; preds = %for.end, %for.body3
- %indvars.iv50 = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next51, %for.end ]
- %arrayidx10 = getelementptr inbounds [50 x [50 x i32]], ptr %sum_c, i64 %indvars.iv55, i64 %indvars.iv50, i64 %indvars.iv55
- store i32 0, ptr %arrayidx10, align 4
- %cmp1334 = icmp slt i64 %indvars.iv.next56, %indvars.iv50
- br i1 %cmp1334, label %for.body14.lr.ph, label %for.end
-
-for.body14.lr.ph: ; preds = %for.body6
- br label %for.body14
-
-for.body14: ; preds = %for.body14, %for.body14.lr.ph
- %arrayidx32 = getelementptr inbounds [50 x [50 x i32]], ptr %sum_c, i64 %indvars.iv55, i64 %indvars.iv50, i64 0
- br i1 false, label %for.body14, label %for.cond12.for.end_crit_edge
-
-for.cond12.for.end_crit_edge: ; preds = %for.body14
- br label %for.end
-
-for.end: ; preds = %for.cond12.for.end_crit_edge, %for.body6
- %indvars.iv.next51 = add nuw nsw i64 %indvars.iv50, 1
- %lftr.wideiv53 = trunc i64 %indvars.iv.next51 to i32
- %exitcond54 = icmp ne i32 %lftr.wideiv53, 50
- br i1 %exitcond54, label %for.body6, label %for.cond4.for.cond1.loopexit_crit_edge
-
-for.inc55: ; preds = %for.cond1.loopexit
- unreachable
-}
diff --git a/polly/test/GPGPU/run-time-check.ll b/polly/test/GPGPU/run-time-check.ll
deleted file mode 100644
index 3b04c3e01593..000000000000
--- a/polly/test/GPGPU/run-time-check.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
-; RUN: FileCheck %s -check-prefix=IR
-;
-; REQUIRES: pollyacc
-;
-; void foo(long n, float A[][32]) {
-; for (long i = 0; i < n; i++)
-; for (long j = 0; j < n; j++)
-; A[i][j] += A[i + 1][j + 1];
-; }
-
-; IR: %tmp = icmp slt i64 %i.0, %n
-; IR-NEXT: br i1 %tmp, label %bb2, label %polly.merge_new_and_old
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @foo(i64 %n, ptr %A) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb15, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp16, %bb15 ]
- %tmp = icmp slt i64 %i.0, %n
- br i1 %tmp, label %bb2, label %bb17
-
-bb2: ; preds = %bb1
- br label %bb3
-
-bb3: ; preds = %bb12, %bb2
- %j.0 = phi i64 [ 0, %bb2 ], [ %tmp13, %bb12 ]
- %exitcond = icmp ne i64 %j.0, %n
- br i1 %exitcond, label %bb4, label %bb14
-
-bb4: ; preds = %bb3
- %tmp5 = add nuw nsw i64 %j.0, 1
- %tmp6 = add nuw nsw i64 %i.0, 1
- %tmp7 = getelementptr inbounds [32 x float], ptr %A, i64 %tmp6, i64 %tmp5
- %tmp8 = load float, ptr %tmp7, align 4
- %tmp9 = getelementptr inbounds [32 x float], ptr %A, i64 %i.0, i64 %j.0
- %tmp10 = load float, ptr %tmp9, align 4
- %tmp11 = fadd float %tmp10, %tmp8
- store float %tmp11, ptr %tmp9, align 4
- br label %bb12
-
-bb12: ; preds = %bb4
- %tmp13 = add nuw nsw i64 %j.0, 1
- br label %bb3
-
-bb14: ; preds = %bb3
- br label %bb15
-
-bb15: ; preds = %bb14
- %tmp16 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb17: ; preds = %bb1
- ret void
-}
diff --git a/polly/test/GPGPU/scalar-param-and-value-32-bit.ll b/polly/test/GPGPU/scalar-param-and-value-32-bit.ll
deleted file mode 100644
index 0313d64e976c..000000000000
--- a/polly/test/GPGPU/scalar-param-and-value-32-bit.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck %s
-
-; REQUIRES: pollyacc, target=nvptx{{.*}}
-;
-; void foo(float A[], int n) {
-; for (long j = 0; j < n; j++)
-; A[j + n] += 42;
-; }
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-; CHECK: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_0(ptr addrspace(1) %MemRef_A, i32 %n)
-
-define void @foo(ptr %A, i32 %n) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb9, %bb
- %j.0 = phi i64 [ 0, %bb ], [ %tmp10, %bb9 ]
- %tmp = sext i32 %n to i64
- %tmp2 = icmp slt i64 %j.0, %tmp
- br i1 %tmp2, label %bb3, label %bb11
-
-bb3: ; preds = %bb1
- %tmp4 = sext i32 %n to i64
- %tmp5 = add nsw i64 %j.0, %tmp4
- %tmp6 = getelementptr inbounds float, ptr %A, i64 %tmp5
- %tmp7 = load float, ptr %tmp6, align 4
- %tmp8 = fadd float %tmp7, 4.200000e+01
- store float %tmp8, ptr %tmp6, align 4
- br label %bb9
-
-bb9: ; preds = %bb3
- %tmp10 = add nuw nsw i64 %j.0, 1
- br label %bb1
-
-bb11: ; preds = %bb1
- ret void
-}
diff --git a/polly/test/GPGPU/scalar-param-and-value-use.ll b/polly/test/GPGPU/scalar-param-and-value-use.ll
deleted file mode 100644
index 0301d88e16ac..000000000000
--- a/polly/test/GPGPU/scalar-param-and-value-use.ll
+++ /dev/null
@@ -1,67 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=IR %s
-
-; REQUIRES: pollyacc, target=nvptx{{.*}}
-
-; void foo(long n, float A[][n]) {
-; for (long i = 0; i < 32; i++)
-; for (long j = 0; j < 32; j++)
-; A[i][j] += A[i + 1][j + 1];
-; }
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-; This test case failed at some point as %n was only available in this kernel
-; when referenced through an isl_id in an isl ast expression, but not when
-; it was referenced from a SCEV or instruction that not part of any loop
-; bound.
-
-; IR: %polly.access.mul.MemRef_A = mul nsw i64 {{.*}}, %n
-
-define void @foo(i64 %n, ptr %A) {
-bb:
- br label %bb2
-
-bb2: ; preds = %bb19, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp20, %bb19 ]
- %exitcond1 = icmp ne i64 %i.0, 32
- br i1 %exitcond1, label %bb3, label %bb21
-
-bb3: ; preds = %bb2
- br label %bb4
-
-bb4: ; preds = %bb16, %bb3
- %j.0 = phi i64 [ 0, %bb3 ], [ %tmp17, %bb16 ]
- %exitcond = icmp ne i64 %j.0, 32
- br i1 %exitcond, label %bb5, label %bb18
-
-bb5: ; preds = %bb4
- %tmp = add nuw nsw i64 %j.0, 1
- %tmp6 = add nuw nsw i64 %i.0, 1
- %tmp7 = mul nsw i64 %tmp6, %n
- %tmp8 = getelementptr inbounds float, ptr %A, i64 %tmp7
- %tmp9 = getelementptr inbounds float, ptr %tmp8, i64 %tmp
- %tmp10 = load float, ptr %tmp9, align 4
- %tmp11 = mul nsw i64 %i.0, %n
- %tmp12 = getelementptr inbounds float, ptr %A, i64 %tmp11
- %tmp13 = getelementptr inbounds float, ptr %tmp12, i64 %j.0
- %tmp14 = load float, ptr %tmp13, align 4
- %tmp15 = fadd float %tmp14, %tmp10
- store float %tmp15, ptr %tmp13, align 4
- br label %bb16
-
-bb16: ; preds = %bb5
- %tmp17 = add nuw nsw i64 %j.0, 1
- br label %bb4
-
-bb18: ; preds = %bb4
- br label %bb19
-
-bb19: ; preds = %bb18
- %tmp20 = add nuw nsw i64 %i.0, 1
- br label %bb2
-
-bb21: ; preds = %bb2
- ret void
-}
diff --git a/polly/test/GPGPU/scalar-parameter-fp128.ll b/polly/test/GPGPU/scalar-parameter-fp128.ll
deleted file mode 100644
index f20a809c7c83..000000000000
--- a/polly/test/GPGPU/scalar-parameter-fp128.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code -disable-output %s
-
-; XFAIL: *
-
-; REQUIRES: pollyacc, target=nvptx{{.*}}
-
-; This fails today with "LowerFormalArguments didn't emit the correct number of values!"
-
-; void foo(fp128 A[], fp128 b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @fp128(ptr %A, fp128 %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i64 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds fp128, ptr %A, i64 %i.0
- %tmp3 = load fp128, ptr %tmp, align 4
- %tmp4 = fadd fp128 %tmp3, %b
- store fp128 %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
-
diff --git a/polly/test/GPGPU/scalar-parameter-half.ll b/polly/test/GPGPU/scalar-parameter-half.ll
deleted file mode 100644
index 127096256812..000000000000
--- a/polly/test/GPGPU/scalar-parameter-half.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code -disable-output %s
-
-; REQUIRES: pollyacc, target=nvptx{{.*}}
-
-; void foo(half A[], half b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @half(ptr %A, half %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i64 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds half, ptr %A, i64 %i.0
- %tmp3 = load half, ptr %tmp, align 4
- %tmp4 = fadd half %tmp3, %b
- store half %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
-
diff --git a/polly/test/GPGPU/scalar-parameter-i120.ll b/polly/test/GPGPU/scalar-parameter-i120.ll
deleted file mode 100644
index 06fb46dd917e..000000000000
--- a/polly/test/GPGPU/scalar-parameter-i120.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code -disable-output %s
-
-; XFAIL: *
-
-; REQUIRES: pollyacc, target=nvptx{{.*}}
-
-; This fails today with "Promotion is not suitable for scalars of size larger than 64-bits"
-
-; void foo(i120 A[], i120 b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @i120(ptr %A, i120 %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i120 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i120 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds i120, ptr %A, i120 %i.0
- %tmp3 = load i120, ptr %tmp, align 4
- %tmp4 = add i120 %tmp3, %b
- store i120 %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i120 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
-
diff --git a/polly/test/GPGPU/scalar-parameter-i128.ll b/polly/test/GPGPU/scalar-parameter-i128.ll
deleted file mode 100644
index 8e54cf4636d4..000000000000
--- a/polly/test/GPGPU/scalar-parameter-i128.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code -disable-output %s
-
-; REQUIRES: pollyacc, target=nvptx{{.*}}
-
-; void foo(i128 A[], i128 b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @i128(ptr %A, i128 %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i128 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i128 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds i128, ptr %A, i128 %i.0
- %tmp3 = load i128, ptr %tmp, align 4
- %tmp4 = add i128 %tmp3, %b
- store i128 %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i128 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
diff --git a/polly/test/GPGPU/scalar-parameter-i3000.ll b/polly/test/GPGPU/scalar-parameter-i3000.ll
deleted file mode 100644
index 5c36b3fd62cb..000000000000
--- a/polly/test/GPGPU/scalar-parameter-i3000.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code -disable-output %s
-
-; XFAIL: *
-
-; REQUIRES: pollyacc, target=nvptx{{.*}}
-
-; This fails today with "Promotion is not suitable for scalars of size larger than 64-bits"
-
-; void foo(i3000 A[], i3000 b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @i3000(ptr %A, i3000 %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i3000 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i3000 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds i3000, ptr %A, i3000 %i.0
- %tmp3 = load i3000, ptr %tmp, align 4
- %tmp4 = add i3000 %tmp3, %b
- store i3000 %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i3000 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
diff --git a/polly/test/GPGPU/scalar-parameter-i80.ll b/polly/test/GPGPU/scalar-parameter-i80.ll
deleted file mode 100644
index a672cd5c1cdc..000000000000
--- a/polly/test/GPGPU/scalar-parameter-i80.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code -disable-output %s
-
-; XFAIL: *
-
-; REQUIRES: pollyacc, target=nvptx{{.*}}
-
-; This fails today with "Promotion is not suitable for scalars of size larger than 64-bits"
-
-; void foo(i80 A[], i80 b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @i80(ptr %A, i80 %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i80 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i80 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds i80, ptr %A, i80 %i.0
- %tmp3 = load i80, ptr %tmp, align 4
- %tmp4 = add i80 %tmp3, %b
- store i80 %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i80 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
-
diff --git a/polly/test/GPGPU/scalar-parameter-ppc_fp128.ll b/polly/test/GPGPU/scalar-parameter-ppc_fp128.ll
deleted file mode 100644
index 11dfd68ede9b..000000000000
--- a/polly/test/GPGPU/scalar-parameter-ppc_fp128.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code -disable-output %s
-
-; XFAIL: *
-
-; REQUIRES: pollyacc, target=nvptx{{.*}}
-
-; This fails today with "LowerFormalArguments didn't emit the correct number of values!"
-
-; void foo(fp128 A[], fp128 b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @ppc_fp128(ptr %A, ppc_fp128 %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i64 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds ppc_fp128, ptr %A, i64 %i.0
- %tmp3 = load ppc_fp128, ptr %tmp, align 4
- %tmp4 = fadd ppc_fp128 %tmp3, %b
- store ppc_fp128 %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
diff --git a/polly/test/GPGPU/scalar-parameter-x86_fp80.ll b/polly/test/GPGPU/scalar-parameter-x86_fp80.ll
deleted file mode 100644
index f20a809c7c83..000000000000
--- a/polly/test/GPGPU/scalar-parameter-x86_fp80.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code -disable-output %s
-
-; XFAIL: *
-
-; REQUIRES: pollyacc, target=nvptx{{.*}}
-
-; This fails today with "LowerFormalArguments didn't emit the correct number of values!"
-
-; void foo(fp128 A[], fp128 b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @fp128(ptr %A, fp128 %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i64 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds fp128, ptr %A, i64 %i.0
- %tmp3 = load fp128, ptr %tmp, align 4
- %tmp4 = fadd fp128 %tmp3, %b
- store fp128 %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
-
diff --git a/polly/test/GPGPU/scalar-parameter.ll b/polly/test/GPGPU/scalar-parameter.ll
deleted file mode 100644
index e416c93211d5..000000000000
--- a/polly/test/GPGPU/scalar-parameter.ll
+++ /dev/null
@@ -1,411 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; RUN: opt %loadPolly -polly-codegen-ppcg \
-; RUN: -S < %s | \
-; RUN: FileCheck -check-prefix=IR %s
-
-; RUN: opt %loadPolly -polly-codegen-ppcg \
-; RUN: -disable-output -polly-acc-dump-kernel-ir < %s | \
-; RUN: FileCheck -check-prefix=KERNEL %s
-
-; XFAIL: *
-
-; REQUIRES: pollyacc, target=nvptx{{.*}}
-
-; This fails today due to extensive output differences from when the test was written.
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-; KERNEL: define ptx_kernel void @kernel_0(ptr %MemRef_A, float %MemRef_b)
-
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * sizeof(float), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(32);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, MemRef_b);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * sizeof(float), cudaMemcpyDeviceToHost));
-; CODE-NEXT: }
-
-; CODE: # kernel0
-; CODE-NEXT: Stmt_bb2(32 * b0 + t0);
-
-; void foo(float A[], float b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-define void @float(ptr %A, float %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i64 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds float, ptr %A, i64 %i.0
- %tmp3 = load float, ptr %tmp, align 4
- %tmp4 = fadd float %tmp3, %b
- store float %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
-
-; KERNEL: define ptx_kernel void @kernel_0(ptr %MemRef_A, double %MemRef_b)
-; KERNEL-NEXT: entry:
-; KERNEL-NEXT: %b.s2a = alloca double
-; KERNEL-NEXT: store double %MemRef_b, ptr %b.s2a
-
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * sizeof(double), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(32);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, MemRef_b);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * sizeof(double), cudaMemcpyDeviceToHost));
-; CODE-NEXT: }
-
-; CODE: # kernel0
-; CODE-NEXT: Stmt_bb2(32 * b0 + t0);
-
-; void foo(double A[], double b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-define void @double(ptr %A, double %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i64 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds double, ptr %A, i64 %i.0
- %tmp3 = load double, ptr %tmp, align 4
- %tmp4 = fadd double %tmp3, %b
- store double %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
-
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * sizeof(i1), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(32);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * sizeof(i1), cudaMemcpyDeviceToHost));
-; CODE-NEXT: }
-
-; CODE: # kernel0
-; CODE-NEXT: Stmt_bb2(32 * b0 + t0);
-
-; void foo(i1 A[], i1 b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-define void @i1(ptr %A, i1 %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i64 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds i1, ptr %A, i64 %i.0
- %tmp3 = load i1, ptr %tmp, align 4
- %tmp4 = add i1 %tmp3, %b
- store i1 %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
-
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * sizeof(i3), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(32);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * sizeof(i3), cudaMemcpyDeviceToHost));
-; CODE-NEXT: }
-
-; CODE: # kernel0
-; CODE-NEXT: Stmt_bb2(32 * b0 + t0);
-
-; void foo(i3 A[], i3 b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-define void @i3(ptr %A, i3 %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i64 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds i3, ptr %A, i64 %i.0
- %tmp3 = load i3, ptr %tmp, align 4
- %tmp4 = add i3 %tmp3, %b
- store i3 %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
-
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * sizeof(i8), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(32);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * sizeof(i8), cudaMemcpyDeviceToHost));
-; CODE-NEXT: }
-
-; CODE: # kernel0
-; CODE-NEXT: Stmt_bb2(32 * b0 + t0);
-
-; void foo(i8 A[], i32 b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-define void @i8(ptr %A, i8 %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i64 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds i8, ptr %A, i64 %i.0
- %tmp3 = load i8, ptr %tmp, align 4
- %tmp4 = add i8 %tmp3, %b
- store i8 %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
-
-; IR-LABEL: @i8
-
-; IR: [[REGA:%.+]] = call ptr @polly_getDevicePtr(ptr %p_dev_array_MemRef_A)
-; IR-NEXT: store ptr [[REGA:%.+]], ptr %polly_launch_0_param_0
-; IR-NEXT: store ptr %polly_launch_0_param_0, ptr %polly_launch_0_params
-; IR-NEXT: store i8 %b, ptr %polly_launch_0_param_1
-; IR-NEXT: [[REGD:%.+]] = getelementptr [2 x ptr], ptr %polly_launch_0_params, i64 0, i64 1
-; IR-NEXT: store ptr %polly_launch_0_param_1, ptr [[REGD]]
-
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * sizeof(i32), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(32);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * sizeof(i32), cudaMemcpyDeviceToHost));
-; CODE-NEXT: }
-
-; CODE: # kernel0
-; CODE-NEXT: Stmt_bb2(32 * b0 + t0);
-
-; void foo(i32 A[], i32 b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-define void @i32(ptr %A, i32 %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i64 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds i32, ptr %A, i64 %i.0
- %tmp3 = load i32, ptr %tmp, align 4
- %tmp4 = add i32 %tmp3, %b
- store i32 %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
-
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * sizeof(i60), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(32);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * sizeof(i60), cudaMemcpyDeviceToHost));
-; CODE-NEXT: }
-
-; CODE: # kernel0
-; CODE-NEXT: Stmt_bb2(32 * b0 + t0);
-
-; void foo(i60 A[], i60 b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-define void @i60(ptr %A, i60 %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i64 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds i60, ptr %A, i64 %i.0
- %tmp3 = load i60, ptr %tmp, align 4
- %tmp4 = add i60 %tmp3, %b
- store i60 %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
-
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * sizeof(i64), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(32);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * sizeof(i64), cudaMemcpyDeviceToHost));
-; CODE-NEXT: }
-
-; CODE: # kernel0
-; CODE-NEXT: Stmt_bb2(32 * b0 + t0);
-
-; void foo(i64 A[], i64 b) {
-; for (long i = 0; i < 1024; i++)
-; A[i] += b;
-; }
-;
-define void @i64(ptr %A, i64 %b) {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb5, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp6, %bb5 ]
- %exitcond = icmp ne i64 %i.0, 1024
- br i1 %exitcond, label %bb2, label %bb7
-
-bb2: ; preds = %bb1
- %tmp = getelementptr inbounds i64, ptr %A, i64 %i.0
- %tmp3 = load i64, ptr %tmp, align 4
- %tmp4 = add i64 %tmp3, %b
- store i64 %tmp4, ptr %tmp, align 4
- br label %bb5
-
-bb5: ; preds = %bb2
- %tmp6 = add nuw nsw i64 %i.0, 1
- br label %bb1
-
-bb7: ; preds = %bb1
- ret void
-}
diff --git a/polly/test/GPGPU/scalar-writes-in-scop-requires-abort.ll b/polly/test/GPGPU/scalar-writes-in-scop-requires-abort.ll
deleted file mode 100644
index 31110437fdca..000000000000
--- a/polly/test/GPGPU/scalar-writes-in-scop-requires-abort.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; RUN: opt %loadPolly -polly-acc-dump-code -polly-invariant-load-hoisting -polly-print-scops -disable-output < %s | FileCheck %s -check-prefix=SCOP
-
-; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-codegen-ppcg \
-; RUN: -polly-acc-dump-code -polly-stmt-granularity=bb \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=CODE
-
-; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-codegen-ppcg \
-; RUN: -polly-invariant-load-hoisting -polly-stmt-granularity=bb < %s \
-; RUN: | FileCheck %s -check-prefix=HOST-IR
-
-; REQUIRES: pollyacc
-
-; SCOP: Invariant Accesses: {
-; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT: { Stmt_loop_a[i0] -> MemRef_p[0] };
-; SCOP-NEXT: Execution Context: { : }
-; SCOP-NEXT: }
-
-; CODE: # kernel0
-; CODE-NEXT: {
-; CODE-NEXT: if (32 * b0 + t0 <= 1025) {
-; CODE-NEXT: Stmt_loop(32 * b0 + t0);
-; CODE-NEXT: write(0);
-; CODE-NEXT: }
-; CODE-NEXT: sync0();
-; CODE-NEXT: }
-
-; Check that we generate a correct "always false" branch.
-; HOST-IR: br i1 false, label %polly.start, label %loop.pre_entry_bb
-
-; This test case checks that we generate correct code if PPCGCodeGeneration
-; decides a build is unsuccessful with invariant load hoisting enabled.
-;
-; There is a conditional branch which switches between the original code and
-; the new code. We try to set this conditional branch to branch on false.
-; However, invariant load hoisting changes the structure of the scop, so we
-; need to change the way we *locate* this instruction.
-
-target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
-target triple = "i386-apple-macosx10.12.0"
-
-define void @foo(ptr %A, ptr %p) {
-entry:
- br label %loop
-
-loop:
- %indvar = phi i64 [0, %entry], [%indvar.next, %loop]
- %indvar.next = add i64 %indvar, 1
- %invariant = load float, ptr %p
- %ptr = getelementptr float, ptr %A, i64 %indvar
- store float 42.0, ptr %ptr
- %cmp = icmp sle i64 %indvar, 1024
- br i1 %cmp, label %loop, label %loop2
-
-loop2:
- %indvar2 = phi i64 [0, %loop], [%indvar2.next, %loop2]
- %indvar2f = phi float [%invariant, %loop], [%indvar2f, %loop2]
- %indvar2.next = add i64 %indvar2, 1
- store float %indvar2f, ptr %A
- %cmp2 = icmp sle i64 %indvar2, 1024
- br i1 %cmp2, label %loop2, label %end
-
-end:
- ret void
-}
diff --git a/polly/test/GPGPU/scheduler-timeout.ll b/polly/test/GPGPU/scheduler-timeout.ll
deleted file mode 100644
index 4a49c53d66c7..000000000000
--- a/polly/test/GPGPU/scheduler-timeout.ll
+++ /dev/null
@@ -1,174 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; REQUIRES: pollyacc
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; This test case took at some point forever to schedule, as the isl scheduler
-; seems to have problems if domain constraints appear in the dependences
-; provided to the scheduler.
-
-; /* D := alpha*A*B*C + beta*D */
-; for (i = 0; i < _PB_NI; i++)
-; for (j = 0; j < _PB_NJ; j++)
-; {
-; tmp[i][j] = 0;
-; for (k = 0; k < _PB_NK; ++k)
-; tmp[i][j] += alpha * A[i][k] * B[k][j];
-; }
-; for (i = 0; i < _PB_NI; i++)
-; for (j = 0; j < _PB_NL; j++)
-; {
-; D[i][j] *= beta;
-; for (k = 0; k < _PB_NJ; ++k)
-; D[i][j] += tmp[i][k] * C[k][j];
-; }
-
-; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (4096) * (4096) * sizeof(float), cudaMemcpyHostToDevice));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (4096) * (4096) * sizeof(float), cudaMemcpyHostToDevice));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_D, MemRef_D, (4096) * (4096) * sizeof(float), cudaMemcpyHostToDevice));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_C, MemRef_C, (4096) * (4096) * sizeof(float), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(16, 32);
-; CODE-NEXT: dim3 k0_dimGrid(128, 128);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_tmp, dev_MemRef_A, MemRef_alpha, dev_MemRef_B);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: {
-; CODE-NEXT: dim3 k1_dimBlock(16, 32);
-; CODE-NEXT: dim3 k1_dimGrid(128, 128);
-; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_tmp, dev_MemRef_D, MemRef_beta, dev_MemRef_C);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_tmp, dev_MemRef_tmp, (4096) * (4096) * sizeof(float), cudaMemcpyDeviceToHost));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_D, dev_MemRef_D, (4096) * (4096) * sizeof(float), cudaMemcpyDeviceToHost));
-
-; CODE: # kernel0
-; CODE-NEXT: for (int c2 = 0; c2 <= 127; c2 += 1)
-; CODE-NEXT: for (int c4 = 0; c4 <= 1; c4 += 1) {
-; CODE-NEXT: if (c2 == 0)
-; CODE-NEXT: Stmt_for_body6(32 * b0 + t0, 32 * b1 + t1 + 16 * c4);
-; CODE-NEXT: for (int c5 = 0; c5 <= 31; c5 += 1)
-; CODE-NEXT: Stmt_for_body11(32 * b0 + t0, 32 * b1 + t1 + 16 * c4, 32 * c2 + c5);
-; CODE-NEXT: }
-
-; CODE: # kernel1
-; CODE-NEXT: for (int c2 = 0; c2 <= 127; c2 += 1)
-; CODE-NEXT: for (int c4 = 0; c4 <= 1; c4 += 1) {
-; CODE-NEXT: if (c2 == 0)
-; CODE-NEXT: Stmt_for_body36(32 * b0 + t0, 32 * b1 + t1 + 16 * c4);
-; CODE-NEXT: for (int c5 = 0; c5 <= 31; c5 += 1)
-; CODE-NEXT: Stmt_for_body44(32 * b0 + t0, 32 * b1 + t1 + 16 * c4, 32 * c2 + c5);
-; CODE-NEXT: }
-
-
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.start(i64, ptr nocapture) #0
-
-; Function Attrs: nounwind uwtable
-define internal void @kernel_2mm(i32 %ni, i32 %nj, i32 %nk, i32 %nl, float %alpha, float %beta, ptr %tmp, ptr %A, ptr %B, ptr %C, ptr %D) #1 {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- br label %for.cond4.preheader
-
-for.cond4.preheader: ; preds = %entry.split, %for.inc28
- %indvars.iv19 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next20, %for.inc28 ]
- br label %for.body6
-
-for.cond31.preheader: ; preds = %for.inc28
- br label %for.cond34.preheader
-
-for.body6: ; preds = %for.cond4.preheader, %for.inc25
- %indvars.iv16 = phi i64 [ 0, %for.cond4.preheader ], [ %indvars.iv.next17, %for.inc25 ]
- %arrayidx8 = getelementptr inbounds [4096 x float], ptr %tmp, i64 %indvars.iv19, i64 %indvars.iv16
- store float 0.000000e+00, ptr %arrayidx8, align 4, !tbaa !1
- br label %for.body11
-
-for.body11: ; preds = %for.body6, %for.body11
- %indvars.iv13 = phi i64 [ 0, %for.body6 ], [ %indvars.iv.next14, %for.body11 ]
- %arrayidx15 = getelementptr inbounds [4096 x float], ptr %A, i64 %indvars.iv19, i64 %indvars.iv13
- %tmp22 = load float, ptr %arrayidx15, align 4, !tbaa !1
- %mul = fmul float %tmp22, %alpha
- %arrayidx19 = getelementptr inbounds [4096 x float], ptr %B, i64 %indvars.iv13, i64 %indvars.iv16
- %tmp23 = load float, ptr %arrayidx19, align 4, !tbaa !1
- %mul20 = fmul float %mul, %tmp23
- %arrayidx24 = getelementptr inbounds [4096 x float], ptr %tmp, i64 %indvars.iv19, i64 %indvars.iv16
- %tmp24 = load float, ptr %arrayidx24, align 4, !tbaa !1
- %add = fadd float %tmp24, %mul20
- store float %add, ptr %arrayidx24, align 4, !tbaa !1
- %indvars.iv.next14 = add nuw nsw i64 %indvars.iv13, 1
- %exitcond15 = icmp ne i64 %indvars.iv.next14, 4096
- br i1 %exitcond15, label %for.body11, label %for.inc25
-
-for.inc25: ; preds = %for.body11
- %indvars.iv.next17 = add nuw nsw i64 %indvars.iv16, 1
- %exitcond18 = icmp ne i64 %indvars.iv.next17, 4096
- br i1 %exitcond18, label %for.body6, label %for.inc28
-
-for.inc28: ; preds = %for.inc25
- %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
- %exitcond21 = icmp ne i64 %indvars.iv.next20, 4096
- br i1 %exitcond21, label %for.cond4.preheader, label %for.cond31.preheader
-
-for.cond34.preheader: ; preds = %for.cond31.preheader, %for.inc65
- %indvars.iv10 = phi i64 [ 0, %for.cond31.preheader ], [ %indvars.iv.next11, %for.inc65 ]
- br label %for.body36
-
-for.body36: ; preds = %for.cond34.preheader, %for.inc62
- %indvars.iv7 = phi i64 [ 0, %for.cond34.preheader ], [ %indvars.iv.next8, %for.inc62 ]
- %arrayidx40 = getelementptr inbounds [4096 x float], ptr %D, i64 %indvars.iv10, i64 %indvars.iv7
- %tmp25 = load float, ptr %arrayidx40, align 4, !tbaa !1
- %mul41 = fmul float %tmp25, %beta
- store float %mul41, ptr %arrayidx40, align 4, !tbaa !1
- br label %for.body44
-
-for.body44: ; preds = %for.body36, %for.body44
- %indvars.iv = phi i64 [ 0, %for.body36 ], [ %indvars.iv.next, %for.body44 ]
- %arrayidx48 = getelementptr inbounds [4096 x float], ptr %tmp, i64 %indvars.iv10, i64 %indvars.iv
- %tmp26 = load float, ptr %arrayidx48, align 4, !tbaa !1
- %arrayidx52 = getelementptr inbounds [4096 x float], ptr %C, i64 %indvars.iv, i64 %indvars.iv7
- %tmp27 = load float, ptr %arrayidx52, align 4, !tbaa !1
- %mul53 = fmul float %tmp26, %tmp27
- %arrayidx57 = getelementptr inbounds [4096 x float], ptr %D, i64 %indvars.iv10, i64 %indvars.iv7
- %tmp28 = load float, ptr %arrayidx57, align 4, !tbaa !1
- %add58 = fadd float %tmp28, %mul53
- store float %add58, ptr %arrayidx57, align 4, !tbaa !1
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp ne i64 %indvars.iv.next, 4096
- br i1 %exitcond, label %for.body44, label %for.inc62
-
-for.inc62: ; preds = %for.body44
- %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1
- %exitcond9 = icmp ne i64 %indvars.iv.next8, 4096
- br i1 %exitcond9, label %for.body36, label %for.inc65
-
-for.inc65: ; preds = %for.inc62
- %indvars.iv.next11 = add nuw nsw i64 %indvars.iv10, 1
- %exitcond12 = icmp ne i64 %indvars.iv.next11, 4096
- br i1 %exitcond12, label %for.cond34.preheader, label %for.end67
-
-for.end67: ; preds = %for.inc65
- ret void
-}
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end(i64, ptr nocapture) #0
-
-attributes #0 = { argmemonly nounwind }
-attributes #1 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.9.0 (trunk 275267) (llvm/trunk 275268)"}
-!1 = !{!2, !2, i64 0}
-!2 = !{!"float", !3, i64 0}
-!3 = !{!"omnipotent char", !4, i64 0}
-!4 = !{!"Simple C/C++ TBAA"}
diff --git a/polly/test/GPGPU/shared-memory-scalar.ll b/polly/test/GPGPU/shared-memory-scalar.ll
deleted file mode 100644
index cd2b1705a388..000000000000
--- a/polly/test/GPGPU/shared-memory-scalar.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -polly-acc-use-shared \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; REQUIRES: pollyacc
-
-; void add(float *A, float alpha) {
-; for (long i = 0; i < 32; i++)
-; for (long j = 0; j < 10; j++)
-; A[i] += alpha;
-; }
-
-; CODE: read(t0);
-; CODE-NEXT: sync0();
-; CODE-NEXT: for (int c3 = 0; c3 <= 9; c3 += 1)
-; CODE-NEXT: Stmt_bb5(t0, c3);
-; CODE-NEXT: sync1();
-; CODE-NEXT: write(t0);
-
-; This test case was intended to test code generation for scalars stored
-; in shared memory. However, after properly marking the scalar as read-only
-; the scalar is not stored any more in shared memory. We still leave this
-; test case as documentation if we every forget to mark scalars as read-only.
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @add(ptr %A, float %alpha) {
-bb:
- br label %bb2
-
-bb2: ; preds = %bb11, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
- %exitcond1 = icmp ne i64 %i.0, 32
- br i1 %exitcond1, label %bb3, label %bb13
-
-bb3: ; preds = %bb2
- br label %bb4
-
-bb4: ; preds = %bb8, %bb3
- %j.0 = phi i64 [ 0, %bb3 ], [ %tmp9, %bb8 ]
- %exitcond = icmp ne i64 %j.0, 10
- br i1 %exitcond, label %bb5, label %bb10
-
-bb5: ; preds = %bb4
- %tmp = getelementptr inbounds float, ptr %A, i64 %i.0
- %tmp6 = load float, ptr %tmp, align 4
- %tmp7 = fadd float %tmp6, %alpha
- store float %tmp7, ptr %tmp, align 4
- br label %bb8
-
-bb8: ; preds = %bb5
- %tmp9 = add nuw nsw i64 %j.0, 1
- br label %bb4
-
-bb10: ; preds = %bb4
- br label %bb11
-
-bb11: ; preds = %bb10
- %tmp12 = add nuw nsw i64 %i.0, 1
- br label %bb2
-
-bb13: ; preds = %bb2
- ret void
-}
diff --git a/polly/test/GPGPU/shared-memory-two-dimensional.ll b/polly/test/GPGPU/shared-memory-two-dimensional.ll
deleted file mode 100644
index 6ee51650295f..000000000000
--- a/polly/test/GPGPU/shared-memory-two-dimensional.ll
+++ /dev/null
@@ -1,103 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -polly-acc-use-shared \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg \
-; RUN: -polly-acc-use-shared \
-; RUN: -disable-output -polly-acc-dump-kernel-ir < %s | \
-; RUN: FileCheck -check-prefix=KERNEL %s
-
-; REQUIRES: pollyacc
-
-; void foo(float A[], float b[][8]) {
-; for (long i = 0; i < 32; i++)
-; for (long j = 0; j < 16; j++)
-; for (long k = 0; k < 8; k++)
-; A[i] += j * k * b[j][k];
-; }
-
-
-; CODE: # kernel0
-; CODE-NEXT: {
-; CODE-NEXT: if (t0 <= 7)
-; CODE-NEXT: for (int c0 = 0; c0 <= 15; c0 += 1)
-; CODE-NEXT: read(c0, t0);
-; CODE-NEXT: read(t0);
-; CODE-NEXT: sync0();
-; CODE-NEXT: for (int c3 = 0; c3 <= 15; c3 += 1)
-; CODE-NEXT: for (int c4 = 0; c4 <= 7; c4 += 1)
-; CODE-NEXT: Stmt_bb8(t0, c3, c4);
-; CODE-NEXT: sync1();
-; CODE-NEXT: write(t0);
-; CODE-NEXT: }
-
-; KERNEL: @shared_MemRef_b = internal addrspace(3) global [16 x [8 x float]] zeroinitializer, align 4
-
-; KERNEL: %polly.access.mul.MemRef_b = mul nsw i64 %polly.indvar, 8
-; KERNEL-NEXT: %polly.access.add.MemRef_b = add nsw i64 %polly.access.mul.MemRef_b, %t0
-; KERNEL-NEXT: %polly.access.MemRef_b = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_b, i64 %polly.access.add.MemRef_b
-; KERNEL-NEXT: %shared.read = load float, float addrspace(1)* %polly.access.MemRef_b
-; KERNEL-NEXT: store float %shared.read, float addrspace(3)* %polly.access.shared_MemRef_b
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @foo(float* %A, [8 x float]* %b) {
-bb:
- br label %bb3
-
-bb3: ; preds = %bb22, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp23, %bb22 ]
- %exitcond2 = icmp ne i64 %i.0, 32
- br i1 %exitcond2, label %bb4, label %bb24
-
-bb4: ; preds = %bb3
- br label %bb5
-
-bb5: ; preds = %bb19, %bb4
- %j.0 = phi i64 [ 0, %bb4 ], [ %tmp20, %bb19 ]
- %exitcond1 = icmp ne i64 %j.0, 16
- br i1 %exitcond1, label %bb6, label %bb21
-
-bb6: ; preds = %bb5
- br label %bb7
-
-bb7: ; preds = %bb16, %bb6
- %k.0 = phi i64 [ 0, %bb6 ], [ %tmp17, %bb16 ]
- %exitcond = icmp ne i64 %k.0, 8
- br i1 %exitcond, label %bb8, label %bb18
-
-bb8: ; preds = %bb7
- %tmp = mul nuw nsw i64 %j.0, %k.0
- %tmp9 = sitofp i64 %tmp to float
- %tmp10 = getelementptr inbounds [8 x float], [8 x float]* %b, i64 %j.0, i64 %k.0
- %tmp11 = load float, float* %tmp10, align 4
- %tmp12 = fmul float %tmp9, %tmp11
- %tmp13 = getelementptr inbounds float, float* %A, i64 %i.0
- %tmp14 = load float, float* %tmp13, align 4
- %tmp15 = fadd float %tmp14, %tmp12
- store float %tmp15, float* %tmp13, align 4
- br label %bb16
-
-bb16: ; preds = %bb8
- %tmp17 = add nuw nsw i64 %k.0, 1
- br label %bb7
-
-bb18: ; preds = %bb7
- br label %bb19
-
-bb19: ; preds = %bb18
- %tmp20 = add nuw nsw i64 %j.0, 1
- br label %bb5
-
-bb21: ; preds = %bb5
- br label %bb22
-
-bb22: ; preds = %bb21
- %tmp23 = add nuw nsw i64 %i.0, 1
- br label %bb3
-
-bb24: ; preds = %bb3
- ret void
-}
diff --git a/polly/test/GPGPU/shared-memory.ll b/polly/test/GPGPU/shared-memory.ll
deleted file mode 100644
index 920db0d37127..000000000000
--- a/polly/test/GPGPU/shared-memory.ll
+++ /dev/null
@@ -1,83 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -polly-acc-use-shared \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg \
-; RUN: -polly-acc-use-shared \
-; RUN: -disable-output -polly-acc-dump-kernel-ir < %s | \
-; RUN: FileCheck -check-prefix=KERNEL %s
-
-; REQUIRES: pollyacc
-
-; void add(float *A) {
-; for (long i = 0; i < 32; i++)
-; for (long j = 0; j < 10; j++)
-; A[i] += 1;
-; }
-
-; CODE: # kernel0
-; CODE: {
-; CODE: read(t0);
-; CODE: sync0();
-; CODE: for (int c3 = 0; c3 <= 9; c3 += 1)
-; CODE: Stmt_bb5(t0, c3);
-; CODE: sync1();
-; CODE: write(t0);
-; CODE: }
-
-; KERNEL: @shared_MemRef_A = internal addrspace(3) global [32 x float] zeroinitializer, align 4
-
-; KERNEL: %polly.access.shared_MemRef_A = getelementptr float, float addrspace(3)* getelementptr inbounds ([32 x float], [32 x float] addrspace(3)* @shared_MemRef_A, i32 0, i32 0), i64 %t0
-; KERNEL-NEXT: %polly.access.cast.MemRef_A = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
-; KERNEL-NEXT: %polly.access.MemRef_A = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A, i64 %t0
-; KERNEL-NEXT: %shared.read = load float, float addrspace(1)* %polly.access.MemRef_A
-; KERNEL-NEXT: store float %shared.read, float addrspace(3)* %polly.access.shared_MemRef_A
-
-; KERNEL: %polly.access.shared_MemRef_A3 = getelementptr float, float addrspace(3)* getelementptr inbounds ([32 x float], [32 x float] addrspace(3)* @shared_MemRef_A, i32 0, i32 0), i64 %t0
-; KERNEL-NEXT: %polly.access.cast.MemRef_A4 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
-; KERNEL-NEXT: %polly.access.MemRef_A5 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A4, i64 %t0
-; KERNEL-NEXT: %shared.write = load float, float addrspace(3)* %polly.access.shared_MemRef_A3
-; KERNEL-NEXT: store float %shared.write, float addrspace(1)* %polly.access.MemRef_A5
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @add(float* %A) {
-bb:
- br label %bb2
-
-bb2: ; preds = %bb11, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
- %exitcond1 = icmp ne i64 %i.0, 32
- br i1 %exitcond1, label %bb3, label %bb13
-
-bb3: ; preds = %bb2
- br label %bb4
-
-bb4: ; preds = %bb8, %bb3
- %j.0 = phi i64 [ 0, %bb3 ], [ %tmp9, %bb8 ]
- %exitcond = icmp ne i64 %j.0, 10
- br i1 %exitcond, label %bb5, label %bb10
-
-bb5: ; preds = %bb4
- %tmp = getelementptr inbounds float, float* %A, i64 %i.0
- %tmp6 = load float, float* %tmp, align 4
- %tmp7 = fadd float %tmp6, 1.000000e+00
- store float %tmp7, float* %tmp, align 4
- br label %bb8
-
-bb8: ; preds = %bb5
- %tmp9 = add nuw nsw i64 %j.0, 1
- br label %bb4
-
-bb10: ; preds = %bb4
- br label %bb11
-
-bb11: ; preds = %bb10
- %tmp12 = add nuw nsw i64 %i.0, 1
- br label %bb2
-
-bb13: ; preds = %bb2
- ret void
-}
diff --git a/polly/test/GPGPU/simple-managed-memory-rewrite.ll b/polly/test/GPGPU/simple-managed-memory-rewrite.ll
deleted file mode 100644
index d8c5b320e2b0..000000000000
--- a/polly/test/GPGPU/simple-managed-memory-rewrite.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: opt %loadPolly -polly-print-scops -disable-output < %s | FileCheck %s --check-prefix=SCOP
-
-; RUN: opt %loadPolly -S -polly-process-unprofitable -polly-acc-mincompute=0 \
-; RUN: -polly-codegen-ppcg -polly-acc-codegen-managed-memory \
-; RUN: -polly-acc-rewrite-managed-memory < %s | FileCheck %s --check-prefix=HOST-IR
-
-; REQUIRES: pollyacc
-
-; SCOP: Function: f
-; SCOP-NEXT: Region: %for.body---%for.end
-; SCOP-NEXT: Max Loop Depth: 1
-; SCOP: i32 MemRef_A[*];
-
-; Check that we generate a constructor call for @A.toptr
-; HOST-IR: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr {{.*}}, ptr @A.toptr }]
-
-; Check that we generate a constructor
-; 4 bytes * 100 = 400
-; HOST-IR: define void {{.*}}constructor() {
-; HOST-IR-NEXT: entry:
-; HOST-IR-NEXT: %mem.raw = call ptr @polly_mallocManaged(i64 400)
-; HOST-IR-NEXT: store ptr %mem.raw, ptr @A.toptr
-; HOST-IR-NEXT: ret void
-; HOST-IR-NEXT: }
-
-; HOST-IR-NOT: @A
-
-source_filename = "test.c"
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.12.0"
-
-@A = internal global [100 x i32] zeroinitializer, align 16
-
-define void @f() {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- br label %for.body
-
-for.body: ; preds = %entry.split, %for.body
- %indvars.iv1 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds [100 x i32], ptr @A, i64 0, i64 %indvars.iv1
- store i32 42, ptr %arrayidx, align 4, !tbaa !3
- %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 100
- br i1 %exitcond, label %for.end, label %for.body
-
-for.end: ; preds = %for.body
- ret void
-}
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #0
-
-
-; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #0
-
-attributes #0 = { argmemonly nounwind }
-
-!llvm.module.flags = !{!0, !1}
-!llvm.ident = !{!2}
-
-!0 = !{i32 1, !"wchar_size", i32 4}
-!1 = !{i32 7, !"PIC Level", i32 2}
-!2 = !{!"clang version 6.0.0"}
-!3 = !{!4, !4, i64 0}
-!4 = !{!"int", !5, i64 0}
-!5 = !{!"omnipotent char", !6, i64 0}
-!6 = !{!"Simple C/C++ TBAA"}
diff --git a/polly/test/GPGPU/size-cast.ll b/polly/test/GPGPU/size-cast.ll
deleted file mode 100644
index 5e2c85de4251..000000000000
--- a/polly/test/GPGPU/size-cast.ll
+++ /dev/null
@@ -1,63 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
-; RUN: FileCheck %s -check-prefix=IR
-
-; REQUIRES: pollyacc
-
-; This test case ensures that we properly sign-extend the types we are using.
-
-; CODE: if (arg >= 1 && arg1 == 0) {
-; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_arg3, MemRef_arg3, (arg) * sizeof(double), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(arg >= 1048545 ? 32768 : (arg + 31) / 32);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_arg3, dev_MemRef_arg2, arg, arg1);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_arg2, dev_MemRef_arg2, (arg) * sizeof(double), cudaMemcpyDeviceToHost));
-; CODE-NEXT cudaCheckReturn(cudaFree(dev_MemRef_arg3));
-; CODE-NEXT cudaCheckReturn(cudaFree(dev_MemRef_arg2));
-
-; CODE: # kernel0
-; CODE-NEXT: for (int c0 = 0; c0 <= (arg - 32 * b0 - 1) / 1048576; c0 += 1)
-; CODE-NEXT: if (arg >= 32 * b0 + t0 + 1048576 * c0 + 1)
-; CODE-NEXT: Stmt_bb6(0, 32 * b0 + t0 + 1048576 * c0);
-
-; IR-LABEL: call ptr @polly_initContextCUDA()
-; IR: sext i32 %arg to i64
-; IR-NEXT: mul i64
-; IR-NEXT: @polly_allocateMemoryForDevice
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @hoge(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3) {
-bb:
- br label %bb4
-
-bb4: ; preds = %bb13, %bb
- br label %bb6
-
-bb5: ; preds = %bb13
- ret void
-
-bb6: ; preds = %bb6, %bb4
- %tmp = phi i64 [ 0, %bb4 ], [ %tmp10, %bb6 ]
- %tmp7 = getelementptr inbounds double, ptr %arg3, i64 %tmp
- %tmp8 = load double, ptr %tmp7, align 8
- %tmp9 = getelementptr inbounds [1000 x double], ptr %arg2, i64 0, i64 %tmp
- store double %tmp8, ptr %tmp9, align 8
- %tmp10 = add nuw nsw i64 %tmp, 1
- %tmp11 = zext i32 %arg to i64
- %tmp12 = icmp ne i64 %tmp10, %tmp11
- br i1 %tmp12, label %bb6, label %bb13
-
-bb13: ; preds = %bb6
- %tmp14 = zext i32 %arg1 to i64
- %tmp15 = icmp ne i64 0, %tmp14
- br i1 %tmp15, label %bb4, label %bb5
-}
diff --git a/polly/test/GPGPU/spir-codegen.ll b/polly/test/GPGPU/spir-codegen.ll
deleted file mode 100644
index 3715e1ec4427..000000000000
--- a/polly/test/GPGPU/spir-codegen.ll
+++ /dev/null
@@ -1,118 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen-ppcg \
-; RUN: -polly-gpu-arch=spir32 \
-; RUN: -polly-acc-dump-kernel-ir -polly-process-unprofitable -disable-output < %s | \
-; RUN: FileCheck %s
-
-; REQUIRES: pollyacc
-
-; CHECK: target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
-; CHECK-NEXT: target triple = "spir-unknown-unknown"
-
-; CHECK-LABEL: define spir_kernel void @FUNC_double_parallel_loop_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_A) #0 !kernel_arg_addr_space !0 !kernel_arg_name !1 !kernel_arg_access_qual !1 !kernel_arg_type !1 !kernel_arg_type_qual !1 !kernel_arg_base_type !1 {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: %0 = call i32 @__gen_ocl_get_group_id0()
-; CHECK-NEXT: %__gen_ocl_get_group_id0 = zext i32 %0 to i64
-; CHECK-NEXT: %1 = call i32 @__gen_ocl_get_group_id1()
-; CHECK-NEXT: %__gen_ocl_get_group_id1 = zext i32 %1 to i64
-; CHECK-NEXT: %2 = call i32 @__gen_ocl_get_local_id0()
-; CHECK-NEXT: %__gen_ocl_get_local_id0 = zext i32 %2 to i64
-; CHECK-NEXT: %3 = call i32 @__gen_ocl_get_local_id1()
-; CHECK-NEXT: %__gen_ocl_get_local_id1 = zext i32 %3 to i64
-; CHECK-NEXT: br label %polly.loop_preheader
-
-; CHECK-LABEL: polly.loop_exit: ; preds = %polly.stmt.bb5
-; CHECK-NEXT: ret void
-
-; CHECK-LABEL: polly.loop_header: ; preds = %polly.stmt.bb5, %polly.loop_preheader
-; CHECK-NEXT: %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.bb5 ]
-; CHECK-NEXT: %4 = mul nsw i64 32, %__gen_ocl_get_group_id0
-; CHECK-NEXT: %5 = add nsw i64 %4, %__gen_ocl_get_local_id0
-; CHECK-NEXT: %6 = mul nsw i64 32, %__gen_ocl_get_group_id1
-; CHECK-NEXT: %7 = add nsw i64 %6, %__gen_ocl_get_local_id1
-; CHECK-NEXT: %8 = mul nsw i64 16, %polly.indvar
-; CHECK-NEXT: %9 = add nsw i64 %7, %8
-; CHECK-NEXT: br label %polly.stmt.bb5
-
-; CHECK-LABEL: polly.stmt.bb5: ; preds = %polly.loop_header
-; CHECK-NEXT: %10 = mul i64 %5, %9
-; CHECK-NEXT: %p_tmp6 = sitofp i64 %10 to float
-; CHECK-NEXT: %polly.access.cast.MemRef_A = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
-; CHECK-NEXT: %11 = mul nsw i64 32, %__gen_ocl_get_group_id0
-; CHECK-NEXT: %12 = add nsw i64 %11, %__gen_ocl_get_local_id0
-; CHECK-NEXT: %polly.access.mul.MemRef_A = mul nsw i64 %12, 1024
-; CHECK-NEXT: %13 = mul nsw i64 32, %__gen_ocl_get_group_id1
-; CHECK-NEXT: %14 = add nsw i64 %13, %__gen_ocl_get_local_id1
-; CHECK-NEXT: %15 = mul nsw i64 16, %polly.indvar
-; CHECK-NEXT: %16 = add nsw i64 %14, %15
-; CHECK-NEXT: %polly.access.add.MemRef_A = add nsw i64 %polly.access.mul.MemRef_A, %16
-; CHECK-NEXT: %polly.access.MemRef_A = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A, i64 %polly.access.add.MemRef_A
-; CHECK-NEXT: %tmp8_p_scalar_ = load float, float addrspace(1)* %polly.access.MemRef_A, align 4
-; CHECK-NEXT: %p_tmp9 = fadd float %tmp8_p_scalar_, %p_tmp6
-; CHECK-NEXT: %polly.access.cast.MemRef_A1 = bitcast i8 addrspace(1)* %MemRef_A to float addrspace(1)*
-; CHECK-NEXT: %17 = mul nsw i64 32, %__gen_ocl_get_group_id0
-; CHECK-NEXT: %18 = add nsw i64 %17, %__gen_ocl_get_local_id0
-; CHECK-NEXT: %polly.access.mul.MemRef_A2 = mul nsw i64 %18, 1024
-; CHECK-NEXT: %19 = mul nsw i64 32, %__gen_ocl_get_group_id1
-; CHECK-NEXT: %20 = add nsw i64 %19, %__gen_ocl_get_local_id1
-; CHECK-NEXT: %21 = mul nsw i64 16, %polly.indvar
-; CHECK-NEXT: %22 = add nsw i64 %20, %21
-; CHECK-NEXT: %polly.access.add.MemRef_A3 = add nsw i64 %polly.access.mul.MemRef_A2, %22
-; CHECK-NEXT: %polly.access.MemRef_A4 = getelementptr float, float addrspace(1)* %polly.access.cast.MemRef_A1, i64 %polly.access.add.MemRef_A3
-; CHECK-NEXT: store float %p_tmp9, float addrspace(1)* %polly.access.MemRef_A4, align 4
-; CHECK-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1
-; CHECK-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, 1
-; CHECK-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
-
-; CHECK-LABEL: polly.loop_preheader: ; preds = %entry
-; CHECK-NEXT: br label %polly.loop_header
-
-; CHECK: attributes #0 = { "polly.skip.fn" }
-
-; void double_parallel_loop(float A[][1024]) {
-; for (long i = 0; i < 1024; i++)
-; for (long j = 0; j < 1024; j++)
-; A[i][j] += i * j;
-; }
-;
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @double_parallel_loop([1024 x float]* %A) {
-bb:
- br label %bb2
-
-bb2: ; preds = %bb13, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp14, %bb13 ]
- %exitcond1 = icmp ne i64 %i.0, 1024
- br i1 %exitcond1, label %bb3, label %bb15
-
-bb3: ; preds = %bb2
- br label %bb4
-
-bb4: ; preds = %bb10, %bb3
- %j.0 = phi i64 [ 0, %bb3 ], [ %tmp11, %bb10 ]
- %exitcond = icmp ne i64 %j.0, 1024
- br i1 %exitcond, label %bb5, label %bb12
-
-bb5: ; preds = %bb4
- %tmp = mul nuw nsw i64 %i.0, %j.0
- %tmp6 = sitofp i64 %tmp to float
- %tmp7 = getelementptr inbounds [1024 x float], [1024 x float]* %A, i64 %i.0, i64 %j.0
- %tmp8 = load float, float* %tmp7, align 4
- %tmp9 = fadd float %tmp8, %tmp6
- store float %tmp9, float* %tmp7, align 4
- br label %bb10
-
-bb10: ; preds = %bb5
- %tmp11 = add nuw nsw i64 %j.0, 1
- br label %bb4
-
-bb12: ; preds = %bb4
- br label %bb13
-
-bb13: ; preds = %bb12
- %tmp14 = add nuw nsw i64 %i.0, 1
- br label %bb2
-
-bb15: ; preds = %bb2
- ret void
-}
diff --git a/polly/test/GPGPU/spir-typesize.ll b/polly/test/GPGPU/spir-typesize.ll
deleted file mode 100644
index fce17c54e6e9..000000000000
--- a/polly/test/GPGPU/spir-typesize.ll
+++ /dev/null
@@ -1,90 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg \
-; RUN: -polly-gpu-arch=spir64 \
-; RUN: -polly-acc-dump-kernel-ir -polly-process-unprofitable -disable-output < %s | \
-; RUN: FileCheck -check-prefix=I64 %s
-
-; RUN: opt %loadPolly -polly-codegen-ppcg \
-; RUN: -polly-gpu-arch=spir32 \
-; RUN: -polly-acc-dump-kernel-ir -polly-process-unprofitable -disable-output < %s | \
-; RUN: FileCheck -check-prefix=I32 %s
-
-; REQUIRES: pollyacc
-
-; This test case checks whether the openCl runtime functions (get_local_id/get_group_id) return the right types for 32 and 64bit devices.
-
-; I32: target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
-; I32-NEXT: target triple = "spir-unknown-unknown"
-
-; I32-LABEL: define spir_kernel void @FUNC_double_parallel_loop_SCOP_0_KERNEL_0(ptr addrspace(1) %MemRef_A) #0 !kernel_arg_addr_space !0 !kernel_arg_name !1 !kernel_arg_access_qual !1 !kernel_arg_type !1 !kernel_arg_type_qual !1 !kernel_arg_base_type !1 {
-; I32-NEXT: entry:
-; I32-NEXT: %0 = call i32 @__gen_ocl_get_group_id0()
-; I32-NEXT: %__gen_ocl_get_group_id0 = zext i32 %0 to i64
-; I32-NEXT: %1 = call i32 @__gen_ocl_get_group_id1()
-; I32-NEXT: %__gen_ocl_get_group_id1 = zext i32 %1 to i64
-; I32-NEXT: %2 = call i32 @__gen_ocl_get_local_id0()
-; I32-NEXT: %__gen_ocl_get_local_id0 = zext i32 %2 to i64
-; I32-NEXT: %3 = call i32 @__gen_ocl_get_local_id1()
-; I32-NEXT: %__gen_ocl_get_local_id1 = zext i32 %3 to i64
-; I32-NEXT: br label %polly.loop_preheader
-
-; I64: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
-; I64-next: target triple = "spir64-unknown-unknown"
-
-; I64-LABEL: define spir_kernel void @FUNC_double_parallel_loop_SCOP_0_KERNEL_0(ptr addrspace(1) %MemRef_A) #0 !kernel_arg_addr_space !0 !kernel_arg_name !1 !kernel_arg_access_qual !1 !kernel_arg_type !1 !kernel_arg_type_qual !1 !kernel_arg_base_type !1 {
-; I64-NEXT: entry:
-; I64-NEXT: %0 = call i64 @__gen_ocl_get_group_id0()
-; I64-NEXT: %1 = call i64 @__gen_ocl_get_group_id1()
-; I64-NEXT: %2 = call i64 @__gen_ocl_get_local_id0()
-; I64-NEXT: %3 = call i64 @__gen_ocl_get_local_id1()
-; I64-NEXT: br label %polly.loop_preheader
-
-
-; void double_parallel_loop(float A[][1024]) {
-; for (long i = 0; i < 1024; i++)
-; for (long j = 0; j < 1024; j++)
-; A[i][j] += i * j;
-; }
-;
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @double_parallel_loop(ptr %A) {
-bb:
- br label %bb2
-
-bb2: ; preds = %bb13, %bb
- %i.0 = phi i64 [ 0, %bb ], [ %tmp14, %bb13 ]
- %exitcond1 = icmp ne i64 %i.0, 1024
- br i1 %exitcond1, label %bb3, label %bb15
-
-bb3: ; preds = %bb2
- br label %bb4
-
-bb4: ; preds = %bb10, %bb3
- %j.0 = phi i64 [ 0, %bb3 ], [ %tmp11, %bb10 ]
- %exitcond = icmp ne i64 %j.0, 1024
- br i1 %exitcond, label %bb5, label %bb12
-
-bb5: ; preds = %bb4
- %tmp = mul nuw nsw i64 %i.0, %j.0
- %tmp6 = sitofp i64 %tmp to float
- %tmp7 = getelementptr inbounds [1024 x float], ptr %A, i64 %i.0, i64 %j.0
- %tmp8 = load float, ptr %tmp7, align 4
- %tmp9 = fadd float %tmp8, %tmp6
- store float %tmp9, ptr %tmp7, align 4
- br label %bb10
-
-bb10: ; preds = %bb5
- %tmp11 = add nuw nsw i64 %j.0, 1
- br label %bb4
-
-bb12: ; preds = %bb4
- br label %bb13
-
-bb13: ; preds = %bb12
- %tmp14 = add nuw nsw i64 %i.0, 1
- br label %bb2
-
-bb15: ; preds = %bb2
- ret void
-}
diff --git a/polly/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll b/polly/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll
deleted file mode 100644
index 6fd14cbfbcd1..000000000000
--- a/polly/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll
+++ /dev/null
@@ -1,82 +0,0 @@
-; RUN: opt %loadPolly -polly-print-scops -disable-output < %s | FileCheck %s --check-prefix=SCOP
-; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s
-
-; Check that we do not create a kernel if there is an
-; unknown function call in a candidate kernel.
-
-; Check that we model the kernel as a scop.
-; SCOP: Function: f
-; SCOP-NEXT: Region: %entry.split---%for.end13
-
-; If a kernel were generated, then this code would have been part of the kernel
-; and not the `.ll` file that is generated.
-; CHECK: %conv = fpext float %0 to double
-; CHECK-NEXT: %1 = tail call double @extern.fn(double %conv)
-; CHECK-NEXT: %conv6 = fptrunc double %1 to float
-
-; REQUIRES: pollyacc
-
-; static const int N = 1000;
-; void f(float A[N][N], int n, float B[N][N]) {
-; for(int i = 0; i < n; i++) {
-; for(int j = 0; j < n; j++) {
-; B[i][j] = extern_fn(A[i][j], 3);
-; }
-;
-; }
-; }
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.11.0"
-
-define void @f(ptr %A, i32 %n, ptr %B) {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- %cmp3 = icmp sgt i32 %n, 0
- br i1 %cmp3, label %for.cond1.preheader.lr.ph, label %for.end13
-
-for.cond1.preheader.lr.ph: ; preds = %entry.split
- br label %for.cond1.preheader
-
-for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.inc11
- %indvars.iv5 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next6, %for.inc11 ]
- %cmp21 = icmp sgt i32 %n, 0
- br i1 %cmp21, label %for.body3.lr.ph, label %for.inc11
-
-for.body3.lr.ph: ; preds = %for.cond1.preheader
- br label %for.body3
-
-for.body3: ; preds = %for.body3.lr.ph, %for.body3
- %indvars.iv = phi i64 [ 0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
- %arrayidx5 = getelementptr inbounds [1000 x float], ptr %A, i64 %indvars.iv5, i64 %indvars.iv
- %0 = load float, ptr %arrayidx5, align 4
- %conv = fpext float %0 to double
- %1 = tail call double @extern.fn(double %conv)
- %conv6 = fptrunc double %1 to float
- %arrayidx10 = getelementptr inbounds [1000 x float], ptr %B, i64 %indvars.iv5, i64 %indvars.iv
- store float %conv6, ptr %arrayidx10, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %wide.trip.count = zext i32 %n to i64
- %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
- br i1 %exitcond, label %for.body3, label %for.cond1.for.inc11_crit_edge
-
-for.cond1.for.inc11_crit_edge: ; preds = %for.body3
- br label %for.inc11
-
-for.inc11: ; preds = %for.cond1.for.inc11_crit_edge, %for.cond1.preheader
- %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
- %wide.trip.count7 = zext i32 %n to i64
- %exitcond8 = icmp ne i64 %indvars.iv.next6, %wide.trip.count7
- br i1 %exitcond8, label %for.cond1.preheader, label %for.cond.for.end13_crit_edge
-
-for.cond.for.end13_crit_edge: ; preds = %for.inc11
- br label %for.end13
-
-for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry.split
- ret void
-}
-
-declare double @extern.fn(double) #0
-attributes #0 = { readnone }
diff --git a/polly/test/GPGPU/untouched-arrays.ll b/polly/test/GPGPU/untouched-arrays.ll
deleted file mode 100644
index 5c7e0c7b543b..000000000000
--- a/polly/test/GPGPU/untouched-arrays.ll
+++ /dev/null
@@ -1,270 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
-; RUN: -disable-output < %s | \
-; RUN: FileCheck -check-prefix=CODE %s
-
-; REQUIRES: pollyacc
-
-; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_global_1, MemRef_global_1, (142) * sizeof(i32), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(10);
-; CODE-NEXT: dim3 k0_dimGrid(1);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_global_1);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_global_1, dev_MemRef_global_1, (142) * sizeof(i32), cudaMemcpyDeviceToHost));
-; CODE: cudaCheckReturn(cudaFree(dev_MemRef_global_1));
-; CODE-NEXT: }
-
-; CODE: # kernel0
-; CODE-NEXT: Stmt_bb33(t0, 0);
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-%struct.hoge = type { [23 x i16], [22 x i16], [14 x i16], [13 x i16] }
-
-@global = external global [9 x %struct.hoge], align 16
-@global.1 = external global [9 x [152 x i32]], align 16
-
-; Function Attrs: nounwind uwtable
-define void @widget() #0 {
-bb:
- br label %bb1
-
-bb1: ; preds = %bb1, %bb
- br i1 undef, label %bb1, label %bb2
-
-bb2: ; preds = %bb2, %bb1
- br i1 undef, label %bb2, label %bb3
-
-bb3: ; preds = %bb3, %bb2
- br i1 undef, label %bb3, label %bb4
-
-bb4: ; preds = %bb4, %bb3
- br i1 undef, label %bb4, label %bb5
-
-bb5: ; preds = %bb5, %bb4
- br i1 undef, label %bb5, label %bb6
-
-bb6: ; preds = %bb6, %bb5
- br i1 undef, label %bb6, label %bb7
-
-bb7: ; preds = %bb7, %bb6
- br i1 undef, label %bb7, label %bb8
-
-bb8: ; preds = %bb8, %bb7
- br i1 undef, label %bb8, label %bb9
-
-bb9: ; preds = %bb8
- br label %bb10
-
-bb10: ; preds = %bb12, %bb9
- br label %bb11
-
-bb11: ; preds = %bb11, %bb10
- br i1 undef, label %bb11, label %bb12
-
-bb12: ; preds = %bb11
- br i1 undef, label %bb10, label %bb13
-
-bb13: ; preds = %bb18, %bb12
- br i1 undef, label %bb16, label %bb14
-
-bb14: ; preds = %bb16, %bb13
- br i1 undef, label %bb15, label %bb18
-
-bb15: ; preds = %bb14
- br label %bb17
-
-bb16: ; preds = %bb16, %bb13
- br i1 undef, label %bb16, label %bb14
-
-bb17: ; preds = %bb17, %bb15
- br i1 undef, label %bb17, label %bb18
-
-bb18: ; preds = %bb17, %bb14
- br i1 undef, label %bb13, label %bb19
-
-bb19: ; preds = %bb25, %bb18
- br label %bb20
-
-bb20: ; preds = %bb24, %bb19
- br i1 undef, label %bb21, label %bb24
-
-bb21: ; preds = %bb20
- br i1 undef, label %bb23, label %bb22
-
-bb22: ; preds = %bb21
- br label %bb24
-
-bb23: ; preds = %bb21
- br label %bb24
-
-bb24: ; preds = %bb23, %bb22, %bb20
- br i1 undef, label %bb20, label %bb25
-
-bb25: ; preds = %bb24
- br i1 undef, label %bb19, label %bb26
-
-bb26: ; preds = %bb56, %bb25
- %tmp = phi ptr [ undef, %bb56 ], [ getelementptr inbounds ([9 x [152 x i32]], ptr @global.1, i64 0, i64 0, i64 32), %bb25 ]
- br label %bb27
-
-bb27: ; preds = %bb27, %bb26
- br i1 undef, label %bb27, label %bb28
-
-bb28: ; preds = %bb27
- br label %bb30
-
-bb30: ; preds = %bb38, %bb28
- %tmp31 = phi i32 [ 3, %bb28 ], [ %tmp40, %bb38 ]
- %tmp32 = phi ptr [ %tmp, %bb28 ], [ %tmp39, %bb38 ]
- br label %bb33
-
-bb33: ; preds = %bb33, %bb30
- %tmp34 = phi i32 [ 0, %bb30 ], [ %tmp37, %bb33 ]
- %tmp35 = phi ptr [ %tmp32, %bb30 ], [ undef, %bb33 ]
- %tmp36 = getelementptr inbounds i32, ptr %tmp35, i64 1
- store i32 undef, ptr %tmp36, align 4, !tbaa !1
- %tmp37 = add nuw nsw i32 %tmp34, 1
- br i1 false, label %bb33, label %bb38
-
-bb38: ; preds = %bb33
- %tmp39 = getelementptr i32, ptr %tmp32, i64 12
- %tmp40 = add nuw nsw i32 %tmp31, 1
- %tmp41 = icmp ne i32 %tmp40, 13
- br i1 %tmp41, label %bb30, label %bb42
-
-bb42: ; preds = %bb38
- %tmp43 = getelementptr inbounds [9 x %struct.hoge], ptr @global, i64 0, i64 0, i32 3, i64 0
- br label %bb44
-
-bb44: ; preds = %bb51, %bb42
- %tmp45 = phi i32 [ 0, %bb42 ], [ %tmp52, %bb51 ]
- %tmp46 = phi ptr [ %tmp43, %bb42 ], [ undef, %bb51 ]
- %tmp47 = load i16, ptr %tmp46, align 2, !tbaa !5
- br label %bb48
-
-bb48: ; preds = %bb48, %bb44
- %tmp49 = phi i32 [ 0, %bb44 ], [ %tmp50, %bb48 ]
- %tmp50 = add nuw nsw i32 %tmp49, 1
- br i1 false, label %bb48, label %bb51
-
-bb51: ; preds = %bb48
- %tmp52 = add nuw nsw i32 %tmp45, 1
- %tmp53 = icmp ne i32 %tmp52, 13
- br i1 %tmp53, label %bb44, label %bb54
-
-bb54: ; preds = %bb51
- br label %bb55
-
-bb55: ; preds = %bb55, %bb54
- br i1 undef, label %bb55, label %bb56
-
-bb56: ; preds = %bb55
- br i1 undef, label %bb26, label %bb57
-
-bb57: ; preds = %bb60, %bb56
- br label %bb58
-
-bb58: ; preds = %bb58, %bb57
- br i1 undef, label %bb58, label %bb59
-
-bb59: ; preds = %bb59, %bb58
- br i1 undef, label %bb59, label %bb60
-
-bb60: ; preds = %bb59
- br i1 undef, label %bb57, label %bb61
-
-bb61: ; preds = %bb65, %bb60
- br label %bb62
-
-bb62: ; preds = %bb64, %bb61
- br label %bb63
-
-bb63: ; preds = %bb63, %bb62
- br i1 undef, label %bb63, label %bb64
-
-bb64: ; preds = %bb63
- br i1 undef, label %bb62, label %bb65
-
-bb65: ; preds = %bb64
- br i1 undef, label %bb61, label %bb66
-
-bb66: ; preds = %bb70, %bb65
- br label %bb67
-
-bb67: ; preds = %bb69, %bb66
- br label %bb68
-
-bb68: ; preds = %bb68, %bb67
- br i1 undef, label %bb68, label %bb69
-
-bb69: ; preds = %bb68
- br i1 undef, label %bb67, label %bb70
-
-bb70: ; preds = %bb69
- br i1 undef, label %bb66, label %bb71
-
-bb71: ; preds = %bb73, %bb70
- br label %bb72
-
-bb72: ; preds = %bb72, %bb71
- br i1 undef, label %bb72, label %bb73
-
-bb73: ; preds = %bb72
- br i1 undef, label %bb71, label %bb74
-
-bb74: ; preds = %bb80, %bb73
- br label %bb75
-
-bb75: ; preds = %bb79, %bb74
- br label %bb76
-
-bb76: ; preds = %bb78, %bb75
- br label %bb77
-
-bb77: ; preds = %bb77, %bb76
- br i1 undef, label %bb77, label %bb78
-
-bb78: ; preds = %bb77
- br i1 undef, label %bb76, label %bb79
-
-bb79: ; preds = %bb78
- br i1 undef, label %bb75, label %bb80
-
-bb80: ; preds = %bb79
- br i1 undef, label %bb74, label %bb81
-
-bb81: ; preds = %bb85, %bb80
- br label %bb82
-
-bb82: ; preds = %bb84, %bb81
- br label %bb83
-
-bb83: ; preds = %bb83, %bb82
- br i1 undef, label %bb83, label %bb84
-
-bb84: ; preds = %bb83
- br i1 undef, label %bb82, label %bb85
-
-bb85: ; preds = %bb84
- br i1 undef, label %bb81, label %bb86
-
-bb86: ; preds = %bb85
- ret void
-}
-
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 4.0.0"}
-!1 = !{!2, !2, i64 0}
-!2 = !{!"int", !3, i64 0}
-!3 = !{!"omnipotent char", !4, i64 0}
-!4 = !{!"Simple C/C++ TBAA"}
-!5 = !{!6, !6, i64 0}
-!6 = !{!"short", !3, i64 0}
diff --git a/polly/test/Unit/lit.site.cfg.in b/polly/test/Unit/lit.site.cfg.in
index 2aeaf197f06c..a93b8b7a527b 100644
--- a/polly/test/Unit/lit.site.cfg.in
+++ b/polly/test/Unit/lit.site.cfg.in
@@ -11,7 +11,6 @@ config.polly_obj_root = "@POLLY_BINARY_DIR@"
config.polly_lib_dir = "@POLLY_LIB_DIR@"
config.shlibdir = "@SHLIBDIR@"
config.target_triple = "@LLVM_TARGET_TRIPLE@"
-config.enable_gpgpu_codegen = "@GPU_CODEGEN@"
config.llvm_polly_link_into_tools = "@LLVM_POLLY_LINK_INTO_TOOLS@"
config.has_unittests = @POLLY_GTEST_AVAIL@
diff --git a/polly/test/lit.cfg b/polly/test/lit.cfg
index 41e3a589c61e..0943507ebe50 100644
--- a/polly/test/lit.cfg
+++ b/polly/test/lit.cfg
@@ -70,6 +70,4 @@ except OSError:
print("Could not find llvm-config in " + config.llvm_tools_dir)
exit(42)
-if re.search(r'NVPTX', llvm_config_cmd.stdout.read().decode('ascii')):
- config.available_features.add('nvptx-registered-target')
llvm_config_cmd.wait()
diff --git a/polly/test/lit.site.cfg.in b/polly/test/lit.site.cfg.in
index 4aed9875c3fb..b44061260834 100644
--- a/polly/test/lit.site.cfg.in
+++ b/polly/test/lit.site.cfg.in
@@ -7,7 +7,6 @@ config.llvm_libs_dir = lit_config.substitute("@LLVM_LIBS_DIR@")
config.polly_obj_root = "@POLLY_BINARY_DIR@"
config.polly_lib_dir = "@POLLY_LIB_DIR@"
config.target_triple = "@LLVM_TARGET_TRIPLE@"
-config.enable_gpgpu_codegen = "@GPU_CODEGEN@"
config.llvm_polly_link_into_tools = "@LLVM_POLLY_LINK_INTO_TOOLS@"
config.targets_to_build = "@TARGETS_TO_BUILD@"
config.extra_paths = "@POLLY_TEST_EXTRA_PATHS@".split(";")
@@ -50,9 +49,6 @@ else:
config.substitutions.append(('%loadNPMPolly', commonOpts ))
-if config.enable_gpgpu_codegen == 'TRUE' :
- config.available_features.add('pollyacc')
-
import lit.llvm
lit.llvm.initialize(lit_config, config)