diff options
Diffstat (limited to 'polly/test/GPGPU/non-read-only-scalars.ll')
-rw-r--r-- | polly/test/GPGPU/non-read-only-scalars.ll | 168 |
1 files changed, 0 insertions, 168 deletions
diff --git a/polly/test/GPGPU/non-read-only-scalars.ll b/polly/test/GPGPU/non-read-only-scalars.ll deleted file mode 100644 index 1ce6e0991ebb..000000000000 --- a/polly/test/GPGPU/non-read-only-scalars.ll +++ /dev/null @@ -1,168 +0,0 @@ -; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \ -; RUN: -disable-output < %s | \ -; RUN: FileCheck -check-prefix=CODE %s - -; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \ -; RUN: -disable-output < %s | \ -; RUN: FileCheck %s -check-prefix=KERNEL-IR -; -; REQUIRES: pollyacc -; -; #include <stdio.h> -; -; float foo(float A[]) { -; float sum = 0; -; -; for (long i = 0; i < 32; i++) -; A[i] = i; -; -; for (long i = 0; i < 32; i++) -; A[i] += i; -; -; for (long i = 0; i < 32; i++) -; sum += A[i]; -; -; return sum; -; } -; -; int main() { -; float A[32]; -; float sum = foo(A); -; printf("%f\n", sum); -; } - -; CODE: dim3 k0_dimBlock(32); -; CODE-NEXT: dim3 k0_dimGrid(1); -; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A); -; CODE-NEXT: cudaCheckKernel(); -; CODE-NEXT: } - -; CODE: { -; CODE-NEXT: dim3 k1_dimBlock; -; CODE-NEXT: dim3 k1_dimGrid; -; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_sum_0__phi); -; CODE-NEXT: cudaCheckKernel(); -; CODE-NEXT: } - -; CODE: { -; CODE-NEXT: dim3 k2_dimBlock; -; CODE-NEXT: dim3 k2_dimGrid; -; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_sum_0__phi, dev_MemRef_sum_0); -; CODE-NEXT: cudaCheckKernel(); -; CODE-NEXT: } - -; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (32) * sizeof(float), cudaMemcpyDeviceToHost)); -; CODE-NEXT: cudaCheckReturn(cudaMemcpy(&MemRef_sum_0, dev_MemRef_sum_0, sizeof(float), cudaMemcpyDeviceToHost)); -; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_A)); -; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_sum_0__phi)); -; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_sum_0)); -; CODE-NEXT: } - -; CODE: # kernel0 -; CODE-NEXT: { -; CODE-NEXT: Stmt_bb4(t0); -; CODE-NEXT: Stmt_bb10(t0); -; CODE-NEXT: } - -; CODE: # kernel1 -; CODE-NEXT: Stmt_bb17(); - -; CODE: # kernel2 -; TODO-NEXT: { -; TODO-NEXT: read(); -; TODO-NEXT: for (int c0 = 0; c0 <= 32; c0 += 1) { -; TODO-NEXT: Stmt_bb18(c0); -; TODO-NEXT: if (c0 <= 31) -; TODO-NEXT: Stmt_bb20(c0); -; TODO-NEXT: } -; TODO-NEXT: write(); -; TODO-NEXT: } - - -; KERNEL-IR: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_1(ptr addrspace(1) %MemRef_sum_0__phi) -; KERNEL-IR: store float 0.000000e+00, ptr %sum.0.phiops -; KERNEL-IR: [[REGA:%.+]] = addrspacecast ptr addrspace(1) %MemRef_sum_0__phi to ptr -; KERNEL-IR: [[REGB:%.+]] = load float, ptr %sum.0.phiops -; KERNEL-IR: store float [[REGB]], ptr [[REGA]] - -; KERNEL-IR: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_2(ptr addrspace(1) %MemRef_A, ptr addrspace(1) %MemRef_sum_0__phi, ptr addrspace(1) %MemRef_sum_0) - - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1 - -define float @foo(ptr %A) { -bb: - br label %bb3 - -bb3: ; preds = %bb6, %bb - %i.0 = phi i64 [ 0, %bb ], [ %tmp7, %bb6 ] - %exitcond2 = icmp ne i64 %i.0, 32 - br i1 %exitcond2, label %bb4, label %bb8 - -bb4: ; preds = %bb3 - %tmp = sitofp i64 %i.0 to float - %tmp5 = getelementptr inbounds float, ptr %A, i64 %i.0 - store float %tmp, ptr %tmp5, align 4 - br label %bb6 - -bb6: ; preds = %bb4 - %tmp7 = add nuw nsw i64 %i.0, 1 - br label %bb3 - -bb8: ; preds = %bb3 - br label %bb9 - -bb9: ; preds = %bb15, %bb8 - %i1.0 = phi i64 [ 0, %bb8 ], [ %tmp16, %bb15 ] - %exitcond1 = icmp ne i64 %i1.0, 32 - br i1 %exitcond1, label %bb10, label %bb17 - -bb10: ; preds = %bb9 - %tmp11 = sitofp i64 %i1.0 to float - %tmp12 = getelementptr inbounds float, ptr %A, i64 %i1.0 - %tmp13 = load float, ptr %tmp12, align 4 - %tmp14 = fadd float %tmp13, %tmp11 - store float %tmp14, ptr %tmp12, align 4 - br label %bb15 - -bb15: ; preds = %bb10 - %tmp16 = add nuw nsw i64 %i1.0, 1 - br label %bb9 - -bb17: ; preds = %bb9 - br label %bb18 - -bb18: ; preds = %bb20, %bb17 - %sum.0 = phi float [ 0.000000e+00, %bb17 ], [ %tmp23, %bb20 ] - %i2.0 = phi i64 [ 0, %bb17 ], [ %tmp24, %bb20 ] - %exitcond = icmp ne i64 %i2.0, 32 - br i1 %exitcond, label %bb19, label %bb25 - -bb19: ; preds = %bb18 - br label %bb20 - -bb20: ; preds = %bb19 - %tmp21 = getelementptr inbounds float, ptr %A, i64 %i2.0 - %tmp22 = load float, ptr %tmp21, align 4 - %tmp23 = fadd float %sum.0, %tmp22 - %tmp24 = add nuw nsw i64 %i2.0, 1 - br label %bb18 - -bb25: ; preds = %bb18 - %sum.0.lcssa = phi float [ %sum.0, %bb18 ] - ret float %sum.0.lcssa -} - -define i32 @main() { -bb: - %A = alloca [32 x float], align 16 - %tmp1 = call float @foo(ptr %A) - %tmp2 = fpext float %tmp1 to double - %tmp3 = call i32 (ptr, ...) @printf(ptr @.str, double %tmp2) #2 - ret i32 0 -} - -declare i32 @printf(ptr, ...) #1 - |