diff options
author | Florian Hahn <flo@fhahn.com> | 2022-01-04 14:34:38 +0000 |
---|---|---|
committer | Florian Hahn <flo@fhahn.com> | 2022-01-04 15:20:35 +0000 |
commit | d8276208be763ba5b70e9b422034e77764a8649f (patch) | |
tree | 96009ee5eeb357743a212f92c7606d9dfac9ee5b | |
parent | 10bb837feb22ad70dc4acc3d2cdb5be7f45d1c21 (diff) | |
download | llvm-d8276208be763ba5b70e9b422034e77764a8649f.tar.gz |
[LAA] Remove overeager assertion for aggregate types.
0a00d64 turned an early exit here into an assertion, but the assertion
can be triggered, as PR52920 shows.
The later code is agnostic to the accessed type, so just drop the
assert. The patch also adds tests for LAA directly and
loop-load-elimination to show the behavior is sane.
-rw-r--r-- | llvm/lib/Analysis/LoopAccessAnalysis.cpp | 1 | ||||
-rw-r--r-- | llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll | 141 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll | 40 |
3 files changed, 181 insertions, 1 deletions
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 6444518dc70c..b8b1b5ad53c9 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1055,7 +1055,6 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, bool ShouldCheckWrap) { Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); - assert(!AccessTy->isAggregateType() && "Bad stride - Not a pointer to a scalar type"); if (isa<ScalableVectorType>(AccessTy)) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll new file mode 100644 index 000000000000..547372b83bc9 --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll @@ -0,0 +1,141 @@ +; RUN: opt -S -disable-output -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' %s 2>&1 | FileCheck %s + +; +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; A forwarding in the presence of symbolic strides. +define void @single_stride(i32* noalias %A, i32* noalias %B, i64 %N, i64 %stride) { +; CHECK-LABEL: Loop access info in function 'single_stride': +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Backward: +; CHECK-NEXT: %load = load i32, i32* %gep.A, align 4 -> +; CHECK-NEXT: store i32 %add, i32* %gep.A.next, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-NEXT: Equal predicate: %stride == 1 +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds i32, i32* %A, i64 %mul: +; CHECK-NEXT: {%A,+,(4 * %stride)}<%loop> +; CHECK-NEXT: --> {%A,+,4}<%loop> +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %mul = mul i64 %iv, %stride + %gep.A = getelementptr inbounds i32, i32* %A, i64 %mul + %load = load i32, i32* %gep.A, align 4 + %gep.B = getelementptr inbounds i32, i32* %B, i64 %iv + %load_1 = load i32, i32* %gep.B, align 4 + %add = add i32 %load_1, %load + %iv.next = add nuw nsw i64 %iv, 1 + %gep.A.next = getelementptr inbounds i32, i32* %A, i64 %iv.next + store i32 %add, i32* %gep.A.next, align 4 + %exitcond = icmp eq i64 %iv.next, %N + br i1 %exitcond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + +; Similar to @single_stride, but with struct types. +define void @single_stride_struct({ i32, i8 }* noalias %A, { i32, i8 }* noalias %B, i64 %N, i64 %stride) { +; CHECK-LABEL: Loop access info in function 'single_stride_struct': +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Backward: +; CHECK-NEXT: %load = load { i32, i8 }, { i32, i8 }* %gep.A, align 4 -> +; CHECK-NEXT: store { i32, i8 } %ins, { i32, i8 }* %gep.A.next, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-NEXT: Equal predicate: %stride == 1 +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds { i32, i8 }, { i32, i8 }* %A, i64 %mul: +; CHECK-NEXT: {%A,+,(8 * %stride)}<%loop> +; CHECK-NEXT: --> {%A,+,8}<%loop> +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %mul = mul i64 %iv, %stride + %gep.A = getelementptr inbounds { i32, i8 }, { i32, i8 }* %A, i64 %mul + %load = load { i32, i8 }, { i32, i8 }* %gep.A, align 4 + %gep.B = getelementptr inbounds { i32, i8 }, { i32, i8 }* %B, i64 %iv + %load_1 = load { i32, i8 }, { i32, i8 }* %gep.B, align 4 + %v1 = extractvalue { i32, i8 } %load, 0 + %v2 = extractvalue { i32, i8} %load_1, 0 + %add = add i32 %v1, %v2 + %ins = insertvalue { i32, i8 } undef, i32 %add, 0 + %iv.next = add nuw nsw i64 %iv, 1 + %gep.A.next = getelementptr inbounds { i32, i8 }, { i32, i8 }* %A, i64 %iv.next + store { i32, i8 } %ins, { i32, i8 }* %gep.A.next, align 4 + %exitcond = icmp eq i64 %iv.next, %N + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +; A loop with two symbolic strides. +define void @two_strides(i32* noalias %A, i32* noalias %B, i64 %N, i64 %stride.1, i64 %stride.2) { +; CHECK-LABEL: Loop access info in function 'two_strides': +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Backward: +; CHECK-NEXT: %load = load i32, i32* %gep.A, align 4 -> +; CHECK-NEXT: store i32 %add, i32* %gep.A.next, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-NEXT: Equal predicate: %stride.2 == 1 +; CHECK-NEXT: Equal predicate: %stride.1 == 1 +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds i32, i32* %A, i64 %mul: +; CHECK-NEXT: {%A,+,(4 * %stride.1)}<%loop> +; CHECK-NEXT: --> {%A,+,4}<%loop> +; CHECK-NEXT: [PSE] %gep.A.next = getelementptr inbounds i32, i32* %A, i64 %mul.2: +; CHECK-NEXT: {((4 * %stride.2) + %A),+,(4 * %stride.2)}<%loop> +; CHECK-NEXT: --> {(4 + %A),+,4}<%loop> +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %mul = mul i64 %iv, %stride.1 + %gep.A = getelementptr inbounds i32, i32* %A, i64 %mul + %load = load i32, i32* %gep.A, align 4 + %gep.B = getelementptr inbounds i32, i32* %B, i64 %iv + %load_1 = load i32, i32* %gep.B, align 4 + %add = add i32 %load_1, %load + %iv.next = add nuw nsw i64 %iv, 1 + %mul.2 = mul i64 %iv.next, %stride.2 + %gep.A.next = getelementptr inbounds i32, i32* %A, i64 %mul.2 + store i32 %add, i32* %gep.A.next, align 4 + %exitcond = icmp eq i64 %iv.next, %N + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll b/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll index 7a2d1b6c7e3c..dce61157aae1 100644 --- a/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll +++ b/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll @@ -50,6 +50,46 @@ for.end: ; preds = %for.body ret void } +; Similar to @f(), but with a struct type. +; ALL-LABEL: @f_struct( +define void @f_struct({ i32, i8 } * noalias nocapture %A, { i32, i8 }* noalias nocapture readonly %B, i64 %N, + i64 %stride) { + +; ONE_STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1 + +entry: +; NO_ONE_STRIDE_SPEC-NOT: %load_initial = load { i32, i8 }, { i32, i8 }* %A +; ONE_STRIDE_SPEC: %load_initial = load { i32, i8 }, { i32, i8 }* %A + br label %for.body + +for.body: ; preds = %for.body, %entry +; NO_ONE_STRIDE_SPEC-NOT: %store_forwarded = phi { i32, i8 } [ %load_initial, {{.*}} ], [ %ins, %for.body ] +; ONE_STRIDE_SPEC: %store_forwarded = phi { i32, i8 } [ %load_initial, {{.*}} ], [ %ins, %for.body ] + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %mul = mul i64 %indvars.iv, %stride + %arrayidx = getelementptr inbounds { i32, i8 }, { i32, i8 }* %A, i64 %mul + %load = load { i32, i8 }, { i32, i8 }* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds { i32, i8 }, { i32, i8 }* %B, i64 %indvars.iv + %load_1 = load { i32, i8 }, { i32, i8 }* %arrayidx2, align 4 + +; NO_ONE_STRIDE_SPEC-NOT: %v1 = extractvalue { i32, i8 } %store_forwarded +; ONE_STRIDE_SPEC: %v1 = extractvalue { i32, i8 } %store_forwarded +; ONE_STRIDE_SPEC: %add = add i32 %v1, %v2 + + %v1 = extractvalue { i32, i8 } %load, 0 + %v2 = extractvalue { i32, i8} %load_1, 0 + %add = add i32 %v1, %v2 + %ins = insertvalue { i32, i8 } undef, i32 %add, 0 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %arrayidx_next = getelementptr inbounds { i32, i8 }, { i32, i8 }* %A, i64 %indvars.iv.next + store { i32, i8 } %ins, { i32, i8 }* %arrayidx_next, align 4 + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + ; With two symbolic strides: ; ; for (unsigned i = 0; i < 100; i++) |