[LAA] Remove overeager assertion for aggregate types.

0a00d64 turned an early exit here into an assertion, but the assertion can be triggered, as PR52920 shows. The later code is agnostic to the accessed type, so just drop the assert. The patch also adds tests for LAA directly and loop-load-elimination to show the behavior is sane.
author: Florian Hahn <flo@fhahn.com> 2022-01-04 14:34:38 +0000
committer: Florian Hahn <flo@fhahn.com> 2022-01-04 15:20:35 +0000
commit: d8276208be763ba5b70e9b422034e77764a8649f (patch)
tree: 96009ee5eeb357743a212f92c7606d9dfac9ee5b
parent: 10bb837feb22ad70dc4acc3d2cdb5be7f45d1c21 (diff)
download: llvm-d8276208be763ba5b70e9b422034e77764a8649f.tar.gz
3 files changed, 181 insertions, 1 deletions
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 6444518dc70c..b8b1b5ad53c9 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1055,7 +1055,6 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
                            bool ShouldCheckWrap) {
   Type *Ty = Ptr->getType();
   assert(Ty->isPointerTy() && "Unexpected non-ptr");
-  assert(!AccessTy->isAggregateType() && "Bad stride - Not a pointer to a scalar type");
 
   if (isa<ScalableVectorType>(AccessTy)) {
     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
new file mode 100644
index 000000000000..547372b83bc9
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
@@ -0,0 +1,141 @@
+; RUN: opt -S -disable-output -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' %s 2>&1 | FileCheck %s
+
+;
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; A forwarding in the presence of symbolic strides.
+define void @single_stride(i32* noalias %A, i32* noalias %B, i64 %N, i64 %stride) {
+; CHECK-LABEL: Loop access info in function 'single_stride':
+; CHECK-NEXT:  loop:
+; CHECK-NEXT:    Report: unsafe dependent memory operations in loop.
+; CHECK-NEXT:    Dependences:
+; CHECK-NEXT:      Backward:
+; CHECK-NEXT:          %load = load i32, i32* %gep.A, align 4 ->
+; CHECK-NEXT:          store i32 %add, i32* %gep.A.next, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:    Run-time memory checks:
+; CHECK-NEXT:    Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:    Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:    SCEV assumptions:
+; CHECK-NEXT:    Equal predicate: %stride == 1
+; CHECK-EMPTY:
+; CHECK-NEXT:    Expressions re-written:
+; CHECK-NEXT:    [PSE]  %gep.A = getelementptr inbounds i32, i32* %A, i64 %mul:
+; CHECK-NEXT:      {%A,+,(4 * %stride)}<%loop>
+; CHECK-NEXT:      --> {%A,+,4}<%loop>
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %mul = mul i64 %iv, %stride
+  %gep.A = getelementptr inbounds i32, i32* %A, i64 %mul
+  %load = load i32, i32* %gep.A, align 4
+  %gep.B = getelementptr inbounds i32, i32* %B, i64 %iv
+  %load_1 = load i32, i32* %gep.B, align 4
+  %add = add i32 %load_1, %load
+  %iv.next = add nuw nsw i64 %iv, 1
+  %gep.A.next = getelementptr inbounds i32, i32* %A, i64 %iv.next
+  store i32 %add, i32* %gep.A.next, align 4
+  %exitcond = icmp eq i64 %iv.next, %N
+  br i1 %exitcond, label %exit, label %loop
+
+exit:                                          ; preds = %loop
+  ret void
+}
+
+; Similar to @single_stride, but with struct types.
+define void @single_stride_struct({ i32, i8 }* noalias %A, { i32, i8 }* noalias %B, i64 %N, i64 %stride) {
+; CHECK-LABEL: Loop access info in function 'single_stride_struct':
+; CHECK-NEXT:  loop:
+; CHECK-NEXT:    Report: unsafe dependent memory operations in loop.
+; CHECK-NEXT:    Dependences:
+; CHECK-NEXT:      Backward:
+; CHECK-NEXT:          %load = load { i32, i8 }, { i32, i8 }* %gep.A, align 4 ->
+; CHECK-NEXT:          store { i32, i8 } %ins, { i32, i8 }* %gep.A.next, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:    Run-time memory checks:
+; CHECK-NEXT:    Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:    Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:    SCEV assumptions:
+; CHECK-NEXT:    Equal predicate: %stride == 1
+; CHECK-EMPTY:
+; CHECK-NEXT:    Expressions re-written:
+; CHECK-NEXT:    [PSE]  %gep.A = getelementptr inbounds { i32, i8 }, { i32, i8 }* %A, i64 %mul:
+; CHECK-NEXT:      {%A,+,(8 * %stride)}<%loop>
+; CHECK-NEXT:      --> {%A,+,8}<%loop>
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %mul = mul i64 %iv, %stride
+  %gep.A = getelementptr inbounds { i32, i8 }, { i32, i8 }* %A, i64 %mul
+  %load = load { i32, i8 }, { i32, i8 }* %gep.A, align 4
+  %gep.B = getelementptr inbounds { i32, i8 }, { i32, i8 }* %B, i64 %iv
+  %load_1 = load { i32, i8 }, { i32, i8 }* %gep.B, align 4
+  %v1 = extractvalue { i32, i8 } %load, 0
+  %v2 = extractvalue { i32, i8} %load_1, 0
+  %add = add i32 %v1, %v2
+  %ins = insertvalue { i32, i8 } undef, i32 %add, 0
+  %iv.next = add nuw nsw i64 %iv, 1
+  %gep.A.next = getelementptr inbounds { i32, i8 }, { i32, i8 }* %A, i64 %iv.next
+  store { i32, i8 } %ins, { i32, i8 }* %gep.A.next, align 4
+  %exitcond = icmp eq i64 %iv.next, %N
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; A loop with two symbolic strides.
+define void @two_strides(i32* noalias %A, i32* noalias %B, i64 %N, i64 %stride.1, i64 %stride.2) {
+; CHECK-LABEL: Loop access info in function 'two_strides':
+; CHECK-NEXT:  loop:
+; CHECK-NEXT:    Report: unsafe dependent memory operations in loop.
+; CHECK-NEXT:    Dependences:
+; CHECK-NEXT:      Backward:
+; CHECK-NEXT:          %load = load i32, i32* %gep.A, align 4 ->
+; CHECK-NEXT:          store i32 %add, i32* %gep.A.next, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:    Run-time memory checks:
+; CHECK-NEXT:    Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:    Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:    SCEV assumptions:
+; CHECK-NEXT:    Equal predicate: %stride.2 == 1
+; CHECK-NEXT:    Equal predicate: %stride.1 == 1
+; CHECK-EMPTY:
+; CHECK-NEXT:    Expressions re-written:
+; CHECK-NEXT:    [PSE]  %gep.A = getelementptr inbounds i32, i32* %A, i64 %mul:
+; CHECK-NEXT:      {%A,+,(4 * %stride.1)}<%loop>
+; CHECK-NEXT:      --> {%A,+,4}<%loop>
+; CHECK-NEXT:    [PSE]  %gep.A.next = getelementptr inbounds i32, i32* %A, i64 %mul.2:
+; CHECK-NEXT:      {((4 * %stride.2) + %A),+,(4 * %stride.2)}<%loop>
+; CHECK-NEXT:      --> {(4 + %A),+,4}<%loop>
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %mul = mul i64 %iv, %stride.1
+  %gep.A = getelementptr inbounds i32, i32* %A, i64 %mul
+  %load = load i32, i32* %gep.A, align 4
+  %gep.B = getelementptr inbounds i32, i32* %B, i64 %iv
+  %load_1 = load i32, i32* %gep.B, align 4
+  %add = add i32 %load_1, %load
+  %iv.next = add nuw nsw i64 %iv, 1
+  %mul.2 = mul i64 %iv.next, %stride.2
+  %gep.A.next = getelementptr inbounds i32, i32* %A, i64 %mul.2
+  store i32 %add, i32* %gep.A.next, align 4
+  %exitcond = icmp eq i64 %iv.next, %N
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll b/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll
index 7a2d1b6c7e3c..dce61157aae1 100644
--- a/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll
+++ b/llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll
@@ -50,6 +50,46 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
+; Similar to @f(), but with a struct type.
+; ALL-LABEL: @f_struct(
+define void @f_struct({ i32, i8 } * noalias nocapture %A, { i32, i8 }* noalias nocapture readonly %B, i64 %N,
+               i64 %stride) {
+
+; ONE_STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1
+
+entry:
+; NO_ONE_STRIDE_SPEC-NOT: %load_initial = load { i32, i8 }, { i32, i8 }* %A
+; ONE_STRIDE_SPEC: %load_initial = load { i32, i8 }, { i32, i8 }* %A
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; NO_ONE_STRIDE_SPEC-NOT: %store_forwarded = phi { i32, i8 } [ %load_initial, {{.*}} ], [ %ins, %for.body ]
+; ONE_STRIDE_SPEC: %store_forwarded = phi { i32, i8 } [  %load_initial, {{.*}} ], [ %ins, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %mul = mul i64 %indvars.iv, %stride
+  %arrayidx = getelementptr inbounds { i32, i8 }, { i32, i8 }* %A, i64 %mul
+  %load = load { i32, i8 }, { i32, i8 }* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds { i32, i8 }, { i32, i8 }* %B, i64 %indvars.iv
+  %load_1 = load { i32, i8 }, { i32, i8 }* %arrayidx2, align 4
+
+; NO_ONE_STRIDE_SPEC-NOT: %v1 = extractvalue { i32, i8 } %store_forwarded
+; ONE_STRIDE_SPEC: %v1 = extractvalue { i32, i8 } %store_forwarded
+; ONE_STRIDE_SPEC: %add = add i32 %v1, %v2
+
+  %v1 = extractvalue { i32, i8 } %load, 0
+  %v2 = extractvalue { i32, i8} %load_1, 0
+  %add = add i32 %v1, %v2
+  %ins = insertvalue { i32, i8 } undef, i32 %add, 0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %arrayidx_next = getelementptr inbounds { i32, i8 }, { i32, i8 }* %A, i64 %indvars.iv.next
+  store { i32, i8 } %ins, { i32, i8 }* %arrayidx_next, align 4
+  %exitcond = icmp eq i64 %indvars.iv.next, %N
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
 ; With two symbolic strides:
 ;
 ;   for (unsigned i = 0; i < 100; i++)
author	Florian Hahn <flo@fhahn.com>	2022-01-04 14:34:38 +0000
committer	Florian Hahn <flo@fhahn.com>	2022-01-04 15:20:35 +0000
commit	d8276208be763ba5b70e9b422034e77764a8649f (patch)
tree	96009ee5eeb357743a212f92c7606d9dfac9ee5b
parent	10bb837feb22ad70dc4acc3d2cdb5be7f45d1c21 (diff)
download	llvm-d8276208be763ba5b70e9b422034e77764a8649f.tar.gz