summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLemonBoy <thatlemon@gmail.com>2021-03-17 16:59:55 +0100
committerTom Stellard <tstellar@redhat.com>2021-03-31 12:10:37 -0700
commit31001be371e8f2c74470e727e54503fb2aabec8b (patch)
tree93ea8be722a9712ea312b2f9a9c77749193a27cb
parent9ae9ab1ca34384e07b751c16645e22a0b953b08b (diff)
downloadllvmorg-12.0.0-rc4.tar.gz
[LoopVectorize] Refine hasIrregularType predicatellvmorg-12.0.0-rc4
The `hasIrregularType` predicate checks whether an array of N values of type Ty is "bitcast-compatible" with a <N x Ty> vector. The previous check returned invalid results in some cases where there's some padding between the array elements: eg. a 4-element array of u7 values is considered as compatible with <4 x u7>, even though the vector is only loading/storing 28 bits instead of 32. The problem causes LLVM to generate incorrect code for some targets: for AArch64 the vector loads/stores are lowered in terms of ubfx/bfi, effectively losing the top (N * padding bits). Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D97465 (cherry picked from commit 4f024938e4c932feba4d28573ec4522106f8d879)
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp22
-rw-r--r--llvm/test/Transforms/LoopVectorize/irregular_type.ll27
2 files changed, 34 insertions, 15 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d36e078444bc..b456a97aa4ec 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -372,19 +372,11 @@ static Type *getMemInstValueType(Value *I) {
/// A helper function that returns true if the given type is irregular. The
/// type is irregular if its allocated size doesn't equal the store size of an
-/// element of the corresponding vector type at the given vectorization factor.
-static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) {
- // Determine if an array of VF elements of type Ty is "bitcast compatible"
- // with a <VF x Ty> vector.
- if (VF.isVector()) {
- auto *VectorTy = VectorType::get(Ty, VF);
- return TypeSize::get(VF.getKnownMinValue() *
- DL.getTypeAllocSize(Ty).getFixedValue(),
- VF.isScalable()) != DL.getTypeStoreSize(VectorTy);
- }
-
- // If the vectorization factor is one, we just check if an array of type Ty
- // requires padding between elements.
+/// element of the corresponding vector type.
+static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
+ // Determine if an array of N elements of type Ty is "bitcast compatible"
+ // with a <N x Ty> vector.
+ // This is only true if there is no padding between the array elements.
return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
}
@@ -5212,7 +5204,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
// requires padding and will be scalarized.
auto &DL = I->getModule()->getDataLayout();
auto *ScalarTy = getMemInstValueType(I);
- if (hasIrregularType(ScalarTy, DL, VF))
+ if (hasIrregularType(ScalarTy, DL))
return false;
// Check if masking is required.
@@ -5259,7 +5251,7 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
// requires padding and will be scalarized.
auto &DL = I->getModule()->getDataLayout();
auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
- if (hasIrregularType(ScalarTy, DL, VF))
+ if (hasIrregularType(ScalarTy, DL))
return false;
return true;
diff --git a/llvm/test/Transforms/LoopVectorize/irregular_type.ll b/llvm/test/Transforms/LoopVectorize/irregular_type.ll
new file mode 100644
index 000000000000..167a1a101e6f
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/irregular_type.ll
@@ -0,0 +1,27 @@
+; RUN: opt %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
+
+; Ensure the array loads/stores are not optimized into vector operations when
+; the element type has padding bits.
+
+; CHECK: foo
+; CHECK: vector.body
+; CHECK-NOT: load <4 x i7>
+; CHECK-NOT: store <4 x i7>
+; CHECK: for.body
+define void @foo(i7* %a, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i7, i7* %a, i64 %indvars.iv
+ %0 = load i7, i7* %arrayidx, align 1
+ %sub = add nuw nsw i7 %0, 0
+ store i7 %sub, i7* %arrayidx, align 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %indvars.iv.next, %n
+ br i1 %cmp, label %for.exit, label %for.body
+
+for.exit:
+ ret void
+}