summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNemanja Ivanovic <nemanja.i.ibm@gmail.com>2023-03-24 10:34:25 -0400
committerTom Stellard <tstellar@redhat.com>2023-04-04 11:32:51 -0700
commit86b0c6e4050261823f07a3fbfd272bea0cda778e (patch)
tree348b08174ec9aeed05a7bcfa08bd8ae8d36d0748
parentb27338656d7db712ba6f37295b26f181ab2e043d (diff)
downloadllvm-86b0c6e4050261823f07a3fbfd272bea0cda778e.tar.gz
[SelectionDAG] Correctly reduce BV to shuffle with zero on big endian
This DAG combine is correct on little endian targets but is incorrect on big endian targets. Add big endian code to correct it. Differential revision: https://reviews.llvm.org/D146460
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp7
-rw-r--r--llvm/test/CodeGen/PowerPC/pr61315.ll133
2 files changed, 136 insertions, 4 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index eed3d820c120..d9cde609e599 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -21361,10 +21361,9 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
// the source vector. The high bits map to zero. We will use a zero vector
// as the 2nd source operand of the shuffle, so use the 1st element of
// that vector (mask value is number-of-elements) for the high bits.
- if (i % ZextRatio == 0)
- ShufMask[i] = Extract.getConstantOperandVal(1);
- else
- ShufMask[i] = NumMaskElts;
+ int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
+ ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
+ : NumMaskElts;
}
// Undef elements of the build vector remain undef because we initialize
diff --git a/llvm/test/CodeGen/PowerPC/pr61315.ll b/llvm/test/CodeGen/PowerPC/pr61315.ll
new file mode 100644
index 000000000000..de65945f963a
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr61315.ll
@@ -0,0 +1,133 @@
+; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s
+define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) local_unnamed_addr #0 {
+; CHECK: .LCPI0_0:
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-LABEL: ConvertExtractedMaskBitsToVect:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addis r3, r2, .LCPI0_0@toc@ha
+; CHECK-NEXT: xxlxor v4, v4, v4
+; CHECK-NEXT: xxlxor v3, v3, v3
+; CHECK-NEXT: addi r3, r3, .LCPI0_0@toc@l
+; CHECK-NEXT: lxv vs0, 0(r3)
+; CHECK-NEXT: addis r3, r2, .LCPI0_1@toc@ha
+; CHECK-NEXT: addi r3, r3, .LCPI0_1@toc@l
+; CHECK-NEXT: xxperm v4, v2, vs0
+; CHECK-NEXT: lxv vs0, 0(r3)
+; CHECK-NEXT: xxland v2, v4, vs0
+; CHECK-NEXT: vcmpequb v2, v2, v3
+; CHECK-NEXT: xxlnor v2, v2, v2
+; CHECK-NEXT: blr
+ %a4 = extractelement <16 x i8> %0, i64 7
+ %a5 = zext i8 %a4 to i16
+ %a6 = insertelement <8 x i16> poison, i16 %a5, i64 0
+ %a7 = bitcast <8 x i16> %a6 to <16 x i8>
+ %a8 = shufflevector <16 x i8> %a7, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %a9 = and <16 x i8> %a8, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+ %a10 = icmp eq <16 x i8> %a9, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+ %a11 = sext <16 x i1> %a10 to <16 x i8>
+ ret <16 x i8> %a11
+}
+
+define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) local_unnamed_addr #0 {
+; CHECK: .LCPI1_0:
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-LABEL: ConvertExtractedMaskBitsToVect2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addis r3, r2, .LCPI1_0@toc@ha
+; CHECK-NEXT: xxlxor v4, v4, v4
+; CHECK-NEXT: xxlxor v3, v3, v3
+; CHECK-NEXT: addi r3, r3, .LCPI1_0@toc@l
+; CHECK-NEXT: lxv vs0, 0(r3)
+; CHECK-NEXT: addis r3, r2, .LCPI1_1@toc@ha
+; CHECK-NEXT: addi r3, r3, .LCPI1_1@toc@l
+; CHECK-NEXT: xxperm v4, v2, vs0
+; CHECK-NEXT: lxv vs0, 0(r3)
+; CHECK-NEXT: xxland v2, v4, vs0
+; CHECK-NEXT: vcmpequb v2, v2, v3
+; CHECK-NEXT: xxlnor v2, v2, v2
+; CHECK-NEXT: blr
+ %a4 = extractelement <16 x i8> %0, i64 7
+ %a5 = zext i8 %a4 to i32
+ %a6 = insertelement <4 x i32> poison, i32 %a5, i64 0
+ %a7 = bitcast <4 x i32> %a6 to <16 x i8>
+ %a8 = shufflevector <16 x i8> %a7, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %a9 = and <16 x i8> %a8, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+ %a10 = icmp eq <16 x i8> %a9, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+ %a11 = sext <16 x i1> %a10 to <16 x i8>
+ ret <16 x i8> %a11
+}
+
+define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) local_unnamed_addr #0 {
+; CHECK: .LCPI2_0:
+; CHECK-NEXT: .byte 6 # 0x6
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 6 # 0x6
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 6 # 0x6
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 6 # 0x6
+; CHECK-NEXT: .byte 7 # 0x7
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-NEXT: .byte 16 # 0x10
+; CHECK-LABEL: ConvertExtractedMaskBitsToVect3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addis r3, r2, .LCPI2_0@toc@ha
+; CHECK-NEXT: xxlxor v4, v4, v4
+; CHECK-NEXT: xxlxor v3, v3, v3
+; CHECK-NEXT: addi r3, r3, .LCPI2_0@toc@l
+; CHECK-NEXT: lxv vs0, 0(r3)
+; CHECK-NEXT: addis r3, r2, .LCPI2_1@toc@ha
+; CHECK-NEXT: addi r3, r3, .LCPI2_1@toc@l
+; CHECK-NEXT: xxperm v4, v2, vs0
+; CHECK-NEXT: lxv vs0, 0(r3)
+; CHECK-NEXT: xxland v2, v4, vs0
+; CHECK-NEXT: vcmpequb v2, v2, v3
+; CHECK-NEXT: xxlnor v2, v2, v2
+; CHECK-NEXT: blr
+ %a4 = extractelement <8 x i16> %0, i64 3
+ %a5 = zext i16 %a4 to i32
+ %a6 = insertelement <4 x i32> poison, i32 %a5, i64 0
+ %a7 = bitcast <4 x i32> %a6 to <16 x i8>
+ %a8 = shufflevector <16 x i8> %a7, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %a9 = and <16 x i8> %a8, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+ %a10 = icmp eq <16 x i8> %a9, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
+ %a11 = sext <16 x i1> %a10 to <16 x i8>
+ ret <16 x i8> %a11
+}