summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@sifive.com>2023-01-24 15:26:42 -0800
committerCraig Topper <craig.topper@sifive.com>2023-01-24 17:16:16 -0800
commitb7166e252448187f69fe7838b77cfaa1e6982e36 (patch)
treeb17fe3642c763cc62bad1d1572979a0f0ee82f70
parent002b190d3798339910ec3fe6a1e467391b235492 (diff)
downloadllvm-b7166e252448187f69fe7838b77cfaa1e6982e36.tar.gz
[RISCV] Combine extract_vector_elt followed by VFMV_S_F_VL.
If we're extracting an element and inserting into a undef vector with the same number of elements, we can use the original vector. This pattern occurs around reductions that have been cascaded together. This can be generalized to wider/narrow vectors by using insert_subvector/extract_subvector, but we don't have lit tests for that case currently. We can also support non-undef before by using a slide or vmv.v.v Reviewed By: reames Differential Revision: https://reviews.llvm.org/D142264
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp19
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll14
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll51
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll64
4 files changed, 71 insertions, 77 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7118c2df0375..a8720d070acb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10562,6 +10562,25 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
+ case RISCVISD::VFMV_S_F_VL: {
+ SDValue Src = N->getOperand(1);
+ // Try to remove vector->scalar->vector if the scalar->vector is inserting
+ // into an undef vector.
+ // TODO: Could use a vslide or vmv.v.v for non-undef.
+ if (N->getOperand(0).isUndef() &&
+ Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isNullConstant(Src.getOperand(1)) &&
+ Src.getOperand(0).getValueType().isScalableVector()) {
+ EVT VT = N->getValueType(0);
+ EVT SrcVT = Src.getOperand(0).getValueType();
+ assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
+ // Widths match, just return the original vector.
+ if (SrcVT == VT)
+ return Src.getOperand(0);
+ // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
+ }
+ break;
+ }
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = N->getConstantOperandVal(0);
switch (IntNo) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll
index 8aa3a811d4c6..3872f20d8a4a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll
@@ -133,17 +133,14 @@ define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32
; CHECK-NEXT: vfmv.s.f v25, fa0
; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t
-; CHECK-NEXT: vfmv.f.s ft0, v25
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v8, ft0
; CHECK-NEXT: addi a1, a0, -32
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t
+; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%r = call reassoc float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl)
ret float %r
@@ -164,17 +161,14 @@ define float @vpreduce_ord_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m,
; CHECK-NEXT: vfmv.s.f v25, fa0
; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t
-; CHECK-NEXT: vfmv.f.s ft0, v25
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v8, ft0
; CHECK-NEXT: addi a1, a0, -32
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t
+; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%r = call float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl)
ret float %r
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
index d6e4d879def1..6b1b51f90846 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
@@ -238,15 +238,13 @@ define half @vreduce_fadd_v128f16(ptr %x, half %s) {
define half @vreduce_ord_fadd_v128f16(ptr %x, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_v128f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 64
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 128
-; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vle16.v v16, (a1)
; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v24
-; CHECK-NEXT: vfmv.f.s ft0, v8
-; CHECK-NEXT: vfmv.s.f v8, ft0
; CHECK-NEXT: vfredosum.vs v8, v16, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -670,15 +668,13 @@ define float @vreduce_fadd_v64f32(ptr %x, float %s) {
define float @vreduce_ord_fadd_v64f32(ptr %x, float %s) {
; CHECK-LABEL: vreduce_ord_fadd_v64f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 128
-; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vle32.v v16, (a1)
; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v24
-; CHECK-NEXT: vfmv.f.s ft0, v8
-; CHECK-NEXT: vfmv.s.f v8, ft0
; CHECK-NEXT: vfredosum.vs v8, v16, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -715,20 +711,15 @@ define float @vreduce_ord_fwadd_v64f32(ptr %x, float %s) {
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: vslidedown.vx v16, v8, a0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwredosum.vs v16, v16, v24
-; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.f.s ft0, v16
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v16, ft0
-; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vfwredosum.vs v8, v8, v16
+; CHECK-NEXT: vfwredosum.vs v8, v8, v24
+; CHECK-NEXT: vfwredosum.vs v8, v16, v8
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -1084,14 +1075,12 @@ define double @vreduce_fadd_v32f64(ptr %x, double %s) {
define double @vreduce_ord_fadd_v32f64(ptr %x, double %s) {
; CHECK-LABEL: vreduce_ord_fadd_v32f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 128
-; CHECK-NEXT: vle64.v v16, (a0)
+; CHECK-NEXT: vle64.v v16, (a1)
; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v24
-; CHECK-NEXT: vfmv.f.s ft0, v8
-; CHECK-NEXT: vfmv.s.f v8, ft0
; CHECK-NEXT: vfredosum.vs v8, v16, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -1126,18 +1115,14 @@ define double @vreduce_ord_fwadd_v32f64(ptr %x, double %s) {
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v16, 16
+; CHECK-NEXT: vslidedown.vi v16, v8, 16
; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v24, fa0
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vfwredosum.vs v16, v16, v24
-; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.f.s ft0, v16
-; CHECK-NEXT: vfmv.s.f v16, ft0
-; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vfwredosum.vs v8, v8, v16
+; CHECK-NEXT: vfwredosum.vs v8, v8, v24
+; CHECK-NEXT: vfwredosum.vs v8, v16, v8
; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
index 9c9150f86e5d..155a36f58e3b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
@@ -93,30 +93,28 @@ declare half @llvm.vp.reduce.fadd.nxv64f16(half, <vscale x 64 x half>, <vscale x
define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_fadd_nxv64f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: srli a1, a2, 1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a2, a1, 1
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v24, v0, a1
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: sub a1, a0, a2
-; CHECK-NEXT: sltu a3, a0, a1
-; CHECK-NEXT: addi a3, a3, -1
-; CHECK-NEXT: and a1, a3, a1
+; CHECK-NEXT: vslidedown.vx v24, v0, a2
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: vfmv.s.f v25, fa0
-; CHECK-NEXT: bltu a0, a2, .LBB6_2
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: bltu a0, a1, .LBB6_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB6_2:
-; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, tu, ma
; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t
-; CHECK-NEXT: vfmv.f.s ft0, v25
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v8, ft0
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma
+; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t
+; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%r = call reassoc half @llvm.vp.reduce.fadd.nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 %evl)
ret half %r
@@ -125,30 +123,28 @@ define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x
define half @vpreduce_ord_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_ord_fadd_nxv64f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: srli a1, a2, 1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a2, a1, 1
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v24, v0, a1
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: sub a1, a0, a2
-; CHECK-NEXT: sltu a3, a0, a1
-; CHECK-NEXT: addi a3, a3, -1
-; CHECK-NEXT: and a1, a3, a1
+; CHECK-NEXT: vslidedown.vx v24, v0, a2
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: vfmv.s.f v25, fa0
-; CHECK-NEXT: bltu a0, a2, .LBB7_2
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: bltu a0, a1, .LBB7_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB7_2:
-; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, tu, ma
; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t
-; CHECK-NEXT: vfmv.f.s ft0, v25
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v8, ft0
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma
+; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t
+; CHECK-NEXT: vfmv.f.s fa0, v25
; CHECK-NEXT: ret
%r = call half @llvm.vp.reduce.fadd.nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 %evl)
ret half %r