diff options
author | Craig Topper <craig.topper@sifive.com> | 2023-01-24 15:26:42 -0800 |
---|---|---|
committer | Craig Topper <craig.topper@sifive.com> | 2023-01-24 17:16:16 -0800 |
commit | b7166e252448187f69fe7838b77cfaa1e6982e36 (patch) | |
tree | b17fe3642c763cc62bad1d1572979a0f0ee82f70 | |
parent | 002b190d3798339910ec3fe6a1e467391b235492 (diff) | |
download | llvm-b7166e252448187f69fe7838b77cfaa1e6982e36.tar.gz |
[RISCV] Combine extract_vector_elt followed by VFMV_S_F_VL.
If we're extracting an element and inserting into a undef vector
with the same number of elements, we can use the original vector.
This pattern occurs around reductions that have been cascaded
together.
This can be generalized to wider/narrow vectors by using
insert_subvector/extract_subvector, but we don't have lit tests
for that case currently.
We can also support non-undef before by using a slide or vmv.v.v
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D142264
4 files changed, 71 insertions, 77 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7118c2df0375..a8720d070acb 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10562,6 +10562,25 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, break; } + case RISCVISD::VFMV_S_F_VL: { + SDValue Src = N->getOperand(1); + // Try to remove vector->scalar->vector if the scalar->vector is inserting + // into an undef vector. + // TODO: Could use a vslide or vmv.v.v for non-undef. + if (N->getOperand(0).isUndef() && + Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isNullConstant(Src.getOperand(1)) && + Src.getOperand(0).getValueType().isScalableVector()) { + EVT VT = N->getValueType(0); + EVT SrcVT = Src.getOperand(0).getValueType(); + assert(SrcVT.getVectorElementType() == VT.getVectorElementType()); + // Widths match, just return the original vector. + if (SrcVT == VT) + return Src.getOperand(0); + // TODO: Use insert_subvector/extract_subvector to change widen/narrow? + } + break; + } case ISD::INTRINSIC_WO_CHAIN: { unsigned IntNo = N->getConstantOperandVal(0); switch (IntNo) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll index 8aa3a811d4c6..3872f20d8a4a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll @@ -133,17 +133,14 @@ define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 ; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: addi a1, a0, -32 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl) ret float %r @@ -164,17 +161,14 @@ define float @vpreduce_ord_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, ; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: addi a1, a0, -32 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = call float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl) ret float %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index d6e4d879def1..6b1b51f90846 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -238,15 +238,13 @@ define half @vreduce_fadd_v128f16(ptr %x, half %s) { define half @vreduce_ord_fadd_v128f16(ptr %x, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_v128f16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vle16.v v16, (a1) ; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v24 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: vfredosum.vs v8, v16, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -670,15 +668,13 @@ define float @vreduce_fadd_v64f32(ptr %x, float %s) { define float @vreduce_ord_fadd_v64f32(ptr %x, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_v64f32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vle32.v v16, (a1) ; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v24 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: vfredosum.vs v8, v16, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -715,20 +711,15 @@ define float @vreduce_ord_fwadd_v64f32(ptr %x, float %s) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v16, a0 +; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfwredosum.vs v16, v16, v24 -; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, ft0 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vfwredosum.vs v8, v8, v16 +; CHECK-NEXT: vfwredosum.vs v8, v8, v24 +; CHECK-NEXT: vfwredosum.vs v8, v16, v8 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1084,14 +1075,12 @@ define double @vreduce_fadd_v32f64(ptr %x, double %s) { define double @vreduce_ord_fadd_v32f64(ptr %x, double %s) { ; CHECK-LABEL: vreduce_ord_fadd_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v16, (a0) +; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v24 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: vfredosum.vs v8, v16, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1126,18 +1115,14 @@ define double @vreduce_ord_fwadd_v32f64(ptr %x, double %s) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v16, 16 +; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vfwredosum.vs v16, v16, v24 -; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma -; CHECK-NEXT: vfmv.f.s ft0, v16 -; CHECK-NEXT: vfmv.s.f v16, ft0 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vfwredosum.vs v8, v8, v16 +; CHECK-NEXT: vfwredosum.vs v8, v8, v24 +; CHECK-NEXT: vfwredosum.vs v8, v16, v8 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll index 9c9150f86e5d..155a36f58e3b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -93,30 +93,28 @@ declare half @llvm.vp.reduce.fadd.nxv64f16(half, <vscale x 64 x half>, <vscale x define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a1, a2, 1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v0, a1 -; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: sub a1, a0, a2 -; CHECK-NEXT: sltu a3, a0, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: vslidedown.vx v24, v0, a2 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: bltu a0, a2, .LBB6_2 +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: bltu a0, a1, .LBB6_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, m8, tu, ma ; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, ft0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfredusum.vs v8, v16, v8, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 %evl) ret half %r @@ -125,30 +123,28 @@ define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x define half @vpreduce_ord_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a1, a2, 1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v0, a1 -; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: sub a1, a0, a2 -; CHECK-NEXT: sltu a3, a0, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: vslidedown.vx v24, v0, a2 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: vfmv.s.f v25, fa0 -; CHECK-NEXT: bltu a0, a2, .LBB7_2 +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: bltu a0, a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a2, e16, m8, tu, ma ; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, ft0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfredosum.vs v8, v16, v8, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %r = call half @llvm.vp.reduce.fadd.nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 %evl) ret half %r |