diff options
author | Tom Stellard <tstellar@redhat.com> | 2020-05-28 21:09:13 +0000 |
---|---|---|
committer | Tom Stellard <tstellar@redhat.com> | 2020-06-22 15:03:26 -0700 |
commit | 1abba52044ddc7d79a985fc9ec0734db54ebe6b3 (patch) | |
tree | 240e81119a3e73a5dd14ef882b689540e3b25f86 | |
parent | 3428405fc4ec18b566d64bb1478acd631ccd40f4 (diff) | |
download | llvm-1abba52044ddc7d79a985fc9ec0734db54ebe6b3.tar.gz |
[PowerPC] Add missing handling for half precision
The fix for PR39865 took care of some of the handling for half precision
but it missed a number of issues that still exist. This patch fixes the
remaining issues that cause crashes in the PPC back end.
Fixes: https://bugs.llvm.org/show_bug.cgi?id=45776
Differential revision: https://reviews.llvm.org/D79283
(cherry picked from commit 1a493b0fa556a07c728862c3c3f70bfd8683bef0)
-rw-r--r-- | llvm/include/llvm/Target/TargetSelectionDAG.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 23 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrVSX.td | 17 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll | 88 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll | 142 |
6 files changed, 104 insertions, 177 deletions
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 1700c6c4640d..46ad5a619770 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -959,6 +959,10 @@ def extloadi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = 1; let MemoryVT = i32; } +def extloadf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = f16; +} def extloadf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = 1; let MemoryVT = f32; @@ -1094,6 +1098,11 @@ def truncstorei32 : PatFrag<(ops node:$val, node:$ptr), let IsStore = 1; let MemoryVT = i32; } +def truncstoref16 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = f16; +} def truncstoref32 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = 1; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 00f59bba52e8..e61d44b5f968 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -167,6 +167,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); } + if (Subtarget.isISA3_0()) { + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal); + setTruncStoreAction(MVT::f64, MVT::f16, Legal); + setTruncStoreAction(MVT::f32, MVT::f16, Legal); + } else { + // No extending loads from f16 or HW conversions back and forth. + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + } + setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PowerPC has pre-inc load and store's. @@ -10361,6 +10378,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"); + // FIXME: handle extends from half precision float vectors on P9. // We only want to custom lower an extend from v2f32 to v2f64. if (Op.getValueType() != MVT::v2f64 || Op.getOperand(0).getValueType() != MVT::v2f32) @@ -10574,6 +10592,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::BITCAST: // Don't handle bitcast here. return; + case ISD::FP_EXTEND: + SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG); + if (Lowered) + Results.push_back(Lowered); + return; } } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index e0c381827b87..2e1485373d19 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -637,7 +637,7 @@ namespace llvm { /// then the VPERM for the shuffle. All in all a very slow sequence. TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override { - if (VT.getScalarSizeInBits() % 8 == 0) + if (VT.getVectorNumElements() != 1 && VT.getScalarSizeInBits() % 8 == 0) return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index be6b30ffa08b..95e5ff6b130d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3343,6 +3343,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)), (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>; + // Load/convert and convert/store patterns for f16. + def : Pat<(f64 (extloadf16 xoaddr:$src)), + (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>; + def : Pat<(truncstoref16 f64:$src, xoaddr:$dst), + (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>; + def : Pat<(f32 (extloadf16 xoaddr:$src)), + (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>; + def : Pat<(truncstoref16 f32:$src, xoaddr:$dst), + (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>; + def : Pat<(f64 (f16_to_fp i32:$A)), + (f64 (XSCVHPDP (MTVSRWZ $A)))>; + def : Pat<(f32 (f16_to_fp i32:$A)), + (f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>; + def : Pat<(i32 (fp_to_f16 f32:$A)), + (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>; + def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>; + let Predicates = [IsBigEndian, HasP9Vector] in { // Scalar stores of i8 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll index af695c58f1b1..783ea3a11cce 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll @@ -11,46 +11,34 @@ define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { ; P9LE-LABEL: test_liwzx1: ; P9LE: # %bb.0: -; P9LE-NEXT: lfiwzx f0, 0, r3 -; P9LE-NEXT: lfiwzx f1, 0, r4 -; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 -; P9LE-NEXT: xxpermdi vs1, f1, f1, 2 -; P9LE-NEXT: xvaddsp vs0, vs0, vs1 -; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2 -; P9LE-NEXT: stfiwx f0, 0, r5 +; P9LE-NEXT: lfs f0, 0(r3) +; P9LE-NEXT: lfs f1, 0(r4) +; P9LE-NEXT: xsaddsp f0, f0, f1 +; P9LE-NEXT: stfs f0, 0(r5) ; P9LE-NEXT: blr ; ; P9BE-LABEL: test_liwzx1: ; P9BE: # %bb.0: -; P9BE-NEXT: lfiwzx f0, 0, r3 -; P9BE-NEXT: lfiwzx f1, 0, r4 -; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 -; P9BE-NEXT: xvaddsp vs0, vs0, vs1 -; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P9BE-NEXT: stfiwx f0, 0, r5 +; P9BE-NEXT: lfs f0, 0(r3) +; P9BE-NEXT: lfs f1, 0(r4) +; P9BE-NEXT: xsaddsp f0, f0, f1 +; P9BE-NEXT: stfs f0, 0(r5) ; P9BE-NEXT: blr ; ; P8LE-LABEL: test_liwzx1: ; P8LE: # %bb.0: -; P8LE-NEXT: lfiwzx f0, 0, r3 -; P8LE-NEXT: lfiwzx f1, 0, r4 -; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 -; P8LE-NEXT: xxpermdi vs1, f1, f1, 2 -; P8LE-NEXT: xvaddsp vs0, vs0, vs1 -; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2 -; P8LE-NEXT: stfiwx f0, 0, r5 +; P8LE-NEXT: lfsx f0, 0, r3 +; P8LE-NEXT: lfsx f1, 0, r4 +; P8LE-NEXT: xsaddsp f0, f0, f1 +; P8LE-NEXT: stfsx f0, 0, r5 ; P8LE-NEXT: blr ; ; P8BE-LABEL: test_liwzx1: ; P8BE: # %bb.0: -; P8BE-NEXT: lfiwzx f0, 0, r3 -; P8BE-NEXT: lfiwzx f1, 0, r4 -; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 -; P8BE-NEXT: xvaddsp vs0, vs0, vs1 -; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P8BE-NEXT: stfiwx f0, 0, r5 +; P8BE-NEXT: lfsx f0, 0, r3 +; P8BE-NEXT: lfsx f1, 0, r4 +; P8BE-NEXT: xsaddsp f0, f0, f1 +; P8BE-NEXT: stfsx f0, 0, r5 ; P8BE-NEXT: blr @@ -65,50 +53,38 @@ define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { ; P9LE-LABEL: test_liwzx2: ; P9LE: # %bb.0: -; P9LE-NEXT: lfiwzx f0, 0, r3 -; P9LE-NEXT: lfiwzx f1, 0, r4 -; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 -; P9LE-NEXT: xxpermdi vs1, f1, f1, 2 -; P9LE-NEXT: xvsubsp vs0, vs0, vs1 -; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2 +; P9LE-NEXT: lfs f0, 0(r3) ; P9LE-NEXT: mr r3, r5 -; P9LE-NEXT: stfiwx f0, 0, r5 +; P9LE-NEXT: lfs f1, 0(r4) +; P9LE-NEXT: xssubsp f0, f0, f1 +; P9LE-NEXT: stfs f0, 0(r5) ; P9LE-NEXT: blr ; ; P9BE-LABEL: test_liwzx2: ; P9BE: # %bb.0: -; P9BE-NEXT: lfiwzx f0, 0, r3 -; P9BE-NEXT: lfiwzx f1, 0, r4 -; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 -; P9BE-NEXT: xvsubsp vs0, vs0, vs1 -; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P9BE-NEXT: lfs f0, 0(r3) ; P9BE-NEXT: mr r3, r5 -; P9BE-NEXT: stfiwx f0, 0, r5 +; P9BE-NEXT: lfs f1, 0(r4) +; P9BE-NEXT: xssubsp f0, f0, f1 +; P9BE-NEXT: stfs f0, 0(r5) ; P9BE-NEXT: blr ; ; P8LE-LABEL: test_liwzx2: ; P8LE: # %bb.0: -; P8LE-NEXT: lfiwzx f0, 0, r3 -; P8LE-NEXT: lfiwzx f1, 0, r4 +; P8LE-NEXT: lfsx f0, 0, r3 +; P8LE-NEXT: lfsx f1, 0, r4 ; P8LE-NEXT: mr r3, r5 -; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 -; P8LE-NEXT: xxpermdi vs1, f1, f1, 2 -; P8LE-NEXT: xvsubsp vs0, vs0, vs1 -; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2 -; P8LE-NEXT: stfiwx f0, 0, r5 +; P8LE-NEXT: xssubsp f0, f0, f1 +; P8LE-NEXT: stfsx f0, 0, r5 ; P8LE-NEXT: blr ; ; P8BE-LABEL: test_liwzx2: ; P8BE: # %bb.0: -; P8BE-NEXT: lfiwzx f0, 0, r3 -; P8BE-NEXT: lfiwzx f1, 0, r4 +; P8BE-NEXT: lfsx f0, 0, r3 +; P8BE-NEXT: lfsx f1, 0, r4 ; P8BE-NEXT: mr r3, r5 -; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 -; P8BE-NEXT: xvsubsp vs0, vs0, vs1 -; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P8BE-NEXT: stfiwx f0, 0, r5 +; P8BE-NEXT: xssubsp f0, f0, f1 +; P8BE-NEXT: stfsx f0, 0, r5 ; P8BE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index 253e74cf0bf3..b3ce655a99f4 100644 --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -9,9 +9,7 @@ define <1 x float> @constrained_vector_fdiv_v1f32() #0 { ; PC64LE-NEXT: addis 4, 2, .LCPI0_1@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI0_0@toc@l(3) ; PC64LE-NEXT: lfs 1, .LCPI0_1@toc@l(4) -; PC64LE-NEXT: xsdivsp 0, 1, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xsdivsp 1, 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fdiv_v1f32: @@ -20,9 +18,7 @@ define <1 x float> @constrained_vector_fdiv_v1f32() #0 { ; PC64LE9-NEXT: lfs 0, .LCPI0_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI0_1@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI0_1@toc@l(3) -; PC64LE9-NEXT: xsdivsp 0, 1, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xsdivsp 1, 1, 0 ; PC64LE9-NEXT: blr entry: %div = call <1 x float> @llvm.experimental.constrained.fdiv.v1f32( @@ -232,8 +228,6 @@ define <1 x float> @constrained_vector_frem_v1f32() #0 { ; PC64LE-NEXT: lfs 2, .LCPI5_1@toc@l(4) ; PC64LE-NEXT: bl fmodf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -250,8 +244,6 @@ define <1 x float> @constrained_vector_frem_v1f32() #0 { ; PC64LE9-NEXT: lfs 2, .LCPI5_1@toc@l(3) ; PC64LE9-NEXT: bl fmodf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -645,9 +637,7 @@ define <1 x float> @constrained_vector_fmul_v1f32() #0 { ; PC64LE-NEXT: addis 4, 2, .LCPI10_1@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI10_0@toc@l(3) ; PC64LE-NEXT: lfs 1, .LCPI10_1@toc@l(4) -; PC64LE-NEXT: xsmulsp 0, 1, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xsmulsp 1, 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fmul_v1f32: @@ -656,9 +646,7 @@ define <1 x float> @constrained_vector_fmul_v1f32() #0 { ; PC64LE9-NEXT: lfs 0, .LCPI10_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI10_1@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI10_1@toc@l(3) -; PC64LE9-NEXT: xsmulsp 0, 1, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xsmulsp 1, 1, 0 ; PC64LE9-NEXT: blr entry: %mul = call <1 x float> @llvm.experimental.constrained.fmul.v1f32( @@ -865,9 +853,7 @@ define <1 x float> @constrained_vector_fadd_v1f32() #0 { ; PC64LE-NEXT: addis 4, 2, .LCPI15_1@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI15_0@toc@l(3) ; PC64LE-NEXT: lfs 1, .LCPI15_1@toc@l(4) -; PC64LE-NEXT: xsaddsp 0, 1, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xsaddsp 1, 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fadd_v1f32: @@ -876,9 +862,7 @@ define <1 x float> @constrained_vector_fadd_v1f32() #0 { ; PC64LE9-NEXT: lfs 0, .LCPI15_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI15_1@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI15_1@toc@l(3) -; PC64LE9-NEXT: xsaddsp 0, 1, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xsaddsp 1, 1, 0 ; PC64LE9-NEXT: blr entry: %add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32( @@ -1081,9 +1065,7 @@ define <1 x float> @constrained_vector_fsub_v1f32() #0 { ; PC64LE-NEXT: addis 4, 2, .LCPI20_1@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI20_0@toc@l(3) ; PC64LE-NEXT: lfs 1, .LCPI20_1@toc@l(4) -; PC64LE-NEXT: xssubsp 0, 1, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xssubsp 1, 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fsub_v1f32: @@ -1092,9 +1074,7 @@ define <1 x float> @constrained_vector_fsub_v1f32() #0 { ; PC64LE9-NEXT: lfs 0, .LCPI20_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI20_1@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI20_1@toc@l(3) -; PC64LE9-NEXT: xssubsp 0, 1, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xssubsp 1, 1, 0 ; PC64LE9-NEXT: blr entry: %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32( @@ -1295,18 +1275,14 @@ define <1 x float> @constrained_vector_sqrt_v1f32() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI25_0@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI25_0@toc@l(3) -; PC64LE-NEXT: xssqrtsp 0, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xssqrtsp 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_sqrt_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI25_0@toc@ha ; PC64LE9-NEXT: lfs 0, .LCPI25_0@toc@l(3) -; PC64LE9-NEXT: xssqrtsp 0, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xssqrtsp 1, 0 ; PC64LE9-NEXT: blr entry: %sqrt = call <1 x float> @llvm.experimental.constrained.sqrt.v1f32( @@ -1481,8 +1457,6 @@ define <1 x float> @constrained_vector_pow_v1f32() #0 { ; PC64LE-NEXT: lfs 2, .LCPI30_1@toc@l(4) ; PC64LE-NEXT: bl powf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -1499,8 +1473,6 @@ define <1 x float> @constrained_vector_pow_v1f32() #0 { ; PC64LE9-NEXT: lfs 2, .LCPI30_1@toc@l(3) ; PC64LE9-NEXT: bl powf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -1899,8 +1871,6 @@ define <1 x float> @constrained_vector_powi_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI35_0@toc@l(3) ; PC64LE-NEXT: bl __powisf2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -1916,8 +1886,6 @@ define <1 x float> @constrained_vector_powi_v1f32() #0 { ; PC64LE9-NEXT: li 4, 3 ; PC64LE9-NEXT: bl __powisf2 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2284,8 +2252,6 @@ define <1 x float> @constrained_vector_sin_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI40_0@toc@l(3) ; PC64LE-NEXT: bl sinf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2300,8 +2266,6 @@ define <1 x float> @constrained_vector_sin_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI40_0@toc@l(3) ; PC64LE9-NEXT: bl sinf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2637,8 +2601,6 @@ define <1 x float> @constrained_vector_cos_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI45_0@toc@l(3) ; PC64LE-NEXT: bl cosf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2653,8 +2615,6 @@ define <1 x float> @constrained_vector_cos_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI45_0@toc@l(3) ; PC64LE9-NEXT: bl cosf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2990,8 +2950,6 @@ define <1 x float> @constrained_vector_exp_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI50_0@toc@l(3) ; PC64LE-NEXT: bl expf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3006,8 +2964,6 @@ define <1 x float> @constrained_vector_exp_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI50_0@toc@l(3) ; PC64LE9-NEXT: bl expf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3343,8 +3299,6 @@ define <1 x float> @constrained_vector_exp2_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI55_0@toc@l(3) ; PC64LE-NEXT: bl exp2f ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3359,8 +3313,6 @@ define <1 x float> @constrained_vector_exp2_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI55_0@toc@l(3) ; PC64LE9-NEXT: bl exp2f ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3696,8 +3648,6 @@ define <1 x float> @constrained_vector_log_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI60_0@toc@l(3) ; PC64LE-NEXT: bl logf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3712,8 +3662,6 @@ define <1 x float> @constrained_vector_log_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI60_0@toc@l(3) ; PC64LE9-NEXT: bl logf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4049,8 +3997,6 @@ define <1 x float> @constrained_vector_log10_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI65_0@toc@l(3) ; PC64LE-NEXT: bl log10f ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4065,8 +4011,6 @@ define <1 x float> @constrained_vector_log10_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI65_0@toc@l(3) ; PC64LE9-NEXT: bl log10f ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4402,8 +4346,6 @@ define <1 x float> @constrained_vector_log2_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI70_0@toc@l(3) ; PC64LE-NEXT: bl log2f ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4418,8 +4360,6 @@ define <1 x float> @constrained_vector_log2_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI70_0@toc@l(3) ; PC64LE9-NEXT: bl log2f ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4755,8 +4695,6 @@ define <1 x float> @constrained_vector_rint_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI75_0@toc@l(3) ; PC64LE-NEXT: bl rintf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4771,8 +4709,6 @@ define <1 x float> @constrained_vector_rint_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI75_0@toc@l(3) ; PC64LE9-NEXT: bl rintf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5108,8 +5044,6 @@ define <1 x float> @constrained_vector_nearbyint_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI80_0@toc@l(3) ; PC64LE-NEXT: bl nearbyintf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -5124,8 +5058,6 @@ define <1 x float> @constrained_vector_nearbyint_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI80_0@toc@l(3) ; PC64LE9-NEXT: bl nearbyintf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5463,8 +5395,6 @@ define <1 x float> @constrained_vector_maxnum_v1f32() #0 { ; PC64LE-NEXT: lfs 2, .LCPI85_1@toc@l(4) ; PC64LE-NEXT: bl fmaxf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -5481,8 +5411,6 @@ define <1 x float> @constrained_vector_maxnum_v1f32() #0 { ; PC64LE9-NEXT: lfs 2, .LCPI85_1@toc@l(3) ; PC64LE9-NEXT: bl fmaxf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5872,8 +5800,6 @@ define <1 x float> @constrained_vector_minnum_v1f32() #0 { ; PC64LE-NEXT: lfs 2, .LCPI90_1@toc@l(4) ; PC64LE-NEXT: bl fminf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -5890,8 +5816,6 @@ define <1 x float> @constrained_vector_minnum_v1f32() #0 { ; PC64LE9-NEXT: lfs 2, .LCPI90_1@toc@l(3) ; PC64LE9-NEXT: bl fminf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -6274,18 +6198,14 @@ define <1 x float> @constrained_vector_fptrunc_v1f64() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI95_0@toc@ha ; PC64LE-NEXT: lfd 0, .LCPI95_0@toc@l(3) -; PC64LE-NEXT: frsp 0, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: frsp 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fptrunc_v1f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI95_0@toc@ha ; PC64LE9-NEXT: lfd 0, .LCPI95_0@toc@l(3) -; PC64LE9-NEXT: frsp 0, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: frsp 1, 0 ; PC64LE9-NEXT: blr entry: %result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64( @@ -6437,15 +6357,13 @@ define <1 x double> @constrained_vector_fpext_v1f32() #0 { ; PC64LE-LABEL: constrained_vector_fpext_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI99_0@toc@ha -; PC64LE-NEXT: lfs 0, .LCPI99_0@toc@l(3) -; PC64LE-NEXT: xxspltd 34, 0, 0 +; PC64LE-NEXT: lfs 1, .LCPI99_0@toc@l(3) ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fpext_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI99_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI99_0@toc@l(3) -; PC64LE9-NEXT: xxspltd 34, 0, 0 +; PC64LE9-NEXT: lfs 1, .LCPI99_0@toc@l(3) ; PC64LE9-NEXT: blr entry: %result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32( @@ -6548,18 +6466,14 @@ define <1 x float> @constrained_vector_ceil_v1f32() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI103_0@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI103_0@toc@l(3) -; PC64LE-NEXT: xsrdpip 0, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xsrdpip 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_ceil_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI103_0@toc@ha ; PC64LE9-NEXT: lfs 0, .LCPI103_0@toc@l(3) -; PC64LE9-NEXT: xsrdpip 0, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xsrdpip 1, 0 ; PC64LE9-NEXT: blr entry: %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32( @@ -6688,18 +6602,14 @@ define <1 x float> @constrained_vector_floor_v1f32() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI107_0@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI107_0@toc@l(3) -; PC64LE-NEXT: xsrdpim 0, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xsrdpim 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_floor_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI107_0@toc@ha ; PC64LE9-NEXT: lfs 0, .LCPI107_0@toc@l(3) -; PC64LE9-NEXT: xsrdpim 0, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xsrdpim 1, 0 ; PC64LE9-NEXT: blr entry: %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32( @@ -6829,18 +6739,14 @@ define <1 x float> @constrained_vector_round_v1f32() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI111_0@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI111_0@toc@l(3) -; PC64LE-NEXT: xsrdpi 0, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xsrdpi 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_round_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI111_0@toc@ha ; PC64LE9-NEXT: lfs 0, .LCPI111_0@toc@l(3) -; PC64LE9-NEXT: xsrdpi 0, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xsrdpi 1, 0 ; PC64LE9-NEXT: blr entry: %round = call <1 x float> @llvm.experimental.constrained.round.v1f32( @@ -6970,18 +6876,14 @@ define <1 x float> @constrained_vector_trunc_v1f32() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI115_0@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI115_0@toc@l(3) -; PC64LE-NEXT: xsrdpiz 0, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xsrdpiz 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_trunc_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI115_0@toc@ha ; PC64LE9-NEXT: lfs 0, .LCPI115_0@toc@l(3) -; PC64LE9-NEXT: xsrdpiz 0, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xsrdpiz 1, 0 ; PC64LE9-NEXT: blr entry: %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32( |