summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQiu Chaofan <qiucofan@cn.ibm.com>2022-05-18 15:55:02 +0800
committerTom Stellard <tstellar@redhat.com>2022-06-02 13:55:14 -0700
commit09ec80e16f475bd1f719bf82aade8cc8b4974187 (patch)
tree88522c626989e52911829d50ab07c436a283de75
parentb950bd2ce7ff79b203b2acba02e1c468836989ae (diff)
downloadllvm-09ec80e16f475bd1f719bf82aade8cc8b4974187.tar.gz
[PowerPC] Treat llvm.fmuladd intrinsic as using CTR
This fixes bug 55463, similar to D78668. This is a temporary fix since we will switch to post-isel CTR loop determination in the future. Reviewed By: dim, shchenz Differential Revision: https://reviews.llvm.org/D125746 (cherry picked from commit d9d15af7873fe16d7a0dde4def30f40fa9901777)
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp4
-rw-r--r--llvm/test/CodeGen/PowerPC/pr55463.ll136
2 files changed, 137 insertions, 3 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index cc5738a5d7b6..48be7e0860df 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -491,15 +491,13 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
case Intrinsic::experimental_constrained_sin:
case Intrinsic::experimental_constrained_cos:
return true;
- // There is no corresponding FMA instruction for PPC double double.
- // Thus, we need to disable CTR loop generation for this type.
- case Intrinsic::fmuladd:
case Intrinsic::copysign:
if (CI->getArgOperand(0)->getType()->getScalarType()->
isPPC_FP128Ty())
return true;
else
continue; // ISD::FCOPYSIGN is never a library call.
+ case Intrinsic::fmuladd:
case Intrinsic::fma: Opcode = ISD::FMA; break;
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
diff --git a/llvm/test/CodeGen/PowerPC/pr55463.ll b/llvm/test/CodeGen/PowerPC/pr55463.ll
new file mode 100644
index 000000000000..63d4170da2d5
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr55463.ll
@@ -0,0 +1,136 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpcspe -verify-machineinstrs < %s | FileCheck %s
+
+define void @baz() #0 {
+; CHECK-LABEL: baz:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: mflr 0
+; CHECK-NEXT: stw 0, 4(1)
+; CHECK-NEXT: stwu 1, -48(1)
+; CHECK-NEXT: li 3, .LCPI0_0@l
+; CHECK-NEXT: li 5, .LCPI0_1@l
+; CHECK-NEXT: lis 4, .LCPI0_0@ha
+; CHECK-NEXT: lis 6, .LCPI0_1@ha
+; CHECK-NEXT: evstdd 29, 24(1) # 8-byte Folded Spill
+; CHECK-NEXT: evstdd 30, 32(1) # 8-byte Folded Spill
+; CHECK-NEXT: evlddx 30, 4, 3
+; CHECK-NEXT: # implicit-def: $r3
+; CHECK-NEXT: evlddx 29, 6, 5
+; CHECK-NEXT: evstdd 28, 16(1) # 8-byte Folded Spill
+; CHECK-NEXT: # implicit-def: $r28
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_1: # %bb1
+; CHECK-NEXT: #
+; CHECK-NEXT: efdcfsi 8, 3
+; CHECK-NEXT: mr 4, 30
+; CHECK-NEXT: mr 6, 29
+; CHECK-NEXT: evmergehi 3, 30, 30
+; CHECK-NEXT: evmergehi 5, 29, 29
+; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3
+; CHECK-NEXT: # kill: def $r5 killed $r5 killed $s5
+; CHECK-NEXT: evmergehi 7, 8, 8
+; CHECK-NEXT: # kill: def $r8 killed $r8 killed $s8
+; CHECK-NEXT: # kill: def $r7 killed $r7 killed $s7
+; CHECK-NEXT: bl fma
+; CHECK-NEXT: evmergelo 3, 3, 4
+; CHECK-NEXT: addi 5, 28, 1
+; CHECK-NEXT: cmplw 5, 28
+; CHECK-NEXT: mr 28, 5
+; CHECK-NEXT: efdctsiz 3, 3
+; CHECK-NEXT: bge 0, .LBB0_1
+; CHECK-NEXT: # %bb.2: # %bb8
+; CHECK-NEXT: bl wibble
+; CHECK-NEXT: evldd 30, 32(1) # 8-byte Folded Reload
+; CHECK-NEXT: evldd 29, 24(1) # 8-byte Folded Reload
+; CHECK-NEXT: evldd 28, 16(1) # 8-byte Folded Reload
+; CHECK-NEXT: lwz 0, 52(1)
+; CHECK-NEXT: addi 1, 1, 48
+; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: blr
+bb:
+ br label %bb1
+
+bb1:
+ %tmp = phi i32 [ %tmp6, %bb1 ], [ undef, %bb ]
+ %tmp2 = phi i32 [ %tmp3, %bb1 ], [ undef, %bb ]
+ %tmp3 = add nsw i32 %tmp2, 1
+ %tmp4 = sitofp i32 %tmp to double
+ %tmp5 = tail call double @llvm.fmuladd.f64(double 0.000000e+00, double -0.000000e+00, double %tmp4)
+ %tmp6 = fptosi double %tmp5 to i32
+ %tmp7 = icmp eq i32 %tmp2, 0
+ br i1 %tmp7, label %bb8, label %bb1
+
+bb8:
+ call void @wibble(i32 %tmp6)
+ ret void
+}
+
+define void @wombat() #0 {
+; CHECK-LABEL: wombat:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: mflr 0
+; CHECK-NEXT: stw 0, 4(1)
+; CHECK-NEXT: stwu 1, -48(1)
+; CHECK-NEXT: li 3, .LCPI1_0@l
+; CHECK-NEXT: li 5, .LCPI1_1@l
+; CHECK-NEXT: lis 4, .LCPI1_0@ha
+; CHECK-NEXT: lis 6, .LCPI1_1@ha
+; CHECK-NEXT: evstdd 29, 24(1) # 8-byte Folded Spill
+; CHECK-NEXT: evstdd 30, 32(1) # 8-byte Folded Spill
+; CHECK-NEXT: evlddx 30, 4, 3
+; CHECK-NEXT: # implicit-def: $r3
+; CHECK-NEXT: evlddx 29, 6, 5
+; CHECK-NEXT: evstdd 28, 16(1) # 8-byte Folded Spill
+; CHECK-NEXT: # implicit-def: $r28
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB1_1: # %bb1
+; CHECK-NEXT: #
+; CHECK-NEXT: efdcfsi 8, 3
+; CHECK-NEXT: mr 4, 30
+; CHECK-NEXT: mr 6, 29
+; CHECK-NEXT: evmergehi 3, 30, 30
+; CHECK-NEXT: evmergehi 5, 29, 29
+; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3
+; CHECK-NEXT: # kill: def $r5 killed $r5 killed $s5
+; CHECK-NEXT: evmergehi 7, 8, 8
+; CHECK-NEXT: # kill: def $r8 killed $r8 killed $s8
+; CHECK-NEXT: # kill: def $r7 killed $r7 killed $s7
+; CHECK-NEXT: bl fma
+; CHECK-NEXT: evmergelo 3, 3, 4
+; CHECK-NEXT: addi 5, 28, 1
+; CHECK-NEXT: cmplw 5, 28
+; CHECK-NEXT: mr 28, 5
+; CHECK-NEXT: efdctsiz 3, 3
+; CHECK-NEXT: bge 0, .LBB1_1
+; CHECK-NEXT: # %bb.2: # %bb8
+; CHECK-NEXT: bl wibble
+; CHECK-NEXT: evldd 30, 32(1) # 8-byte Folded Reload
+; CHECK-NEXT: evldd 29, 24(1) # 8-byte Folded Reload
+; CHECK-NEXT: evldd 28, 16(1) # 8-byte Folded Reload
+; CHECK-NEXT: lwz 0, 52(1)
+; CHECK-NEXT: addi 1, 1, 48
+; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: blr
+bb:
+ br label %bb1
+
+bb1:
+ %tmp = phi i32 [ %tmp6, %bb1 ], [ undef, %bb ]
+ %tmp2 = phi i32 [ %tmp3, %bb1 ], [ undef, %bb ]
+ %tmp3 = add nsw i32 %tmp2, 1
+ %tmp4 = sitofp i32 %tmp to double
+ %tmp5 = tail call double @llvm.fma.f64(double 0.000000e+00, double -0.000000e+00, double %tmp4)
+ %tmp6 = fptosi double %tmp5 to i32
+ %tmp7 = icmp eq i32 %tmp2, 0
+ br i1 %tmp7, label %bb8, label %bb1
+
+bb8:
+ call void @wibble(i32 %tmp6)
+ ret void
+}
+
+declare void @wibble(i32)
+declare double @llvm.fmuladd.f64(double, double, double)
+declare double @llvm.fma.f64(double, double, double)
+
+attributes #0 = { nounwind }