summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBill Schmidt <wschmidt@linux.vnet.ibm.com>2014-08-04 23:37:33 +0000
committerBill Schmidt <wschmidt@linux.vnet.ibm.com>2014-08-04 23:37:33 +0000
commit39f807fc9fda99b284e9c2b17912161f015e87b6 (patch)
tree49fd4a08fad058ed96f3b4999d4e2f216195fa7f
parentea5288432e358e1e01e5c369b2277536cf101e07 (diff)
downloadllvm-39f807fc9fda99b284e9c2b17912161f015e87b6.tar.gz
Merging r214714:
------------------------------------------------------------------------ r214714 | uweigand | 2014-08-04 08:13:57 -0500 (Mon, 04 Aug 2014) | 19 lines [PowerPC] Fix and improve vector comparisons This patch refactors code generation of vector comparisons. This fixes a wrong code-gen bug for ISD::SETGE for floating-point types, and improves generated code for vector comparisons in general. Specifically, the patch moves all logic deciding how to implement vector comparisons into getVCmpInst, which gets two extra boolean outputs indicating to its caller whether its needs to swap the input operands and/or negate the result of the comparison. Apart from implementing these two modifications as directed by getVCmpInst, there is no need to ever implement vector comparisons in any other manner; in particular, there is never a need to perform two separate comparisons (e.g. one for equal and one for greater-than, as code used to do before this patch). Reviewed by Bill Schmidt. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_35@214817 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp251
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp10
-rw-r--r--test/CodeGen/PowerPC/vec_cmp.ll108
3 files changed, 180 insertions, 189 deletions
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index a9e146281b87..536c2824fb7a 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -663,94 +663,105 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC,
- bool HasVSX) {
- switch (CC) {
- case ISD::SETEQ:
- case ISD::SETUEQ:
- case ISD::SETNE:
- case ISD::SETUNE:
- if (VecVT == MVT::v16i8)
- return PPC::VCMPEQUB;
- else if (VecVT == MVT::v8i16)
- return PPC::VCMPEQUH;
- else if (VecVT == MVT::v4i32)
- return PPC::VCMPEQUW;
- // v4f32 != v4f32 could be translate to unordered not equal
- else if (VecVT == MVT::v4f32)
- return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
- else if (VecVT == MVT::v2f64)
- return PPC::XVCMPEQDP;
- break;
- case ISD::SETLT:
- case ISD::SETGT:
- case ISD::SETLE:
- case ISD::SETGE:
- if (VecVT == MVT::v16i8)
- return PPC::VCMPGTSB;
- else if (VecVT == MVT::v8i16)
- return PPC::VCMPGTSH;
- else if (VecVT == MVT::v4i32)
- return PPC::VCMPGTSW;
- else if (VecVT == MVT::v4f32)
- return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
- else if (VecVT == MVT::v2f64)
- return PPC::XVCMPGTDP;
- break;
- case ISD::SETULT:
- case ISD::SETUGT:
- case ISD::SETUGE:
- case ISD::SETULE:
- if (VecVT == MVT::v16i8)
- return PPC::VCMPGTUB;
- else if (VecVT == MVT::v8i16)
- return PPC::VCMPGTUH;
- else if (VecVT == MVT::v4i32)
- return PPC::VCMPGTUW;
- break;
- case ISD::SETOEQ:
- if (VecVT == MVT::v4f32)
- return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
- else if (VecVT == MVT::v2f64)
- return PPC::XVCMPEQDP;
- break;
- case ISD::SETOLT:
- case ISD::SETOGT:
- case ISD::SETOLE:
- if (VecVT == MVT::v4f32)
- return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
- else if (VecVT == MVT::v2f64)
- return PPC::XVCMPGTDP;
- break;
- case ISD::SETOGE:
- if (VecVT == MVT::v4f32)
- return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
- else if (VecVT == MVT::v2f64)
- return PPC::XVCMPGEDP;
- break;
- default:
- break;
- }
- llvm_unreachable("Invalid integer vector compare condition");
-}
+static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
+ bool HasVSX, bool &Swap, bool &Negate) {
+ Swap = false;
+ Negate = false;
-// getVCmpEQInst: return the equal compare instruction for the specified vector
-// type. Since this is for altivec specific code, only support the altivec
-// types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT, bool HasVSX) {
- switch (VecVT) {
- case MVT::v16i8:
- return PPC::VCMPEQUB;
- case MVT::v8i16:
- return PPC::VCMPEQUH;
- case MVT::v4i32:
- return PPC::VCMPEQUW;
- case MVT::v4f32:
- return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
- case MVT::v2f64:
- return PPC::XVCMPEQDP;
- default:
- llvm_unreachable("Invalid integer vector compare condition");
+ if (VecVT.isFloatingPoint()) {
+ /* Handle some cases by swapping input operands. */
+ switch (CC) {
+ case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
+ case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+ case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
+ case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
+ case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+ case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
+ default: break;
+ }
+ /* Handle some cases by negating the result. */
+ switch (CC) {
+ case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+ case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
+ case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
+ case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
+ default: break;
+ }
+ /* We have instructions implementing the remaining cases. */
+ switch (CC) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPEQDP;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGTDP;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGEDP;
+ break;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid floating-point vector compare condition");
+ } else {
+ /* Handle some cases by swapping input operands. */
+ switch (CC) {
+ case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
+ case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+ case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+ case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
+ default: break;
+ }
+ /* Handle some cases by negating the result. */
+ switch (CC) {
+ case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+ case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
+ case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
+ case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
+ default: break;
+ }
+ /* We have instructions implementing the remaining cases. */
+ switch (CC) {
+ case ISD::SETEQ:
+ case ISD::SETUEQ:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPEQUB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPEQUH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPEQUW;
+ break;
+ case ISD::SETGT:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPGTSB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPGTSH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPGTSW;
+ break;
+ case ISD::SETUGT:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPGTUB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPGTUH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPGTUW;
+ break;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid integer vector compare condition");
}
}
@@ -842,60 +853,20 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
// vector compare operations return the same type as the operands.
if (LHS.getValueType().isVector()) {
EVT VecVT = LHS.getValueType();
- MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
- unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget->hasVSX());
-
- switch (CC) {
- case ISD::SETEQ:
- case ISD::SETOEQ:
- case ISD::SETUEQ:
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
- case ISD::SETNE:
- case ISD::SETONE:
- case ISD::SETUNE: {
- SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
- PPC::VNOR,
- VecVT, VCmp, VCmp);
- }
- case ISD::SETLT:
- case ISD::SETOLT:
- case ISD::SETULT:
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS);
- case ISD::SETGT:
- case ISD::SETOGT:
- case ISD::SETUGT:
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
- case ISD::SETGE:
- case ISD::SETOGE:
- case ISD::SETUGE: {
- // Small optimization: Altivec provides a 'Vector Compare Greater Than
- // or Equal To' instruction (vcmpgefp), so in this case there is no
- // need for extra logic for the equal compare.
- if (VecVT.getSimpleVT().isFloatingPoint()) {
- return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
- } else {
- SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX());
- SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR :
- PPC::VOR,
- VecVT, VCmpGT, VCmpEQ);
- }
- }
- case ISD::SETLE:
- case ISD::SETOLE:
- case ISD::SETULE: {
- SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX());
- SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR :
- PPC::VOR,
- VecVT, VCmpLE, VCmpEQ);
- }
- default:
- llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
+ bool Swap, Negate;
+ unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
+ PPCSubTarget->hasVSX(), Swap, Negate);
+ if (Swap)
+ std::swap(LHS, RHS);
+
+ if (Negate) {
+ SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
+ return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
+ PPC::VNOR,
+ VecVT, VCmp, VCmp);
}
+
+ return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
}
if (PPCSubTarget->useCRBits())
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index d596bda49fd0..634d902e5666 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -526,11 +526,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// Altivec does not contain unordered floating-point compare instructions
setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
- setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
-
setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
@@ -561,11 +556,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// Share the Altivec comparison restrictions.
setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
- setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand);
- setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand);
- setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand);
- setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand);
-
setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll
index 2733089fcb10..516b2dd58b99 100644
--- a/test/CodeGen/PowerPC/vec_cmp.ll
+++ b/test/CodeGen/PowerPC/vec_cmp.ll
@@ -63,9 +63,8 @@ entry:
ret <16 x i8> %sext
}
; CHECK-LABEL: v16si8_cmp_le:
-; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsb [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK: vcmpgtsb [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
define <16 x i8> @v16ui8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
entry:
@@ -74,9 +73,8 @@ entry:
ret <16 x i8> %sext
}
; CHECK-LABEL: v16ui8_cmp_le:
-; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtub [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK: vcmpgtub [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
define <16 x i8> @v16si8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
entry:
@@ -121,9 +119,8 @@ entry:
ret <16 x i8> %sext
}
; CHECK-LABEL: v16si8_cmp_ge:
-; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsb [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK: vcmpgtsb [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
define <16 x i8> @v16ui8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
entry:
@@ -132,9 +129,8 @@ entry:
ret <16 x i8> %sext
}
; CHECK-LABEL: v16ui8_cmp_ge:
-; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtub [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK: vcmpgtub [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
define <32 x i8> @v32si8_cmp(<32 x i8> %x, <32 x i8> %y) nounwind readnone {
@@ -193,9 +189,8 @@ entry:
ret <8 x i16> %sext
}
; CHECK-LABEL: v8si16_cmp_le:
-; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsh [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK: vcmpgtsh [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
define <8 x i16> @v8ui16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
entry:
@@ -204,9 +199,8 @@ entry:
ret <8 x i16> %sext
}
; CHECK-LABEL: v8ui16_cmp_le:
-; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtuh [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK: vcmpgtuh [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
define <8 x i16> @v8si16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
entry:
@@ -251,9 +245,8 @@ entry:
ret <8 x i16> %sext
}
; CHECK-LABEL: v8si16_cmp_ge:
-; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsh [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK: vcmpgtsh [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
define <8 x i16> @v8ui16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
entry:
@@ -262,9 +255,8 @@ entry:
ret <8 x i16> %sext
}
; CHECK-LABEL: v8ui16_cmp_ge:
-; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtuh [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK: vcmpgtuh [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
define <16 x i16> @v16si16_cmp(<16 x i16> %x, <16 x i16> %y) nounwind readnone {
@@ -326,9 +318,8 @@ entry:
ret <4 x i32> %sext
}
; CHECK-LABEL: v4si32_cmp_le:
-; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsw [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK: vcmpgtsw [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
define <4 x i32> @v4ui32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
entry:
@@ -337,9 +328,8 @@ entry:
ret <4 x i32> %sext
}
; CHECK-LABEL: v4ui32_cmp_le:
-; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtuw [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK: vcmpgtuw [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
define <4 x i32> @v4si32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
entry:
@@ -384,9 +374,8 @@ entry:
ret <4 x i32> %sext
}
; CHECK-LABEL: v4si32_cmp_ge:
-; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsw [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK: vcmpgtsw [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
define <4 x i32> @v4ui32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
entry:
@@ -395,9 +384,8 @@ entry:
ret <4 x i32> %sext
}
; CHECK-LABEL: v4ui32_cmp_ge:
-; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtuw [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK: vcmpgtuw [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
define <8 x i32> @v8si32_cmp(<8 x i32> %x, <8 x i32> %y) nounwind readnone {
@@ -480,9 +468,7 @@ entry:
ret <4 x float> %0
}
; CHECK-LABEL: v4f32_cmp_le:
-; CHECK: vcmpeqfp [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtfp [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK: vcmpgefp 2, 3, 2
define <4 x float> @v4f32_cmp_lt(<4 x float> %x, <4 x float> %y) nounwind readnone {
entry:
@@ -514,6 +500,50 @@ entry:
; CHECK-LABEL: v4f32_cmp_gt:
; CHECK: vcmpgtfp 2, 2, 3
+define <4 x float> @v4f32_cmp_ule(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+ %cmp = fcmp ule <4 x float> %x, %y
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %0 = bitcast <4 x i32> %sext to <4 x float>
+ ret <4 x float> %0
+}
+; CHECK-LABEL: v4f32_cmp_ule:
+; CHECK: vcmpgtfp [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
+
+define <4 x float> @v4f32_cmp_ult(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+ %cmp = fcmp ult <4 x float> %x, %y
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %0 = bitcast <4 x i32> %sext to <4 x float>
+ ret <4 x float> %0
+}
+; CHECK-LABEL: v4f32_cmp_ult:
+; CHECK: vcmpgefp [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
+
+define <4 x float> @v4f32_cmp_uge(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+ %cmp = fcmp uge <4 x float> %x, %y
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %0 = bitcast <4 x i32> %sext to <4 x float>
+ ret <4 x float> %0
+}
+; CHECK-LABEL: v4f32_cmp_uge:
+; CHECK: vcmpgtfp [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
+
+define <4 x float> @v4f32_cmp_ugt(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+ %cmp = fcmp ugt <4 x float> %x, %y
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %0 = bitcast <4 x i32> %sext to <4 x float>
+ ret <4 x float> %0
+}
+; CHECK-LABEL: v4f32_cmp_ugt:
+; CHECK: vcmpgefp [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
+
define <8 x float> @v8f32_cmp(<8 x float> %x, <8 x float> %y) nounwind readnone {
entry: