summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp29
-rw-r--r--llvm/test/CodeGen/X86/setcc-lowering.ll11
-rw-r--r--llvm/test/CodeGen/X86/vshift-6.ll18
3 files changed, 38 insertions, 20 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 64c91cb62c8a..fb0ec198dd45 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -46647,6 +46647,7 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc dl(N);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If this is SSE1 only convert to FOR to avoid scalarization.
if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {
@@ -46663,7 +46664,6 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
SmallVector<APInt, 2> SrcPartials;
if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps, &SrcPartials) &&
SrcOps.size() == 1) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
@@ -46724,11 +46724,36 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
}
}
- // Attempt to recursively combine an OR of shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
+ // Attempt to recursively combine an OR of shuffles.
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
return Res;
+
+ // If either operand is a constant mask, then only the elements that aren't
+ // allones are actually demanded by the other operand.
+ auto SimplifyUndemandedElts = [&](SDValue Op, SDValue OtherOp) {
+ APInt UndefElts;
+ SmallVector<APInt> EltBits;
+ int NumElts = VT.getVectorNumElements();
+ int EltSizeInBits = VT.getScalarSizeInBits();
+ if (!getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, EltBits))
+ return false;
+
+ APInt DemandedElts = APInt::getZero(NumElts);
+ for (int I = 0; I != NumElts; ++I)
+ if (!EltBits[I].isAllOnes())
+ DemandedElts.setBit(I);
+
+ APInt KnownUndef, KnownZero;
+ return TLI.SimplifyDemandedVectorElts(OtherOp, DemandedElts, KnownUndef,
+ KnownZero, DCI);
+ };
+ if (SimplifyUndemandedElts(N0, N1) || SimplifyUndemandedElts(N1, N0)) {
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
}
// We should fold "masked merge" patterns when `andn` is not available.
diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll
index fae9602eaaaf..f740c5925090 100644
--- a/llvm/test/CodeGen/X86/setcc-lowering.ll
+++ b/llvm/test/CodeGen/X86/setcc-lowering.ll
@@ -9,12 +9,11 @@
define <8 x i16> @pr25080(<8 x i32> %a) {
; AVX-LABEL: pr25080:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vshift-6.ll b/llvm/test/CodeGen/X86/vshift-6.ll
index e66d91340164..fab8e9be7d0a 100644
--- a/llvm/test/CodeGen/X86/vshift-6.ll
+++ b/llvm/test/CodeGen/X86/vshift-6.ll
@@ -30,12 +30,9 @@ define <16 x i8> @do_not_crash(i8*, i32*, i64*, i32, i64, i8) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movb %al, (%ecx)
-; X86-NEXT: movd %eax, %xmm0
-; X86-NEXT: psllq $56, %xmm0
-; X86-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
-; X86-NEXT: movdqa %xmm2, %xmm1
-; X86-NEXT: pandn %xmm0, %xmm1
-; X86-NEXT: por %xmm2, %xmm1
+; X86-NEXT: movd %eax, %xmm1
+; X86-NEXT: psllq $56, %xmm1
+; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-NEXT: pcmpeqd %xmm3, %xmm3
; X86-NEXT: psllw $5, %xmm1
; X86-NEXT: pxor %xmm2, %xmm2
@@ -65,12 +62,9 @@ define <16 x i8> @do_not_crash(i8*, i32*, i64*, i32, i64, i8) {
; X64-LABEL: do_not_crash:
; X64: # %bb.0: # %entry
; X64-NEXT: movb %r9b, (%rdi)
-; X64-NEXT: movd %r9d, %xmm0
-; X64-NEXT: psllq $56, %xmm0
-; X64-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
-; X64-NEXT: movdqa %xmm2, %xmm1
-; X64-NEXT: pandn %xmm0, %xmm1
-; X64-NEXT: por %xmm2, %xmm1
+; X64-NEXT: movd %r9d, %xmm1
+; X64-NEXT: psllq $56, %xmm1
+; X64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; X64-NEXT: pcmpeqd %xmm2, %xmm2
; X64-NEXT: psllw $5, %xmm1
; X64-NEXT: pxor %xmm3, %xmm3