summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2021-04-11 20:06:53 +0100
committerSimon Pilgrim <llvm-dev@redking.me.uk>2021-04-11 20:07:09 +0100
commit231b87618bb61b24674d060721f7004057da9336 (patch)
treea5ec1071f2809445cb2a77b3c562ead63025dc57
parentea8dd3ee2eb457a8c3975e1f64caa7a58169e02e (diff)
downloadllvm-231b87618bb61b24674d060721f7004057da9336.tar.gz
[X86][AVX512] Fold not(kmov(x)) -> kmov(not(x)) and not(widen_subvector(x)) -> widen_subvector(not(x))
Improve AVX512 mask inversion, rG38c799bce801 exposed some missing opportunities to move scalar not() back onto the boolvector types for folding with setcc etc.
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp22
-rw-r--r--llvm/test/CodeGen/X86/movmsk-cmp.ll31
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-and-bool.ll45
3 files changed, 48 insertions, 50 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0214745f88f3..4ecd9f86322b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -46988,6 +46988,28 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
return RV;
+ // Fold not(iX bitcast(vXi1)) -> (iX bitcast(not(vec))) for legal boolvecs.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (llvm::isAllOnesConstant(N1) && N0.getOpcode() == ISD::BITCAST &&
+ N0.getOperand(0).getValueType().isVector() &&
+ N0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 &&
+ TLI.isTypeLegal(N0.getOperand(0).getValueType()) && N0.hasOneUse()) {
+ return DAG.getBitcast(VT, DAG.getNOT(SDLoc(N), N0.getOperand(0),
+ N0.getOperand(0).getValueType()));
+ }
+
+ // Handle AVX512 mask widening.
+ // Fold not(insert_subvector(undef,sub)) -> insert_subvector(undef,not(sub))
+ if (ISD::isBuildVectorAllOnes(N1.getNode()) && VT.isVector() &&
+ VT.getVectorElementType() == MVT::i1 &&
+ N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.getOperand(0).isUndef() &&
+ TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
+ return DAG.getNode(
+ ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getNOT(SDLoc(N), N0.getOperand(1), N0.getOperand(1).getValueType()),
+ N0.getOperand(2));
+ }
+
// Fold xor(zext(xor(x,c1)),c2) -> xor(zext(x),xor(zext(c1),c2))
// Fold xor(truncate(xor(x,c1)),c2) -> xor(truncate(x),xor(truncate(c1),c2))
// TODO: Under what circumstances could this be performed in DAGCombine?
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 3d0b7cd50fcf..30aa8cacd3cd 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -1017,9 +1017,8 @@ define i1 @allzeros_v2i64_not(<2 x i64> %a0) {
; KNL-LABEL: allzeros_v2i64_not:
; KNL: # %bb.0:
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: notb %al
; KNL-NEXT: testb $3, %al
; KNL-NEXT: setne %al
; KNL-NEXT: vzeroupper
@@ -1838,9 +1837,8 @@ define i1 @allones_v4i32_and1(<4 x i32> %arg) {
; KNL-LABEL: allones_v4i32_and1:
; KNL: # %bb.0:
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
+; KNL-NEXT: vptestnmd {{.*}}(%rip){1to16}, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: notb %al
; KNL-NEXT: testb $15, %al
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
@@ -2154,9 +2152,8 @@ define i1 @allones_v2i64_and1(<2 x i64> %arg) {
; KNL: # %bb.0:
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1]
-; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
+; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: notb %al
; KNL-NEXT: testb $3, %al
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
@@ -2252,9 +2249,8 @@ define i1 @allones_v4i64_and1(<4 x i64> %arg) {
; KNL-LABEL: allones_v4i64_and1:
; KNL: # %bb.0:
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
+; KNL-NEXT: vptestnmq {{.*}}(%rip){1to8}, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: notb %al
; KNL-NEXT: testb $15, %al
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
@@ -3159,9 +3155,8 @@ define i1 @allones_v4i32_and4(<4 x i32> %arg) {
; KNL-LABEL: allones_v4i32_and4:
; KNL: # %bb.0:
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
+; KNL-NEXT: vptestnmd {{.*}}(%rip){1to16}, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: notb %al
; KNL-NEXT: testb $15, %al
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
@@ -3475,9 +3470,8 @@ define i1 @allones_v2i64_and4(<2 x i64> %arg) {
; KNL: # %bb.0:
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4]
-; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0
+; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: notb %al
; KNL-NEXT: testb $3, %al
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
@@ -3573,9 +3567,8 @@ define i1 @allones_v4i64_and4(<4 x i64> %arg) {
; KNL-LABEL: allones_v4i64_and4:
; KNL: # %bb.0:
; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0
+; KNL-NEXT: vptestnmq {{.*}}(%rip){1to8}, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: notb %al
; KNL-NEXT: testb $15, %al
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
@@ -4018,9 +4011,8 @@ define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
; KNL: # %bb.0:
; KNL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
-; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
+; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: notb %al
; KNL-NEXT: testb $-109, %al
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
@@ -4029,8 +4021,8 @@ define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
; SKX-LABEL: movmsk_v8i16:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
+; SKX-NEXT: knotb %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: notb %al
; SKX-NEXT: testb $-109, %al
; SKX-NEXT: sete %al
; SKX-NEXT: retq
@@ -4136,9 +4128,8 @@ define i1 @movmsk_and_v2i64(<2 x i64> %x, <2 x i64> %y) {
; KNL: # %bb.0:
; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; KNL-NEXT: vpcmpneqq %zmm1, %zmm0, %k0
+; KNL-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: notb %al
; KNL-NEXT: testb $3, %al
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
@@ -4278,8 +4269,8 @@ define i1 @movmsk_and_v2f64(<2 x double> %x, <2 x double> %y) {
; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; KNL-NEXT: vcmplepd %zmm0, %zmm1, %k0
+; KNL-NEXT: knotw %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: notb %al
; KNL-NEXT: testb $3, %al
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index 4e0410f97346..50aad826e730 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -31,9 +31,8 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) {
; AVX512F-LABEL: trunc_v2i64_v2i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0
-; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: notb %al
; AVX512F-NEXT: testb $3, %al
; AVX512F-NEXT: sete %al
; AVX512F-NEXT: vzeroupper
@@ -42,9 +41,8 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) {
; AVX512BW-LABEL: trunc_v2i64_v2i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0
-; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: testb $3, %al
; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper
@@ -53,9 +51,8 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) {
; AVX512VL-LABEL: trunc_v2i64_v2i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0
-; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k0
+; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k0
; AVX512VL-NEXT: kmovd %k0, %eax
-; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: testb $3, %al
; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: retq
@@ -84,9 +81,8 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
; AVX512F-LABEL: trunc_v4i32_v4i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: notb %al
; AVX512F-NEXT: testb $15, %al
; AVX512F-NEXT: sete %al
; AVX512F-NEXT: vzeroupper
@@ -95,9 +91,8 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
; AVX512BW-LABEL: trunc_v4i32_v4i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
+; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: testb $15, %al
; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper
@@ -106,9 +101,8 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
; AVX512VL-LABEL: trunc_v4i32_v4i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0
+; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0
; AVX512VL-NEXT: kmovd %k0, %eax
-; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: testb $15, %al
; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: retq
@@ -244,9 +238,8 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) {
; AVX512F-LABEL: trunc_v4i64_v4i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpsllq $63, %ymm0, %ymm0
-; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: notb %al
; AVX512F-NEXT: testb $15, %al
; AVX512F-NEXT: sete %al
; AVX512F-NEXT: vzeroupper
@@ -255,9 +248,8 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) {
; AVX512BW-LABEL: trunc_v4i64_v4i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllq $63, %ymm0, %ymm0
-; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: testb $15, %al
; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper
@@ -266,9 +258,8 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) {
; AVX512VL-LABEL: trunc_v4i64_v4i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsllq $63, %ymm0, %ymm0
-; AVX512VL-NEXT: vptestmq %ymm0, %ymm0, %k0
+; AVX512VL-NEXT: vptestnmq %ymm0, %ymm0, %k0
; AVX512VL-NEXT: kmovd %k0, %eax
-; AVX512VL-NEXT: notb %al
; AVX512VL-NEXT: testb $15, %al
; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: vzeroupper
@@ -875,9 +866,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
; AVX512F-LABEL: icmp_v2i64_v2i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: notb %al
; AVX512F-NEXT: testb $3, %al
; AVX512F-NEXT: sete %al
; AVX512F-NEXT: vzeroupper
@@ -886,9 +876,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
; AVX512BW-LABEL: icmp_v2i64_v2i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
+; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: testb $3, %al
; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper
@@ -931,9 +920,8 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
; AVX512F-LABEL: icmp_v4i32_v4i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: notb %al
; AVX512F-NEXT: testb $15, %al
; AVX512F-NEXT: sete %al
; AVX512F-NEXT: vzeroupper
@@ -942,9 +930,8 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
; AVX512BW-LABEL: icmp_v4i32_v4i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: testb $15, %al
; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper
@@ -1110,9 +1097,8 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
; AVX512F-LABEL: icmp_v4i64_v4i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: notb %al
; AVX512F-NEXT: testb $15, %al
; AVX512F-NEXT: sete %al
; AVX512F-NEXT: vzeroupper
@@ -1121,9 +1107,8 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
; AVX512BW-LABEL: icmp_v4i64_v4i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
+; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: notb %al
; AVX512BW-NEXT: testb $15, %al
; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper