summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-10-30 06:56:16 +0000
committerCraig Topper <craig.topper@gmail.com>2016-10-30 06:56:16 +0000
commitb7781a95fd24913b90d3fada20d339debb8b11d4 (patch)
tree1054f6d5fb61231d65d4afa56f53cb15f489f6d4
parent72f9ed1807181ae500122e74e57bb2beaf4ab6de (diff)
downloadllvm-b7781a95fd24913b90d3fada20d339debb8b11d4.tar.gz
[X86] Use intrinsics table for PMADDUBSW and PMADDWD so that we can use the legacy intrinsics to select EVEX encoded instructions when available.
This removes a couple tablegen classes that become unused after this change. Another class gained an additional parameter to allow PMADDUBSW to specify a different result type from its input type. llvm-svn: 285515
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp6
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td133
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h4
3 files changed, 57 insertions, 86 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 54bc9b81946f..0acbe9da46f1 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1099,7 +1099,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::PINSRDrr, X86::PINSRDrm, 0 },
{ X86::PINSRQrr, X86::PINSRQrm, 0 },
{ X86::PINSRWrri, X86::PINSRWrmi, 0 },
- { X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 },
+ { X86::PMADDUBSWrr, X86::PMADDUBSWrm, TB_ALIGN_16 },
{ X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
{ X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 },
{ X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
@@ -1397,7 +1397,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPINSRDrr, X86::VPINSRDrm, 0 },
{ X86::VPINSRQrr, X86::VPINSRQrm, 0 },
{ X86::VPINSRWrri, X86::VPINSRWrmi, 0 },
- { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, 0 },
+ { X86::VPMADDUBSWrr, X86::VPMADDUBSWrm, 0 },
{ X86::VPMADDWDrr, X86::VPMADDWDrm, 0 },
{ X86::VPMAXSWrr, X86::VPMAXSWrm, 0 },
{ X86::VPMAXUBrr, X86::VPMAXUBrm, 0 },
@@ -1557,7 +1557,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 },
{ X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, 0 },
{ X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 },
- { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, 0 },
+ { X86::VPMADDUBSWYrr, X86::VPMADDUBSWYrm, 0 },
{ X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 },
{ X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 },
{ X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 },
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 7ced35a56a85..2a2539e473c6 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3970,47 +3970,6 @@ def SSE_PMADD : OpndItins<
let ExeDomain = SSEPackedInt in { // SSE integer instructions
-multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
- RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop,
- OpndItins itins,
- bit IsCommutable = 0,
- bit Is2Addr = 1> {
- let isCommutable = IsCommutable in
- def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>,
- Sched<[itins.Sched]>;
- def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))],
- itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
-}
-
-multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
- Intrinsic IntId256, OpndItins itins,
- bit IsCommutable = 0> {
-let Predicates = [HasAVX] in
- defm V#NAME : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId128,
- VR128, loadv2i64, i128mem, itins,
- IsCommutable, 0>, VEX_4V;
-
-let Constraints = "$src1 = $dst" in
- defm NAME : PDI_binop_rm_int<opc, OpcodeStr, IntId128, VR128, memopv2i64,
- i128mem, itins, IsCommutable, 1>;
-
-let Predicates = [HasAVX2] in
- defm V#NAME#Y : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId256,
- VR256, loadv4i64, i256mem, itins,
- IsCommutable, 0>, VEX_4V, VEX_L;
-}
-
/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType DstVT, ValueType SrcVT, RegisterClass RC,
@@ -4086,9 +4045,17 @@ defm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8,
defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16,
SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
-// Intrinsic forms
-defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
- int_x86_avx2_pmadd_wd, SSE_PMADD, 1>;
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
+defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
+ loadv2i64, i128mem, SSE_PMADD, 0>, VEX_4V;
+
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
+defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16,
+ VR256, loadv4i64, i128mem, SSE_PMADD,
+ 0>, VEX_4V, VEX_L;
+let Constraints = "$src1 = $dst" in
+defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
+ memopv2i64, i128mem, SSE_PMADD>;
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
@@ -5529,16 +5496,16 @@ def SSE_PMULHRSW : OpndItins<
/// SS3I_binop_rm - Simple SSSE3 bin op
multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, OpndItins itins,
- bit Is2Addr = 1> {
+ ValueType DstVT, ValueType OpVT, RegisterClass RC,
+ PatFrag memop_frag, X86MemOperand x86memop,
+ OpndItins itins, bit Is2Addr = 1> {
let isCommutable = 1 in
def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
+ [(set RC:$dst, (DstVT (OpNode (OpVT RC:$src1), RC:$src2)))], itins.rr>,
Sched<[itins.Sched]>;
def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
@@ -5546,7 +5513,7 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1,
+ (DstVT (OpNode (OpVT RC:$src1),
(bitconvert (memop_frag addr:$src2)))))], itins.rm>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
@@ -5593,27 +5560,30 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
let isCommutable = 0 in {
- defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128,
- loadv2i64, i128mem,
+ defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8,
+ VR128, loadv2i64, i128mem,
SSE_PSHUFB, 0>, VEX_4V;
}
-defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, VR128,
- loadv2i64, i128mem,
+defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16,
+ VR128, loadv2i64, i128mem,
SSE_PMULHRSW, 0>, VEX_4V;
+defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16,
+ v16i8, VR128, loadv2i64, i128mem,
+ SSE_PMADD, 0>, VEX_4V;
}
let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
- defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128,
+ defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
loadv2i64, i128mem,
SSE_PHADDSUBW, 0>, VEX_4V;
- defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128,
+ defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
loadv2i64, i128mem,
SSE_PHADDSUBD, 0>, VEX_4V;
- defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128,
+ defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
loadv2i64, i128mem,
SSE_PHADDSUBW, 0>, VEX_4V;
- defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128,
+ defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
loadv2i64, i128mem,
SSE_PHADDSUBD, 0>, VEX_4V;
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
@@ -5631,35 +5601,35 @@ let isCommutable = 0 in {
defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128,
SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V;
- defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
- int_x86_ssse3_pmadd_ub_sw_128,
- SSE_PMADD, loadv2i64, 0>, VEX_4V;
}
}
let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
let isCommutable = 0 in {
- defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256,
- loadv4i64, i256mem,
+ defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8,
+ VR256, loadv4i64, i256mem,
SSE_PSHUFB, 0>, VEX_4V, VEX_L;
}
-defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, VR256,
- loadv4i64, i256mem,
+defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16,
+ VR256, loadv4i64, i256mem,
SSE_PMULHRSW, 0>, VEX_4V, VEX_L;
+defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16,
+ v32i8, VR256, loadv4i64, i256mem,
+ SSE_PMADD, 0>, VEX_4V, VEX_L;
}
let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in {
- defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256,
- loadv4i64, i256mem,
+ defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
+ VR256, loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
- defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256,
+ defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
- defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256,
- loadv4i64, i256mem,
+ defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
+ VR256, loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
- defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256,
+ defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPSIGNBY : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
@@ -5674,22 +5644,19 @@ let isCommutable = 0 in {
defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
int_x86_avx2_phsub_sw,
WriteVecALU>, VEX_4V, VEX_L;
- defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
- int_x86_avx2_pmadd_ub_sw,
- WriteVecIMul>, VEX_4V, VEX_L;
}
}
// None of these have i8 immediate fields.
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
- defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128,
+ defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128,
memopv2i64, i128mem, SSE_PHADDSUBW>;
- defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128,
+ defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128,
memopv2i64, i128mem, SSE_PHADDSUBD>;
- defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128,
+ defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128,
memopv2i64, i128mem, SSE_PHADDSUBW>;
- defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128,
+ defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128,
memopv2i64, i128mem, SSE_PHADDSUBD>;
defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128,
SSE_PSIGN, memopv2i64>;
@@ -5697,7 +5664,7 @@ let isCommutable = 0 in {
SSE_PSIGN, memopv2i64>;
defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128,
SSE_PSIGN, memopv2i64>;
- defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128,
+ defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128,
memopv2i64, i128mem, SSE_PSHUFB>;
defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
int_x86_ssse3_phadd_sw_128,
@@ -5705,12 +5672,12 @@ let isCommutable = 0 in {
defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
int_x86_ssse3_phsub_sw_128,
SSE_PHADDSUBSW, memopv2i64>;
- defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
- int_x86_ssse3_pmadd_ub_sw_128,
- SSE_PMADD, memopv2i64>;
+ defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16,
+ v16i8, VR128, memopv2i64, i128mem,
+ SSE_PMADD>;
}
-defm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, VR128,
- memopv2i64, i128mem, SSE_PMULHRSW>;
+defm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16,
+ VR128, memopv2i64, i128mem, SSE_PMULHRSW>;
}
//===---------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 6eda95e1a4f8..a2fffe89df68 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -289,6 +289,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0),
+ X86_INTRINSIC_DATA(avx2_pmadd_ub_sw, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
+ X86_INTRINSIC_DATA(avx2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(avx2_pmovmskb, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(avx2_pmul_dq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx2_pmul_hr_sw, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
@@ -1760,6 +1762,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(sse2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0),
X86_INTRINSIC_DATA(sse2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
@@ -1808,6 +1811,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0),
X86_INTRINSIC_DATA(ssse3_phsub_w_128, INTR_TYPE_2OP, X86ISD::HSUB, 0),
+ X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0),