diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2013-09-01 14:24:41 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2013-09-01 14:24:41 +0000 |
commit | 6adcd58d3c58a8eeb21bc1bfe399c7b03592f273 (patch) | |
tree | 24d3b376df1fff639da8ea5df7288b1c7a1d3750 /lib | |
parent | 5510728d28bb1ee04abc32da3d21b7df12948053 (diff) | |
download | llvm-6adcd58d3c58a8eeb21bc1bfe399c7b03592f273.tar.gz |
AVX-512: Added GATHER and SCATTER instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189729 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 225 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 63 |
2 files changed, 285 insertions, 3 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 73c4a1cabf51..739c1448cdfc 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1445,6 +1445,7 @@ void X86TargetLowering::resetOperationActions() { // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't // handle type legalization for these operations here. @@ -11623,7 +11624,87 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { } } -static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { +static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, + SDValue Base, SDValue Index, + SDValue ScaleOp, SDValue Chain, + const X86Subtarget * Subtarget) { + SDLoc dl(Op); + ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp); + assert(C && "Invalid scale type"); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); + SDValue Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl); + EVT MaskVT = MVT::getVectorVT(MVT::i1, + Index.getValueType().getVectorNumElements()); + SDValue MaskInReg = DAG.getConstant(~0, MaskVT); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); + SDValue Disp = DAG.getTargetConstant(0, MVT::i32); + SDValue Segment = DAG.getRegister(0, MVT::i32); + SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain}; + SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); + SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) }; + return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl); +} + +static SDValue getMGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, + SDValue Src, SDValue Mask, SDValue Base, + SDValue Index, SDValue ScaleOp, SDValue Chain, + const X86Subtarget * Subtarget) { + SDLoc dl(Op); + ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp); + assert(C && "Invalid scale type"); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); + EVT MaskVT = MVT::getVectorVT(MVT::i1, + Index.getValueType().getVectorNumElements()); + SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); + SDValue Disp = DAG.getTargetConstant(0, MVT::i32); + SDValue Segment = DAG.getRegister(0, MVT::i32); + if (Src.getOpcode() == ISD::UNDEF) + Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl); + SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain}; + SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); + SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) }; + return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl); +} + +static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, + SDValue Src, SDValue Base, SDValue Index, + SDValue ScaleOp, SDValue Chain) { + SDLoc dl(Op); + ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp); + assert(C && "Invalid scale type"); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); + SDValue Disp = DAG.getTargetConstant(0, MVT::i32); + SDValue Segment = DAG.getRegister(0, MVT::i32); + EVT MaskVT = MVT::getVectorVT(MVT::i1, + Index.getValueType().getVectorNumElements()); + SDValue MaskInReg = DAG.getConstant(~0, MaskVT); + SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); + SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain}; + SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); + return SDValue(Res, 1); +} + +static SDValue getMScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, + SDValue Src, SDValue Mask, SDValue Base, + SDValue Index, SDValue ScaleOp, SDValue Chain) { + SDLoc dl(Op); + ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp); + assert(C && "Invalid scale type"); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); + SDValue Disp = DAG.getTargetConstant(0, MVT::i32); + SDValue Segment = DAG.getRegister(0, MVT::i32); + EVT MaskVT = MVT::getVectorVT(MVT::i1, + Index.getValueType().getVectorNumElements()); + SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); + SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); + SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain}; + SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); + return SDValue(Res, 1); +} + +static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { SDLoc dl(Op); unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); switch (IntNo) { @@ -11658,7 +11739,144 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, SDValue(Result.getNode(), 2)); } - + //int_gather(index, base, scale); + case Intrinsic::x86_avx512_gather_qpd_512: + case Intrinsic::x86_avx512_gather_qps_512: + case Intrinsic::x86_avx512_gather_dpd_512: + case Intrinsic::x86_avx512_gather_qpi_512: + case Intrinsic::x86_avx512_gather_qpq_512: + case Intrinsic::x86_avx512_gather_dpq_512: + case Intrinsic::x86_avx512_gather_dps_512: + case Intrinsic::x86_avx512_gather_dpi_512: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Unexpected intrinsic!"); + case Intrinsic::x86_avx512_gather_qps_512: Opc = X86::VGATHERQPSZrm; break; + case Intrinsic::x86_avx512_gather_qpd_512: Opc = X86::VGATHERQPDZrm; break; + case Intrinsic::x86_avx512_gather_dpd_512: Opc = X86::VGATHERDPDZrm; break; + case Intrinsic::x86_avx512_gather_dps_512: Opc = X86::VGATHERDPSZrm; break; + case Intrinsic::x86_avx512_gather_qpi_512: Opc = X86::VPGATHERQDZrm; break; + case Intrinsic::x86_avx512_gather_qpq_512: Opc = X86::VPGATHERQQZrm; break; + case Intrinsic::x86_avx512_gather_dpi_512: Opc = X86::VPGATHERDDZrm; break; + case Intrinsic::x86_avx512_gather_dpq_512: Opc = X86::VPGATHERDQZrm; break; + } + SDValue Chain = Op.getOperand(0); + SDValue Index = Op.getOperand(2); + SDValue Base = Op.getOperand(3); + SDValue Scale = Op.getOperand(4); + return getGatherNode(Opc, Op, DAG, Base, Index, Scale, Chain, Subtarget); + } + //int_gather_mask(v1, mask, index, base, scale); + case Intrinsic::x86_avx512_gather_qps_mask_512: + case Intrinsic::x86_avx512_gather_qpd_mask_512: + case Intrinsic::x86_avx512_gather_dpd_mask_512: + case Intrinsic::x86_avx512_gather_dps_mask_512: + case Intrinsic::x86_avx512_gather_qpi_mask_512: + case Intrinsic::x86_avx512_gather_qpq_mask_512: + case Intrinsic::x86_avx512_gather_dpi_mask_512: + case Intrinsic::x86_avx512_gather_dpq_mask_512: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Unexpected intrinsic!"); + case Intrinsic::x86_avx512_gather_qps_mask_512: + Opc = X86::VGATHERQPSZrm; break; + case Intrinsic::x86_avx512_gather_qpd_mask_512: + Opc = X86::VGATHERQPDZrm; break; + case Intrinsic::x86_avx512_gather_dpd_mask_512: + Opc = X86::VGATHERDPDZrm; break; + case Intrinsic::x86_avx512_gather_dps_mask_512: + Opc = X86::VGATHERDPSZrm; break; + case Intrinsic::x86_avx512_gather_qpi_mask_512: + Opc = X86::VPGATHERQDZrm; break; + case Intrinsic::x86_avx512_gather_qpq_mask_512: + Opc = X86::VPGATHERQQZrm; break; + case Intrinsic::x86_avx512_gather_dpi_mask_512: + Opc = X86::VPGATHERDDZrm; break; + case Intrinsic::x86_avx512_gather_dpq_mask_512: + Opc = X86::VPGATHERDQZrm; break; + } + SDValue Chain = Op.getOperand(0); + SDValue Src = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + SDValue Index = Op.getOperand(4); + SDValue Base = Op.getOperand(5); + SDValue Scale = Op.getOperand(6); + return getMGatherNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain, + Subtarget); + } + //int_scatter(base, index, v1, scale); + case Intrinsic::x86_avx512_scatter_qpd_512: + case Intrinsic::x86_avx512_scatter_qps_512: + case Intrinsic::x86_avx512_scatter_dpd_512: + case Intrinsic::x86_avx512_scatter_qpi_512: + case Intrinsic::x86_avx512_scatter_qpq_512: + case Intrinsic::x86_avx512_scatter_dpq_512: + case Intrinsic::x86_avx512_scatter_dps_512: + case Intrinsic::x86_avx512_scatter_dpi_512: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Unexpected intrinsic!"); + case Intrinsic::x86_avx512_scatter_qpd_512: + Opc = X86::VSCATTERQPDZmr; break; + case Intrinsic::x86_avx512_scatter_qps_512: + Opc = X86::VSCATTERQPSZmr; break; + case Intrinsic::x86_avx512_scatter_dpd_512: + Opc = X86::VSCATTERDPDZmr; break; + case Intrinsic::x86_avx512_scatter_dps_512: + Opc = X86::VSCATTERDPSZmr; break; + case Intrinsic::x86_avx512_scatter_qpi_512: + Opc = X86::VPSCATTERQDZmr; break; + case Intrinsic::x86_avx512_scatter_qpq_512: + Opc = X86::VPSCATTERQQZmr; break; + case Intrinsic::x86_avx512_scatter_dpq_512: + Opc = X86::VPSCATTERDQZmr; break; + case Intrinsic::x86_avx512_scatter_dpi_512: + Opc = X86::VPSCATTERDDZmr; break; + } + SDValue Chain = Op.getOperand(0); + SDValue Base = Op.getOperand(2); + SDValue Index = Op.getOperand(3); + SDValue Src = Op.getOperand(4); + SDValue Scale = Op.getOperand(5); + return getScatterNode(Opc, Op, DAG, Src, Base, Index, Scale, Chain); + } + //int_scatter_mask(base, mask, index, v1, scale); + case Intrinsic::x86_avx512_scatter_qps_mask_512: + case Intrinsic::x86_avx512_scatter_qpd_mask_512: + case Intrinsic::x86_avx512_scatter_dpd_mask_512: + case Intrinsic::x86_avx512_scatter_dps_mask_512: + case Intrinsic::x86_avx512_scatter_qpi_mask_512: + case Intrinsic::x86_avx512_scatter_qpq_mask_512: + case Intrinsic::x86_avx512_scatter_dpi_mask_512: + case Intrinsic::x86_avx512_scatter_dpq_mask_512: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Unexpected intrinsic!"); + case Intrinsic::x86_avx512_scatter_qpd_mask_512: + Opc = X86::VSCATTERQPDZmr; break; + case Intrinsic::x86_avx512_scatter_qps_mask_512: + Opc = X86::VSCATTERQPSZmr; break; + case Intrinsic::x86_avx512_scatter_dpd_mask_512: + Opc = X86::VSCATTERDPDZmr; break; + case Intrinsic::x86_avx512_scatter_dps_mask_512: + Opc = X86::VSCATTERDPSZmr; break; + case Intrinsic::x86_avx512_scatter_qpi_mask_512: + Opc = X86::VPSCATTERQDZmr; break; + case Intrinsic::x86_avx512_scatter_qpq_mask_512: + Opc = X86::VPSCATTERQQZmr; break; + case Intrinsic::x86_avx512_scatter_dpq_mask_512: + Opc = X86::VPSCATTERDQZmr; break; + case Intrinsic::x86_avx512_scatter_dpi_mask_512: + Opc = X86::VPSCATTERDDZmr; break; + } + SDValue Chain = Op.getOperand(0); + SDValue Base = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + SDValue Index = Op.getOperand(4); + SDValue Src = Op.getOperand(5); + SDValue Scale = Op.getOperand(6); + return getMScatterNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain); + } // XTEST intrinsics. case Intrinsic::x86_xtest: { SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other); @@ -13093,7 +13311,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::FRAME_TO_ARGS_OFFSET: diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 6b2f1608ca40..ea3a4e174166 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2923,6 +2923,69 @@ defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VR512, VR256X, X86vsext, EVEX_CD8<32, CD8VH>; //===----------------------------------------------------------------------===// +// GATHER - SCATTER Operations + +multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand memop> { +let mayLoad = 1, + Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in + def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb), + (ins RC:$src1, KRC:$mask, memop:$src2), + !strconcat(OpcodeStr, + "\t{$src2, ${dst}{${mask}}|${dst}{${mask}}, $src2}"), + []>, EVEX, EVEX_K; +} +defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC, + RegisterClass RC, X86MemOperand memop> { +let mayStore = 1, Constraints = "$mask = $mask_wb" in + def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb), + (ins memop:$dst, KRC:$mask, RC:$src2), + !strconcat(OpcodeStr, + "\t{$src2, ${dst}{${mask}}|${dst}{${mask}}, $src2}"), + []>, EVEX, EVEX_K; +} + +defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>, + EVEX_V512, EVEX_CD8<32, CD8VT1>; + +//===----------------------------------------------------------------------===// // VSHUFPS - VSHUFPD Operations multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop, |