diff options
Diffstat (limited to 'lib/Target/AMDGPU')
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 2 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUFrameLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 19 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 57 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelLowering.h | 5 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUInstrInfo.h | 2 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPULibCalls.cpp | 6 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPULibFunc.cpp | 29 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPULibFunc.h | 20 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 2 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIISelLowering.cpp | 62 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIMachineFunctionInfo.h | 8 | ||||
-rw-r--r-- | lib/Target/AMDGPU/VOP3Instructions.td | 2 |
15 files changed, 137 insertions, 87 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 156f7bc6512b..b17b67167666 100644 --- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -400,7 +400,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { return false; FastMathFlags FMF = FPOp->getFastMathFlags(); - bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() || + bool UnsafeDiv = HasUnsafeFPMath || FMF.isFast() || FMF.allowReciprocal(); // With UnsafeDiv node will be optimized to just rcp and mul. diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/lib/Target/AMDGPU/AMDGPUFrameLowering.h index 2329fffd5212..91fe921bfeec 100644 --- a/lib/Target/AMDGPU/AMDGPUFrameLowering.h +++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.h @@ -15,7 +15,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index c313e4a04ef8..f04efd71fa05 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -204,6 +204,7 @@ private: void SelectADD_SUB_I64(SDNode *N); void SelectUADDO_USUBO(SDNode *N); void SelectDIV_SCALE(SDNode *N); + void SelectMAD_64_32(SDNode *N); void SelectFMA_W_CHAIN(SDNode *N); void SelectFMUL_W_CHAIN(SDNode *N); @@ -594,6 +595,11 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SelectDIV_SCALE(N); return; } + case AMDGPUISD::MAD_I64_I32: + case AMDGPUISD::MAD_U64_U32: { + SelectMAD_64_32(N); + return; + } case ISD::CopyToReg: { const SITargetLowering& Lowering = *static_cast<const SITargetLowering*>(getTargetLowering()); @@ -814,6 +820,19 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); } +// We need to handle this here because tablegen doesn't support matching +// instructions with multiple outputs. +void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { + SDLoc SL(N); + bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; + unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32; + + SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + Clamp }; + CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); +} + bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, unsigned OffsetBits) const { if ((OffsetBits == 16 && !isUInt<16>(Offset)) || diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index fe2c9337721b..d502b77447d4 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -128,27 +128,20 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) { return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32); } -bool AMDGPUTargetLowering::isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op) -{ - assert(Op.getOpcode() == ISD::OR); - - SDValue N0 = Op->getOperand(0); - SDValue N1 = Op->getOperand(1); - EVT VT = N0.getValueType(); - - if (VT.isInteger() && !VT.isVector()) { - KnownBits LHSKnown, RHSKnown; - DAG.computeKnownBits(N0, LHSKnown); +unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) { + KnownBits Known; + EVT VT = Op.getValueType(); + DAG.computeKnownBits(Op, Known); - if (LHSKnown.Zero.getBoolValue()) { - DAG.computeKnownBits(N1, RHSKnown); + return VT.getSizeInBits() - Known.countMinLeadingZeros(); +} - if (!(~RHSKnown.Zero & ~LHSKnown.Zero)) - return true; - } - } +unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); - return false; + // In order for this to be a signed 24-bit value, bit 23, must + // be a sign bit. + return VT.getSizeInBits() - DAG.ComputeNumSignBits(Op); } AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, @@ -2615,21 +2608,14 @@ SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, //===----------------------------------------------------------------------===// static bool isU24(SDValue Op, SelectionDAG &DAG) { - KnownBits Known; - EVT VT = Op.getValueType(); - DAG.computeKnownBits(Op, Known); - - return (VT.getSizeInBits() - Known.countMinLeadingZeros()) <= 24; + return AMDGPUTargetLowering::numBitsUnsigned(Op, DAG) <= 24; } static bool isI24(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - - // In order for this to be a signed 24-bit value, bit 23, must - // be a sign bit. return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated // as unsigned 24-bit values. - (VT.getSizeInBits() - DAG.ComputeNumSignBits(Op)) < 24; + AMDGPUTargetLowering::numBitsSigned(Op, DAG) < 24; } static bool simplifyI24(SDNode *Node24, unsigned OpIdx, @@ -2914,21 +2900,6 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0)); return DAG.getZExtOrTrunc(Shl, SL, VT); } - case ISD::OR: - if (!isOrEquivalentToAdd(DAG, LHS)) - break; - LLVM_FALLTHROUGH; - case ISD::ADD: { - // shl (or|add x, c2), c1 => or|add (shl x, c1), (c2 << c1) - if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(LHS->getOperand(1))) { - SDValue Shl = DAG.getNode(ISD::SHL, SL, VT, LHS->getOperand(0), - SDValue(RHS, 0)); - SDValue C2V = DAG.getConstant(C2->getAPIntValue() << RHSVal, - SDLoc(C2), VT); - return DAG.getNode(LHS->getOpcode(), SL, VT, Shl, C2V); - } - break; - } } if (VT != MVT::i64) @@ -3946,6 +3917,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(MUL_LOHI_I24) NODE_NAME_CASE(MAD_U24) NODE_NAME_CASE(MAD_I24) + NODE_NAME_CASE(MAD_I64_I32) + NODE_NAME_CASE(MAD_U64_U32) NODE_NAME_CASE(TEXTURE_FETCH) NODE_NAME_CASE(EXPORT) NODE_NAME_CASE(EXPORT_DONE) diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index cdb15186f86e..ba35aeb90ed3 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -35,7 +35,8 @@ private: SDValue getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const; public: - static bool isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op); + static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG); + static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG); protected: const AMDGPUSubtarget *Subtarget; @@ -379,6 +380,8 @@ enum NodeType : unsigned { MULHI_I24, MAD_U24, MAD_I24, + MAD_U64_U32, + MAD_I64_I32, MUL_LOHI_I24, MUL_LOHI_U24, TEXTURE_FETCH, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 41cc7d7093ec..f1a42b42f1f1 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -18,7 +18,7 @@ #include "AMDGPU.h" #include "Utils/AMDGPUBaseInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "AMDGPUGenInstrInfo.inc" diff --git a/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/lib/Target/AMDGPU/AMDGPULibCalls.cpp index e7e54750fe66..714c60a7446b 100644 --- a/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -487,7 +487,7 @@ bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName, bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const { if (auto Op = dyn_cast<FPMathOperator>(CI)) - if (Op->hasUnsafeAlgebra()) + if (Op->isFast()) return true; const Function *F = CI->getParent()->getParent(); Attribute Attr = F->getFnAttribute("unsafe-fp-math"); @@ -1337,7 +1337,8 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, // for OpenCL 2.0 we have only generic implementation of sincos // function. AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo); - nf.getLeads()[0].PtrKind = AMDGPULibFunc::GENERIC; + const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*M); + nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AS.FLAT_ADDRESS); Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf)); if (!Fsincos) return false; @@ -1350,7 +1351,6 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, // The allocaInst allocates the memory in private address space. This need // to be bitcasted to point to the address space of cos pointer type. // In OpenCL 2.0 this is generic, while in 1.2 that is private. - const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*M); if (PTy->getPointerAddressSpace() != AS.PRIVATE_ADDRESS) P = B.CreateAddrSpaceCast(Alloc, PTy); CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P); diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/lib/Target/AMDGPU/AMDGPULibFunc.cpp index 919e8c1e13c4..4671273d61f9 100644 --- a/lib/Target/AMDGPU/AMDGPULibFunc.cpp +++ b/lib/Target/AMDGPU/AMDGPULibFunc.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "AMDGPU.h" #include "AMDGPULibFunc.h" #include <llvm/ADT/SmallString.h> #include <llvm/ADT/SmallVector.h> @@ -458,13 +459,16 @@ AMDGPULibFunc::Param ParamIterator::getNextParam() { P.ArgType = AMDGPULibFunc::I32; break; - case E_CONSTPTR_SWAPGL: - switch (P.PtrKind & AMDGPULibFunc::ADDR_SPACE) { - case AMDGPULibFunc::GLOBAL: P.PtrKind = AMDGPULibFunc::LOCAL; break; - case AMDGPULibFunc::LOCAL: P.PtrKind = AMDGPULibFunc::GLOBAL; break; + case E_CONSTPTR_SWAPGL: { + unsigned AS = AMDGPULibFunc::getAddrSpaceFromEPtrKind(P.PtrKind); + switch (AS) { + case AMDGPUAS::GLOBAL_ADDRESS: AS = AMDGPUAS::LOCAL_ADDRESS; break; + case AMDGPUAS::LOCAL_ADDRESS: AS = AMDGPUAS::GLOBAL_ADDRESS; break; } + P.PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AS); P.PtrKind |= AMDGPULibFunc::CONST; break; + } default: llvm_unreachable("Unhandeled param rule"); } @@ -590,19 +594,14 @@ bool ItaniumParamParser::parseItaniumParam(StringRef& param, if (eatTerm(param, 'P')) { if (eatTerm(param, 'K')) res.PtrKind |= AMDGPULibFunc::CONST; if (eatTerm(param, 'V')) res.PtrKind |= AMDGPULibFunc::VOLATILE; + unsigned AS; if (!eatTerm(param, "U3AS")) { - res.PtrKind |= AMDGPULibFunc::PRIVATE; + AS = 0; } else { - switch(param.front()) { - case '1': res.PtrKind |= AMDGPULibFunc::GLOBAL; break; - case '2': res.PtrKind |= AMDGPULibFunc::READONLY;break; - case '3': res.PtrKind |= AMDGPULibFunc::LOCAL; break; - case '4': res.PtrKind |= AMDGPULibFunc::GENERIC; break; - case '5': res.PtrKind |= AMDGPULibFunc::OTHER; break; - default: return false; - } + AS = param.front() - '0'; drop_front(param, 1); } + res.PtrKind |= AMDGPULibFuncBase::getEPtrKindFromAddrSpace(AS); } else { res.PtrKind = AMDGPULibFunc::BYVALUE; } @@ -837,7 +836,9 @@ public: os << 'P'; if (p.PtrKind & AMDGPULibFunc::CONST) os << 'K'; if (p.PtrKind & AMDGPULibFunc::VOLATILE) os << 'V'; - int AS = UseAddrSpace ? (p.PtrKind & AMDGPULibFunc::ADDR_SPACE)-1 : 0; + unsigned AS = UseAddrSpace + ? AMDGPULibFuncBase::getAddrSpaceFromEPtrKind(p.PtrKind) + : 0; if (AS != 0) os << "U3AS" << AS; Ptr = p; p.PtrKind = 0; diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.h b/lib/Target/AMDGPU/AMDGPULibFunc.h index 8f4297b4a491..5405bc645714 100644 --- a/lib/Target/AMDGPU/AMDGPULibFunc.h +++ b/lib/Target/AMDGPU/AMDGPULibFunc.h @@ -283,14 +283,7 @@ public: enum EPtrKind { BYVALUE = 0, - PRIVATE, - GLOBAL, - READONLY, - LOCAL, - GENERIC, - OTHER, - - ADDR_SPACE = 0xF, + ADDR_SPACE = 0xF, // Address space takes value 0x1 ~ 0xF. CONST = 0x10, VOLATILE = 0x20 }; @@ -315,6 +308,17 @@ public: static bool isMangled(EFuncId Id) { return static_cast<unsigned>(Id) <= static_cast<unsigned>(EI_LAST_MANGLED); } + + static unsigned getEPtrKindFromAddrSpace(unsigned AS) { + assert(((AS + 1) & ~ADDR_SPACE) == 0); + return AS + 1; + } + + static unsigned getAddrSpaceFromEPtrKind(unsigned Kind) { + Kind = Kind & ADDR_SPACE; + assert(Kind >= 1); + return Kind - 1; + } }; class AMDGPULibFuncImpl : public AMDGPULibFuncBase { diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 9fc9592bdc57..83122281d2b2 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -23,7 +23,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/MDBuilder.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include <algorithm> using namespace llvm; diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 56a5fa634b55..6ee529c8549b 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -462,6 +462,10 @@ public: return isAmdHsaOS() || isMesaKernel(MF); } + bool hasMad64_32() const { + return getGeneration() >= SEA_ISLANDS; + } + bool hasFminFmaxLegacy() const { return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; } diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index f7ecdea77047..14f26f787ab1 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -245,6 +245,10 @@ GCNMinRegSchedRegistry("gcn-minreg", static StringRef computeDataLayout(const Triple &TT) { if (TT.getArch() == Triple::r600) { // 32-bit pointers. + if (TT.getEnvironmentName() == "amdgiz" || + TT.getEnvironmentName() == "amdgizcl") + return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" + "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"; return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; } diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index d729dcc439ef..d1120f5e330c 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2134,12 +2134,17 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, } if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee)) { - // FIXME: Remove this hack for function pointer types. - const GlobalValue *GV = GA->getGlobal(); - assert(Callee.getValueType() == MVT::i32); - Callee = DAG.getGlobalAddress(GV, DL, MVT::i64, GA->getOffset(), - false, GA->getTargetFlags()); + // FIXME: Remove this hack for function pointer types after removing + // support of old address space mapping. In the new address space + // mapping the pointer in default address space is 64 bit, therefore + // does not need this hack. + if (Callee.getValueType() == MVT::i32) { + const GlobalValue *GV = GA->getGlobal(); + Callee = DAG.getGlobalAddress(GV, DL, MVT::i64, GA->getOffset(), false, + GA->getTargetFlags()); + } } + assert(Callee.getValueType() == MVT::i64); const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); @@ -5957,18 +5962,57 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, return 0; } +static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL, + EVT VT, + SDValue N0, SDValue N1, SDValue N2, + bool Signed) { + unsigned MadOpc = Signed ? AMDGPUISD::MAD_I64_I32 : AMDGPUISD::MAD_U64_U32; + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i1); + SDValue Mad = DAG.getNode(MadOpc, SL, VTs, N0, N1, N2); + return DAG.getNode(ISD::TRUNCATE, SL, VT, Mad); +} + SDValue SITargetLowering::performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); - - if (VT != MVT::i32) - return SDValue(); - SDLoc SL(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); + if ((LHS.getOpcode() == ISD::MUL || RHS.getOpcode() == ISD::MUL) + && Subtarget->hasMad64_32() && + !VT.isVector() && VT.getScalarSizeInBits() > 32 && + VT.getScalarSizeInBits() <= 64) { + if (LHS.getOpcode() != ISD::MUL) + std::swap(LHS, RHS); + + SDValue MulLHS = LHS.getOperand(0); + SDValue MulRHS = LHS.getOperand(1); + SDValue AddRHS = RHS; + + // TODO: Maybe restrict if SGPR inputs. + if (numBitsUnsigned(MulLHS, DAG) <= 32 && + numBitsUnsigned(MulRHS, DAG) <= 32) { + MulLHS = DAG.getZExtOrTrunc(MulLHS, SL, MVT::i32); + MulRHS = DAG.getZExtOrTrunc(MulRHS, SL, MVT::i32); + AddRHS = DAG.getZExtOrTrunc(AddRHS, SL, MVT::i64); + return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, false); + } + + if (numBitsSigned(MulLHS, DAG) < 32 && numBitsSigned(MulRHS, DAG) < 32) { + MulLHS = DAG.getSExtOrTrunc(MulLHS, SL, MVT::i32); + MulRHS = DAG.getSExtOrTrunc(MulRHS, SL, MVT::i32); + AddRHS = DAG.getSExtOrTrunc(AddRHS, SL, MVT::i64); + return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, true); + } + + return SDValue(); + } + + if (VT != MVT::i32) + return SDValue(); + // add x, zext (setcc) => addcarry x, 0, setcc // add x, sext (setcc) => subcarry x, 0, setcc unsigned Opc = LHS.getOpcode(); diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index ade909cc84e3..5dde72910ee3 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -14,15 +14,15 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H -#include "AMDGPUMachineFunction.h" #include "AMDGPUArgumentUsageInfo.h" +#include "AMDGPUMachineFunction.h" #include "SIRegisterInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include <array> @@ -87,9 +87,6 @@ public: /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which /// tells the hardware which interpolation parameters to load. class SIMachineFunctionInfo final : public AMDGPUMachineFunction { - // FIXME: This should be removed and getPreloadedValue moved here. - friend class SIRegisterInfo; - unsigned TIDReg = AMDGPU::NoRegister; // Registers that may be reserved for spilling purposes. These may be the same @@ -143,7 +140,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { private: unsigned LDSWaveSpillSize = 0; - unsigned ScratchOffsetReg; unsigned NumUserSGPRs = 0; unsigned NumSystemSGPRs = 0; diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td index aa041aab51c8..666b80107dc6 100644 --- a/lib/Target/AMDGPU/VOP3Instructions.td +++ b/lib/Target/AMDGPU/VOP3Instructions.td @@ -399,8 +399,10 @@ def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_ } // End Constraints = "@earlyclobber $vdst" let isCommutable = 1 in { +let SchedRW = [WriteDouble, WriteSALU] in { def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; +} // End SchedRW = [WriteDouble, WriteSALU] } // End isCommutable = 1 } // End SubtargetPredicate = isCIVI |