summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorDavid L. Jones <dlj@google.com>2017-11-15 01:40:05 +0000
committerDavid L. Jones <dlj@google.com>2017-11-15 01:40:05 +0000
commitd5c2cca72463233df77a065f201db31b140eb44d (patch)
tree3f9a978131033302a58b7db7db1ecf2a4622bad2 /lib/Target/AMDGPU
parentce7676b8db6bac096dad4c4ad62e9e6bb8aa1064 (diff)
parentdcf64df89bc6d775e266ebd6b0134d135f47a35b (diff)
downloadllvm-testing.tar.gz
Creating branches/google/testing and tags/google/testing/2017-11-14 from r317716testing
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/testing@318248 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/AMDGPU')
-rw-r--r--lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUFrameLowering.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp19
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp57
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h5
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPULibCalls.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPULibFunc.cpp29
-rw-r--r--lib/Target/AMDGPU/AMDGPULibFunc.h20
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h4
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp4
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp62
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h8
-rw-r--r--lib/Target/AMDGPU/VOP3Instructions.td2
15 files changed, 137 insertions, 87 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 156f7bc6512b..b17b67167666 100644
--- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -400,7 +400,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
return false;
FastMathFlags FMF = FPOp->getFastMathFlags();
- bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() ||
+ bool UnsafeDiv = HasUnsafeFPMath || FMF.isFast() ||
FMF.allowReciprocal();
// With UnsafeDiv node will be optimized to just rcp and mul.
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
index 2329fffd5212..91fe921bfeec 100644
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
@@ -15,7 +15,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index c313e4a04ef8..f04efd71fa05 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -204,6 +204,7 @@ private:
void SelectADD_SUB_I64(SDNode *N);
void SelectUADDO_USUBO(SDNode *N);
void SelectDIV_SCALE(SDNode *N);
+ void SelectMAD_64_32(SDNode *N);
void SelectFMA_W_CHAIN(SDNode *N);
void SelectFMUL_W_CHAIN(SDNode *N);
@@ -594,6 +595,11 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectDIV_SCALE(N);
return;
}
+ case AMDGPUISD::MAD_I64_I32:
+ case AMDGPUISD::MAD_U64_U32: {
+ SelectMAD_64_32(N);
+ return;
+ }
case ISD::CopyToReg: {
const SITargetLowering& Lowering =
*static_cast<const SITargetLowering*>(getTargetLowering());
@@ -814,6 +820,19 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
+// We need to handle this here because tablegen doesn't support matching
+// instructions with multiple outputs.
+void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
+ SDLoc SL(N);
+ bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
+ unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
+
+ SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ Clamp };
+ CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
+}
+
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
unsigned OffsetBits) const {
if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index fe2c9337721b..d502b77447d4 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -128,27 +128,20 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
}
-bool AMDGPUTargetLowering::isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op)
-{
- assert(Op.getOpcode() == ISD::OR);
-
- SDValue N0 = Op->getOperand(0);
- SDValue N1 = Op->getOperand(1);
- EVT VT = N0.getValueType();
-
- if (VT.isInteger() && !VT.isVector()) {
- KnownBits LHSKnown, RHSKnown;
- DAG.computeKnownBits(N0, LHSKnown);
+unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) {
+ KnownBits Known;
+ EVT VT = Op.getValueType();
+ DAG.computeKnownBits(Op, Known);
- if (LHSKnown.Zero.getBoolValue()) {
- DAG.computeKnownBits(N1, RHSKnown);
+ return VT.getSizeInBits() - Known.countMinLeadingZeros();
+}
- if (!(~RHSKnown.Zero & ~LHSKnown.Zero))
- return true;
- }
- }
+unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
- return false;
+ // In order for this to be a signed 24-bit value, bit 23, must
+ // be a sign bit.
+ return VT.getSizeInBits() - DAG.ComputeNumSignBits(Op);
}
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
@@ -2615,21 +2608,14 @@ SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
//===----------------------------------------------------------------------===//
static bool isU24(SDValue Op, SelectionDAG &DAG) {
- KnownBits Known;
- EVT VT = Op.getValueType();
- DAG.computeKnownBits(Op, Known);
-
- return (VT.getSizeInBits() - Known.countMinLeadingZeros()) <= 24;
+ return AMDGPUTargetLowering::numBitsUnsigned(Op, DAG) <= 24;
}
static bool isI24(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
-
- // In order for this to be a signed 24-bit value, bit 23, must
- // be a sign bit.
return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
// as unsigned 24-bit values.
- (VT.getSizeInBits() - DAG.ComputeNumSignBits(Op)) < 24;
+ AMDGPUTargetLowering::numBitsSigned(Op, DAG) < 24;
}
static bool simplifyI24(SDNode *Node24, unsigned OpIdx,
@@ -2914,21 +2900,6 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0));
return DAG.getZExtOrTrunc(Shl, SL, VT);
}
- case ISD::OR:
- if (!isOrEquivalentToAdd(DAG, LHS))
- break;
- LLVM_FALLTHROUGH;
- case ISD::ADD: {
- // shl (or|add x, c2), c1 => or|add (shl x, c1), (c2 << c1)
- if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(LHS->getOperand(1))) {
- SDValue Shl = DAG.getNode(ISD::SHL, SL, VT, LHS->getOperand(0),
- SDValue(RHS, 0));
- SDValue C2V = DAG.getConstant(C2->getAPIntValue() << RHSVal,
- SDLoc(C2), VT);
- return DAG.getNode(LHS->getOpcode(), SL, VT, Shl, C2V);
- }
- break;
- }
}
if (VT != MVT::i64)
@@ -3946,6 +3917,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MUL_LOHI_I24)
NODE_NAME_CASE(MAD_U24)
NODE_NAME_CASE(MAD_I24)
+ NODE_NAME_CASE(MAD_I64_I32)
+ NODE_NAME_CASE(MAD_U64_U32)
NODE_NAME_CASE(TEXTURE_FETCH)
NODE_NAME_CASE(EXPORT)
NODE_NAME_CASE(EXPORT_DONE)
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index cdb15186f86e..ba35aeb90ed3 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -35,7 +35,8 @@ private:
SDValue getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const;
public:
- static bool isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op);
+ static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG);
+ static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
protected:
const AMDGPUSubtarget *Subtarget;
@@ -379,6 +380,8 @@ enum NodeType : unsigned {
MULHI_I24,
MAD_U24,
MAD_I24,
+ MAD_U64_U32,
+ MAD_I64_I32,
MUL_LOHI_I24,
MUL_LOHI_U24,
TEXTURE_FETCH,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 41cc7d7093ec..f1a42b42f1f1 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -18,7 +18,7 @@
#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "AMDGPUGenInstrInfo.inc"
diff --git a/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index e7e54750fe66..714c60a7446b 100644
--- a/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -487,7 +487,7 @@ bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
if (auto Op = dyn_cast<FPMathOperator>(CI))
- if (Op->hasUnsafeAlgebra())
+ if (Op->isFast())
return true;
const Function *F = CI->getParent()->getParent();
Attribute Attr = F->getFnAttribute("unsafe-fp-math");
@@ -1337,7 +1337,8 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
// for OpenCL 2.0 we have only generic implementation of sincos
// function.
AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
- nf.getLeads()[0].PtrKind = AMDGPULibFunc::GENERIC;
+ const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*M);
+ nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AS.FLAT_ADDRESS);
Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
if (!Fsincos) return false;
@@ -1350,7 +1351,6 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
// The allocaInst allocates the memory in private address space. This need
// to be bitcasted to point to the address space of cos pointer type.
// In OpenCL 2.0 this is generic, while in 1.2 that is private.
- const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*M);
if (PTy->getPointerAddressSpace() != AS.PRIVATE_ADDRESS)
P = B.CreateAddrSpaceCast(Alloc, PTy);
CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index 919e8c1e13c4..4671273d61f9 100644
--- a/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "AMDGPU.h"
#include "AMDGPULibFunc.h"
#include <llvm/ADT/SmallString.h>
#include <llvm/ADT/SmallVector.h>
@@ -458,13 +459,16 @@ AMDGPULibFunc::Param ParamIterator::getNextParam() {
P.ArgType = AMDGPULibFunc::I32;
break;
- case E_CONSTPTR_SWAPGL:
- switch (P.PtrKind & AMDGPULibFunc::ADDR_SPACE) {
- case AMDGPULibFunc::GLOBAL: P.PtrKind = AMDGPULibFunc::LOCAL; break;
- case AMDGPULibFunc::LOCAL: P.PtrKind = AMDGPULibFunc::GLOBAL; break;
+ case E_CONSTPTR_SWAPGL: {
+ unsigned AS = AMDGPULibFunc::getAddrSpaceFromEPtrKind(P.PtrKind);
+ switch (AS) {
+ case AMDGPUAS::GLOBAL_ADDRESS: AS = AMDGPUAS::LOCAL_ADDRESS; break;
+ case AMDGPUAS::LOCAL_ADDRESS: AS = AMDGPUAS::GLOBAL_ADDRESS; break;
}
+ P.PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AS);
P.PtrKind |= AMDGPULibFunc::CONST;
break;
+ }
default: llvm_unreachable("Unhandeled param rule");
}
@@ -590,19 +594,14 @@ bool ItaniumParamParser::parseItaniumParam(StringRef& param,
if (eatTerm(param, 'P')) {
if (eatTerm(param, 'K')) res.PtrKind |= AMDGPULibFunc::CONST;
if (eatTerm(param, 'V')) res.PtrKind |= AMDGPULibFunc::VOLATILE;
+ unsigned AS;
if (!eatTerm(param, "U3AS")) {
- res.PtrKind |= AMDGPULibFunc::PRIVATE;
+ AS = 0;
} else {
- switch(param.front()) {
- case '1': res.PtrKind |= AMDGPULibFunc::GLOBAL; break;
- case '2': res.PtrKind |= AMDGPULibFunc::READONLY;break;
- case '3': res.PtrKind |= AMDGPULibFunc::LOCAL; break;
- case '4': res.PtrKind |= AMDGPULibFunc::GENERIC; break;
- case '5': res.PtrKind |= AMDGPULibFunc::OTHER; break;
- default: return false;
- }
+ AS = param.front() - '0';
drop_front(param, 1);
}
+ res.PtrKind |= AMDGPULibFuncBase::getEPtrKindFromAddrSpace(AS);
} else {
res.PtrKind = AMDGPULibFunc::BYVALUE;
}
@@ -837,7 +836,9 @@ public:
os << 'P';
if (p.PtrKind & AMDGPULibFunc::CONST) os << 'K';
if (p.PtrKind & AMDGPULibFunc::VOLATILE) os << 'V';
- int AS = UseAddrSpace ? (p.PtrKind & AMDGPULibFunc::ADDR_SPACE)-1 : 0;
+ unsigned AS = UseAddrSpace
+ ? AMDGPULibFuncBase::getAddrSpaceFromEPtrKind(p.PtrKind)
+ : 0;
if (AS != 0) os << "U3AS" << AS;
Ptr = p;
p.PtrKind = 0;
diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.h b/lib/Target/AMDGPU/AMDGPULibFunc.h
index 8f4297b4a491..5405bc645714 100644
--- a/lib/Target/AMDGPU/AMDGPULibFunc.h
+++ b/lib/Target/AMDGPU/AMDGPULibFunc.h
@@ -283,14 +283,7 @@ public:
enum EPtrKind {
BYVALUE = 0,
- PRIVATE,
- GLOBAL,
- READONLY,
- LOCAL,
- GENERIC,
- OTHER,
-
- ADDR_SPACE = 0xF,
+ ADDR_SPACE = 0xF, // Address space takes value 0x1 ~ 0xF.
CONST = 0x10,
VOLATILE = 0x20
};
@@ -315,6 +308,17 @@ public:
static bool isMangled(EFuncId Id) {
return static_cast<unsigned>(Id) <= static_cast<unsigned>(EI_LAST_MANGLED);
}
+
+ static unsigned getEPtrKindFromAddrSpace(unsigned AS) {
+ assert(((AS + 1) & ~ADDR_SPACE) == 0);
+ return AS + 1;
+ }
+
+ static unsigned getAddrSpaceFromEPtrKind(unsigned Kind) {
+ Kind = Kind & ADDR_SPACE;
+ assert(Kind >= 1);
+ return Kind - 1;
+ }
};
class AMDGPULibFuncImpl : public AMDGPULibFuncBase {
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 9fc9592bdc57..83122281d2b2 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -23,7 +23,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/MDBuilder.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include <algorithm>
using namespace llvm;
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 56a5fa634b55..6ee529c8549b 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -462,6 +462,10 @@ public:
return isAmdHsaOS() || isMesaKernel(MF);
}
+ bool hasMad64_32() const {
+ return getGeneration() >= SEA_ISLANDS;
+ }
+
bool hasFminFmaxLegacy() const {
return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
}
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index f7ecdea77047..14f26f787ab1 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -245,6 +245,10 @@ GCNMinRegSchedRegistry("gcn-minreg",
static StringRef computeDataLayout(const Triple &TT) {
if (TT.getArch() == Triple::r600) {
// 32-bit pointers.
+ if (TT.getEnvironmentName() == "amdgiz" ||
+ TT.getEnvironmentName() == "amdgizcl")
+ return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
+ "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
}
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index d729dcc439ef..d1120f5e330c 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2134,12 +2134,17 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee)) {
- // FIXME: Remove this hack for function pointer types.
- const GlobalValue *GV = GA->getGlobal();
- assert(Callee.getValueType() == MVT::i32);
- Callee = DAG.getGlobalAddress(GV, DL, MVT::i64, GA->getOffset(),
- false, GA->getTargetFlags());
+ // FIXME: Remove this hack for function pointer types after removing
+ // support of old address space mapping. In the new address space
+ // mapping the pointer in default address space is 64 bit, therefore
+ // does not need this hack.
+ if (Callee.getValueType() == MVT::i32) {
+ const GlobalValue *GV = GA->getGlobal();
+ Callee = DAG.getGlobalAddress(GV, DL, MVT::i64, GA->getOffset(), false,
+ GA->getTargetFlags());
+ }
}
+ assert(Callee.getValueType() == MVT::i64);
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
@@ -5957,18 +5962,57 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
return 0;
}
+static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL,
+ EVT VT,
+ SDValue N0, SDValue N1, SDValue N2,
+ bool Signed) {
+ unsigned MadOpc = Signed ? AMDGPUISD::MAD_I64_I32 : AMDGPUISD::MAD_U64_U32;
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i1);
+ SDValue Mad = DAG.getNode(MadOpc, SL, VTs, N0, N1, N2);
+ return DAG.getNode(ISD::TRUNCATE, SL, VT, Mad);
+}
+
SDValue SITargetLowering::performAddCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
-
- if (VT != MVT::i32)
- return SDValue();
-
SDLoc SL(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
+ if ((LHS.getOpcode() == ISD::MUL || RHS.getOpcode() == ISD::MUL)
+ && Subtarget->hasMad64_32() &&
+ !VT.isVector() && VT.getScalarSizeInBits() > 32 &&
+ VT.getScalarSizeInBits() <= 64) {
+ if (LHS.getOpcode() != ISD::MUL)
+ std::swap(LHS, RHS);
+
+ SDValue MulLHS = LHS.getOperand(0);
+ SDValue MulRHS = LHS.getOperand(1);
+ SDValue AddRHS = RHS;
+
+ // TODO: Maybe restrict if SGPR inputs.
+ if (numBitsUnsigned(MulLHS, DAG) <= 32 &&
+ numBitsUnsigned(MulRHS, DAG) <= 32) {
+ MulLHS = DAG.getZExtOrTrunc(MulLHS, SL, MVT::i32);
+ MulRHS = DAG.getZExtOrTrunc(MulRHS, SL, MVT::i32);
+ AddRHS = DAG.getZExtOrTrunc(AddRHS, SL, MVT::i64);
+ return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, false);
+ }
+
+ if (numBitsSigned(MulLHS, DAG) < 32 && numBitsSigned(MulRHS, DAG) < 32) {
+ MulLHS = DAG.getSExtOrTrunc(MulLHS, SL, MVT::i32);
+ MulRHS = DAG.getSExtOrTrunc(MulRHS, SL, MVT::i32);
+ AddRHS = DAG.getSExtOrTrunc(AddRHS, SL, MVT::i64);
+ return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, true);
+ }
+
+ return SDValue();
+ }
+
+ if (VT != MVT::i32)
+ return SDValue();
+
// add x, zext (setcc) => addcarry x, 0, setcc
// add x, sext (setcc) => subcarry x, 0, setcc
unsigned Opc = LHS.getOpcode();
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index ade909cc84e3..5dde72910ee3 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -14,15 +14,15 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
-#include "AMDGPUMachineFunction.h"
#include "AMDGPUArgumentUsageInfo.h"
+#include "AMDGPUMachineFunction.h"
#include "SIRegisterInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include <array>
@@ -87,9 +87,6 @@ public:
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
- // FIXME: This should be removed and getPreloadedValue moved here.
- friend class SIRegisterInfo;
-
unsigned TIDReg = AMDGPU::NoRegister;
// Registers that may be reserved for spilling purposes. These may be the same
@@ -143,7 +140,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
private:
unsigned LDSWaveSpillSize = 0;
- unsigned ScratchOffsetReg;
unsigned NumUserSGPRs = 0;
unsigned NumSystemSGPRs = 0;
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index aa041aab51c8..666b80107dc6 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -399,8 +399,10 @@ def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_
} // End Constraints = "@earlyclobber $vdst"
let isCommutable = 1 in {
+let SchedRW = [WriteDouble, WriteSALU] in {
def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
+} // End SchedRW = [WriteDouble, WriteSALU]
} // End isCommutable = 1
} // End SubtargetPredicate = isCIVI