summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h6
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp531
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h10
-rw-r--r--llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll3
-rw-r--r--llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll259
-rw-r--r--llvm/test/CodeGen/AMDGPU/sgpr-spill.mir1185
-rw-r--r--llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/spill-m0.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll1
-rw-r--r--llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll70
-rw-r--r--llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir80
14 files changed, 1587 insertions, 611 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 3a956f3e7cd9..d0fd59a4e9c9 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1237,16 +1237,8 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
if (!allStackObjectsAreDead(MFI)) {
assert(RS && "RegScavenger required if spilling");
- if (FuncInfo->isEntryFunction()) {
- int ScavengeFI = MFI.CreateFixedObject(
- TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
- RS->addScavengingFrameIndex(ScavengeFI);
- } else {
- int ScavengeFI = MFI.CreateStackObject(
- TRI->getSpillSize(AMDGPU::SGPR_32RegClass),
- TRI->getSpillAlign(AMDGPU::SGPR_32RegClass), false);
- RS->addScavengingFrameIndex(ScavengeFI);
- }
+ // Add an emergency spill slot
+ RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index a86f720026af..dcb740bbf216 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -440,6 +440,21 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
}
}
+int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
+ const SIRegisterInfo &TRI) {
+ if (ScavengeFI)
+ return *ScavengeFI;
+ if (isEntryFunction()) {
+ ScavengeFI = MFI.CreateFixedObject(
+ TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
+ } else {
+ ScavengeFI = MFI.CreateStackObject(
+ TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
+ TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
+ }
+ return *ScavengeFI;
+}
+
MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
return AMDGPU::SGPR0 + NumUserSGPRs;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 3009c33c8b35..cede63bcc979 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -480,6 +480,10 @@ private:
// VGPRs used for AGPR spills.
SmallVector<MCPhysReg, 32> SpillVGPR;
+ // Emergency stack slot. Sometimes, we create this before finalizing the stack
+ // frame, so save it here and add it to the RegScavenger later.
+ Optional<int> ScavengeFI;
+
public: // FIXME
/// If this is set, an SGPR used for save/restore of the register used for the
/// frame pointer.
@@ -536,6 +540,8 @@ public:
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
void removeDeadFrameIndices(MachineFrameInfo &MFI);
+ int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI);
+
bool hasCalculatedTID() const { return TIDReg != 0; };
Register getTIDReg() const { return TIDReg; };
void setTIDReg(Register Reg) { TIDReg = Reg; }
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 55c1ce3da47f..caafc0c8d443 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -43,6 +43,231 @@ std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
+namespace llvm {
+
+// A temporary struct to spill SGPRs.
+// This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits
+// just v_writelane and v_readlane.
+//
+// When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR
+// is saved to scratch (or the other way around for loads).
+// For this, a VGPR is required where the needed lanes can be clobbered. The
+// RegScavenger can provide a VGPR where currently active lanes can be
+// clobbered, but we still need to save inactive lanes.
+// The high-level steps are:
+// - Try to scavenge SGPR(s) to save exec
+// - Try to scavenge VGPR
+// - Save needed, all or inactive lanes of a TmpVGPR
+// - Spill/Restore SGPRs using TmpVGPR
+// - Restore TmpVGPR
+//
+// To save all lanes of TmpVGPR, exec needs to be saved and modified. If we
+// cannot scavenge temporary SGPRs to save exec, we use the following code:
+// buffer_store_dword TmpVGPR ; only if active lanes need to be saved
+// s_not exec, exec
+// buffer_store_dword TmpVGPR ; save inactive lanes
+// s_not exec, exec
+struct SGPRSpillBuilder {
+ struct PerVGPRData {
+ unsigned PerVGPR;
+ unsigned NumVGPRs;
+ int64_t VGPRLanes;
+ };
+
+ // The SGPR to save
+ Register SuperReg;
+ MachineBasicBlock::iterator MI;
+ ArrayRef<int16_t> SplitParts;
+ unsigned NumSubRegs;
+ bool IsKill;
+ const DebugLoc &DL;
+
+ /* When spilling to stack */
+ // The SGPRs are written into this VGPR, which is then written to scratch
+ // (or vice versa for loads).
+ Register TmpVGPR = AMDGPU::NoRegister;
+ // Temporary spill slot to save TmpVGPR to.
+ int TmpVGPRIndex = 0;
+ // If TmpVGPR is live before the spill or if it is scavenged.
+ bool TmpVGPRLive = false;
+ // Scavenged SGPR to save EXEC.
+ Register SavedExecReg = AMDGPU::NoRegister;
+ // Stack index to write the SGPRs to.
+ int Index;
+ unsigned EltSize = 4;
+
+ RegScavenger &RS;
+ MachineBasicBlock &MBB;
+ MachineFunction &MF;
+ SIMachineFunctionInfo &MFI;
+ const SIInstrInfo &TII;
+ const SIRegisterInfo &TRI;
+ bool IsWave32;
+ Register ExecReg;
+ unsigned MovOpc;
+ unsigned NotOpc;
+
+ SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII,
+ bool IsWave32, MachineBasicBlock::iterator MI, int Index,
+ RegScavenger &RS)
+ : SuperReg(MI->getOperand(0).getReg()), MI(MI),
+ IsKill(MI->getOperand(0).isKill()), DL(MI->getDebugLoc()), Index(Index),
+ RS(RS), MBB(*MI->getParent()), MF(*MBB.getParent()),
+ MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
+ IsWave32(IsWave32) {
+ const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg);
+ SplitParts = TRI.getRegSplitParts(RC, EltSize);
+ NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
+
+ if (IsWave32) {
+ ExecReg = AMDGPU::EXEC_LO;
+ MovOpc = AMDGPU::S_MOV_B32;
+ NotOpc = AMDGPU::S_NOT_B32;
+ } else {
+ ExecReg = AMDGPU::EXEC;
+ MovOpc = AMDGPU::S_MOV_B64;
+ NotOpc = AMDGPU::S_NOT_B64;
+ }
+
+ assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
+ assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
+ SuperReg != AMDGPU::EXEC && "exec should never spill");
+ }
+
+ PerVGPRData getPerVGPRData() {
+ PerVGPRData Data;
+ Data.PerVGPR = IsWave32 ? 32 : 64;
+ Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR;
+ Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL;
+ return Data;
+ }
+
+ // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is
+ // free.
+ // Writes these instructions if an SGPR can be scavenged:
+ // s_mov_b64 s[6:7], exec ; Save exec
+ // s_mov_b64 exec, 3 ; Wanted lanemask
+ // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot
+ //
+ // Writes these instructions if no SGPR can be scavenged:
+ // buffer_store_dword v0 ; Only if no free VGPR was found
+ // s_not_b64 exec, exec
+ // buffer_store_dword v0 ; Save inactive lanes
+ // ; exec stays inverted, it is flipped back in
+ // ; restore.
+ void prepare() {
+ // Scavenged temporary VGPR to use. It must be scavenged once for any number
+ // of spilled subregs.
+ // FIXME: The liveness analysis is limited and does not tell if a register
+ // is in use in lanes that are currently inactive. We can never be sure if
+ // a register as actually in use in another lane, so we need to save all
+ // used lanes of the chosen VGPR.
+ TmpVGPR = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0, false);
+
+ // Reserve temporary stack slot
+ TmpVGPRIndex = MFI.getScavengeFI(MF.getFrameInfo(), TRI);
+ if (TmpVGPR) {
+ // Found a register that is dead in the currently active lanes, we only
+ // need to spill inactive lanes.
+ TmpVGPRLive = false;
+ } else {
+ // Pick v0 because it doesn't make a difference.
+ TmpVGPR = AMDGPU::VGPR0;
+ TmpVGPRLive = true;
+ }
+
+ // Try to scavenge SGPRs to save exec
+ assert(!SavedExecReg && "Exec is already saved, refuse to save again");
+ const TargetRegisterClass &RC =
+ IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
+ RS.setRegUsed(SuperReg);
+ SavedExecReg = RS.scavengeRegister(&RC, MI, 0, false);
+
+ int64_t VGPRLanes = getPerVGPRData().VGPRLanes;
+
+ if (SavedExecReg) {
+ // Set exec to needed lanes
+ BuildMI(MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg);
+ auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
+ if (!TmpVGPRLive)
+ I.addReg(TmpVGPR, RegState::ImplicitDefine);
+ // Spill needed lanes
+ TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
+ } else {
+ // Spill active lanes
+ if (TmpVGPRLive)
+ TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
+ /*IsKill*/ false);
+ // Spill inactive lanes
+ auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ if (!TmpVGPRLive)
+ I.addReg(TmpVGPR, RegState::ImplicitDefine);
+ TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
+ }
+ }
+
+ // Writes these instructions if an SGPR can be scavenged:
+ // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot
+ // s_waitcnt vmcnt(0) ; If a free VGPR was found
+ // s_mov_b64 exec, s[6:7] ; Save exec
+ //
+ // Writes these instructions if no SGPR can be scavenged:
+ // buffer_load_dword v0 ; Restore inactive lanes
+ // s_waitcnt vmcnt(0) ; If a free VGPR was found
+ // s_not_b64 exec, exec
+ // buffer_load_dword v0 ; Only if no free VGPR was found
+ void restore() {
+ if (SavedExecReg) {
+ // Restore used lanes
+ TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
+ /*IsKill*/ false);
+ // Restore exec
+ auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg)
+ .addReg(SavedExecReg, RegState::Kill);
+ // Add an implicit use of the load so it is not dead.
+ // FIXME This inserts an unnecessary waitcnt
+ if (!TmpVGPRLive) {
+ I.addReg(TmpVGPR, RegState::Implicit);
+ }
+ } else {
+ // Restore inactive lanes
+ TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
+ /*IsKill*/ false);
+ auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ if (!TmpVGPRLive) {
+ I.addReg(TmpVGPR, RegState::Implicit);
+ }
+ // Restore active lanes
+ if (TmpVGPRLive)
+ TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true);
+ }
+ }
+
+ // Write TmpVGPR to memory or read TmpVGPR from memory.
+ // Either using a single buffer_load/store if exec is set to the needed mask
+ // or using
+ // buffer_load
+ // s_not exec, exec
+ // buffer_load
+ // s_not exec, exec
+ void readWriteTmpVGPR(unsigned Offset, bool IsLoad) {
+ if (SavedExecReg) {
+ // Spill needed lanes
+ TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
+ } else {
+ // Spill active lanes
+ TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
+ /*IsKill*/ false);
+ // Spill inactive lanes
+ BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
+ BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ }
+ }
+};
+
+} // namespace llvm
+
SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
: AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST),
SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) {
@@ -1039,120 +1264,36 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
}
}
-// Generate a VMEM access which loads or stores the VGPR containing an SGPR
-// spill such that all the lanes set in VGPRLanes are loaded or stored.
-// This generates exec mask manipulation and will use SGPRs available in MI
-// or VGPR lanes in the VGPR to save and restore the exec mask.
-void SIRegisterInfo::buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI,
- int Index, int Offset,
- unsigned EltSize, Register VGPR,
- int64_t VGPRLanes,
- RegScavenger *RS,
- bool IsLoad) const {
- MachineBasicBlock *MBB = MI->getParent();
- MachineFunction *MF = MBB->getParent();
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- const SIInstrInfo *TII = ST.getInstrInfo();
-
- Register SuperReg = MI->getOperand(0).getReg();
- const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
- ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
- unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
- unsigned FirstPart = Offset * 32;
- unsigned ExecLane = 0;
-
- bool IsKill = MI->getOperand(0).isKill();
- const DebugLoc &DL = MI->getDebugLoc();
-
- // Cannot handle load/store to EXEC
- assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
- SuperReg != AMDGPU::EXEC && "exec should never spill");
-
- // On Wave32 only handle EXEC_LO.
- // On Wave64 only update EXEC_HI if there is sufficent space for a copy.
- bool OnlyExecLo = isWave32 || NumSubRegs == 1 || SuperReg == AMDGPU::EXEC_HI;
-
- unsigned ExecMovOpc = OnlyExecLo ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- Register ExecReg = OnlyExecLo ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- Register SavedExecReg;
-
- // Backup EXEC
- if (OnlyExecLo) {
- SavedExecReg =
- NumSubRegs == 1
- ? SuperReg
- : Register(getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]));
- } else {
- // If src/dst is an odd size it is possible subreg0 is not aligned.
- for (; ExecLane < (NumSubRegs - 1); ++ExecLane) {
- SavedExecReg = getMatchingSuperReg(
- getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]), AMDGPU::sub0,
- &AMDGPU::SReg_64_XEXECRegClass);
- if (SavedExecReg)
- break;
- }
- }
- assert(SavedExecReg);
- BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), SavedExecReg).addReg(ExecReg);
-
- // Setup EXEC
- BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg).addImm(VGPRLanes);
-
+void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
+ int Offset, bool IsLoad,
+ bool IsKill) const {
// Load/store VGPR
- MachineFrameInfo &FrameInfo = MF->getFrameInfo();
+ MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo();
assert(FrameInfo.getStackID(Index) != TargetStackID::SGPRSpill);
- Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF)
- ? getBaseRegister()
- : getFrameRegister(*MF);
+ Register FrameReg =
+ FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF)
+ ? getBaseRegister()
+ : getFrameRegister(SB.MF);
Align Alignment = FrameInfo.getObjectAlign(Index);
- MachinePointerInfo PtrInfo =
- MachinePointerInfo::getFixedStack(*MF, Index);
- MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SB.MF, Index);
+ MachineMemOperand *MMO = SB.MF.getMachineMemOperand(
PtrInfo, IsLoad ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore,
- EltSize, Alignment);
+ SB.EltSize, Alignment);
if (IsLoad) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
- buildSpillLoadStore(MI, Opc,
- Index,
- VGPR, false,
- FrameReg,
- Offset * EltSize, MMO,
- RS);
+ buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg,
+ Offset * SB.EltSize, MMO, &SB.RS);
} else {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
- buildSpillLoadStore(MI, Opc, Index, VGPR,
- IsKill, FrameReg,
- Offset * EltSize, MMO, RS);
+ buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg,
+ Offset * SB.EltSize, MMO, &SB.RS);
// This only ever adds one VGPR spill
- MFI->addToSpilledVGPRs(1);
- }
-
- // Restore EXEC
- BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg)
- .addReg(SavedExecReg, getKillRegState(IsLoad || IsKill));
-
- // Restore clobbered SGPRs
- if (IsLoad) {
- // Nothing to do; register will be overwritten
- } else if (!IsKill) {
- // Restore SGPRs from appropriate VGPR lanes
- if (!OnlyExecLo) {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32),
- getSubReg(SuperReg, SplitParts[FirstPart + ExecLane + 1]))
- .addReg(VGPR)
- .addImm(ExecLane + 1);
- }
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32),
- NumSubRegs == 1 ? SavedExecReg
- : Register(getSubReg(
- SuperReg, SplitParts[FirstPart + ExecLane])))
- .addReg(VGPR, RegState::Kill)
- .addImm(ExecLane);
+ SB.MFI.addToSpilledVGPRs(1);
}
}
@@ -1160,115 +1301,97 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
int Index,
RegScavenger *RS,
bool OnlyToVGPR) const {
- MachineBasicBlock *MBB = MI->getParent();
- MachineFunction *MF = MBB->getParent();
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, *RS);
- ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
- = MFI->getSGPRToVGPRSpills(Index);
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills =
+ SB.MFI.getSGPRToVGPRSpills(Index);
bool SpillToVGPR = !VGPRSpills.empty();
if (OnlyToVGPR && !SpillToVGPR)
return false;
- const SIInstrInfo *TII = ST.getInstrInfo();
-
- Register SuperReg = MI->getOperand(0).getReg();
- bool IsKill = MI->getOperand(0).isKill();
- const DebugLoc &DL = MI->getDebugLoc();
-
- assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
- SuperReg != MFI->getFrameOffsetReg()));
-
- assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
- assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
- SuperReg != AMDGPU::EXEC && "exec should never spill");
-
- unsigned EltSize = 4;
- const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
-
- ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
- unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
+ assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() &&
+ SB.SuperReg != SB.MFI.getFrameOffsetReg()));
if (SpillToVGPR) {
- for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
- Register SubReg = NumSubRegs == 1
- ? SuperReg
- : Register(getSubReg(SuperReg, SplitParts[i]));
+ for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
+ Register SubReg =
+ SB.NumSubRegs == 1
+ ? SB.SuperReg
+ : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
- bool UseKill = IsKill && i == NumSubRegs - 1;
+ bool UseKill = SB.IsKill && i == SB.NumSubRegs - 1;
// Mark the "old value of vgpr" input undef only if this is the first sgpr
// spill to this specific vgpr in the first basic block.
- auto MIB =
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
- .addReg(SubReg, getKillRegState(UseKill))
- .addImm(Spill.Lane)
- .addReg(Spill.VGPR);
+ auto MIB = BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
+ Spill.VGPR)
+ .addReg(SubReg, getKillRegState(UseKill))
+ .addImm(Spill.Lane)
+ .addReg(Spill.VGPR);
- if (i == 0 && NumSubRegs > 1) {
+ if (i == 0 && SB.NumSubRegs > 1) {
// We may be spilling a super-register which is only partially defined,
// and need to ensure later spills think the value is defined.
- MIB.addReg(SuperReg, RegState::ImplicitDefine);
+ MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
}
- if (NumSubRegs > 1)
- MIB.addReg(SuperReg, getKillRegState(UseKill) | RegState::Implicit);
+ if (SB.NumSubRegs > 1)
+ MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit);
// FIXME: Since this spills to another register instead of an actual
// frame index, we should delete the frame index when all references to
// it are fixed.
}
} else {
- // Scavenged temporary VGPR to use. It must be scavenged once for any number
- // of spilled subregs.
- Register TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
- RS->setRegUsed(TmpVGPR);
+ SB.prepare();
- // SubReg carries the "Kill" flag when SubReg == SuperReg.
- unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
+ // SubReg carries the "Kill" flag when SubReg == SB.SuperReg.
+ unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
- unsigned PerVGPR = 32;
- unsigned NumVGPRs = (NumSubRegs + (PerVGPR - 1)) / PerVGPR;
- int64_t VGPRLanes = (1LL << std::min(PerVGPR, NumSubRegs)) - 1LL;
+ // Per VGPR helper data
+ auto PVD = SB.getPerVGPRData();
- for (unsigned Offset = 0; Offset < NumVGPRs; ++Offset) {
+ for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
unsigned TmpVGPRFlags = RegState::Undef;
// Write sub registers into the VGPR
- for (unsigned i = Offset * PerVGPR,
- e = std::min((Offset + 1) * PerVGPR, NumSubRegs);
+ for (unsigned i = Offset * PVD.PerVGPR,
+ e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
i < e; ++i) {
- Register SubReg = NumSubRegs == 1
- ? SuperReg
- : Register(getSubReg(SuperReg, SplitParts[i]));
+ Register SubReg =
+ SB.NumSubRegs == 1
+ ? SB.SuperReg
+ : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
MachineInstrBuilder WriteLane =
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), TmpVGPR)
+ BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
+ SB.TmpVGPR)
.addReg(SubReg, SubKillState)
- .addImm(i % PerVGPR)
- .addReg(TmpVGPR, TmpVGPRFlags);
+ .addImm(i % PVD.PerVGPR)
+ .addReg(SB.TmpVGPR, TmpVGPRFlags);
TmpVGPRFlags = 0;
// There could be undef components of a spilled super register.
// TODO: Can we detect this and skip the spill?
- if (NumSubRegs > 1) {
- // The last implicit use of the SuperReg carries the "Kill" flag.
+ if (SB.NumSubRegs > 1) {
+ // The last implicit use of the SB.SuperReg carries the "Kill" flag.
unsigned SuperKillState = 0;
- if (i + 1 == NumSubRegs)
- SuperKillState |= getKillRegState(IsKill);
- WriteLane.addReg(SuperReg, RegState::Implicit | SuperKillState);
+ if (i + 1 == SB.NumSubRegs)
+ SuperKillState |= getKillRegState(SB.IsKill);
+ WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
}
}
// Write out VGPR
- buildSGPRSpillLoadStore(MI, Index, Offset, EltSize, TmpVGPR, VGPRLanes,
- RS, false);
+ SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false);
}
+
+ SB.restore();
}
MI->eraseFromParent();
- MFI->addToSpilledSGPRs(NumSubRegs);
+ SB.MFI.addToSpilledSGPRs(SB.NumSubRegs);
return true;
}
@@ -1276,75 +1399,59 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
int Index,
RegScavenger *RS,
bool OnlyToVGPR) const {
- MachineFunction *MF = MI->getParent()->getParent();
- MachineBasicBlock *MBB = MI->getParent();
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, *RS);
- ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
- = MFI->getSGPRToVGPRSpills(Index);
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills =
+ SB.MFI.getSGPRToVGPRSpills(Index);
bool SpillToVGPR = !VGPRSpills.empty();
if (OnlyToVGPR && !SpillToVGPR)
return false;
- const SIInstrInfo *TII = ST.getInstrInfo();
- const DebugLoc &DL = MI->getDebugLoc();
-
- Register SuperReg = MI->getOperand(0).getReg();
-
- assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
- assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
- SuperReg != AMDGPU::EXEC && "exec should never spill");
-
- unsigned EltSize = 4;
-
- const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
-
- ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
- unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
-
if (SpillToVGPR) {
- for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
- Register SubReg = NumSubRegs == 1
- ? SuperReg
- : Register(getSubReg(SuperReg, SplitParts[i]));
+ for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
+ Register SubReg =
+ SB.NumSubRegs == 1
+ ? SB.SuperReg
+ : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
- auto MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
- .addReg(Spill.VGPR)
- .addImm(Spill.Lane);
- if (NumSubRegs > 1 && i == 0)
- MIB.addReg(SuperReg, RegState::ImplicitDefine);
+ auto MIB =
+ BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
+ .addReg(Spill.VGPR)
+ .addImm(Spill.Lane);
+ if (SB.NumSubRegs > 1 && i == 0)
+ MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
}
} else {
- Register TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
- RS->setRegUsed(TmpVGPR);
+ SB.prepare();
- unsigned PerVGPR = 32;
- unsigned NumVGPRs = (NumSubRegs + (PerVGPR - 1)) / PerVGPR;
- int64_t VGPRLanes = (1LL << std::min(PerVGPR, NumSubRegs)) - 1LL;
+ // Per VGPR helper data
+ auto PVD = SB.getPerVGPRData();
- for (unsigned Offset = 0; Offset < NumVGPRs; ++Offset) {
+ for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
// Load in VGPR data
- buildSGPRSpillLoadStore(MI, Index, Offset, EltSize, TmpVGPR, VGPRLanes,
- RS, true);
+ SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true);
// Unpack lanes
- for (unsigned i = Offset * PerVGPR,
- e = std::min((Offset + 1) * PerVGPR, NumSubRegs);
+ for (unsigned i = Offset * PVD.PerVGPR,
+ e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
i < e; ++i) {
- Register SubReg = NumSubRegs == 1
- ? SuperReg
- : Register(getSubReg(SuperReg, SplitParts[i]));
+ Register SubReg =
+ SB.NumSubRegs == 1
+ ? SB.SuperReg
+ : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
bool LastSubReg = (i + 1 == e);
- auto MIB =
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
- .addReg(TmpVGPR, getKillRegState(LastSubReg))
- .addImm(i);
- if (NumSubRegs > 1 && i == 0)
- MIB.addReg(SuperReg, RegState::ImplicitDefine);
+ auto MIB = BuildMI(SB.MBB, MI, SB.DL,
+ SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
+ .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
+ .addImm(i);
+ if (SB.NumSubRegs > 1 && i == 0)
+ MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
}
}
+
+ SB.restore();
}
MI->eraseFromParent();
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 96bef205b847..4d7484d1133a 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -22,6 +22,7 @@ namespace llvm {
class GCNSubtarget;
class LiveIntervals;
class RegisterBank;
+struct SGPRSpillBuilder;
class SIMachineFunctionInfo;
class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
@@ -106,10 +107,11 @@ public:
const TargetRegisterClass *getPointerRegClass(
const MachineFunction &MF, unsigned Kind = 0) const override;
- void buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI, int Index,
- int Offset, unsigned EltSize, Register VGPR,
- int64_t VGPRLanes, RegScavenger *RS,
- bool IsLoad) const;
+ void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset,
+ bool IsLoad, bool IsKill = true) const;
+
+ void buildSGPRSpillLoadStore(SGPRSpillBuilder &SB, int Offset,
+ int64_t VGPRLanes) const;
/// If \p OnlyToVGPR is true, this will only succeed if this
bool spillSGPR(MachineBasicBlock::iterator MI,
diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
index 4045f53e53e7..e25a1d232fc2 100644
--- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
@@ -112,6 +112,9 @@ endif:
; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]]
; GCN: s_cmp_lg_u32
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
+; VMEM: buffer_store_dword
; GCN: buffer_store_dword [[VAL_LOOP]], off, s[0:3], 0 offset:{{[0-9]+}} ; 4-byte Folded Spill
; GCN-NEXT: s_cbranch_scc1 [[LOOP]]
diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
index 848512981fc3..d61185cbc426 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
@@ -12,17 +12,23 @@ declare hidden void @external_void_func_void() #0
; SPILL-TO-VGPR: v_writelane_b32 v40, s31, 1
; NO-SPILL-TO-VGPR: v_mov_b32_e32 v0, s33
; NO-SPILL-TO-VGPR: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; NO-SPILL-TO-VGPR: s_mov_b64 exec, s[4:5]
+; NO-SPILL-TO-VGPR: s_mov_b64 s[6:7], exec
+; NO-SPILL-TO-VGPR: s_mov_b64 exec, 3
+; NO-SPILL-TO-VGPR: buffer_store_dword v1, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR: v_writelane_b32 v1, s30, 0
; NO-SPILL-TO-VGPR: v_writelane_b32 v1, s31, 1
; NO-SPILL-TO-VGPR: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; NO-SPILL-TO-VGPR: buffer_load_dword v1, off, s[0:3], s33 offset:16
+; NO-SPILL-TO-VGPR: s_waitcnt vmcnt(0)
+; NO-SPILL-TO-VGPR: s_mov_b64 exec, s[6:7]
; GCN: s_swappc_b64 s[30:31], s[4:5]
; SPILL-TO-VGPR: v_readlane_b32 s4, v40, 0
; SPILL-TO-VGPR: v_readlane_b32 s5, v40, 1
-; NO-SPILL-TO-VGPR: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; NO-SPILL-TO-VGPR: v_readlane_b32 s4, v1, 0
-; NO-SPILL-TO-VGPR: v_readlane_b32 s5, v1, 1
+; NO-SPILL-TO-VGPR: v_readlane_b32 s4, v2, 0
+; NO-SPILL-TO-VGPR: v_readlane_b32 s5, v2, 1
; SPILL-TO-VGPR: v_readlane_b32 s33, v40, 2
; NO-SPILL-TO-VGPR: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
index dccee0a298a3..02ca4cb9baec 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
@@ -761,12 +761,15 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[2:3]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_mov_b64 s[4:5], exec
+; GCN-NEXT: s_mov_b64 exec, 3
+; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0
; GCN-NEXT: v_writelane_b32 v0, s2, 0
; GCN-NEXT: v_writelane_b32 v0, s3, 1
-; GCN-NEXT: s_mov_b64 s[2:3], exec
-; GCN-NEXT: s_mov_b64 exec, 3
; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, s[2:3]
+; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_mov_b32 s1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s0, s1
@@ -842,13 +845,16 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32
; GCN-NEXT: v_readlane_b32 s17, v31, 61
; GCN-NEXT: v_readlane_b32 s18, v31, 62
; GCN-NEXT: v_readlane_b32 s19, v31, 63
-; GCN-NEXT: s_mov_b64 s[0:1], exec
+; GCN-NEXT: s_mov_b64 s[2:3], exec
; GCN-NEXT: s_mov_b64 exec, 3
+; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0
; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[0:1]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_readlane_b32 s0, v0, 0
; GCN-NEXT: v_readlane_b32 s1, v0, 1
+; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_mov_b64 exec, s[2:3]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[36:51]
; GCN-NEXT: ;;#ASMEND
@@ -887,5 +893,248 @@ ret:
ret void
}
+; Same as @no_vgprs_last_sgpr_spill, some SGPR spills must go to memory.
+; Additionally, v0 is live throughout the function.
+define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
+; GCN-LABEL: no_vgprs_last_sgpr_spill_live_v0:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s54, -1
+; GCN-NEXT: s_mov_b32 s55, 0xe8f000
+; GCN-NEXT: s_add_u32 s52, s52, s3
+; GCN-NEXT: s_addc_u32 s53, s53, 0
+; GCN-NEXT: s_load_dword s0, s[0:1], 0x9
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:19]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v31, s4, 0
+; GCN-NEXT: v_writelane_b32 v31, s5, 1
+; GCN-NEXT: v_writelane_b32 v31, s6, 2
+; GCN-NEXT: v_writelane_b32 v31, s7, 3
+; GCN-NEXT: v_writelane_b32 v31, s8, 4
+; GCN-NEXT: v_writelane_b32 v31, s9, 5
+; GCN-NEXT: v_writelane_b32 v31, s10, 6
+; GCN-NEXT: v_writelane_b32 v31, s11, 7
+; GCN-NEXT: v_writelane_b32 v31, s12, 8
+; GCN-NEXT: v_writelane_b32 v31, s13, 9
+; GCN-NEXT: v_writelane_b32 v31, s14, 10
+; GCN-NEXT: v_writelane_b32 v31, s15, 11
+; GCN-NEXT: v_writelane_b32 v31, s16, 12
+; GCN-NEXT: v_writelane_b32 v31, s17, 13
+; GCN-NEXT: v_writelane_b32 v31, s18, 14
+; GCN-NEXT: v_writelane_b32 v31, s19, 15
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:19]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v31, s4, 16
+; GCN-NEXT: v_writelane_b32 v31, s5, 17
+; GCN-NEXT: v_writelane_b32 v31, s6, 18
+; GCN-NEXT: v_writelane_b32 v31, s7, 19
+; GCN-NEXT: v_writelane_b32 v31, s8, 20
+; GCN-NEXT: v_writelane_b32 v31, s9, 21
+; GCN-NEXT: v_writelane_b32 v31, s10, 22
+; GCN-NEXT: v_writelane_b32 v31, s11, 23
+; GCN-NEXT: v_writelane_b32 v31, s12, 24
+; GCN-NEXT: v_writelane_b32 v31, s13, 25
+; GCN-NEXT: v_writelane_b32 v31, s14, 26
+; GCN-NEXT: v_writelane_b32 v31, s15, 27
+; GCN-NEXT: v_writelane_b32 v31, s16, 28
+; GCN-NEXT: v_writelane_b32 v31, s17, 29
+; GCN-NEXT: v_writelane_b32 v31, s18, 30
+; GCN-NEXT: v_writelane_b32 v31, s19, 31
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:19]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v31, s4, 32
+; GCN-NEXT: v_writelane_b32 v31, s5, 33
+; GCN-NEXT: v_writelane_b32 v31, s6, 34
+; GCN-NEXT: v_writelane_b32 v31, s7, 35
+; GCN-NEXT: v_writelane_b32 v31, s8, 36
+; GCN-NEXT: v_writelane_b32 v31, s9, 37
+; GCN-NEXT: v_writelane_b32 v31, s10, 38
+; GCN-NEXT: v_writelane_b32 v31, s11, 39
+; GCN-NEXT: v_writelane_b32 v31, s12, 40
+; GCN-NEXT: v_writelane_b32 v31, s13, 41
+; GCN-NEXT: v_writelane_b32 v31, s14, 42
+; GCN-NEXT: v_writelane_b32 v31, s15, 43
+; GCN-NEXT: v_writelane_b32 v31, s16, 44
+; GCN-NEXT: v_writelane_b32 v31, s17, 45
+; GCN-NEXT: v_writelane_b32 v31, s18, 46
+; GCN-NEXT: v_writelane_b32 v31, s19, 47
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[4:19]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_writelane_b32 v31, s4, 48
+; GCN-NEXT: v_writelane_b32 v31, s5, 49
+; GCN-NEXT: v_writelane_b32 v31, s6, 50
+; GCN-NEXT: v_writelane_b32 v31, s7, 51
+; GCN-NEXT: v_writelane_b32 v31, s8, 52
+; GCN-NEXT: v_writelane_b32 v31, s9, 53
+; GCN-NEXT: v_writelane_b32 v31, s10, 54
+; GCN-NEXT: v_writelane_b32 v31, s11, 55
+; GCN-NEXT: v_writelane_b32 v31, s12, 56
+; GCN-NEXT: v_writelane_b32 v31, s13, 57
+; GCN-NEXT: v_writelane_b32 v31, s14, 58
+; GCN-NEXT: v_writelane_b32 v31, s15, 59
+; GCN-NEXT: v_writelane_b32 v31, s16, 60
+; GCN-NEXT: v_writelane_b32 v31, s17, 61
+; GCN-NEXT: v_writelane_b32 v31, s18, 62
+; GCN-NEXT: v_writelane_b32 v31, s19, 63
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def s[2:3]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_mov_b64 s[4:5], exec
+; GCN-NEXT: s_mov_b64 exec, 3
+; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0
+; GCN-NEXT: v_writelane_b32 v0, s2, 0
+; GCN-NEXT: v_writelane_b32 v0, s3, 1
+; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_mov_b64 exec, s[4:5]
+; GCN-NEXT: s_mov_b32 s1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_lg_u32 s0, s1
+; GCN-NEXT: s_cbranch_scc1 BB3_2
+; GCN-NEXT: ; %bb.1: ; %bb0
+; GCN-NEXT: v_readlane_b32 s36, v31, 32
+; GCN-NEXT: v_readlane_b32 s37, v31, 33
+; GCN-NEXT: v_readlane_b32 s38, v31, 34
+; GCN-NEXT: v_readlane_b32 s39, v31, 35
+; GCN-NEXT: v_readlane_b32 s40, v31, 36
+; GCN-NEXT: v_readlane_b32 s41, v31, 37
+; GCN-NEXT: v_readlane_b32 s42, v31, 38
+; GCN-NEXT: v_readlane_b32 s43, v31, 39
+; GCN-NEXT: v_readlane_b32 s44, v31, 40
+; GCN-NEXT: v_readlane_b32 s45, v31, 41
+; GCN-NEXT: v_readlane_b32 s46, v31, 42
+; GCN-NEXT: v_readlane_b32 s47, v31, 43
+; GCN-NEXT: v_readlane_b32 s48, v31, 44
+; GCN-NEXT: v_readlane_b32 s49, v31, 45
+; GCN-NEXT: v_readlane_b32 s50, v31, 46
+; GCN-NEXT: v_readlane_b32 s51, v31, 47
+; GCN-NEXT: v_readlane_b32 s0, v31, 16
+; GCN-NEXT: v_readlane_b32 s1, v31, 17
+; GCN-NEXT: v_readlane_b32 s2, v31, 18
+; GCN-NEXT: v_readlane_b32 s3, v31, 19
+; GCN-NEXT: v_readlane_b32 s4, v31, 20
+; GCN-NEXT: v_readlane_b32 s5, v31, 21
+; GCN-NEXT: v_readlane_b32 s6, v31, 22
+; GCN-NEXT: v_readlane_b32 s7, v31, 23
+; GCN-NEXT: v_readlane_b32 s8, v31, 24
+; GCN-NEXT: v_readlane_b32 s9, v31, 25
+; GCN-NEXT: v_readlane_b32 s10, v31, 26
+; GCN-NEXT: v_readlane_b32 s11, v31, 27
+; GCN-NEXT: v_readlane_b32 s12, v31, 28
+; GCN-NEXT: v_readlane_b32 s13, v31, 29
+; GCN-NEXT: v_readlane_b32 s14, v31, 30
+; GCN-NEXT: v_readlane_b32 s15, v31, 31
+; GCN-NEXT: v_readlane_b32 s16, v31, 0
+; GCN-NEXT: v_readlane_b32 s17, v31, 1
+; GCN-NEXT: v_readlane_b32 s18, v31, 2
+; GCN-NEXT: v_readlane_b32 s19, v31, 3
+; GCN-NEXT: v_readlane_b32 s20, v31, 4
+; GCN-NEXT: v_readlane_b32 s21, v31, 5
+; GCN-NEXT: v_readlane_b32 s22, v31, 6
+; GCN-NEXT: v_readlane_b32 s23, v31, 7
+; GCN-NEXT: v_readlane_b32 s24, v31, 8
+; GCN-NEXT: v_readlane_b32 s25, v31, 9
+; GCN-NEXT: v_readlane_b32 s26, v31, 10
+; GCN-NEXT: v_readlane_b32 s27, v31, 11
+; GCN-NEXT: v_readlane_b32 s28, v31, 12
+; GCN-NEXT: v_readlane_b32 s29, v31, 13
+; GCN-NEXT: v_readlane_b32 s30, v31, 14
+; GCN-NEXT: v_readlane_b32 s31, v31, 15
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; def v0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use s[16:31]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use s[0:15]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s4, v31, 48
+; GCN-NEXT: v_readlane_b32 s5, v31, 49
+; GCN-NEXT: v_readlane_b32 s6, v31, 50
+; GCN-NEXT: v_readlane_b32 s7, v31, 51
+; GCN-NEXT: v_readlane_b32 s8, v31, 52
+; GCN-NEXT: v_readlane_b32 s9, v31, 53
+; GCN-NEXT: v_readlane_b32 s10, v31, 54
+; GCN-NEXT: v_readlane_b32 s11, v31, 55
+; GCN-NEXT: v_readlane_b32 s12, v31, 56
+; GCN-NEXT: v_readlane_b32 s13, v31, 57
+; GCN-NEXT: v_readlane_b32 s14, v31, 58
+; GCN-NEXT: v_readlane_b32 s15, v31, 59
+; GCN-NEXT: v_readlane_b32 s16, v31, 60
+; GCN-NEXT: v_readlane_b32 s17, v31, 61
+; GCN-NEXT: v_readlane_b32 s18, v31, 62
+; GCN-NEXT: v_readlane_b32 s19, v31, 63
+; GCN-NEXT: s_mov_b64 s[2:3], exec
+; GCN-NEXT: s_mov_b64 exec, 3
+; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0
+; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_readlane_b32 s0, v1, 0
+; GCN-NEXT: v_readlane_b32 s1, v1, 1
+; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_mov_b64 exec, s[2:3]
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use s[36:51]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use s[4:19]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use s[0:1]
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; use v0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: BB3_2: ; %ret
+; GCN-NEXT: s_endpgm
+ call void asm sideeffect "", "~{v[0:7]}" () #0
+ call void asm sideeffect "", "~{v[8:15]}" () #0
+ call void asm sideeffect "", "~{v[16:23]}" () #0
+ call void asm sideeffect "", "~{v[24:27]}"() #0
+ call void asm sideeffect "", "~{v[28:29]}"() #0
+ call void asm sideeffect "", "~{v30}"() #0
+
+ %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
+ %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
+ %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
+ %wide.sgpr3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
+ %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
+ %cmp = icmp eq i32 %in, 0
+ br i1 %cmp, label %bb0, label %ret
+
+bb0:
+ %vgpr0 = call i32 asm sideeffect "; def $0", "=v" () #0
+ call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
+ call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
+ call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
+ call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr3) #0
+ call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
+ call void asm sideeffect "; use $0", "v"(i32 %vgpr0) #0
+ br label %ret
+
+ret:
+ ret void
+}
+
attributes #0 = { nounwind }
attributes #1 = { nounwind "amdgpu-waves-per-eu"="8,8" }
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
index 9980f6ace85b..6be98620e157 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
@@ -1,183 +1,9 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GCN64,MUBUF %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GCN32,MUBUF %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-flat-scratch -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GCN64,FLATSCR %s
-
-
-# CHECK-LABEL: name: check_spill
-
-# FLATSCR: $sgpr33 = S_MOV_B32 0
-# FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc
-# FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc
-
-# S32 with kill
-# CHECK: V_WRITELANE
-# CHECK: $sgpr12 = S_MOV_B32 $exec_lo
-# CHECK: $exec_lo = S_MOV_B32 1
-# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4
-# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 4
-# CHECK: $exec_lo = S_MOV_B32 killed $sgpr12
-
-# S32 without kill
-# CHECK: V_WRITELANE
-# CHECK: $sgpr12 = S_MOV_B32 $exec_lo
-# CHECK: $exec_lo = S_MOV_B32 1
-# MUBUF: BUFFER_STORE_DWORD_OFFSET $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4
-# FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr{{[0-9]+}}, $sgpr33, 4
-# CHECK: $sgpr12 = V_READLANE
-
-# S64 with kill
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
-# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
-# GCN32: $exec_lo = S_MOV_B32 3
-# GCN64: $exec = S_MOV_B64 3
-# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8
-# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 8
-# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
-# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
-
-# S64 without kill
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
-# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
-# GCN32: $exec_lo = S_MOV_B32 3
-# GCN64: $exec = S_MOV_B64 3
-# MUBUF: BUFFER_STORE_DWORD_OFFSET $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 8
-# FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr{{[0-9]+}}, $sgpr33, 8
-# GCN32: $exec_lo = S_MOV_B32 $sgpr12
-# GCN64: $exec = S_MOV_B64 $sgpr12_sgpr13
-# GCN64: $sgpr13 = V_READLANE
-# CHECK: $sgpr12 = V_READLANE
-
-# S96
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
-# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
-# GCN32: $exec_lo = S_MOV_B32 7
-# GCN64: $exec = S_MOV_B64 7
-# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 16
-# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 16
-# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
-# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
-
-# S128
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
-# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
-# GCN32: $exec_lo = S_MOV_B32 15
-# GCN64: $exec = S_MOV_B64 15
-# MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 28
-# FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr{{[0-9]+}}, $sgpr33, 28
-# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
-# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
-
-# S160
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
-# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
-# GCN32: $exec_lo = S_MOV_B32 31
-# GCN64: $exec = S_MOV_B64 31
-# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 44
-# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 44
-# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
-# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
-
-# S256
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
-# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
-# GCN32: $exec_lo = S_MOV_B32 255
-# GCN64: $exec = S_MOV_B64 255
-# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 64
-# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 64
-# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
-# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
-
-# S512
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
-# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
-# GCN32: $exec_lo = S_MOV_B32 65535
-# GCN64: $exec = S_MOV_B64 65535
-# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 96
-# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 96
-# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
-# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
-
-# S1024
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# CHECK: V_WRITELANE
-# GCN32: $sgpr64 = S_MOV_B32 $exec_lo
-# GCN64: $sgpr64_sgpr65 = S_MOV_B64 $exec
-# GCN32: $exec_lo = S_MOV_B32 4294967295
-# GCN64: $exec = S_MOV_B64 4294967295
-# MUBUF: BUFFER_STORE_DWORD_OFFSET {{(killed )?}}$vgpr{{[0-9]+}}, ${{(sgpr[0-9_]+)*}}, $sgpr33, 160
-# FLATSCR: SCRATCH_STORE_DWORD_SADDR {{(killed )?}}$vgpr{{[0-9]+}}, $sgpr33, 160
-# GCN32: $exec_lo = S_MOV_B32 killed $sgpr64
-# GCN64: $exec = S_MOV_B64 killed $sgpr64_sgpr65
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GCN64-MUBUF %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GCN32-MUBUF %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-flat-scratch -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GCN64-FLATSCR %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -filetype=obj -verify-machineinstrs -start-before=prologepilog %s -o /dev/null
+# Check not crashing when emitting ISA
--- |
@@ -228,6 +54,850 @@ body: |
bb.0:
liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7
+ ; GCN32-LABEL: name: check_spill
+ ; GCN32: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11
+ ; GCN32: $sgpr33 = S_MOV_B32 0
+ ; GCN32: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GCN32: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GCN32: $sgpr98 = S_MOV_B32 4294967295, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GCN32: $sgpr99 = S_MOV_B32 834756608, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GCN32: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr11, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GCN32: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GCN32: renamable $sgpr12 = IMPLICIT_DEF
+ ; GCN32: INLINEASM &"", 0 /* attdialect */, implicit-def $vgpr0
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
+ ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 -1
+ ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
+ ; GCN32: INLINEASM &"", 1 /* sideeffect attdialect */, implicit $vgpr0
+ ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
+ ; GCN32: renamable $sgpr12 = IMPLICIT_DEF
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
+ ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
+ ; GCN32: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
+ ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
+ ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 3, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 -1
+ ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
+ ; GCN32: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
+ ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 3, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
+ ; GCN32: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
+ ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
+ ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 7, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 -1
+ ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
+ ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 15, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 -1
+ ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
+ ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 31, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 -1
+ ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
+ ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 255, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 -1
+ ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
+ ; GCN32: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr20, 8, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr21, 9, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr22, 10, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr23, 11, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr24, 12, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32: $sgpr12 = S_OR_SAVEEXEC_B32 65535, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 -1
+ ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
+ ; GCN32: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_XOR_B32 $exec_lo, -1, implicit-def $scc
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32: $sgpr64 = S_OR_SAVEEXEC_B32 4294967295, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GCN32: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 -1
+ ; GCN32: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
+ ; GCN32: $exec_lo = S_MOV_B32 killed $sgpr64
+ ; FLATSCR-LABEL: name: check_spill
+ ; FLATSCR: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11, $sgpr0_sgpr1
+ ; FLATSCR: $sgpr33 = S_MOV_B32 0
+ ; FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc
+ ; FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc
+ ; FLATSCR: renamable $sgpr12 = IMPLICIT_DEF
+ ; FLATSCR: INLINEASM &"", 0 /* attdialect */, implicit-def $vgpr0
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
+ ; FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo
+ ; FLATSCR: $exec_lo = S_MOV_B32 1
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ ; FLATSCR: $exec_lo = S_MOV_B32 -1
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5)
+ ; FLATSCR: INLINEASM &"", 1 /* sideeffect attdialect */, implicit $vgpr0
+ ; FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12
+ ; FLATSCR: renamable $sgpr12 = IMPLICIT_DEF
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0
+ ; FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo
+ ; FLATSCR: $exec_lo = S_MOV_B32 1
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ ; FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12
+ ; FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
+ ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
+ ; FLATSCR: $exec = S_MOV_B64 3
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5)
+ ; FLATSCR: $exec = S_MOV_B64 -1
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
+ ; FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
+ ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
+ ; FLATSCR: $exec = S_MOV_B64 3
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5)
+ ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
+ ; FLATSCR: $sgpr13 = V_READLANE_B32 $vgpr0, 1
+ ; FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
+ ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
+ ; FLATSCR: $exec = S_MOV_B64 7
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.2, addrspace 5)
+ ; FLATSCR: $exec = S_MOV_B64 -1
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
+ ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
+ ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
+ ; FLATSCR: $exec = S_MOV_B64 15
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5)
+ ; FLATSCR: $exec = S_MOV_B64 -1
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
+ ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
+ ; FLATSCR: $exec = S_MOV_B64 31
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.4, addrspace 5)
+ ; FLATSCR: $exec = S_MOV_B64 -1
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
+ ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
+ ; FLATSCR: $exec = S_MOV_B64 255
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.5, addrspace 5)
+ ; FLATSCR: $exec = S_MOV_B64 -1
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
+ ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr20, 8, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr21, 9, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr22, 10, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr23, 11, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr24, 12, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec
+ ; FLATSCR: $exec = S_MOV_B64 65535
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.6, addrspace 5)
+ ; FLATSCR: $exec = S_MOV_B64 -1
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13
+ ; FLATSCR: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; FLATSCR: $sgpr64_sgpr65 = S_MOV_B64 $exec
+ ; FLATSCR: $exec = S_MOV_B64 4294967295
+ ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.7, addrspace 5)
+ ; FLATSCR: $exec = S_MOV_B64 -1
+ ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.8, addrspace 5)
+ ; FLATSCR: $exec = S_MOV_B64 killed $sgpr64_sgpr65
+ ; GCN64-MUBUF-LABEL: name: check_spill
+ ; GCN64-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11
+ ; GCN64-MUBUF: $sgpr33 = S_MOV_B32 0
+ ; GCN64-MUBUF: $sgpr28 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
+ ; GCN64-MUBUF: $sgpr29 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
+ ; GCN64-MUBUF: $sgpr30 = S_MOV_B32 4294967295, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
+ ; GCN64-MUBUF: $sgpr31 = S_MOV_B32 14680064, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
+ ; GCN64-MUBUF: $sgpr28 = S_ADD_U32 $sgpr28, $sgpr11, implicit-def $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
+ ; GCN64-MUBUF: $sgpr29 = S_ADDC_U32 $sgpr29, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
+ ; GCN64-MUBUF: renamable $sgpr12 = IMPLICIT_DEF
+ ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GCN64-MUBUF: $exec = S_MOV_B64 1, implicit-def $vgpr0
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; GCN64-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr0
+ ; GCN64-MUBUF: renamable $sgpr12 = IMPLICIT_DEF
+ ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GCN64-MUBUF: $exec = S_MOV_B64 1, implicit-def $vgpr1
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; GCN64-MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr1
+ ; GCN64-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
+ ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GCN64-MUBUF: $exec = S_MOV_B64 3, implicit-def $vgpr2
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13
+ ; GCN64-MUBUF: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
+ ; GCN64-MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr2
+ ; GCN64-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
+ ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GCN64-MUBUF: $exec = S_MOV_B64 3, implicit-def $vgpr3
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13
+ ; GCN64-MUBUF: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
+ ; GCN64-MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr3
+ ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
+ ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GCN64-MUBUF: $exec = S_MOV_B64 7, implicit-def $vgpr4
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $vgpr4 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr4, implicit $sgpr12_sgpr13_sgpr14
+ ; GCN64-MUBUF: $vgpr4 = V_WRITELANE_B32 $sgpr13, 1, $vgpr4, implicit $sgpr12_sgpr13_sgpr14
+ ; GCN64-MUBUF: $vgpr4 = V_WRITELANE_B32 $sgpr14, 2, $vgpr4, implicit killed $sgpr12_sgpr13_sgpr14
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5)
+ ; GCN64-MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr4
+ ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
+ ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GCN64-MUBUF: $exec = S_MOV_B64 15, implicit-def $vgpr5
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN64-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr13, 1, $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN64-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr14, 2, $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN64-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr15, 3, $vgpr5, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
+ ; GCN64-MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr5
+ ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
+ ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GCN64-MUBUF: $exec = S_MOV_B64 31, implicit-def $vgpr6
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN64-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr13, 1, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN64-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr14, 2, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN64-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr15, 3, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN64-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr16, 4, $vgpr6, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
+ ; GCN64-MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr6
+ ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
+ ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GCN64-MUBUF: $exec = S_MOV_B64 255, implicit-def $vgpr7
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr13, 1, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr14, 2, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr15, 3, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr16, 4, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr17, 5, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr18, 6, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr19, 7, $vgpr7, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5)
+ ; GCN64-MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr7
+ ; GCN64-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
+ ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GCN64-MUBUF: $exec = S_MOV_B64 65535, implicit-def $vgpr8
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr13, 1, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr14, 2, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr15, 3, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr16, 4, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr17, 5, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr18, 6, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr19, 7, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr20, 8, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr21, 9, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr22, 10, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr23, 11, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr24, 12, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr25, 13, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr26, 14, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr27, 15, $vgpr8, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5)
+ ; GCN64-MUBUF: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr8
+ ; GCN64-MUBUF: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
+ ; GCN64-MUBUF: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GCN64-MUBUF: $exec = S_MOV_B64 4294967295, implicit-def $vgpr9
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr65, 1, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr66, 2, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr67, 3, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr68, 4, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr69, 5, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr70, 6, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr71, 7, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr72, 8, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr73, 9, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr74, 10, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr75, 11, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr76, 12, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr77, 13, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr78, 14, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr79, 15, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr80, 16, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr81, 17, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr82, 18, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr83, 19, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr84, 20, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr85, 21, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr86, 22, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr87, 23, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr88, 24, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr89, 25, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr90, 26, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr91, 27, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr92, 28, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr93, 29, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr94, 30, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr95, 31, $vgpr9, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5)
+ ; GCN64-MUBUF: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-MUBUF: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr9
+ ; GCN32-MUBUF-LABEL: name: check_spill
+ ; GCN32-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11
+ ; GCN32-MUBUF: $sgpr33 = S_MOV_B32 0
+ ; GCN32-MUBUF: $sgpr96 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GCN32-MUBUF: $sgpr97 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GCN32-MUBUF: $sgpr98 = S_MOV_B32 4294967295, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GCN32-MUBUF: $sgpr99 = S_MOV_B32 834756608, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GCN32-MUBUF: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr11, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GCN32-MUBUF: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
+ ; GCN32-MUBUF: renamable $sgpr12 = IMPLICIT_DEF
+ ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; GCN32-MUBUF: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr0
+ ; GCN32-MUBUF: renamable $sgpr12 = IMPLICIT_DEF
+ ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 1, implicit-def $vgpr1
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; GCN32-MUBUF: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr1
+ ; GCN32-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
+ ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 3, implicit-def $vgpr2
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13
+ ; GCN32-MUBUF: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
+ ; GCN32-MUBUF: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr2
+ ; GCN32-MUBUF: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
+ ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 3, implicit-def $vgpr3
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13
+ ; GCN32-MUBUF: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
+ ; GCN32-MUBUF: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr3
+ ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
+ ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 7, implicit-def $vgpr4
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $vgpr4 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr4, implicit $sgpr12_sgpr13_sgpr14
+ ; GCN32-MUBUF: $vgpr4 = V_WRITELANE_B32 $sgpr13, 1, $vgpr4, implicit $sgpr12_sgpr13_sgpr14
+ ; GCN32-MUBUF: $vgpr4 = V_WRITELANE_B32 $sgpr14, 2, $vgpr4, implicit killed $sgpr12_sgpr13_sgpr14
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, 0, implicit $exec :: (store 4 into %stack.2, addrspace 5)
+ ; GCN32-MUBUF: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr4
+ ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
+ ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 15, implicit-def $vgpr5
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN32-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr13, 1, $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN32-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr14, 2, $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN32-MUBUF: $vgpr5 = V_WRITELANE_B32 $sgpr15, 3, $vgpr5, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
+ ; GCN32-MUBUF: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr5
+ ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
+ ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 31, implicit-def $vgpr6
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN32-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr13, 1, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN32-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr14, 2, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN32-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr15, 3, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN32-MUBUF: $vgpr6 = V_WRITELANE_B32 $sgpr16, 4, $vgpr6, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
+ ; GCN32-MUBUF: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr6
+ ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
+ ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 255, implicit-def $vgpr7
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr13, 1, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr14, 2, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr15, 3, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr16, 4, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr17, 5, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr18, 6, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32-MUBUF: $vgpr7 = V_WRITELANE_B32 $sgpr19, 7, $vgpr7, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5)
+ ; GCN32-MUBUF: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr7
+ ; GCN32-MUBUF: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
+ ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr8
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr13, 1, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr14, 2, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr15, 3, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr16, 4, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr17, 5, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr18, 6, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr19, 7, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr20, 8, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr21, 9, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr22, 10, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr23, 11, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr24, 12, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr25, 13, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr26, 14, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: $vgpr8 = V_WRITELANE_B32 $sgpr27, 15, $vgpr8, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5)
+ ; GCN32-MUBUF: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr8
+ ; GCN32-MUBUF: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
+ ; GCN32-MUBUF: $sgpr0 = S_MOV_B32 $exec_lo
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr9
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr65, 1, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr66, 2, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr67, 3, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr68, 4, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr69, 5, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr70, 6, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr71, 7, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr72, 8, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr73, 9, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr74, 10, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr75, 11, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr76, 12, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr77, 13, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr78, 14, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr79, 15, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr80, 16, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr81, 17, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr82, 18, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr83, 19, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr84, 20, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr85, 21, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr86, 22, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr87, 23, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr88, 24, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr89, 25, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr90, 26, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr91, 27, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr92, 28, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr93, 29, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr94, 30, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: $vgpr9 = V_WRITELANE_B32 $sgpr95, 31, $vgpr9, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN32-MUBUF: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5)
+ ; GCN32-MUBUF: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN32-MUBUF: $exec_lo = S_MOV_B32 killed $sgpr0, implicit $vgpr9
+ ; GCN64-FLATSCR-LABEL: name: check_spill
+ ; GCN64-FLATSCR: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11, $sgpr0_sgpr1
+ ; GCN64-FLATSCR: $sgpr33 = S_MOV_B32 0
+ ; GCN64-FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc
+ ; GCN64-FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc
+ ; GCN64-FLATSCR: renamable $sgpr12 = IMPLICIT_DEF
+ ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 1, implicit-def $vgpr0
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr0
+ ; GCN64-FLATSCR: renamable $sgpr12 = IMPLICIT_DEF
+ ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 1, implicit-def $vgpr1
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr1
+ ; GCN64-FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
+ ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 3, implicit-def $vgpr2
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13
+ ; GCN64-FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr2
+ ; GCN64-FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
+ ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 3, implicit-def $vgpr3
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13
+ ; GCN64-FLATSCR: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.1, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr3
+ ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
+ ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 7, implicit-def $vgpr4
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr4 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr4, implicit $sgpr12_sgpr13_sgpr14
+ ; GCN64-FLATSCR: $vgpr4 = V_WRITELANE_B32 $sgpr13, 1, $vgpr4, implicit $sgpr12_sgpr13_sgpr14
+ ; GCN64-FLATSCR: $vgpr4 = V_WRITELANE_B32 $sgpr14, 2, $vgpr4, implicit killed $sgpr12_sgpr13_sgpr14
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.2, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr4
+ ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
+ ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 15, implicit-def $vgpr5
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr5 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN64-FLATSCR: $vgpr5 = V_WRITELANE_B32 $sgpr13, 1, $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN64-FLATSCR: $vgpr5 = V_WRITELANE_B32 $sgpr14, 2, $vgpr5, implicit $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN64-FLATSCR: $vgpr5 = V_WRITELANE_B32 $sgpr15, 3, $vgpr5, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr5
+ ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
+ ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 31, implicit-def $vgpr6
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr6, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr6 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN64-FLATSCR: $vgpr6 = V_WRITELANE_B32 $sgpr13, 1, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN64-FLATSCR: $vgpr6 = V_WRITELANE_B32 $sgpr14, 2, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN64-FLATSCR: $vgpr6 = V_WRITELANE_B32 $sgpr15, 3, $vgpr6, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN64-FLATSCR: $vgpr6 = V_WRITELANE_B32 $sgpr16, 4, $vgpr6, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr6, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.4, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr6 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr6
+ ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
+ ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 255, implicit-def $vgpr7
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr7, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr13, 1, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr14, 2, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr15, 3, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr16, 4, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr17, 5, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr18, 6, $vgpr7, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-FLATSCR: $vgpr7 = V_WRITELANE_B32 $sgpr19, 7, $vgpr7, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr7, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.5, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr7 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr7
+ ; GCN64-FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
+ ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 65535, implicit-def $vgpr8
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr8, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr13, 1, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr14, 2, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr15, 3, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr16, 4, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr17, 5, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr18, 6, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr19, 7, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr20, 8, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr21, 9, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr22, 10, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr23, 11, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr24, 12, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr25, 13, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr26, 14, $vgpr8, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: $vgpr8 = V_WRITELANE_B32 $sgpr27, 15, $vgpr8, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr8, $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.6, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr8
+ ; GCN64-FLATSCR: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
+ ; GCN64-FLATSCR: $sgpr2_sgpr3 = S_MOV_B64 $exec
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 4294967295, implicit-def $vgpr9
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr9, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr65, 1, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr66, 2, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr67, 3, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr68, 4, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr69, 5, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr70, 6, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr71, 7, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr72, 8, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr73, 9, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr74, 10, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr75, 11, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr76, 12, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr77, 13, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr78, 14, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr79, 15, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr80, 16, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr81, 17, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr82, 18, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr83, 19, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr84, 20, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr85, 21, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr86, 22, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr87, 23, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr88, 24, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr89, 25, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr90, 26, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr91, 27, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr92, 28, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr93, 29, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr94, 30, $vgpr9, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: $vgpr9 = V_WRITELANE_B32 $sgpr95, 31, $vgpr9, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN64-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr9, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.7, addrspace 5)
+ ; GCN64-FLATSCR: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GCN64-FLATSCR: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit $vgpr9
renamable $sgpr12 = IMPLICIT_DEF
SI_SPILL_S32_SAVE killed $sgpr12, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
@@ -258,161 +928,6 @@ body: |
renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
SI_SPILL_S1024_SAVE killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, %stack.7, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
-
-# CHECK-LABEL: name: check_reload
-
-# FLATSCR: $sgpr33 = S_MOV_B32 0
-# FLATSCR: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc
-# FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc
-
-# S32
-# CHECK: $sgpr12 = S_MOV_B32 $exec_lo
-# CHECK: $exec_lo = S_MOV_B32 1
-# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 4
-# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4
-# CHECK: $exec_lo = S_MOV_B32 killed $sgpr12
-# CHECK: $sgpr12 = V_READLANE
-
-# S64
-# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
-# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
-# GCN32: $exec_lo = S_MOV_B32 3
-# GCN64: $exec = S_MOV_B64 3
-# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 8
-# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8
-# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
-# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
-# CHECK: $sgpr12 = V_READLANE
-# CHECK: $sgpr13 = V_READLANE
-
-# S96
-# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
-# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
-# GCN32: $exec_lo = S_MOV_B32 7
-# GCN64: $exec = S_MOV_B64 7
-# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 16
-# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16
-# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
-# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
-# CHECK: $sgpr12 = V_READLANE
-# CHECK: $sgpr13 = V_READLANE
-# CHECK: $sgpr14 = V_READLANE
-
-# S128
-# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
-# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
-# GCN32: $exec_lo = S_MOV_B32 15
-# GCN64: $exec = S_MOV_B64 15
-# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 28
-# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 28
-# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
-# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
-# CHECK: $sgpr12 = V_READLANE
-# CHECK: $sgpr13 = V_READLANE
-# CHECK: $sgpr14 = V_READLANE
-# CHECK: $sgpr15 = V_READLANE
-
-# S160
-# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
-# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
-# GCN32: $exec_lo = S_MOV_B32 31
-# GCN64: $exec = S_MOV_B64 31
-# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 44
-# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44
-# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
-# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
-# CHECK: $sgpr12 = V_READLANE
-# CHECK: $sgpr13 = V_READLANE
-# CHECK: $sgpr14 = V_READLANE
-# CHECK: $sgpr15 = V_READLANE
-# CHECK: $sgpr16 = V_READLANE
-
-# S256
-# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
-# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
-# GCN32: $exec_lo = S_MOV_B32 255
-# GCN64: $exec = S_MOV_B64 255
-# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 64
-# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64
-# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
-# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
-# CHECK: $sgpr12 = V_READLANE
-# CHECK: $sgpr13 = V_READLANE
-# CHECK: $sgpr14 = V_READLANE
-# CHECK: $sgpr15 = V_READLANE
-# CHECK: $sgpr16 = V_READLANE
-# CHECK: $sgpr17 = V_READLANE
-# CHECK: $sgpr18 = V_READLANE
-# CHECK: $sgpr19 = V_READLANE
-
-# S512
-# GCN32: $sgpr12 = S_MOV_B32 $exec_lo
-# GCN64: $sgpr12_sgpr13 = S_MOV_B64 $exec
-# GCN32: $exec_lo = S_MOV_B32 65535
-# GCN64: $exec = S_MOV_B64 65535
-# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 96
-# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 96
-# GCN32: $exec_lo = S_MOV_B32 killed $sgpr12
-# GCN64: $exec = S_MOV_B64 killed $sgpr12_sgpr13
-# CHECK: $sgpr12 = V_READLANE
-# CHECK: $sgpr13 = V_READLANE
-# CHECK: $sgpr14 = V_READLANE
-# CHECK: $sgpr15 = V_READLANE
-# CHECK: $sgpr16 = V_READLANE
-# CHECK: $sgpr17 = V_READLANE
-# CHECK: $sgpr18 = V_READLANE
-# CHECK: $sgpr19 = V_READLANE
-# CHECK: $sgpr20 = V_READLANE
-# CHECK: $sgpr21 = V_READLANE
-# CHECK: $sgpr22 = V_READLANE
-# CHECK: $sgpr23 = V_READLANE
-# CHECK: $sgpr24 = V_READLANE
-# CHECK: $sgpr25 = V_READLANE
-# CHECK: $sgpr26 = V_READLANE
-# CHECK: $sgpr27 = V_READLANE
-
-# S1024
-# GCN32: $sgpr64 = S_MOV_B32 $exec_lo
-# GCN64: $sgpr64_sgpr65 = S_MOV_B64 $exec
-# GCN32: $exec_lo = S_MOV_B32 4294967295
-# GCN64: $exec = S_MOV_B64 4294967295
-# MUBUF: BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 160
-# FLATSCR: SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160
-# GCN32: $exec_lo = S_MOV_B32 killed $sgpr64
-# GCN64: $exec = S_MOV_B64 killed $sgpr64_sgpr65
-# CHECK: $sgpr64 = V_READLANE
-# CHECK: $sgpr65 = V_READLANE
-# CHECK: $sgpr66 = V_READLANE
-# CHECK: $sgpr67 = V_READLANE
-# CHECK: $sgpr68 = V_READLANE
-# CHECK: $sgpr69 = V_READLANE
-# CHECK: $sgpr70 = V_READLANE
-# CHECK: $sgpr71 = V_READLANE
-# CHECK: $sgpr72 = V_READLANE
-# CHECK: $sgpr73 = V_READLANE
-# CHECK: $sgpr74 = V_READLANE
-# CHECK: $sgpr75 = V_READLANE
-# CHECK: $sgpr76 = V_READLANE
-# CHECK: $sgpr77 = V_READLANE
-# CHECK: $sgpr78 = V_READLANE
-# CHECK: $sgpr79 = V_READLANE
-# CHECK: $sgpr80 = V_READLANE
-# CHECK: $sgpr81 = V_READLANE
-# CHECK: $sgpr82 = V_READLANE
-# CHECK: $sgpr83 = V_READLANE
-# CHECK: $sgpr84 = V_READLANE
-# CHECK: $sgpr85 = V_READLANE
-# CHECK: $sgpr86 = V_READLANE
-# CHECK: $sgpr87 = V_READLANE
-# CHECK: $sgpr88 = V_READLANE
-# CHECK: $sgpr89 = V_READLANE
-# CHECK: $sgpr90 = V_READLANE
-# CHECK: $sgpr91 = V_READLANE
-# CHECK: $sgpr92 = V_READLANE
-# CHECK: $sgpr93 = V_READLANE
-# CHECK: $sgpr94 = V_READLANE
-# CHECK: $sgpr95 = V_READLANE
-
---
name: check_reload
tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
index 018a2c8f2e19..cbe66a1a0414 100644
--- a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
@@ -7,14 +7,16 @@
; Make sure we are handling hazards correctly.
; SGPR: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:4
-; SGPR-NEXT: s_mov_b64 exec, s[0:1]
; SGPR-NEXT: s_waitcnt vmcnt(0)
; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 0
; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 1
; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 2
; SGPR-NEXT: v_readlane_b32 s[[HI:[0-9]+]], [[VHI]], 3
-; SGPR-NEXT: s_nop 4
-; SGPR-NEXT: buffer_store_dword v0, off, s[0:[[HI]]{{\]}}, 0
+; SGPR-NEXT: buffer_load_dword [[VHI]], off, s[96:99], 0
+; SGPR-NEXT: s_waitcnt vmcnt(0)
+; SGPR-NEXT: s_mov_b64 exec, s[4:5]
+; SGPR-NEXT: s_nop 1
+; SGPR-NEXT: buffer_store_dword v0, off, s[0:3], 0
; ALL: s_endpgm
define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) {
diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
index 474461d2ae12..91d3f8c98c8d 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
@@ -14,11 +14,11 @@
; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], [[M0_LANE:[0-9]+]]
+; TOVMEM: s_mov_b64 [[COPY_EXEC:s\[[0-9]+:[0-9]+\]]], exec
+; TOVMEM: s_mov_b64 exec, 1
; TOVMEM: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0
-; TOVMEM: s_mov_b32 [[COPY_EXEC_LO:s[0-9]+]], exec_lo
-; TOVMEM: s_mov_b32 exec_lo, 1
; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 ; 4-byte Folded Spill
-; TOVMEM: s_mov_b32 exec_lo, [[COPY_EXEC_LO]]
+; TOVMEM: s_mov_b64 exec, [[COPY_EXEC]]
; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index afccbb4b2b71..543de9227d7f 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -46,6 +46,7 @@ entry:
; CHECK-LABEL: test_limited_sgpr
; GFX6: s_add_u32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
+; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9:]+}}], s32
; GFX6-NEXT: s_sub_u32 s32, s32, 0x[[OFFSET:[0-9a-f]+]]
; GFX6: NumSgprs: 48
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll b/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll
new file mode 100644
index 000000000000..c4c887b1906a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+
+; Spill an SGPR to scratch without having spare SGPRs available to save exec
+
+define amdgpu_kernel void @test() #1 {
+; GFX10-LABEL: test:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s10, -1
+; GFX10-NEXT: s_mov_b32 s11, 0x31e16000
+; GFX10-NEXT: s_add_u32 s8, s8, s1
+; GFX10-NEXT: s_addc_u32 s9, s9, 0
+; GFX10-NEXT: ;;#ASMSTART
+; GFX10-NEXT: ; def s[0:7]
+; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: ;;#ASMSTART
+; GFX10-NEXT: ; def s[8:12]
+; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_not_b64 exec, exec
+; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; GFX10-NEXT: v_writelane_b32 v0, s8, 0
+; GFX10-NEXT: v_writelane_b32 v0, s9, 1
+; GFX10-NEXT: v_writelane_b32 v0, s10, 2
+; GFX10-NEXT: v_writelane_b32 v0, s11, 3
+; GFX10-NEXT: v_writelane_b32 v0, s12, 4
+; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill
+; GFX10-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10-NEXT: s_not_b64 exec, exec
+; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill
+; GFX10-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10-NEXT: s_not_b64 exec, exec
+; GFX10-NEXT: buffer_load_dword v0, off, s[8:11], 0
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10-NEXT: s_not_b64 exec, exec
+; GFX10-NEXT: ;;#ASMSTART
+; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: ;;#ASMSTART
+; GFX10-NEXT: ; use s[0:7]
+; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_mov_b64 s[6:7], exec
+; GFX10-NEXT: s_mov_b64 exec, 31
+; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; GFX10-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_readlane_b32 s0, v0, 0
+; GFX10-NEXT: v_readlane_b32 s1, v0, 1
+; GFX10-NEXT: v_readlane_b32 s2, v0, 2
+; GFX10-NEXT: v_readlane_b32 s3, v0, 3
+; GFX10-NEXT: v_readlane_b32 s4, v0, 4
+; GFX10-NEXT: buffer_load_dword v0, off, s[8:11], 0
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10-NEXT: s_mov_b64 exec, s[6:7]
+; GFX10-NEXT: ;;#ASMSTART
+; GFX10-NEXT: ; use s[0:3]
+; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_endpgm
+ %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "={s[0:7]}" () #0
+ %wide.sgpr2 = call <4 x i32> asm sideeffect "; def $0", "={s[8:12]}" () #0
+ call void asm sideeffect "", "~{v[0:7]}" () #0
+ call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr0) #0
+ call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr2) #0
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "amdgpu-num-sgpr"="16" "amdgpu-num-vgpr"="8" }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
index dd230752ef4b..5e1d0ee314b3 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
@@ -46,27 +46,31 @@ body: |
; GFX9: $sgpr12 = S_ADD_U32 $sgpr12, $sgpr9, implicit-def $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $vcc = IMPLICIT_DEF
+ ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GFX9: $exec = S_MOV_B64 3, implicit-def $vgpr0
+ ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc
- ; GFX9: $vcc = S_MOV_B64 $exec
- ; GFX9: $exec = S_MOV_B64 3
- ; GFX9: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
- ; GFX9: $exec = S_MOV_B64 $vcc
- ; GFX9: $vcc_hi = V_READLANE_B32 $vgpr0, 1
- ; GFX9: $vcc_lo = V_READLANE_B32 killed $vgpr0, 0
- ; GFX9: $vcc = IMPLICIT_DEF
- ; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
- ; GFX9: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc
- ; GFX9: $vcc = S_MOV_B64 $exec
- ; GFX9: $exec = S_MOV_B64 3
; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
- ; GFX9: $exec = S_MOV_B64 killed $vcc
- ; GFX9: $vcc = S_MOV_B64 $exec
- ; GFX9: $exec = S_MOV_B64 3
- ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
- ; GFX9: $exec = S_MOV_B64 killed $vcc
- ; GFX9: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc
- ; GFX9: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1
+ ; GFX9: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GFX9: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr0
+ ; GFX9: $vcc = IMPLICIT_DEF
+ ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GFX9: $exec = S_MOV_B64 3, implicit-def $vgpr1
+ ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GFX9: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc
+ ; GFX9: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc
+ ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; GFX9: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GFX9: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr1
+ ; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GFX9: $exec = S_MOV_B64 3, implicit-def $vgpr2
+ ; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; GFX9: $vcc_lo = V_READLANE_B32 $vgpr2, 0, implicit-def $vcc
+ ; GFX9: $vcc_hi = V_READLANE_B32 killed $vgpr2, 1
+ ; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GFX9: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr2
; GFX10-LABEL: name: check_vcc
; GFX10: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9
; GFX10: $sgpr33 = S_MOV_B32 0
@@ -77,27 +81,31 @@ body: |
; GFX10: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr9, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
; GFX10: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
; GFX10: $vcc = IMPLICIT_DEF
+ ; GFX10: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GFX10: $exec = S_MOV_B64 3, implicit-def $vgpr0
+ ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc
- ; GFX10: $vcc = S_MOV_B64 $exec
- ; GFX10: $exec = S_MOV_B64 3
- ; GFX10: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
- ; GFX10: $exec = S_MOV_B64 $vcc
- ; GFX10: $vcc_hi = V_READLANE_B32 $vgpr0, 1
- ; GFX10: $vcc_lo = V_READLANE_B32 killed $vgpr0, 0
- ; GFX10: $vcc = IMPLICIT_DEF
- ; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc
- ; GFX10: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc
- ; GFX10: $vcc = S_MOV_B64 $exec
- ; GFX10: $exec = S_MOV_B64 3
; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
- ; GFX10: $exec = S_MOV_B64 killed $vcc
- ; GFX10: $vcc = S_MOV_B64 $exec
- ; GFX10: $exec = S_MOV_B64 3
- ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
- ; GFX10: $exec = S_MOV_B64 killed $vcc
- ; GFX10: $vcc_lo = V_READLANE_B32 $vgpr0, 0, implicit-def $vcc
- ; GFX10: $vcc_hi = V_READLANE_B32 killed $vgpr0, 1
+ ; GFX10: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GFX10: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr0
+ ; GFX10: $vcc = IMPLICIT_DEF
+ ; GFX10: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GFX10: $exec = S_MOV_B64 3, implicit-def $vgpr1
+ ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GFX10: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc
+ ; GFX10: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc
+ ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+ ; GFX10: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GFX10: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr1
+ ; GFX10: $sgpr0_sgpr1 = S_MOV_B64 $exec
+ ; GFX10: $exec = S_MOV_B64 3, implicit-def $vgpr2
+ ; GFX10: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store 4 into %fixed-stack.0, align 16, addrspace 5)
+ ; GFX10: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+ ; GFX10: $vcc_lo = V_READLANE_B32 $vgpr2, 0, implicit-def $vcc
+ ; GFX10: $vcc_hi = V_READLANE_B32 killed $vgpr2, 1
+ ; GFX10: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load 4 from %fixed-stack.0, align 16, addrspace 5)
+ ; GFX10: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit $vgpr2
$vcc = IMPLICIT_DEF
SI_SPILL_S64_SAVE $vcc, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32