summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJessica Paquette <jpaquette@apple.com>2020-12-04 15:51:44 -0800
committerTom Stellard <tstellar@redhat.com>2020-12-15 18:28:58 -0500
commit280e47ea0e837b809be03f2048ac8abc14dbc387 (patch)
treec42f26b214be904e7b7b325642bb6ca4bfffd9df
parent9e16c5bfae6e8d3cbec74376c2e734e3ff4ba11b (diff)
downloadllvm-280e47ea0e837b809be03f2048ac8abc14dbc387.tar.gz
[AArch64][GlobalISel] Narrow 128-bit regs to 64-bit regs in emitTestBit
When we have a 128-bit register, emitTestBit would incorrectly narrow to 32 bits always. If the bit number was > 32, then we would need a TB(N)ZX. This would cause a crash, as we'd have the wrong register class. (PR48379) This generalizes `narrowExtReg` into `moveScalarRegClass`. This also allows us to remove `widenGPRBankRegIfNeeded` entirely, since `selectCopy` correctly handles SUBREG_TO_REG etc. This does create some codegen changes (since `selectCopy` uses the `all` regclass variants). However, I think that these will likely be optimized away, and we can always improve the `selectCopy` code. It looks like we should revisit `selectCopy` at this point, and possibly refactor it into at least one `emit` function. Differential Revision: https://reviews.llvm.org/D92707 (cherry picked from commit 195a7af0abb26915f962462f69c0f17e3835f78b)
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp97
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir5
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir32
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir10
4 files changed, 67 insertions, 77 deletions
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 90bab9603245..7733fe7f7b24 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -289,14 +289,15 @@ private:
getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
bool IsLoadStore = false) const;
- /// Instructions that accept extend modifiers like UXTW expect the register
- /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
- /// subregister copy if necessary. Return either ExtReg, or the result of the
- /// new copy.
- Register narrowExtendRegIfNeeded(Register ExtReg,
- MachineIRBuilder &MIB) const;
- Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size,
- MachineIRBuilder &MIB) const;
+ /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
+ ///
+ /// \returns Either \p Reg if no change was necessary, or the new register
+ /// created by moving \p Reg.
+ ///
+ /// Note: This uses emitCopy right now.
+ Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
+ MachineIRBuilder &MIB) const;
+
ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
@@ -1195,10 +1196,10 @@ MachineInstr *AArch64InstructionSelector::emitTestBit(
// TBNZW work.
bool UseWReg = Bit < 32;
unsigned NecessarySize = UseWReg ? 32 : 64;
- if (Size < NecessarySize)
- TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB);
- else if (Size > NecessarySize)
- TestReg = narrowExtendRegIfNeeded(TestReg, MIB);
+ if (Size != NecessarySize)
+ TestReg = moveScalarRegClass(
+ TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
+ MIB);
static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
{AArch64::TBZW, AArch64::TBNZW}};
@@ -4984,7 +4985,7 @@ AArch64InstructionSelector::selectExtendedSHL(
// Need a 32-bit wide register here.
MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
- OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
+ OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
}
// We can use the LHS of the GEP as the base, and the LHS of the shift as an
@@ -5156,8 +5157,8 @@ AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
// Need a 32-bit wide register.
MachineIRBuilder MIB(*PtrAdd);
- Register ExtReg =
- narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
+ Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
+ AArch64::GPR32RegClass, MIB);
unsigned SignExtend = Ext == AArch64_AM::SXTW;
// Base is LHS, offset is ExtReg.
@@ -5431,67 +5432,21 @@ AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
}
}
-Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
- Register ExtReg, MachineIRBuilder &MIB) const {
+Register AArch64InstructionSelector::moveScalarRegClass(
+ Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
MachineRegisterInfo &MRI = *MIB.getMRI();
- if (MRI.getType(ExtReg).getSizeInBits() == 32)
- return ExtReg;
-
- // Insert a copy to move ExtReg to GPR32.
- Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
+ auto Ty = MRI.getType(Reg);
+ assert(!Ty.isVector() && "Expected scalars only!");
+ if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
+ return Reg;
- // Select the copy into a subregister copy.
+ // Create a copy and immediately select it.
+ // FIXME: We should have an emitCopy function?
+ auto Copy = MIB.buildCopy({&RC}, {Reg});
selectCopy(*Copy, TII, MRI, TRI, RBI);
return Copy.getReg(0);
}
-Register AArch64InstructionSelector::widenGPRBankRegIfNeeded(
- Register Reg, unsigned WideSize, MachineIRBuilder &MIB) const {
- assert(WideSize >= 8 && "WideSize is smaller than all possible registers?");
- MachineRegisterInfo &MRI = *MIB.getMRI();
- unsigned NarrowSize = MRI.getType(Reg).getSizeInBits();
- assert(WideSize >= NarrowSize &&
- "WideSize cannot be smaller than NarrowSize!");
-
- // If the sizes match, just return the register.
- //
- // If NarrowSize is an s1, then we can select it to any size, so we'll treat
- // it as a don't care.
- if (NarrowSize == WideSize || NarrowSize == 1)
- return Reg;
-
- // Now check the register classes.
- const RegisterBank *RB = RBI.getRegBank(Reg, MRI, TRI);
- const TargetRegisterClass *OrigRC = getMinClassForRegBank(*RB, NarrowSize);
- const TargetRegisterClass *WideRC = getMinClassForRegBank(*RB, WideSize);
- assert(OrigRC && "Could not determine narrow RC?");
- assert(WideRC && "Could not determine wide RC?");
-
- // If the sizes differ, but the register classes are the same, there is no
- // need to insert a SUBREG_TO_REG.
- //
- // For example, an s8 that's supposed to be a GPR will be selected to either
- // a GPR32 or a GPR64 register. Note that this assumes that the s8 will
- // always end up on a GPR32.
- if (OrigRC == WideRC)
- return Reg;
-
- // We have two different register classes. Insert a SUBREG_TO_REG.
- unsigned SubReg = 0;
- getSubRegForClass(OrigRC, TRI, SubReg);
- assert(SubReg && "Couldn't determine subregister?");
-
- // Build the SUBREG_TO_REG and return the new, widened register.
- auto SubRegToReg =
- MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {})
- .addImm(0)
- .addUse(Reg)
- .addImm(SubReg);
- constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI);
- return SubRegToReg.getReg(0);
-}
-
/// Select an "extended register" operand. This operand folds in an extend
/// followed by an optional left shift.
InstructionSelector::ComplexRendererFns
@@ -5552,7 +5507,7 @@ AArch64InstructionSelector::selectArithExtendedRegister(
// We require a GPR32 here. Narrow the ExtReg if needed using a subregister
// copy.
MachineIRBuilder MIB(*RootDef);
- ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
+ ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
[=](MachineInstrBuilder &MIB) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
index 977bb5a64cf5..9962bd87c175 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
@@ -78,8 +78,9 @@ body: |
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: liveins: $h0
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
- ; CHECK: %copy:gpr32 = COPY [[SUBREG_TO_REG]]
- ; CHECK: TBNZW %copy, 3, %bb.1
+ ; CHECK: %copy:gpr32all = COPY [[SUBREG_TO_REG]]
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %copy
+ ; CHECK: TBNZW [[COPY]], 3, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir
index efb999909ccc..d5902d70842b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir
@@ -34,3 +34,35 @@ body: |
bb.1:
RET_ReallyLR
...
+---
+name: no_trunc
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: no_trunc
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: liveins: $x0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16)
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr64all = COPY [[LDRQui]].dsub
+ ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]]
+ ; CHECK: TBNZX [[COPY2]], 33, %bb.1
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ liveins: $x0
+ %1:gpr(p0) = COPY $x0
+ %3:gpr(s64) = G_CONSTANT i64 8589934592
+ %5:gpr(s64) = G_CONSTANT i64 0
+ %0:fpr(s128) = G_LOAD %1:gpr(p0) :: (load 16)
+ %2:fpr(s64) = G_TRUNC %0:fpr(s128)
+ %8:gpr(s64) = COPY %2:fpr(s64)
+ %4:gpr(s64) = G_AND %8:gpr, %3:gpr
+ %7:gpr(s32) = G_ICMP intpred(ne), %4:gpr(s64), %5:gpr
+ %6:gpr(s1) = G_TRUNC %7:gpr(s32)
+ G_BRCOND %6:gpr(s1), %bb.1
+ bb.1:
+ RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir
index 22963c50a2eb..7db671b08224 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir
@@ -106,8 +106,9 @@ body: |
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: liveins: $w0
; CHECK: %reg:gpr32all = COPY $w0
- ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32
- ; CHECK: TBZX [[SUBREG_TO_REG]], 33, %bb.1
+ ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, %reg, %subreg.sub_32
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]]
+ ; CHECK: TBZX [[COPY]], 33, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
@@ -140,8 +141,9 @@ body: |
; CHECK: bb.0:
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: %reg:gpr32 = IMPLICIT_DEF
- ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32
- ; CHECK: TBZX [[SUBREG_TO_REG]], 33, %bb.1
+ ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, %reg, %subreg.sub_32
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]]
+ ; CHECK: TBZX [[COPY]], 33, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR