summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAmara Emerson <amara@apple.com>2020-11-03 11:17:31 -0800
committerAmara Emerson <amara@apple.com>2020-11-03 17:25:14 -0800
commit393b55380afcd8681db03dfbdea2f27ff3517d25 (patch)
tree17033cc5565fdadf255412a7553aa78c414f0148
parentc298824f9caf407aedeb4958467fb2a18025638d (diff)
downloadllvm-393b55380afcd8681db03dfbdea2f27ff3517d25.tar.gz
[AArch64][GlobalISel] Add combine for G_EXTRACT_VECTOR_ELT to allow selection of pairwise FADD.
For the <2 x float> case, instead of adding another combine or legalization to get it into a <4 x float> form, I'm just adding a GISel specific selection pattern to cover it. Differential Revision: https://reviews.llvm.org/D90699
-rw-r--r--llvm/lib/Target/AArch64/AArch64Combine.td11
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrGISel.td7
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp71
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-extractvec-faddp.mir188
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/select-faddp.mir62
5 files changed, 337 insertions, 2 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 1e0be249e525..560e362b074b 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -94,6 +94,14 @@ def adjust_icmp_imm : GICombineRule <
def icmp_lowering : GICombineGroup<[adjust_icmp_imm]>;
+def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple<unsigned, LLT, Register>">;
+def extractvecelt_pairwise_add : GICombineRule<
+ (defs root:$root, extractvecelt_pairwise_add_matchdata:$matchinfo),
+ (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root,
+ [{ return matchExtractVecEltPairwiseAdd(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@@ -110,6 +118,7 @@ def AArch64PostLegalizerCombinerHelper
[copy_prop, erase_undef_store, combines_for_extload,
sext_trunc_sextload,
hoist_logic_op_with_same_opcode_hands,
- and_trivial_mask, xor_of_and_with_same_reg]> {
+ and_trivial_mask, xor_of_and_with_same_reg,
+ extractvecelt_pairwise_add]> {
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 79b563e345a8..1bd9ce25125d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -135,4 +135,9 @@ def : GINodeEquiv<G_TRN1, AArch64trn1>;
def : GINodeEquiv<G_TRN2, AArch64trn2>;
def : GINodeEquiv<G_EXT, AArch64ext>;
def : GINodeEquiv<G_VASHR, AArch64vashr>;
-def : GINodeEquiv<G_VLSHR, AArch64vlshr>; \ No newline at end of file
+def : GINodeEquiv<G_VLSHR, AArch64vlshr>;
+
+// These are patterns that we only use for GlobalISel via the importer.
+def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)),
+ (vector_extract (v2f32 FPR64:$Rn), (i64 1)))),
+ (f32 (FADDPv2i32p (v2f32 FPR64:$Rn)))>; \ No newline at end of file
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 4f3938852a40..17520ded4ba7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -24,8 +24,11 @@
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
@@ -33,6 +36,74 @@
using namespace llvm;
+/// This combine tries do what performExtractVectorEltCombine does in SDAG.
+/// Rewrite for pairwise fadd pattern
+/// (s32 (g_extract_vector_elt
+/// (g_fadd (vXs32 Other)
+/// (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))
+/// ->
+/// (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)
+/// (g_extract_vector_elt (vXs32 Other) 1))
+bool matchExtractVecEltPairwiseAdd(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ std::tuple<unsigned, LLT, Register> &MatchInfo) {
+ Register Src1 = MI.getOperand(1).getReg();
+ Register Src2 = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto Cst = getConstantVRegValWithLookThrough(Src2, MRI);
+ if (!Cst || Cst->Value != 0)
+ return false;
+ // SDAG also checks for FullFP16, but this looks to be beneficial anyway.
+
+ // Now check for an fadd operation. TODO: expand this for integer add?
+ auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI);
+ if (!FAddMI)
+ return false;
+
+ // If we add support for integer add, must restrict these types to just s64.
+ unsigned DstSize = DstTy.getSizeInBits();
+ if (DstSize != 16 && DstSize != 32 && DstSize != 64)
+ return false;
+
+ Register Src1Op1 = FAddMI->getOperand(1).getReg();
+ Register Src1Op2 = FAddMI->getOperand(2).getReg();
+ MachineInstr *Shuffle =
+ getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI);
+ MachineInstr *Other = MRI.getVRegDef(Src1Op1);
+ if (!Shuffle) {
+ Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI);
+ Other = MRI.getVRegDef(Src1Op2);
+ }
+
+ // We're looking for a shuffle that moves the second element to index 0.
+ if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 &&
+ Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) {
+ std::get<0>(MatchInfo) = TargetOpcode::G_FADD;
+ std::get<1>(MatchInfo) = DstTy;
+ std::get<2>(MatchInfo) = Other->getOperand(0).getReg();
+ return true;
+ }
+ return false;
+}
+
+bool applyExtractVecEltPairwiseAdd(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
+ std::tuple<unsigned, LLT, Register> &MatchInfo) {
+ unsigned Opc = std::get<0>(MatchInfo);
+ assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");
+ // We want to generate two extracts of elements 0 and 1, and add them.
+ LLT Ty = std::get<1>(MatchInfo);
+ Register Src = std::get<2>(MatchInfo);
+ LLT s64 = LLT::scalar(64);
+ B.setInstrAndDebugLoc(MI);
+ auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0));
+ auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));
+ B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});
+ MI.eraseFromParent();
+ return true;
+}
+
#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPostLegalizeGICombiner.inc"
#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-extractvec-faddp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-extractvec-faddp.mir
new file mode 100644
index 000000000000..790634563068
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-extractvec-faddp.mir
@@ -0,0 +1,188 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+---
+name: f64_faddp
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: f64_faddp
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64)
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C1]](s64)
+ ; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[EVEC]], [[EVEC1]]
+ ; CHECK: $d0 = COPY [[FADD]](s64)
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:_(<2 x s64>) = COPY $q0
+ %2:_(<2 x s64>) = G_IMPLICIT_DEF
+ %5:_(s64) = G_CONSTANT i64 0
+ %1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(1, undef)
+ %3:_(<2 x s64>) = G_FADD %1, %0
+ %4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64)
+ $d0 = COPY %4(s64)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: f64_faddp_commuted
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: f64_faddp_commuted
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64)
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C1]](s64)
+ ; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[EVEC]], [[EVEC1]]
+ ; CHECK: $d0 = COPY [[FADD]](s64)
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:_(<2 x s64>) = COPY $q0
+ %2:_(<2 x s64>) = G_IMPLICIT_DEF
+ %5:_(s64) = G_CONSTANT i64 0
+ %1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(1, undef)
+ %3:_(<2 x s64>) = G_FADD %0, %1
+ %4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64)
+ $d0 = COPY %4(s64)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: f32_faddp
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$d0' }
+body: |
+ bb.1:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: f32_faddp
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C1]](s64)
+ ; CHECK: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[EVEC]], [[EVEC1]]
+ ; CHECK: $s0 = COPY [[FADD]](s32)
+ ; CHECK: RET_ReallyLR implicit $s0
+ %0:_(<2 x s32>) = COPY $d0
+ %2:_(<2 x s32>) = G_IMPLICIT_DEF
+ %5:_(s64) = G_CONSTANT i64 0
+ %1:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %2, shufflemask(1, undef)
+ %3:_(<2 x s32>) = G_FADD %1, %0
+ %4:_(s32) = G_EXTRACT_VECTOR_ELT %3(<2 x s32>), %5(s64)
+ $s0 = COPY %4(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: f32_faddp_commuted
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$d0' }
+body: |
+ bb.1:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: f32_faddp_commuted
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C1]](s64)
+ ; CHECK: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[EVEC]], [[EVEC1]]
+ ; CHECK: $s0 = COPY [[FADD]](s32)
+ ; CHECK: RET_ReallyLR implicit $s0
+ %0:_(<2 x s32>) = COPY $d0
+ %2:_(<2 x s32>) = G_IMPLICIT_DEF
+ %5:_(s64) = G_CONSTANT i64 0
+ %1:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %2, shufflemask(1, undef)
+ %3:_(<2 x s32>) = G_FADD %0, %1
+ %4:_(s32) = G_EXTRACT_VECTOR_ELT %3(<2 x s32>), %5(s64)
+ $s0 = COPY %4(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: wrong_extract_idx
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: wrong_extract_idx
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[DEF]], shufflemask(1, undef)
+ ; CHECK: [[FADD:%[0-9]+]]:_(<2 x s64>) = G_FADD [[SHUF]], [[COPY]]
+ ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[FADD]](<2 x s64>), [[C]](s64)
+ ; CHECK: $d0 = COPY [[EVEC]](s64)
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:_(<2 x s64>) = COPY $q0
+ %2:_(<2 x s64>) = G_IMPLICIT_DEF
+ %5:_(s64) = G_CONSTANT i64 1
+ %1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(1, undef)
+ %3:_(<2 x s64>) = G_FADD %1, %0
+ %4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64)
+ $d0 = COPY %4(s64)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: wrong_shuffle_mask
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: wrong_shuffle_mask
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[DEF]], shufflemask(0, undef)
+ ; CHECK: [[FADD:%[0-9]+]]:_(<2 x s64>) = G_FADD [[SHUF]], [[COPY]]
+ ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[FADD]](<2 x s64>), [[C]](s64)
+ ; CHECK: $d0 = COPY [[EVEC]](s64)
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:_(<2 x s64>) = COPY $q0
+ %2:_(<2 x s64>) = G_IMPLICIT_DEF
+ %5:_(s64) = G_CONSTANT i64 0
+ %1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(0, undef)
+ %3:_(<2 x s64>) = G_FADD %1, %0
+ %4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64)
+ $d0 = COPY %4(s64)
+ RET_ReallyLR implicit $d0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-faddp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-faddp.mir
new file mode 100644
index 000000000000..770630851d1b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-faddp.mir
@@ -0,0 +1,62 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-unknown -run-pass=instruction-select %s -o - | FileCheck %s
+---
+name: f64_faddp
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+frameInfo:
+ maxAlignment: 1
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: f64_faddp
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK: [[FADDPv2i64p:%[0-9]+]]:fpr64 = FADDPv2i64p [[COPY]]
+ ; CHECK: $d0 = COPY [[FADDPv2i64p]]
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:fpr(<2 x s64>) = COPY $q0
+ %6:gpr(s64) = G_CONSTANT i64 0
+ %7:fpr(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %6(s64)
+ %8:gpr(s64) = G_CONSTANT i64 1
+ %9:fpr(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %8(s64)
+ %4:fpr(s64) = G_FADD %7, %9
+ $d0 = COPY %4(s64)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: f32_faddp
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$d0' }
+frameInfo:
+ maxAlignment: 1
+body: |
+ bb.1:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: f32_faddp
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[FADDPv2i32p:%[0-9]+]]:fpr32 = FADDPv2i32p [[COPY]]
+ ; CHECK: $s0 = COPY [[FADDPv2i32p]]
+ ; CHECK: RET_ReallyLR implicit $s0
+ %0:fpr(<2 x s32>) = COPY $d0
+ %6:gpr(s64) = G_CONSTANT i64 0
+ %7:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %6(s64)
+ %8:gpr(s64) = G_CONSTANT i64 1
+ %9:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %8(s64)
+ %4:fpr(s32) = G_FADD %7, %9
+ $s0 = COPY %4(s32)
+ RET_ReallyLR implicit $s0
+
+...