diff options
author | Tom Stellard <tstellar@redhat.com> | 2019-05-03 23:50:08 +0000 |
---|---|---|
committer | Tom Stellard <tstellar@redhat.com> | 2019-05-03 23:50:08 +0000 |
commit | 15b37004545f948860fdbe570d6898b40206339b (patch) | |
tree | 1e9d6870cb42a6055e0f57420988f9c530ba6501 | |
parent | 1dd2d3a72e37ffd07b7538068f3967df47f55ce4 (diff) | |
download | llvm-15b37004545f948860fdbe570d6898b40206339b.tar.gz |
Merging r359834:
------------------------------------------------------------------------
r359834 | evandro | 2019-05-02 15:01:39 -0700 (Thu, 02 May 2019) | 3 lines
[AArch64] Update for Exynos
Fix the forwarding of multiplication results for Exynos M4.
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_80@359946 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/AArch64/AArch64SchedExynosM4.td | 36 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64SchedPredExynos.td | 11 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64SchedPredicates.td | 53 |
3 files changed, 18 insertions, 82 deletions
diff --git a/lib/Target/AArch64/AArch64SchedExynosM4.td b/lib/Target/AArch64/AArch64SchedExynosM4.td index 4d892465b3f2..61652b1d8e3d 100644 --- a/lib/Target/AArch64/AArch64SchedExynosM4.td +++ b/lib/Target/AArch64/AArch64SchedExynosM4.td @@ -239,7 +239,6 @@ def M4WriteNEONK : SchedWriteRes<[M4UnitNSHF, M4UnitS0]> { let Latency = 5; let NumMicroOps = 2; } def M4WriteNEONL : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; } -def M4WriteNEONM : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; } def M4WriteNEONN : SchedWriteRes<[M4UnitNMSC, M4UnitNMSC]> { let Latency = 5; let NumMicroOps = 2; } @@ -480,8 +479,6 @@ def M4WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M4WriteNALU1]>, SchedVar<NoSchedPred, [M4WriteZ0]>]>; def M4WriteMOVI : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M4WriteZ0]>, SchedVar<NoSchedPred, [M4WriteNALU1]>]>; -def M4WriteMULL : SchedWriteVariant<[SchedVar<ExynosLongVectorUpperPred, [M4WriteNEONM]>, - SchedVar<NoSchedPred, [M4WriteNMUL3]>]>; // Fast forwarding. def M4ReadAESM1 : SchedReadAdvance<+1, [M4WriteNCRY1]>; @@ -489,7 +486,8 @@ def M4ReadFMACM1 : SchedReadAdvance<+1, [M4WriteFMAC4, M4WriteFMAC4H, M4WriteFMAC5]>; def M4ReadNMULM1 : SchedReadAdvance<+1, [M4WriteNMUL3]>; -def M4ReadMULLP2 : SchedReadAdvance<-2, [M4WriteNEONM]>; +def M4ReadNMULP2 : SchedReadAdvance<-2, [M4WriteNMUL3]>; + //===----------------------------------------------------------------------===// // Coarse scheduling model. @@ -662,10 +660,8 @@ def : InstRW<[M4WriteNEONK], (instregex "^FMOVDXHighr")>; def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev1f16")>; def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev1i(32|64)")>; def : InstRW<[M4WriteNMSC1], (instregex "^FRECPXv1")>; -def : InstRW<[M4WriteFMAC4H, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S16")>; -def : InstRW<[M4WriteFMAC4, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S(32|64)")>; +def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)S16")>; +def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)S(32|64)")>; // FP load instructions. def : InstRW<[WriteVLD], (instregex "^LDR[SDQ]l")>; @@ -736,14 +732,20 @@ def : InstRW<[M4WriteNALU1], (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>; def : InstRW<[M4WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>; def : InstRW<[M4WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>; def : InstRW<[M4WriteNHAD3], (instregex "^[SU](MIN|MAX)Vv")>; -def : InstRW<[M4WriteNMUL3], (instregex "^(SQR?D)?MULH?v")>; def : InstRW<[M4WriteNMUL3, M4ReadNMULM1], (instregex "^ML[AS]v")>; -def : InstRW<[M4WriteNMUL3], (instregex "^SQRDML[AS]H")>; -def : InstRW<[M4WriteMULL, - M4ReadMULLP2], (instregex "^(S|U|SQD)ML[AS]Lv")>; -def : InstRW<[M4WriteMULL, - M4ReadMULLP2], (instregex "^(S|U|SQD)MULLv")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^(SQR?D)?MULH?v")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^SQRDML[AS]H")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^(S|U|SQD)ML[AS]L(v1(i32|i64)|v2i32|v4i16|v8i8)")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULP2], (instregex "^(S|U|SQD)ML[AS]L(v4i32|v8i16|v16i8)")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^(S|U|SQD)MULL(v1(i32|i64)|v2i32|v4i16|v8i8)")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULP2], (instregex "^(S|U|SQD)MULL(v4i32|v8i16|v16i8)")>; def : InstRW<[M4WriteNMUL3], (instregex "^[SU]DOT(lane)?v")>; def : InstRW<[M4WriteNHAD3], (instregex "^[SU]ADALPv")>; def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>; @@ -808,10 +810,8 @@ def : InstRW<[M4WriteNALU1], (instregex "^FMOVv.f(32|64)")>; def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev[248]f16")>; def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev[248]f(32|64)")>; def : InstRW<[M4WriteFCVT3], (instregex "^U(RECP|RSQRT)Ev[24]i32")>; -def : InstRW<[M4WriteFMAC4H, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f16")>; -def : InstRW<[M4WriteFMAC4, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>; +def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)Sv.f16")>; +def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>; def : InstRW<[M4WriteNSHF1], (instregex "^REV(16|32|64)v")>; def : InstRW<[M4WriteNSHFA], (instregex "^TB[LX]v(8|16)i8One")>; def : InstRW<[M4WriteNSHFB], (instregex "^TB[LX]v(8|16)i8Two")>; diff --git a/lib/Target/AArch64/AArch64SchedPredExynos.td b/lib/Target/AArch64/AArch64SchedPredExynos.td index 48c54230e9d8..316036d89406 100644 --- a/lib/Target/AArch64/AArch64SchedPredExynos.td +++ b/lib/Target/AArch64/AArch64SchedPredExynos.td @@ -103,17 +103,6 @@ def ExynosScaledIdxPred : MCSchedPredicate<ExynosScaledIdxFn>; // Identify FP instructions. def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckDForm, CheckQForm]>>; -// Identify whether an instruction whose result is a long vector -// operates on the upper half of the input registers. -def ExynosLongVectorUpperFn : TIIPredicate< - "isExynosLongVectorUpper", - MCOpcodeSwitchStatement< - [MCOpcodeSwitchCase< - IsLongVectorUpperOp.ValidOpcodes, - MCReturnStatement<TruePred>>], - MCReturnStatement<FalsePred>>>; -def ExynosLongVectorUpperPred : MCSchedPredicate<ExynosLongVectorUpperFn>; - // Identify 128-bit NEON instructions. def ExynosQFormPred : MCSchedPredicate<CheckQForm>; diff --git a/lib/Target/AArch64/AArch64SchedPredicates.td b/lib/Target/AArch64/AArch64SchedPredicates.td index dbaf11fc95dd..b23572b41b9c 100644 --- a/lib/Target/AArch64/AArch64SchedPredicates.td +++ b/lib/Target/AArch64/AArch64SchedPredicates.td @@ -268,59 +268,6 @@ def IsStoreRegOffsetOp : CheckOpcode<[STRBBroW, STRBBroX, def IsLoadStoreRegOffsetOp : CheckOpcode<!listconcat(IsLoadRegOffsetOp.ValidOpcodes, IsStoreRegOffsetOp.ValidOpcodes)>; -// Identify whether an instruction whose result is a long vector -// operates on the upper half of the input registers. -def IsLongVectorUpperOp : CheckOpcode<[FCVTLv8i16, FCVTLv4i32, - FCVTNv8i16, FCVTNv4i32, - FCVTXNv4f32, - PMULLv16i8, PMULLv2i64, - RADDHNv8i16_v16i8, RADDHNv4i32_v8i16, RADDHNv2i64_v4i32, - RSHRNv16i8_shift, RSHRNv8i16_shift, RSHRNv4i32_shift, - RSUBHNv8i16_v16i8, RSUBHNv4i32_v8i16, RSUBHNv2i64_v4i32, - SABALv16i8_v8i16, SABALv8i16_v4i32, SABALv4i32_v2i64, - SABDLv16i8_v8i16, SABDLv8i16_v4i32, SABDLv4i32_v2i64, - SADDLv16i8_v8i16, SADDLv8i16_v4i32, SADDLv4i32_v2i64, - SADDWv16i8_v8i16, SADDWv8i16_v4i32, SADDWv4i32_v2i64, - SHLLv16i8, SHLLv8i16, SHLLv4i32, - SHRNv16i8_shift, SHRNv8i16_shift, SHRNv4i32_shift, - SMLALv16i8_v8i16, SMLALv8i16_v4i32, SMLALv4i32_v2i64, - SMLALv8i16_indexed, SMLALv4i32_indexed, - SMLSLv16i8_v8i16, SMLSLv8i16_v4i32, SMLSLv4i32_v2i64, - SMLSLv8i16_indexed, SMLSLv4i32_indexed, - SMULLv16i8_v8i16, SMULLv8i16_v4i32, SMULLv4i32_v2i64, - SMULLv8i16_indexed, SMULLv4i32_indexed, - SQDMLALv8i16_v4i32, SQDMLALv4i32_v2i64, - SQDMLALv8i16_indexed, SQDMLALv4i32_indexed, - SQDMLSLv8i16_v4i32, SQDMLSLv4i32_v2i64, - SQDMLSLv8i16_indexed, SQDMLSLv4i32_indexed, - SQDMULLv8i16_v4i32, SQDMULLv4i32_v2i64, - SQDMULLv8i16_indexed, SQDMULLv4i32_indexed, - SQRSHRNv16i8_shift, SQRSHRNv8i16_shift, SQRSHRNv4i32_shift, - SQRSHRUNv16i8_shift, SQRSHRUNv8i16_shift, SQRSHRUNv4i32_shift, - SQSHRNv16i8_shift, SQSHRNv8i16_shift, SQSHRNv4i32_shift, - SQSHRUNv16i8_shift, SQSHRUNv8i16_shift, SQSHRUNv4i32_shift, - SQXTNv16i8, SQXTNv8i16, SQXTNv4i32, - SQXTUNv16i8, SQXTUNv8i16, SQXTUNv4i32, - SSHLLv16i8_shift, SSHLLv8i16_shift, SSHLLv4i32_shift, - SSUBLv16i8_v8i16, SSUBLv8i16_v4i32, SSUBLv4i32_v2i64, - SSUBWv16i8_v8i16, SSUBWv8i16_v4i32, SSUBWv4i32_v2i64, - UABALv16i8_v8i16, UABALv8i16_v4i32, UABALv4i32_v2i64, - UABDLv16i8_v8i16, UABDLv8i16_v4i32, UABDLv4i32_v2i64, - UADDLv16i8_v8i16, UADDLv8i16_v4i32, UADDLv4i32_v2i64, - UADDWv16i8_v8i16, UADDWv8i16_v4i32, UADDWv4i32_v2i64, - UMLALv16i8_v8i16, UMLALv8i16_v4i32, UMLALv4i32_v2i64, - UMLALv8i16_indexed, UMLALv4i32_indexed, - UMLSLv16i8_v8i16, UMLSLv8i16_v4i32, UMLSLv4i32_v2i64, - UMLSLv8i16_indexed, UMLSLv4i32_indexed, - UMULLv16i8_v8i16, UMULLv8i16_v4i32, UMULLv4i32_v2i64, - UMULLv8i16_indexed, UMULLv4i32_indexed, - UQSHRNv16i8_shift, UQSHRNv8i16_shift, UQSHRNv4i32_shift, - UQXTNv16i8, UQXTNv8i16, UQXTNv4i32, - USHLLv16i8_shift, USHLLv8i16_shift, USHLLv4i32_shift, - USUBLv16i8_v8i16, USUBLv8i16_v4i32, USUBLv4i32_v2i64, - USUBWv16i8_v8i16, USUBWv8i16_v4i32, USUBWv4i32_v2i64, - XTNv16i8, XTNv8i16, XTNv4i32]>; - // Target predicates. // Identify an instruction that effectively transfers a register to another. |