summaryrefslogtreecommitdiff
path: root/lld
diff options
context:
space:
mode:
authorPeter Smith <peter.smith@arm.com>2023-04-17 18:37:05 +0100
committerPeter Smith <peter.smith@arm.com>2023-04-24 13:48:22 +0100
commitd0cdc5ddd75d6e9005f35ce199ee8fe41b4ae509 (patch)
treecb1addad149f9f7603a67ed1af5633e38484cca0 /lld
parent2bb7e00b098cdbf87b9e2e2f5ec85b661664b709 (diff)
downloadllvm-d0cdc5ddd75d6e9005f35ce199ee8fe41b4ae509.tar.gz
[LLD][ELF][AArch64] Add AArch64 short range thunk support
The AArch64 branch immediate instruction has a 128MiB range. This makes it suitable for use a short range thunk in the same way as short thunks are implemented in Arm and PPC. This patch adds support for short range thunks to AArch64. Adding short range thunk support should mean that OutputSections can grow to nearly 256 MiB in size without needing long-range indirect branches. Differential Revision: https://reviews.llvm.org/D148701
Diffstat (limited to 'lld')
-rw-r--r--lld/ELF/Thunks.cpp71
-rw-r--r--lld/test/ELF/aarch64-cortex-a53-843419-thunk.s4
-rw-r--r--lld/test/ELF/aarch64-long-thunk-converge.s53
-rw-r--r--lld/test/ELF/aarch64-thunk-reuse2.s12
-rw-r--r--lld/test/ELF/aarch64-thunk-script.s18
-rw-r--r--lld/test/ELF/aarch64-thunk-section-location.s7
6 files changed, 124 insertions, 41 deletions
diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp
index 5bfcf6218084..edcc6a92109b 100644
--- a/lld/ELF/Thunks.cpp
+++ b/lld/ELF/Thunks.cpp
@@ -44,21 +44,45 @@ using namespace lld::elf;
namespace {
-// AArch64 long range Thunks
-class AArch64ABSLongThunk final : public Thunk {
+// Base class for AArch64 thunks.
+//
+// An AArch64 thunk may be either short or long. A short thunk is simply a
+// branch (B) instruction, and it may be used to call AArch64 functions when the
+// distance from the thunk to the target is less than 128MB. Long thunks can
+// branch to any virtual address and they are implemented in the derived
+// classes. This class tries to create a short thunk if the target is in range,
+// otherwise it creates a long thunk.
+class AArch64Thunk : public Thunk {
public:
- AArch64ABSLongThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {}
- uint32_t size() override { return 16; }
+ AArch64Thunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {}
+ bool getMayUseShortThunk();
void writeTo(uint8_t *buf) override;
+
+private:
+ bool mayUseShortThunk = true;
+ virtual void writeLong(uint8_t *buf) = 0;
+};
+
+// AArch64 long range Thunks.
+class AArch64ABSLongThunk final : public AArch64Thunk {
+public:
+ AArch64ABSLongThunk(Symbol &dest, int64_t addend)
+ : AArch64Thunk(dest, addend) {}
+ uint32_t size() override { return getMayUseShortThunk() ? 4 : 16; }
void addSymbols(ThunkSection &isec) override;
+
+private:
+ void writeLong(uint8_t *buf) override;
};
-class AArch64ADRPThunk final : public Thunk {
+class AArch64ADRPThunk final : public AArch64Thunk {
public:
- AArch64ADRPThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {}
- uint32_t size() override { return 12; }
- void writeTo(uint8_t *buf) override;
+ AArch64ADRPThunk(Symbol &dest, int64_t addend) : AArch64Thunk(dest, addend) {}
+ uint32_t size() override { return getMayUseShortThunk() ? 4 : 12; }
void addSymbols(ThunkSection &isec) override;
+
+private:
+ void writeLong(uint8_t *buf) override;
};
// Base class for ARM thunks.
@@ -461,14 +485,34 @@ void Thunk::setOffset(uint64_t newOffset) {
offset = newOffset;
}
-// AArch64 long range Thunks
-
+// AArch64 Thunk base class.
static uint64_t getAArch64ThunkDestVA(const Symbol &s, int64_t a) {
uint64_t v = s.isInPlt() ? s.getPltVA() : s.getVA(a);
return v;
}
-void AArch64ABSLongThunk::writeTo(uint8_t *buf) {
+bool AArch64Thunk::getMayUseShortThunk() {
+ if (!mayUseShortThunk)
+ return false;
+ uint64_t s = getAArch64ThunkDestVA(destination, addend);
+ uint64_t p = getThunkTargetSym()->getVA();
+ mayUseShortThunk = llvm::isInt<28>(s - p);
+ return mayUseShortThunk;
+}
+
+void AArch64Thunk::writeTo(uint8_t *buf) {
+ if (!getMayUseShortThunk()) {
+ writeLong(buf);
+ return;
+ }
+ uint64_t s = getAArch64ThunkDestVA(destination, addend);
+ uint64_t p = getThunkTargetSym()->getVA();
+ write32(buf, 0x14000000); // b S
+ target->relocateNoSym(buf, R_AARCH64_CALL26, s - p);
+}
+
+// AArch64 long range Thunks.
+void AArch64ABSLongThunk::writeLong(uint8_t *buf) {
const uint8_t data[] = {
0x50, 0x00, 0x00, 0x58, // ldr x16, L0
0x00, 0x02, 0x1f, 0xd6, // br x16
@@ -484,7 +528,8 @@ void AArch64ABSLongThunk::addSymbols(ThunkSection &isec) {
addSymbol(saver().save("__AArch64AbsLongThunk_" + destination.getName()),
STT_FUNC, 0, isec);
addSymbol("$x", STT_NOTYPE, 0, isec);
- addSymbol("$d", STT_NOTYPE, 8, isec);
+ if (!getMayUseShortThunk())
+ addSymbol("$d", STT_NOTYPE, 8, isec);
}
// This Thunk has a maximum range of 4Gb, this is sufficient for all programs
@@ -492,7 +537,7 @@ void AArch64ABSLongThunk::addSymbols(ThunkSection &isec) {
// clang and gcc do not support the large code model for position independent
// code so it is safe to use this for position independent thunks without
// worrying about the destination being more than 4Gb away.
-void AArch64ADRPThunk::writeTo(uint8_t *buf) {
+void AArch64ADRPThunk::writeLong(uint8_t *buf) {
const uint8_t data[] = {
0x10, 0x00, 0x00, 0x90, // adrp x16, Dest R_AARCH64_ADR_PREL_PG_HI21(Dest)
0x10, 0x02, 0x00, 0x91, // add x16, x16, R_AARCH64_ADD_ABS_LO12_NC(Dest)
diff --git a/lld/test/ELF/aarch64-cortex-a53-843419-thunk.s b/lld/test/ELF/aarch64-cortex-a53-843419-thunk.s
index a12cc2686e22..9e794a5f5675 100644
--- a/lld/test/ELF/aarch64-cortex-a53-843419-thunk.s
+++ b/lld/test/ELF/aarch64-cortex-a53-843419-thunk.s
@@ -2,7 +2,7 @@
// RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux %s -o %t.o
// RUN: echo "SECTIONS { \
// RUN: .text1 0x10000 : { *(.text.01) *(.text.02) *(.text.03) } \
-// RUN: .text2 0x8010000 : { *(.text.04) } } " > %t.script
+// RUN: .text2 0x10010000 : { *(.text.04) } } " > %t.script
// RUN: ld.lld --script %t.script -fix-cortex-a53-843419 -verbose %t.o -o %t2 \
// RUN: 2>&1 | FileCheck -check-prefix=CHECK-PRINT %s
// RUN: llvm-objdump --no-print-imm-hex --no-show-raw-insn --triple=aarch64-linux-gnu -d %t2 | FileCheck %s
@@ -39,7 +39,7 @@ t3_ff8_ldr:
// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 10FF8 in unpatched output.
// CHECK: 0000000000010ff8 <t3_ff8_ldr>:
-// CHECK-NEXT: adrp x0, 0x8010000
+// CHECK-NEXT: adrp x0, 0x10010000
// CHECK-NEXT: ldr x1, [x1]
// CHECK-NEXT: b 0x11008
// CHECK-NEXT: ret
diff --git a/lld/test/ELF/aarch64-long-thunk-converge.s b/lld/test/ELF/aarch64-long-thunk-converge.s
new file mode 100644
index 000000000000..c96133979226
--- /dev/null
+++ b/lld/test/ELF/aarch64-long-thunk-converge.s
@@ -0,0 +1,53 @@
+// REQUIRES: aarch64
+// RUN: rm -rf %t && split-file %s %t
+// RUN: llvm-mc -triple aarch64-none-elf -filetype=obj -o %t/a.o %t/a.s
+// RUN: ld.lld --shared %t/a.o -T %t/a.t -o %t/a
+// RUN: llvm-objdump --no-show-raw-insn -d --start-address=0x7001004 --stop-address=0x7001010 %t/a | FileCheck %s
+// RUN: llvm-objdump --no-show-raw-insn -d --start-address=0x11001008 --stop-address=0x11001014 %t/a | FileCheck --check-prefix=CHECK2 %s
+// RUN: rm -f %t/a
+/// This test shows that once a long-range thunk has been generated it
+/// cannot be written as a short-range thunk. This prevents oscillations
+/// in size that can prevent convergence.
+/// In pass 0 bl foo requires a long-range thunk to reach foo. The thunk for
+/// bar increases the address of foo so that ic can be reaced by bl foo with a
+/// a single b instruction.
+/// In pass 2 we expect the the long-range thunk to remain long.
+
+// CHECK-LABEL: <__AArch64ADRPThunk_>:
+// CHECK-NEXT: 7001004: adrp x16, 0x11001000
+// CHECK-NEXT: add x16, x16, #0x14
+// CHECK-NEXT: br x16
+
+// CHECK2-LABEL: <__AArch64ADRPThunk_>:
+// CHECK2-NEXT: 11001008: adrp x16, 0x9001000
+// CHECK2-NEXT: add x16, x16, #0x10
+// CHECK2-NEXT: br x16
+
+
+//--- a.t
+SECTIONS {
+ .foo 0x1000 : { *(.foo.*) }
+ .bar 0x11001000 : { *(.bar.*) }
+}
+
+//--- a.s
+.section .foo.1,"ax",%progbits,unique,1
+bl bar
+
+.section .foo.2,"ax",%progbits,unique,1
+.space 0x7000000
+
+.section .foo.3,"ax",%progbits,unique,1
+.space 0x2000000
+
+.section .foo.4,"ax",%progbits,unique,1
+foo:
+nop
+
+.section .bar.1,"ax",%progbits,unique,1
+nop
+nop
+.section .bar.2,"ax",%progbits,unique,1
+bar:
+bl foo
+.space 0x8000000
diff --git a/lld/test/ELF/aarch64-thunk-reuse2.s b/lld/test/ELF/aarch64-thunk-reuse2.s
index 5ad1474a29ad..87ebbba0fbfa 100644
--- a/lld/test/ELF/aarch64-thunk-reuse2.s
+++ b/lld/test/ELF/aarch64-thunk-reuse2.s
@@ -10,17 +10,15 @@
# CHECK: <dest>:
# CHECK-NEXT: 10700: ret
# CHECK: <__AArch64ADRPThunk_>:
-# CHECK-NEXT: 10704: adrp x16, 0x10000
-# CHECK-NEXT: add x16, x16, #1792
-# CHECK-NEXT: br x16
+# CHECK-NEXT: 10704: b 0x10700 <dest>
# CHECK-EMPTY:
# CHECK: <__AArch64ADRPThunk_>:
-# CHECK-NEXT: 8010710: adrp x16, 0x10000
+# CHECK-NEXT: 8010708: adrp x16, 0x10000
# CHECK-NEXT: add x16, x16, #1792
-# CHECK-NEXT: br x16
+# CHECk-NEXT: br x16
# CHECK-LABEL: <high>:
-# CHECK-NEXT: 801071c: bl 0x8010710 <__AArch64ADRPThunk_>
-# CHECK-NEXT: b 0x8010710 <__AArch64ADRPThunk_>
+# CHECK-NEXT: 8010714: bl 0x8010708 <__AArch64ADRPThunk_>
+# CHECK-NEXT: b 0x8010708 <__AArch64ADRPThunk_>
.section .text._start, "ax", %progbits
.globl _start
diff --git a/lld/test/ELF/aarch64-thunk-script.s b/lld/test/ELF/aarch64-thunk-script.s
index 4e48ff44c036..08ff4e987118 100644
--- a/lld/test/ELF/aarch64-thunk-script.s
+++ b/lld/test/ELF/aarch64-thunk-script.s
@@ -30,20 +30,12 @@ high_target:
// CHECK-EMPTY:
// CHECK-NEXT: <_start>:
// CHECK-NEXT: 2000: bl 0x200c <__AArch64AbsLongThunk_high_target>
-// CHECK-NEXT: 2004: bl 0x201c <__AArch64AbsLongThunk_>
+// CHECK-NEXT: 2004: bl 0x2010 <__AArch64AbsLongThunk_>
// CHECK-NEXT: ret
// CHECK: <__AArch64AbsLongThunk_high_target>:
-// CHECK-NEXT: 200c: ldr x16, 0x2014
-// CHECK-NEXT: br x16
-// CHECK: <$d>:
-// CHECK-NEXT: 2014: 00 20 00 08 .word 0x08002000
-// CHECK-NEXT: 2018: 00 00 00 00 .word 0x00000000
-// CHECK: <__AArch64AbsLongThunk_>:
-// CHECK-NEXT: 201c: ldr x16, 0x2024
-// CHECK-NEXT: 2020: br x16
-// CHECK: <$d>:
-// CHECK-NEXT: 2024: 04 20 00 08 .word 0x08002004
-// CHECK-NEXT: 2028: 00 00 00 00 .word 0x00000000
+// CHECK-NEXT: 200c: b 0x8002000 <high_target>
+// CHECK: <__AArch64AbsLongThunk_>:
+// CHECK-NEXT: 2010: b 0x8002004 <high_target+0x4>
// CHECK: Disassembly of section .text_high:
// CHECK-EMPTY:
// CHECK-NEXT: <high_target>:
@@ -56,10 +48,8 @@ high_target:
/// Local thunk symbols.
// NM-NEXT: t __AArch64AbsLongThunk_high_target
// NM-NEXT: t $x
-// NM-NEXT: t $d
// NM-NEXT: t __AArch64AbsLongThunk_{{$}}
// NM-NEXT: t $x
-// NM-NEXT: t $d
/// Global symbols.
// NM-NEXT: T _start
// NM-NEXT: T high_target
diff --git a/lld/test/ELF/aarch64-thunk-section-location.s b/lld/test/ELF/aarch64-thunk-section-location.s
index 82ced3ab2fa4..d4b9520429bb 100644
--- a/lld/test/ELF/aarch64-thunk-section-location.s
+++ b/lld/test/ELF/aarch64-thunk-section-location.s
@@ -1,7 +1,7 @@
// REQUIRES: aarch64
// RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %s -o %t.o
// RUN: ld.lld %t.o -o %t
-// RUN: llvm-objdump -d --start-address=0x81d1008 --stop-address=0x81d1014 --no-show-raw-insn %t | FileCheck %s
+// RUN: llvm-objdump -d --start-address=0x81d1008 --stop-address=0x81d100c --no-show-raw-insn %t | FileCheck %s
// RUN: rm %t.o %t
// Check that the range extension thunks are dumped close to the aarch64 branch
// range of 128 MiB
@@ -36,7 +36,4 @@ high_target:
ret
// CHECK: <__AArch64AbsLongThunk_high_target>:
-// CHECK-NEXT: 81d1008: ldr x16, 0x81d1010
-// CHECK-NEXT: 81d100c: br x16
-// CHECK: <$d>:
-// CHECK-NEXT: 81d1010: 00 20 21 08 .word 0x08212000
+// CHECK-NEXT: 81d1008: b 0x8212000 <high_target>