summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Shaposhnikov <ashaposhnikov@google.com>2022-01-10 05:20:37 +0000
committerAlexander Shaposhnikov <ashaposhnikov@google.com>2022-01-10 05:20:37 +0000
commit8acc3b4ab0c76b9c2a54182e31a02f90ebb96329 (patch)
tree1a5b2d8c9c9ec067ea6e71d70be9d32d98833dfa
parent6b8362eb8dc87be8977e3c1d3a7b2ff35a15898c (diff)
downloadllvm-8acc3b4ab0c76b9c2a54182e31a02f90ebb96329.tar.gz
[lld][ELF] Support adrp+ldr GOT optimization for AArch64
This diff adds first bits to support relocation relaxations for AArch64 discussed on https://github.com/ARM-software/abi-aa/pull/106. In particular, the case of adrp x0, :got: symbol ldr x0, [x0, :got_lo12: symbol] is handled. Test plan: make check-all Differential revision: https://reviews.llvm.org/D112063
-rw-r--r--lld/ELF/Arch/AArch64.cpp92
-rw-r--r--lld/ELF/InputSection.cpp22
-rw-r--r--lld/ELF/Target.h10
-rw-r--r--lld/test/ELF/aarch64-adrp-ldr-got-symbols.s70
-rw-r--r--lld/test/ELF/aarch64-adrp-ldr-got.s117
5 files changed, 305 insertions, 6 deletions
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index ca3a6aa58dc5..96e16f760a82 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -568,6 +568,98 @@ void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
}
+AArch64Relaxer::AArch64Relaxer(ArrayRef<Relocation> relocs) {
+ if (!config->relax || config->emachine != EM_AARCH64) {
+ safeToRelaxAdrpLdr = false;
+ return;
+ }
+ // Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC
+ // always appear in pairs.
+ size_t i = 0;
+ const size_t size = relocs.size();
+ for (; i != size; ++i) {
+ if (relocs[i].type == R_AARCH64_ADR_GOT_PAGE) {
+ if (i + 1 < size && relocs[i + 1].type == R_AARCH64_LD64_GOT_LO12_NC) {
+ ++i;
+ continue;
+ }
+ break;
+ } else if (relocs[i].type == R_AARCH64_LD64_GOT_LO12_NC) {
+ break;
+ }
+ }
+ safeToRelaxAdrpLdr = i == size;
+}
+
+bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
+ const Relocation &ldrRel, uint64_t secAddr,
+ uint8_t *buf) const {
+ if (!safeToRelaxAdrpLdr)
+ return false;
+
+ // When the definition of sym is not preemptible then we may
+ // be able to relax
+ // ADRP xn, :got: sym
+ // LDR xn, [ xn :got_lo12: sym]
+ // to
+ // ADRP xn, sym
+ // ADD xn, xn, :lo_12: sym
+
+ if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE ||
+ ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC)
+ return false;
+ // Check if the relocations apply to consecutive instructions.
+ if (adrpRel.offset + 4 != ldrRel.offset)
+ return false;
+ // Check if the relocations reference the same symbol and
+ // skip undefined, preemptible and STT_GNU_IFUNC symbols.
+ if (!adrpRel.sym || adrpRel.sym != ldrRel.sym || !adrpRel.sym->isDefined() ||
+ adrpRel.sym->isPreemptible || adrpRel.sym->isGnuIFunc())
+ return false;
+ // Check if the addends of the both instructions are zero.
+ if (adrpRel.addend != 0 || ldrRel.addend != 0)
+ return false;
+ uint32_t adrpInstr = read32le(buf + adrpRel.offset);
+ uint32_t ldrInstr = read32le(buf + ldrRel.offset);
+ // Check if the first instruction is ADRP and the second instruction is LDR.
+ if ((adrpInstr & 0x9f000000) != 0x90000000 ||
+ (ldrInstr & 0x3b000000) != 0x39000000)
+ return false;
+ // Check the value of the sf bit.
+ if (!(ldrInstr >> 31))
+ return false;
+ uint32_t adrpDestReg = adrpInstr & 0x1f;
+ uint32_t ldrDestReg = ldrInstr & 0x1f;
+ uint32_t ldrSrcReg = (ldrInstr >> 5) & 0x1f;
+ // Check if ADPR and LDR use the same register.
+ if (adrpDestReg != ldrDestReg || adrpDestReg != ldrSrcReg)
+ return false;
+
+ Symbol &sym = *adrpRel.sym;
+ // Check if the address difference is within 4GB range.
+ int64_t val =
+ getAArch64Page(sym.getVA()) - getAArch64Page(secAddr + adrpRel.offset);
+ if (val != llvm::SignExtend64(val, 33))
+ return false;
+
+ Relocation adrpSymRel = {R_AARCH64_PAGE_PC, R_AARCH64_ADR_PREL_PG_HI21,
+ adrpRel.offset, /*addend=*/0, &sym};
+ Relocation addRel = {R_ABS, R_AARCH64_ADD_ABS_LO12_NC, ldrRel.offset,
+ /*addend=*/0, &sym};
+
+ // adrp x_<dest_reg>
+ write32le(buf + adrpSymRel.offset, 0x90000000 | adrpDestReg);
+ // add x_<dest reg>, x_<dest reg>
+ write32le(buf + addRel.offset, 0x91000000 | adrpDestReg | (adrpDestReg << 5));
+
+ target->relocate(buf + adrpSymRel.offset, adrpSymRel,
+ SignExtend64(getAArch64Page(sym.getVA()) -
+ getAArch64Page(secAddr + adrpSymRel.offset),
+ 64));
+ target->relocate(buf + addRel.offset, addRel, SignExtend64(sym.getVA(), 64));
+ return true;
+}
+
// AArch64 may use security features in variant PLT sequences. These are:
// Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
// Indicator (BTI) introduced in armv8.5-a. The additional instructions used
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 844388330d6f..bd1079c9a1db 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -1010,25 +1010,35 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
const unsigned bits = config->wordsize * 8;
const TargetInfo &target = *elf::target;
uint64_t lastPPCRelaxedRelocOff = UINT64_C(-1);
-
- for (const Relocation &rel : relocations) {
+ AArch64Relaxer aarch64relaxer(relocations);
+ for (size_t i = 0, size = relocations.size(); i != size; ++i) {
+ const Relocation &rel = relocations[i];
if (rel.expr == R_NONE)
continue;
uint64_t offset = rel.offset;
uint8_t *bufLoc = buf + offset;
- uint64_t addrLoc = getOutputSection()->addr + offset;
+ uint64_t secAddr = getOutputSection()->addr;
if (auto *sec = dyn_cast<InputSection>(this))
- addrLoc += sec->outSecOff;
+ secAddr += sec->outSecOff;
+ const uint64_t addrLoc = secAddr + offset;
const uint64_t targetVA =
SignExtend64(getRelocTargetVA(file, rel.type, rel.addend, addrLoc,
- *rel.sym, rel.expr), bits);
-
+ *rel.sym, rel.expr),
+ bits);
switch (rel.expr) {
case R_RELAX_GOT_PC:
case R_RELAX_GOT_PC_NOPIC:
target.relaxGot(bufLoc, rel, targetVA);
break;
+ case R_AARCH64_GOT_PAGE_PC:
+ if (i + 1 < size && aarch64relaxer.tryRelaxAdrpLdr(
+ rel, relocations[i + 1], secAddr, buf)) {
+ ++i;
+ continue;
+ }
+ target.relocate(bufLoc, rel, targetVA);
+ break;
case R_PPC64_RELAX_GOT_PC: {
// The R_PPC64_PCREL_OPT relocation must appear immediately after
// R_PPC64_GOT_PCREL34 in the relocations table at the same offset.
diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index e0e97301ca98..f7b947ec3aa2 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -221,6 +221,16 @@ void addPPC64SaveRestore();
uint64_t getPPC64TocBase();
uint64_t getAArch64Page(uint64_t expr);
+class AArch64Relaxer {
+ bool safeToRelaxAdrpLdr = true;
+
+public:
+ explicit AArch64Relaxer(ArrayRef<Relocation> relocs);
+
+ bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
+ uint64_t secAddr, uint8_t *buf) const;
+};
+
extern const TargetInfo *target;
TargetInfo *getTarget();
diff --git a/lld/test/ELF/aarch64-adrp-ldr-got-symbols.s b/lld/test/ELF/aarch64-adrp-ldr-got-symbols.s
new file mode 100644
index 000000000000..bff7c2fcbb71
--- /dev/null
+++ b/lld/test/ELF/aarch64-adrp-ldr-got-symbols.s
@@ -0,0 +1,70 @@
+## This test verifies that the pair adrp + ldr is relaxed/not relaxed
+## depending on the target symbol properties.
+
+# REQUIRES: aarch64
+# RUN: split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/symbols.s -o %t/symbols.o
+
+# RUN: ld.lld -shared -T %t/linker.t %t/symbols.o -o %t/symbols.so
+# RUN: llvm-objdump --no-show-raw-insn -d %t/symbols.so | \
+# RUN: FileCheck --check-prefix=LIB %s
+
+## Symbol 'hidden_sym' is nonpreemptible, the relaxation should be applied.
+LIB: adrp x0
+LIB-NEXT: add x0
+
+## Symbol 'global_sym' is preemptible, no relaxations should be applied.
+LIB-NEXT: adrp x1
+LIB-NEXT: ldr x1
+
+## Symbol 'undefined_sym' is undefined, no relaxations should be applied.
+LIB-NEXT: adrp x2
+LIB-NEXT: ldr x2
+
+## Symbol 'ifunc_sym' is STT_GNU_IFUNC, no relaxations should be applied.
+LIB-NEXT: adrp x3
+LIB-NEXT: ldr x3
+
+# RUN: ld.lld -T %t/linker.t -z undefs %t/symbols.o -o %t/symbols
+# RUN: llvm-objdump --no-show-raw-insn -d %t/symbols | \
+# RUN: FileCheck --check-prefix=EXE %s
+
+## Symbol 'global_sym' is nonpreemptible, the relaxation should be applied.
+EXE: adrp x1
+EXE-NEXT: add x1
+
+## The linker script ensures that .rodata and .text are sufficiently (>1MB)
+## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop.
+#--- linker.t
+SECTIONS {
+ .rodata 0x1000: { *(.rodata) }
+ .text 0x300100: { *(.text) }
+}
+
+#--- symbols.s
+.rodata
+.hidden hidden_sym
+hidden_sym:
+.word 10
+
+.global global_sym
+global_sym:
+.word 10
+
+.text
+.type ifunc_sym STT_GNU_IFUNC
+.hidden ifunc_sym
+ifunc_sym:
+ nop
+
+.global _start
+_start:
+ adrp x0, :got:hidden_sym
+ ldr x0, [x0, #:got_lo12:hidden_sym]
+ adrp x1, :got:global_sym
+ ldr x1, [x1, #:got_lo12:global_sym]
+ adrp x2, :got:undefined_sym
+ ldr x2, [x2, #:got_lo12:undefined_sym]
+ adrp x3, :got:ifunc_sym
+ ldr x3, [x3, #:got_lo12:ifunc_sym]
diff --git a/lld/test/ELF/aarch64-adrp-ldr-got.s b/lld/test/ELF/aarch64-adrp-ldr-got.s
new file mode 100644
index 000000000000..c789e720e02e
--- /dev/null
+++ b/lld/test/ELF/aarch64-adrp-ldr-got.s
@@ -0,0 +1,117 @@
+# REQUIRES: aarch64
+# RUN: split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/unpaired.s -o %t/unpaired.o
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/lone-ldr.s -o %t/lone-ldr.o
+
+# RUN: ld.lld %t/a.o -T %t/linker.t -o %t/a
+# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s
+
+## Symbol 'x' is nonpreemptible, the relaxation should be applied.
+## This test verifies the encoding when the register x1 is used.
+# CHECK: adrp x1
+# CHECK-NEXT: add x1, x1
+
+## ADRP contains a nonzero addend, no relaxations should be applied.
+# CHECK-NEXT: adrp x2
+# CHECK-NEXT: ldr
+
+## LDR contains a nonzero addend, no relaxations should be applied.
+# CHECK-NEXT: adrp x3
+# CHECK-NEXT: ldr
+
+## LDR and ADRP use different registers, no relaxations should be applied.
+# CHECK-NEXT: adrp x4
+# CHECK-NEXT: ldr
+
+## LDR and ADRP use different registers, no relaxations should be applied.
+# CHECK-NEXT: adrp x6
+# CHECK-NEXT: ldr
+
+## Symbol 'x' is nonpreemptible, but --no-relax surpresses relaxations.
+# RUN: ld.lld %t/a.o -T %t/linker.t --no-relax -o %t/no-relax
+# RUN: llvm-objdump --no-show-raw-insn -d %t/no-relax | \
+# FileCheck --check-prefix=X1-NO-RELAX %s
+
+# X1-NO-RELAX: adrp x1
+# X1-NO-RELAX-NEXT: ldr
+
+## Symbol 'x' is nonpreemptible, but the address is not within adrp range.
+# RUN: ld.lld %t/a.o -T %t/out-of-range.t -o %t/out-of-range
+# RUN: llvm-objdump --no-show-raw-insn -d %t/out-of-range | \
+# RUN: FileCheck --check-prefix=X1-NO-RELAX %s
+
+## Relocations do not appear in pairs, no relaxations should be applied.
+# RUN: ld.lld %t/unpaired.o -o %t/unpaired
+# RUN: llvm-objdump --no-show-raw-insn -d %t/unpaired | \
+# RUN: FileCheck --check-prefix=UNPAIRED %s
+
+# UNPAIRED: adrp x0
+# UNPAIRED-NEXT: b
+# UNPAIRED-NEXT: adrp x0
+# UNPAIRED: ldr x0
+
+## Relocations do not appear in pairs, no relaxations should be applied.
+# RUN: ld.lld %t/lone-ldr.o -o %t/lone-ldr
+# RUN: llvm-objdump --no-show-raw-insn -d %t/lone-ldr | \
+# RUN: FileCheck --check-prefix=LONE-LDR %s
+
+# LONE-LDR: ldr x0
+
+## This linker script ensures that .rodata and .text are sufficiently (>1M)
+## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop.
+#--- linker.t
+SECTIONS {
+ .rodata 0x1000: { *(.rodata) }
+ .text 0x200100: { *(.text) }
+}
+
+## This linker script ensures that .rodata and .text are sufficiently (>4GB)
+## far apart so that the adrp + ldr pair cannot be relaxed.
+#--- out-of-range.t
+SECTIONS {
+ .rodata 0x1000: { *(.rodata) }
+ .text 0x100002000: { *(.text) }
+}
+
+#--- a.s
+.rodata
+.hidden x
+x:
+.word 10
+.text
+.global _start
+_start:
+ adrp x1, :got:x
+ ldr x1, [x1, #:got_lo12:x]
+ adrp x2, :got:x+1
+ ldr x2, [x2, #:got_lo12:x]
+ adrp x3, :got:x
+ ldr x3, [x3, #:got_lo12:x+8]
+ adrp x4, :got:x
+ ldr x5, [x4, #:got_lo12:x]
+ adrp x6, :got:x
+ ldr x6, [x0, #:got_lo12:x]
+
+#--- unpaired.s
+.text
+.hidden x
+x:
+ nop
+.global _start
+_start:
+ adrp x0, :got:x
+ b L
+ adrp x0, :got:x
+L:
+ ldr x0, [x0, #:got_lo12:x]
+
+#--- lone-ldr.s
+.text
+.hidden x
+x:
+ nop
+.global _start
+_start:
+ ldr x0, [x0, #:got_lo12:x]