From 8acc3b4ab0c76b9c2a54182e31a02f90ebb96329 Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Mon, 10 Jan 2022 05:20:37 +0000 Subject: [lld][ELF] Support adrp+ldr GOT optimization for AArch64 This diff adds first bits to support relocation relaxations for AArch64 discussed on https://github.com/ARM-software/abi-aa/pull/106. In particular, the case of adrp x0, :got: symbol ldr x0, [x0, :got_lo12: symbol] is handled. Test plan: make check-all Differential revision: https://reviews.llvm.org/D112063 --- lld/ELF/Arch/AArch64.cpp | 92 ++++++++++++++++++++++ lld/ELF/InputSection.cpp | 22 ++++-- lld/ELF/Target.h | 10 +++ lld/test/ELF/aarch64-adrp-ldr-got-symbols.s | 70 +++++++++++++++++ lld/test/ELF/aarch64-adrp-ldr-got.s | 117 ++++++++++++++++++++++++++++ 5 files changed, 305 insertions(+), 6 deletions(-) create mode 100644 lld/test/ELF/aarch64-adrp-ldr-got-symbols.s create mode 100644 lld/test/ELF/aarch64-adrp-ldr-got.s diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index ca3a6aa58dc5..96e16f760a82 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -568,6 +568,98 @@ void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, llvm_unreachable("invalid relocation for TLS IE to LE relaxation"); } +AArch64Relaxer::AArch64Relaxer(ArrayRef relocs) { + if (!config->relax || config->emachine != EM_AARCH64) { + safeToRelaxAdrpLdr = false; + return; + } + // Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC + // always appear in pairs. + size_t i = 0; + const size_t size = relocs.size(); + for (; i != size; ++i) { + if (relocs[i].type == R_AARCH64_ADR_GOT_PAGE) { + if (i + 1 < size && relocs[i + 1].type == R_AARCH64_LD64_GOT_LO12_NC) { + ++i; + continue; + } + break; + } else if (relocs[i].type == R_AARCH64_LD64_GOT_LO12_NC) { + break; + } + } + safeToRelaxAdrpLdr = i == size; +} + +bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel, + const Relocation &ldrRel, uint64_t secAddr, + uint8_t *buf) const { + if (!safeToRelaxAdrpLdr) + return false; + + // When the definition of sym is not preemptible then we may + // be able to relax + // ADRP xn, :got: sym + // LDR xn, [ xn :got_lo12: sym] + // to + // ADRP xn, sym + // ADD xn, xn, :lo_12: sym + + if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE || + ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC) + return false; + // Check if the relocations apply to consecutive instructions. + if (adrpRel.offset + 4 != ldrRel.offset) + return false; + // Check if the relocations reference the same symbol and + // skip undefined, preemptible and STT_GNU_IFUNC symbols. + if (!adrpRel.sym || adrpRel.sym != ldrRel.sym || !adrpRel.sym->isDefined() || + adrpRel.sym->isPreemptible || adrpRel.sym->isGnuIFunc()) + return false; + // Check if the addends of the both instructions are zero. + if (adrpRel.addend != 0 || ldrRel.addend != 0) + return false; + uint32_t adrpInstr = read32le(buf + adrpRel.offset); + uint32_t ldrInstr = read32le(buf + ldrRel.offset); + // Check if the first instruction is ADRP and the second instruction is LDR. + if ((adrpInstr & 0x9f000000) != 0x90000000 || + (ldrInstr & 0x3b000000) != 0x39000000) + return false; + // Check the value of the sf bit. + if (!(ldrInstr >> 31)) + return false; + uint32_t adrpDestReg = adrpInstr & 0x1f; + uint32_t ldrDestReg = ldrInstr & 0x1f; + uint32_t ldrSrcReg = (ldrInstr >> 5) & 0x1f; + // Check if ADPR and LDR use the same register. + if (adrpDestReg != ldrDestReg || adrpDestReg != ldrSrcReg) + return false; + + Symbol &sym = *adrpRel.sym; + // Check if the address difference is within 4GB range. + int64_t val = + getAArch64Page(sym.getVA()) - getAArch64Page(secAddr + adrpRel.offset); + if (val != llvm::SignExtend64(val, 33)) + return false; + + Relocation adrpSymRel = {R_AARCH64_PAGE_PC, R_AARCH64_ADR_PREL_PG_HI21, + adrpRel.offset, /*addend=*/0, &sym}; + Relocation addRel = {R_ABS, R_AARCH64_ADD_ABS_LO12_NC, ldrRel.offset, + /*addend=*/0, &sym}; + + // adrp x_ + write32le(buf + adrpSymRel.offset, 0x90000000 | adrpDestReg); + // add x_, x_ + write32le(buf + addRel.offset, 0x91000000 | adrpDestReg | (adrpDestReg << 5)); + + target->relocate(buf + adrpSymRel.offset, adrpSymRel, + SignExtend64(getAArch64Page(sym.getVA()) - + getAArch64Page(secAddr + adrpSymRel.offset), + 64)); + target->relocate(buf + addRel.offset, addRel, SignExtend64(sym.getVA(), 64)); + return true; +} + // AArch64 may use security features in variant PLT sequences. These are: // Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target // Indicator (BTI) introduced in armv8.5-a. The additional instructions used diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 844388330d6f..bd1079c9a1db 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -1010,25 +1010,35 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { const unsigned bits = config->wordsize * 8; const TargetInfo &target = *elf::target; uint64_t lastPPCRelaxedRelocOff = UINT64_C(-1); - - for (const Relocation &rel : relocations) { + AArch64Relaxer aarch64relaxer(relocations); + for (size_t i = 0, size = relocations.size(); i != size; ++i) { + const Relocation &rel = relocations[i]; if (rel.expr == R_NONE) continue; uint64_t offset = rel.offset; uint8_t *bufLoc = buf + offset; - uint64_t addrLoc = getOutputSection()->addr + offset; + uint64_t secAddr = getOutputSection()->addr; if (auto *sec = dyn_cast(this)) - addrLoc += sec->outSecOff; + secAddr += sec->outSecOff; + const uint64_t addrLoc = secAddr + offset; const uint64_t targetVA = SignExtend64(getRelocTargetVA(file, rel.type, rel.addend, addrLoc, - *rel.sym, rel.expr), bits); - + *rel.sym, rel.expr), + bits); switch (rel.expr) { case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: target.relaxGot(bufLoc, rel, targetVA); break; + case R_AARCH64_GOT_PAGE_PC: + if (i + 1 < size && aarch64relaxer.tryRelaxAdrpLdr( + rel, relocations[i + 1], secAddr, buf)) { + ++i; + continue; + } + target.relocate(bufLoc, rel, targetVA); + break; case R_PPC64_RELAX_GOT_PC: { // The R_PPC64_PCREL_OPT relocation must appear immediately after // R_PPC64_GOT_PCREL34 in the relocations table at the same offset. diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index e0e97301ca98..f7b947ec3aa2 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -221,6 +221,16 @@ void addPPC64SaveRestore(); uint64_t getPPC64TocBase(); uint64_t getAArch64Page(uint64_t expr); +class AArch64Relaxer { + bool safeToRelaxAdrpLdr = true; + +public: + explicit AArch64Relaxer(ArrayRef relocs); + + bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel, + uint64_t secAddr, uint8_t *buf) const; +}; + extern const TargetInfo *target; TargetInfo *getTarget(); diff --git a/lld/test/ELF/aarch64-adrp-ldr-got-symbols.s b/lld/test/ELF/aarch64-adrp-ldr-got-symbols.s new file mode 100644 index 000000000000..bff7c2fcbb71 --- /dev/null +++ b/lld/test/ELF/aarch64-adrp-ldr-got-symbols.s @@ -0,0 +1,70 @@ +## This test verifies that the pair adrp + ldr is relaxed/not relaxed +## depending on the target symbol properties. + +# REQUIRES: aarch64 +# RUN: split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/symbols.s -o %t/symbols.o + +# RUN: ld.lld -shared -T %t/linker.t %t/symbols.o -o %t/symbols.so +# RUN: llvm-objdump --no-show-raw-insn -d %t/symbols.so | \ +# RUN: FileCheck --check-prefix=LIB %s + +## Symbol 'hidden_sym' is nonpreemptible, the relaxation should be applied. +LIB: adrp x0 +LIB-NEXT: add x0 + +## Symbol 'global_sym' is preemptible, no relaxations should be applied. +LIB-NEXT: adrp x1 +LIB-NEXT: ldr x1 + +## Symbol 'undefined_sym' is undefined, no relaxations should be applied. +LIB-NEXT: adrp x2 +LIB-NEXT: ldr x2 + +## Symbol 'ifunc_sym' is STT_GNU_IFUNC, no relaxations should be applied. +LIB-NEXT: adrp x3 +LIB-NEXT: ldr x3 + +# RUN: ld.lld -T %t/linker.t -z undefs %t/symbols.o -o %t/symbols +# RUN: llvm-objdump --no-show-raw-insn -d %t/symbols | \ +# RUN: FileCheck --check-prefix=EXE %s + +## Symbol 'global_sym' is nonpreemptible, the relaxation should be applied. +EXE: adrp x1 +EXE-NEXT: add x1 + +## The linker script ensures that .rodata and .text are sufficiently (>1MB) +## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop. +#--- linker.t +SECTIONS { + .rodata 0x1000: { *(.rodata) } + .text 0x300100: { *(.text) } +} + +#--- symbols.s +.rodata +.hidden hidden_sym +hidden_sym: +.word 10 + +.global global_sym +global_sym: +.word 10 + +.text +.type ifunc_sym STT_GNU_IFUNC +.hidden ifunc_sym +ifunc_sym: + nop + +.global _start +_start: + adrp x0, :got:hidden_sym + ldr x0, [x0, #:got_lo12:hidden_sym] + adrp x1, :got:global_sym + ldr x1, [x1, #:got_lo12:global_sym] + adrp x2, :got:undefined_sym + ldr x2, [x2, #:got_lo12:undefined_sym] + adrp x3, :got:ifunc_sym + ldr x3, [x3, #:got_lo12:ifunc_sym] diff --git a/lld/test/ELF/aarch64-adrp-ldr-got.s b/lld/test/ELF/aarch64-adrp-ldr-got.s new file mode 100644 index 000000000000..c789e720e02e --- /dev/null +++ b/lld/test/ELF/aarch64-adrp-ldr-got.s @@ -0,0 +1,117 @@ +# REQUIRES: aarch64 +# RUN: split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o +# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/unpaired.s -o %t/unpaired.o +# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/lone-ldr.s -o %t/lone-ldr.o + +# RUN: ld.lld %t/a.o -T %t/linker.t -o %t/a +# RUN: llvm-objdump --no-show-raw-insn -d %t/a | FileCheck %s + +## Symbol 'x' is nonpreemptible, the relaxation should be applied. +## This test verifies the encoding when the register x1 is used. +# CHECK: adrp x1 +# CHECK-NEXT: add x1, x1 + +## ADRP contains a nonzero addend, no relaxations should be applied. +# CHECK-NEXT: adrp x2 +# CHECK-NEXT: ldr + +## LDR contains a nonzero addend, no relaxations should be applied. +# CHECK-NEXT: adrp x3 +# CHECK-NEXT: ldr + +## LDR and ADRP use different registers, no relaxations should be applied. +# CHECK-NEXT: adrp x4 +# CHECK-NEXT: ldr + +## LDR and ADRP use different registers, no relaxations should be applied. +# CHECK-NEXT: adrp x6 +# CHECK-NEXT: ldr + +## Symbol 'x' is nonpreemptible, but --no-relax surpresses relaxations. +# RUN: ld.lld %t/a.o -T %t/linker.t --no-relax -o %t/no-relax +# RUN: llvm-objdump --no-show-raw-insn -d %t/no-relax | \ +# FileCheck --check-prefix=X1-NO-RELAX %s + +# X1-NO-RELAX: adrp x1 +# X1-NO-RELAX-NEXT: ldr + +## Symbol 'x' is nonpreemptible, but the address is not within adrp range. +# RUN: ld.lld %t/a.o -T %t/out-of-range.t -o %t/out-of-range +# RUN: llvm-objdump --no-show-raw-insn -d %t/out-of-range | \ +# RUN: FileCheck --check-prefix=X1-NO-RELAX %s + +## Relocations do not appear in pairs, no relaxations should be applied. +# RUN: ld.lld %t/unpaired.o -o %t/unpaired +# RUN: llvm-objdump --no-show-raw-insn -d %t/unpaired | \ +# RUN: FileCheck --check-prefix=UNPAIRED %s + +# UNPAIRED: adrp x0 +# UNPAIRED-NEXT: b +# UNPAIRED-NEXT: adrp x0 +# UNPAIRED: ldr x0 + +## Relocations do not appear in pairs, no relaxations should be applied. +# RUN: ld.lld %t/lone-ldr.o -o %t/lone-ldr +# RUN: llvm-objdump --no-show-raw-insn -d %t/lone-ldr | \ +# RUN: FileCheck --check-prefix=LONE-LDR %s + +# LONE-LDR: ldr x0 + +## This linker script ensures that .rodata and .text are sufficiently (>1M) +## far apart so that the adrp + ldr pair cannot be relaxed to adr + nop. +#--- linker.t +SECTIONS { + .rodata 0x1000: { *(.rodata) } + .text 0x200100: { *(.text) } +} + +## This linker script ensures that .rodata and .text are sufficiently (>4GB) +## far apart so that the adrp + ldr pair cannot be relaxed. +#--- out-of-range.t +SECTIONS { + .rodata 0x1000: { *(.rodata) } + .text 0x100002000: { *(.text) } +} + +#--- a.s +.rodata +.hidden x +x: +.word 10 +.text +.global _start +_start: + adrp x1, :got:x + ldr x1, [x1, #:got_lo12:x] + adrp x2, :got:x+1 + ldr x2, [x2, #:got_lo12:x] + adrp x3, :got:x + ldr x3, [x3, #:got_lo12:x+8] + adrp x4, :got:x + ldr x5, [x4, #:got_lo12:x] + adrp x6, :got:x + ldr x6, [x0, #:got_lo12:x] + +#--- unpaired.s +.text +.hidden x +x: + nop +.global _start +_start: + adrp x0, :got:x + b L + adrp x0, :got:x +L: + ldr x0, [x0, #:got_lo12:x] + +#--- lone-ldr.s +.text +.hidden x +x: + nop +.global _start +_start: + ldr x0, [x0, #:got_lo12:x] -- cgit v1.2.1