summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <tstellar@redhat.com>2019-05-31 22:44:22 +0000
committerTom Stellard <tstellar@redhat.com>2019-05-31 22:44:22 +0000
commit0489682ef3b5659eaf05dd7d51ab6c049575ca6b (patch)
treebd2b645afc25b5816c722df9a27b427f5fe6d220
parentf1cacab458fe9a80b3dcbc6a230d3c7b798a9f57 (diff)
downloadllvm-0489682ef3b5659eaf05dd7d51ab6c049575ca6b.tar.gz
Merging r360405:
------------------------------------------------------------------------ r360405 | maskray | 2019-05-09 22:51:00 -0700 (Thu, 09 May 2019) | 25 lines [PPC64] Define getThunkSectionSpacing() based on the range of R_PPC64_REL24 Suggested by Sean Fertile and Peter Smith. Thunk section spacing decrease the total number of thunks. I measured a decrease of 1% or less in some large programs, with no perceivable slowdown in link time. Override getThunkSectionSpacing() to enable it. 0x2000000 is the farthest point R_PPC64_REL24 can reach. I tried several numbers and found 0x2000000 works the best. Numbers near 0x2000000 work as well but let's just use the simpler number. As demonstrated by the updated tests, this essentially changes placement of most thunks to the end of the output section. We leverage this property to fix PR40740 reported by Alfredo Dal'Ava JĂșnior: The output section .init consists of input sections from several object files (crti.o crtbegin.o crtend.o crtn.o). Sections other than the last one do not have a terminator. With this patch, we create the thunk after the last .init input section and thus fix the issue. This is not foolproof but works quite well for such sections (with no terminator) in practice. Reviewed By: ruiu, sfertile Differential Revision: https://reviews.llvm.org/D61720 ------------------------------------------------------------------------ llvm-svn: 362274
-rw-r--r--lld/ELF/Arch/PPC64.cpp9
-rw-r--r--lld/test/ELF/ppc64-bsymbolic-toc-restore.s4
-rw-r--r--lld/test/ELF/ppc64-call-reach.s15
-rw-r--r--lld/test/ELF/ppc64-ifunc.s28
-rw-r--r--lld/test/ELF/ppc64-local-dynamic.s2
-rw-r--r--lld/test/ELF/ppc64-long-branch-init.s43
-rw-r--r--lld/test/ELF/ppc64-plt-stub.s11
-rw-r--r--lld/test/ELF/ppc64-toc-restore-recursive-call.s11
-rw-r--r--lld/test/ELF/ppc64-toc-restore.s37
9 files changed, 101 insertions, 59 deletions
diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index 8a320c9a4e9e..cbfa8073d33f 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -113,6 +113,7 @@ public:
void writeGotHeader(uint8_t *Buf) const override;
bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
uint64_t BranchAddr, const Symbol &S) const override;
+ uint32_t getThunkSectionSpacing() const override;
bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
RelExpr Expr) const override;
@@ -759,6 +760,14 @@ bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
return !inBranchRange(Type, BranchAddr, S.getVA());
}
+uint32_t PPC64::getThunkSectionSpacing() const {
+ // See comment in Arch/ARM.cpp for a more detailed explanation of
+ // getThunkSectionSpacing(). For PPC64 we pick the constant here based on
+ // R_PPC64_REL24, which is used by unconditional branch instructions.
+ // 0x2000000 = (1 << 24-1) * 4
+ return 0x2000000;
+}
+
bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
int64_t Offset = Dst - Src;
if (Type == R_PPC64_REL14)
diff --git a/lld/test/ELF/ppc64-bsymbolic-toc-restore.s b/lld/test/ELF/ppc64-bsymbolic-toc-restore.s
index b7d9edd45d43..d467d22ff7b1 100644
--- a/lld/test/ELF/ppc64-bsymbolic-toc-restore.s
+++ b/lld/test/ELF/ppc64-bsymbolic-toc-restore.s
@@ -53,7 +53,7 @@ caller:
# CHECK-LABEL: caller
# CHECK: bl .+44
# CHECK-NEXT: mr 31, 3
-# CHECK-NEXT: bl .-48
+# CHECK-NEXT: bl .+44
# CHECK-NEXT: ld 2, 24(1)
# CHECK-NEXT: add 3, 3, 31
# CHECK-NEXT: addi 1, 1, 32
@@ -63,6 +63,6 @@ caller:
# CHECK-EMPTY:
# CHECK-NEXT: def:
# CHECK-NEXT: addis 2, 12, 2
-# CHECK-NEXT: addi 2, 2, -32636
+# CHECK-NEXT: addi 2, 2, -32616
# CHECK-NEXT: li 3, 55
# CHECK-NEXT: blr
diff --git a/lld/test/ELF/ppc64-call-reach.s b/lld/test/ELF/ppc64-call-reach.s
index 085e68f9aebd..b843e7e531c9 100644
--- a/lld/test/ELF/ppc64-call-reach.s
+++ b/lld/test/ELF/ppc64-call-reach.s
@@ -65,27 +65,24 @@ test:
# NEGOFFSET: 10010014: bl .-33554432
# NEGOFFSET: 10010024: b .+33554432
+# THUNK-LABEL: test:
+# THUNK: 10010014: bl .+20
+# THUNK: 10010024: b .+20
+
# .branch_lt[0]
# THUNK-LABEL: __long_branch_callee:
-# THUNK-NEXT: 10010000: addis 12, 2, -1
+# THUNK-NEXT: 10010028: addis 12, 2, -1
# THUNK-NEXT: ld 12, -32768(12)
# THUNK-NEXT: mtctr 12
# THUNK-NEXT: bctr
# .branch_lt[1]
# THUNK-LABEL: __long_branch_tail_callee:
-# THUNK-NEXT: 10010010: addis 12, 2, -1
+# THUNK-NEXT: 10010038: addis 12, 2, -1
# THUNK-NEXT: ld 12, -32760(12)
# THUNK-NEXT: mtctr 12
# THUNK-NEXT: bctr
-# Each call now branches to a thunk, and although it is printed as positive
-# the offset is interpreted as a signed 26 bit value so 67108812 is actually
-# -52.
-# THUNK-LABEL: test:
-# THUNK: 10010034: bl .-52
-# THUNK: 10010044: b .+67108812
-
# The offset from the TOC to the .branch_lt section is (-1 << 16) - 32768.
# Name Type Address Off Size
# BRANCHLT: .branch_lt PROGBITS 0000000010020000 020000 000010
diff --git a/lld/test/ELF/ppc64-ifunc.s b/lld/test/ELF/ppc64-ifunc.s
index 4bf50b98db11..32e317f3c059 100644
--- a/lld/test/ELF/ppc64-ifunc.s
+++ b/lld/test/ELF/ppc64-ifunc.s
@@ -15,11 +15,21 @@
# RUN: llvm-readelf -r %t | FileCheck --check-prefix=DYNREL %s
# NM-DAG: 0000000010028000 d .TOC.
-# NM-DAG: 0000000010010028 T ifunc
-# NM-DAG: 000000001001002c T ifunc2
+# NM-DAG: 0000000010010000 T ifunc
+# NM-DAG: 0000000010010004 T ifunc2
# SECTIONS: .plt NOBITS 0000000010030000
+# __plt_ifunc - . = 0x10010020 - 0x10010010 = 16
+# __plt_ifunc2 - . = 0x10010044 - 0x10010018 = 28
+# CHECK: _start:
+# CHECK-NEXT: addis 2, 12, 1
+# CHECK-NEXT: addi 2, 2, 32760
+# CHECK-NEXT: 10010010: bl .+16
+# CHECK-NEXT: ld 2, 24(1)
+# CHECK-NEXT: 10010018: bl .+28
+# CHECK-NEXT: ld 2, 24(1)
+
# .plt[0] - .TOC. = 0x10030000 - 0x10028000 = (1<<16) - 32768
# CHECK: __plt_ifunc:
# CHECK-NEXT: std 2, 24(1)
@@ -36,19 +46,9 @@
# CHECK-NEXT: mtctr 12
# CHECK-NEXT: bctr
-# __plt_ifunc - . = 0x10010000 - 0x10010038 = -56
-# __plt_ifunc2 - . = 0x10010014 - 0x10010040 = -44
-# CHECK: _start:
-# CHECK-NEXT: addis 2, 12, 1
-# CHECK-NEXT: addi 2, 2, 32720
-# CHECK-NEXT: 10010038: bl .-56
-# CHECK-NEXT: ld 2, 24(1)
-# CHECK-NEXT: 10010040: bl .-44
-# CHECK-NEXT: ld 2, 24(1)
-
# Check that we emit 2 R_PPC64_IRELATIVE.
-# DYNREL: R_PPC64_IRELATIVE 10010028
-# DYNREL: R_PPC64_IRELATIVE 1001002c
+# DYNREL: R_PPC64_IRELATIVE 10010000
+# DYNREL: R_PPC64_IRELATIVE 10010004
.type ifunc STT_GNU_IFUNC
.globl ifunc
diff --git a/lld/test/ELF/ppc64-local-dynamic.s b/lld/test/ELF/ppc64-local-dynamic.s
index 8a23863f67de..87e33b784b8b 100644
--- a/lld/test/ELF/ppc64-local-dynamic.s
+++ b/lld/test/ELF/ppc64-local-dynamic.s
@@ -113,7 +113,7 @@ k:
// Dis: test:
// Dis: addis 3, 2, 0
// Dis-NEXT: addi 3, 3, -32760
-// Dis-NEXT: bl .-60
+// Dis-NEXT: bl .+60
// Dis-NEXT: ld 2, 24(1)
// Dis-NEXT: addis 3, 3, 0
// Dis-NEXT: lwa 3, -32768(3)
diff --git a/lld/test/ELF/ppc64-long-branch-init.s b/lld/test/ELF/ppc64-long-branch-init.s
new file mode 100644
index 000000000000..80b3919cc455
--- /dev/null
+++ b/lld/test/ELF/ppc64-long-branch-init.s
@@ -0,0 +1,43 @@
+# REQUIRES: ppc
+
+# RUN: llvm-mc -filetype=obj -triple=powerpc64-pc-freebsd13.0 %s -o %t.o
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
+
+## .init consists of sections from several object files. Sections other than the
+## last one do not have a terminator. Check we do not create a long branch stub
+## in the middle.
+## We currently use thunk section spacing to ensure the stub is in the end. This
+## is not foolproof but good enough to not break in practice.
+
+# CHECK: Disassembly of section .init:
+# CHECK-LABEL: _init:
+# CHECK: blr
+# CHECK-EMPTY:
+# CHECK-LABEL: __long_branch_foo:
+
+.globl foo
+foo:
+ .space 0x2000000
+ blr
+
+.section .init,"ax",@progbits,unique,0
+.globl _init
+_init:
+ stdu 1, -48(1)
+ mflr 0
+ std 0, 64(1)
+
+.section .init,"ax",@progbits,unique,1
+ bl foo
+ nop
+
+.section .init,"ax",@progbits,unique,2
+ bl foo
+ nop
+
+.section .init,"ax",@progbits,unique,3
+ ld 1, 0(1)
+ ld 0, 16(1)
+ mtlr 0
+ blr
diff --git a/lld/test/ELF/ppc64-plt-stub.s b/lld/test/ELF/ppc64-plt-stub.s
index 95e28a5850a9..bf3ac09fd516 100644
--- a/lld/test/ELF/ppc64-plt-stub.s
+++ b/lld/test/ELF/ppc64-plt-stub.s
@@ -4,16 +4,19 @@
// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o
// RUN: ld.lld -shared %t2.o -o %t2.so
// RUN: ld.lld %t.o %t2.so -o %t
-// RUN: llvm-objdump -d %t | FileCheck %s
+// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o
// RUN: ld.lld -shared %t2.o -o %t2.so
// RUN: ld.lld %t.o %t2.so -o %t
-// RUN: llvm-objdump -d %t | FileCheck %s
+// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
// CHECK: Disassembly of section .text:
-// CHECK-NEXT: __plt_foo:
+// CHECK-NEXT: _start:
+// CHECK: 10010008: bl .+16
+
+// CHECK-LABEL: 0000000010010018 __plt_foo:
// CHECK-NEXT: std 2, 24(1)
// CHECK-NEXT: addis 12, 2, 0
// CHECK-NEXT: ld 12, 32560(12)
@@ -21,8 +24,6 @@
// CHECK-NEXT: bctr
-// CHECK: _start:
-// CHECK: bl .-40
.text
.abiversion 2
.globl _start
diff --git a/lld/test/ELF/ppc64-toc-restore-recursive-call.s b/lld/test/ELF/ppc64-toc-restore-recursive-call.s
index 538b12c7c90e..756a058cc565 100644
--- a/lld/test/ELF/ppc64-toc-restore-recursive-call.s
+++ b/lld/test/ELF/ppc64-toc-restore-recursive-call.s
@@ -14,12 +14,11 @@
# for recursive calls as well as keeps the logic for recursive calls consistent
# with non-recursive calls.
-# CHECK-LABEL: __plt_recursive_func:
-# CHECK-NEXT: 10000:
-# CHECK-LABEL: recursive_func
-# CHECK-NEXT: 10014:
-# CHECK: 1003c: bl .-60
-# CHECK-NEXT: 10040: ld 2, 24(1)
+# CHECK-LABEL: 0000000000010000 recursive_func:
+# CHECK: 10028: bl .+32
+# CHECK-NEXT: ld 2, 24(1)
+
+# CHECK-LABEL: 0000000000010048 __plt_recursive_func:
.abiversion 2
.section ".text"
diff --git a/lld/test/ELF/ppc64-toc-restore.s b/lld/test/ELF/ppc64-toc-restore.s
index 8ffe2136591d..d65bef847a7b 100644
--- a/lld/test/ELF/ppc64-toc-restore.s
+++ b/lld/test/ELF/ppc64-toc-restore.s
@@ -28,16 +28,11 @@ _start:
bl foo
nop
bl bar_local
-
-
-// CHECK: Disassembly of section .text:
-// CHECK: _start:
-// CHECK: 1001001c: bl .-28
-// CHECK-NOT: 10010020: nop
-// CHECK: 10010020: ld 2, 24(1)
-// CHECK: 10010024: bl .-16
-// CHECK-NOT: 10010028: nop
-// CHECK-NOT: 10010028: ld 2, 24(1)
+// CHECK-LABEL: _start:
+// CHECK-NEXT: 10010008: bl .+64
+// CHECK-NEXT: 1001000c: ld 2, 24(1)
+// CHECK-NEXT: 10010010: bl .-16
+// CHECK-EMPTY:
# Calling a function in another object file which will have same
# TOC base does not need a nop. If nop present, do not rewrite to
@@ -47,26 +42,24 @@ _diff_object:
bl foo_not_shared
bl foo_not_shared
nop
-
-// CHECK: _diff_object:
-// CHECK-NEXT: 10010028: bl .+24
-// CHECK-NEXT: 1001002c: bl .+20
-// CHECK-NEXT: 10010030: nop
+// CHECK-LABEL: _diff_object:
+// CHECK-NEXT: 10010014: bl .+28
+// CHECK-NEXT: 10010018: bl .+24
+// CHECK-NEXT: 1001001c: nop
# Branching to a local function does not need a nop
.global noretbranch
noretbranch:
b bar_local
-// CHECK: noretbranch:
-// CHECK: 10010034: b .+67108832
-// CHECK-NOT: 10010038: nop
-// CHECK-NOT: 1001003c: ld 2, 24(1)
+// CHECK-LABEL: noretbranch:
+// CHECK: 10010020: b .+67108832
+// CHECK-EMPTY:
// This should come last to check the end-of-buffer condition.
.global last
last:
bl foo
nop
-// CHECK: last:
-// CHECK: 10010038: bl .-56
-// CHECK-NEXT: 1001003c: ld 2, 24(1)
+// CHECK-LABEL: last:
+// CHECK-NEXT: 10010024: bl .+36
+// CHECK-NEXT: 10010028: ld 2, 24(1)