1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
#include <stdlib.h>
#include "elf_compat.h"
#include "ghcplatform.h"
#if defined(aarch64_HOST_ARCH)
#include "elf_plt_aarch64.h"
#if defined(OBJFORMAT_ELF)
/* five 4 byte instructions */
const size_t instSizeAarch64 = 4;
const size_t stubSizeAarch64 = 5 * 4;
/*
* Compute the number of stub (PLT entries) for a given section by iterating
* over the relocations and relocations with explicit addend and counting those
* relocations that might require a PLT relocation.
*
* This will be an upper bound, and we might not use all stubs. However by
* calculating the number of potential stubs beforehand, we can allocate enough
* space adjacent to the section, such that the PLT is rather close to the
* section, and the risk of the stubs being out of reach for the instruction to
* be relocated is minimal.
*/
bool needStubForRelAarch64(Elf_Rel * rel) {
switch(ELF64_R_TYPE(rel->r_info)) {
case COMPAT_R_AARCH64_CALL26:
case COMPAT_R_AARCH64_JUMP26:
return true;
default:
return false;
}
}
bool needStubForRelaAarch64(Elf_Rela * rela) {
switch(ELF64_R_TYPE(rela->r_info)) {
case COMPAT_R_AARCH64_CALL26:
case COMPAT_R_AARCH64_JUMP26:
return true;
default:
return false;
}
}
bool
makeStubAarch64(Stub * s) {
// We (the linker) may corrupt registers x16 (IP0) and x17 (IP1) [AAPCS64]
// and the condition flags, according to the "ELF for the ARM64
// Architecture".
//
// [Special purpose regs]
// X16 and X17 are IP0 and IP1, intra-procedure-call temporary registers.
// These can be used by call veneers and similar code, or as temporary
// registers for intermediate values between subroutine calls. They are
// corruptible by a function. Veneers are small pieces of code which are
// automatically inserted by the linker, for example when the branch target
// is out of range of the branch instruction.
// (Sect 9.9.1 of ARM Cortex-A Series Programmer's Guide for ARMv8-A, V1.0)
// Move wide
// mov <Wd>, #<imm16> (sf == 0)
// mov <Xd>, #<imm16> (sf == 1) looks like:
// 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
// sf 1 0 1 0 0 1 0 1 [hw ] [ imm16 ...
//
// 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
// ... imm16 ] [ Rd ]
// hw is the half word shift.
// Move keep
// movk <Wd>, #<imm16> (sf == 0)
// movk <Xd>, #<imm16> (sf == 1) looks like:
// 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
// sf 1 1 1 0 0 1 0 1 [hw ] [ imm16 ...
//
// 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
// ... imm16 ] [ Rd ]
// hw is the half word shift.
// br <Xn> (Encoding A1) looks like:
// 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
// 1 1 0 1 0 1 1 0 0 0 0 1 1 1 1 1
//
// 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
// 0 0 0 0 0 0 [ Rd ] 0 0 0 0 0
//
// We'll use 0b1110 for the condition.
uint32_t mov__hw0_x16 = 0xd2800000 | 16;
uint32_t movk_hw0_x16 = mov__hw0_x16 | (1 << 29);
uint32_t mov__hw3_x16 = mov__hw0_x16 | (3 << 21);
uint32_t movk_hw2_x16 = movk_hw0_x16 | (2 << 21);
uint32_t movk_hw1_x16 = movk_hw0_x16 | (1 << 21);
uint32_t br_x16 = 0xd61f0000 | 16 << 5;
uint32_t *P = (uint32_t*)s->addr;
/* target address */
uint64_t addr = (uint64_t)s->target;
uint16_t addr_hw0 = (uint16_t)(addr >> 0);
uint16_t addr_hw1 = (uint16_t)(addr >> 16);
uint16_t addr_hw2 = (uint16_t)(addr >> 32);
uint16_t addr_hw3 = (uint16_t)(addr >> 48);
P[0] = mov__hw3_x16 | ((uint32_t)addr_hw3 << 5);
P[1] = movk_hw2_x16 | ((uint32_t)addr_hw2 << 5);
P[2] = movk_hw1_x16 | ((uint32_t)addr_hw1 << 5);
P[3] = movk_hw0_x16 | ((uint32_t)addr_hw0 << 5);
P[4] = br_x16;
return EXIT_SUCCESS;
}
#endif // OBJECTFORMAT_ELF
#endif // aarch64_HOST_ARCH
|