summaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/dl-trampoline.S
blob: c1686dae1061c200d2a24ef883e8c774d2580fb5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
/* PLT trampolines.  x86-64 version.
   Copyright (C) 2004, 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */

#include <sysdep.h>

	.text
	.globl _dl_runtime_resolve
	.type _dl_runtime_resolve, @function
	.align 16
	cfi_startproc
_dl_runtime_resolve:
	subq $56,%rsp
	cfi_adjust_cfa_offset(72) # Incorporate PLT
	movq %rax,(%rsp)	# Preserve registers otherwise clobbered.
	movq %rcx, 8(%rsp)
	movq %rdx, 16(%rsp)
	movq %rsi, 24(%rsp)
	movq %rdi, 32(%rsp)
	movq %r8, 40(%rsp)
	movq %r9, 48(%rsp)
	movq 64(%rsp), %rsi	# Copy args pushed by PLT in register.
	movq %rsi, %r11		# Multiply by 24
	addq %r11, %rsi
	addq %r11, %rsi
	shlq $3, %rsi
	movq 56(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_offset
	call _dl_fixup		# Call resolver.
	movq %rax, %r11		# Save return value
	movq 48(%rsp), %r9	# Get register content back.
	movq 40(%rsp), %r8
	movq 32(%rsp), %rdi
	movq 24(%rsp), %rsi
	movq 16(%rsp), %rdx
	movq 8(%rsp), %rcx
	movq (%rsp), %rax
	addq $72, %rsp		# Adjust stack(PLT did 2 pushes)
	cfi_adjust_cfa_offset(-72)
	jmp *%r11		# Jump to function address.
	cfi_endproc
	.size _dl_runtime_resolve, .-_dl_runtime_resolve


#ifndef PROF
	.globl _dl_runtime_profile
	.type _dl_runtime_profile, @function
	.align 16
	cfi_startproc
_dl_runtime_profile:
	subq $80, %rsp
	cfi_adjust_cfa_offset(96) # Incorporate PLT
	movq %rax, (%rsp)	# Preserve registers otherwise clobbered.
	movq %rdx, 8(%rsp)
	movq %r8, 16(%rsp)
	movq %r9, 24(%rsp)
	movq %rcx, 32(%rsp)
	movq %rsi, 40(%rsp)
	movq %rdi, 48(%rsp)
	movq %rbp, 56(%rsp)	# Information for auditors.
	leaq 96(%rsp), %rax
	movq %rax, 64(%rsp)
	leaq 8(%rsp), %rcx
	movq 96(%rsp), %rdx	# Load return address if needed
	movq 88(%rsp), %rsi	# Copy args pushed by PLT in register.
	movq %rsi,%r11		# Multiply by 24
	addq %r11,%rsi
	addq %r11,%rsi
	shlq $3, %rsi
	movq 80(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_offset
	leaq 72(%rsp), %r8
	call _dl_profile_fixup	# Call resolver.
	movq %rax, %r11		# Save return value
	movq 8(%rsp), %rdx	# Get back register content.
	movq 16(%rsp), %r8
	movq 24(%rsp), %r9
	movq (%rsp),%rax
	movq 72(%rsp), %r10
	testq %r10, %r10
	jns 1f
	movq 32(%rsp), %rcx
	movq 40(%rsp), %rsi
	movq 48(%rsp), %rdi
	addq $96,%rsp		# Adjust stack
	cfi_adjust_cfa_offset (-96)
	jmp *%r11		# Jump to function address.

	/*
	    +96     return address
	    +88     PLT2
	    +80     PLT1
	    +72     free
	    +64     %rsp
	    +56     %rbp
	    +48     %rdi
	    +40     %rsi
	    +32     %rcx
	    +24     %r9
	    +16     %r8
	    +8      %rdx
	   %esp     %rax
	*/
	cfi_adjust_cfa_offset (96)
1:	movq %rbx, 72(%rsp)
	cfi_rel_offset (1, 72)
	leaq 104(%rsp), %rsi
	movq %rsp, %rbx
	cfi_def_cfa_register (1)
	subq %r10, %rsp
	movq %rsp, %rdi
	movq %r10, %rcx
	shrq $3, %rcx
	rep
	movsq
	andq $0xfffffffffffffff0, %rsp
	movq 32(%rbx), %rcx
	movq 40(%rbx), %rsi
	movq 48(%rbx), %rdi
	call *%r11
	movq %rbx, %rsp
	cfi_def_cfa_register (7)
	subq $72, %rsp
	cfi_adjust_cfa_offset (72)
	movq %rsp, %rcx
	movq %rax, (%rcx)
	movq %rdx, 8(%rcx)
	/* Even though the stack is correctly aligned to allow using movaps
	   we use movups.  Some callers might provide an incorrectly aligned
	   stack and we do not want to have it blow up here.  */
	movups %xmm0, 16(%rcx)
	movups %xmm1, 32(%rcx)
	fstpt 48(%rcx)
	fstpt 64(%rcx)
	/*
	    +168    return address
	    +160    PLT2
	    +152    PLT1
	    +144    free
	    +136    %rsp
	    +128    %rbp
	    +120    %rdi
	    +112    %rsi
	    +104    %rcx
	    +96     %r9
	    +88     %r8
	    +80     %rdx
	    +64     %st1 result
	    +48     %st result
	    +32     %xmm1 result
	    +16     %xmm0 result
	    +8      %rdx result
	   %esp     %rax result
	*/
	leaq 80(%rsp), %rdx
	movq 144(%rsp), %rbx
	cfi_restore (1)
	movq 160(%rsp), %rsi	# Copy args pushed by PLT in register.
	movq %rsi,%r11		# Multiply by 24
	addq %r11,%rsi
	addq %r11,%rsi
	shlq $3, %rsi
	movq 152(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_offset
	call _dl_call_pltexit
	movq (%rsp), %rax
	movq 8(%rsp), %rdx
	movups 16(%rsp), %xmm0
	movups 32(%rsp), %xmm1
	fldt 64(%rsp)
	fldt 48(%rsp)
	addq $168, %rsp
	cfi_adjust_cfa_offset (-168)
	retq
	cfi_endproc
	.size _dl_runtime_profile, .-_dl_runtime_profile
#endif