summaryrefslogtreecommitdiff
path: root/rts/adjustor/NativeAmd64MingwAsm.S
blob: ae80f9b86b9a0b21caa2502fbb9183fc480a3935 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#include "include/ghcconfig.h"

#define HPTR_OFF    0
#define WPTR_OFF    8

#if defined(LEADING_UNDERSCORE)
#define CSYM(x) _ ## x
#else
#define CSYM(x) x
#endif

#define DECLARE_CSYM(x) \
    .globl CSYM(x) ; \
    CSYM(x):

/* ------------------------------------------------------------------
 * Adjustor for a call with less than 4 integer arguments.
 * ------------------------------------------------------------------ */
DECLARE_CSYM(simple_ccall_adjustor)
    // Shuffle the argument registers down
    mov %r8, %r9
    mov %rdx, %r8
    mov %rcx, %rdx
    movsd %xmm2, %xmm3
    movsd %xmm1, %xmm2
    movsd %xmm0, %xmm1
    mov lcl_simple_ccall_adjustor_context(%rip), %rax  // load the address of the context
    mov HPTR_OFF(%rax), %rcx                 // load the StablePtr
    jmp *WPTR_OFF(%rax)                      // jump to the entrypoint

.align 8
DECLARE_CSYM(simple_ccall_adjustor_context)
// See Note [Adjustors: Local symbol references] in NativeAmd64Asm.S
lcl_simple_ccall_adjustor_context:
    // this will be overwritten with a pointer to the AdjustorContext
    .quad 0
DECLARE_CSYM(simple_ccall_adjustor_end)

/* ------------------------------------------------------------------
 * Adjustor for a call with 4 or more integer arguments and where the fourth
 * argument is not floating-point.
 * ------------------------------------------------------------------ */
DECLARE_CSYM(complex_nofloat_ccall_adjustor)
    sub $8,%rsp
    // Handle the fourth argument; this is the only difference between the
    // float/non-float cases
    mov %r9, 0x20(%rsp)
    // Push the new return address onto the stack
    pushq complex_nofloat_ccall_ret_code_ptr(%rip)
    // But the old return address has been moved up into a spill slot, so we
    // need to move it above them
    mov 0x10(%rsp), %r9
    mov %r9, 0x30(%rsp)
    // Now do the normal argument shuffle
    mov %r8, %r9
    mov %rdx, %r8
    mov %rcx, %rdx
    movsd %xmm2, %xmm3
    movsd %xmm1, %xmm2
    movsd %xmm0, %xmm1
    // Load the address of the context
    mov lcl_complex_nofloat_ccall_adjustor_context(%rip), %rax
    mov HPTR_OFF(%rax), %rcx
    jmpq *WPTR_OFF(%rax)

.align 8
complex_nofloat_ccall_ret_code_ptr:
    .quad complex_ccall_ret_code
DECLARE_CSYM(complex_nofloat_ccall_adjustor_context)
// See Note [Adjustors: Local symbol references] in NativeAmd64Asm.S
lcl_complex_nofloat_ccall_adjustor_context:
    // this will be overwritten with a pointer to the AdjustorContext
    .quad 0x0
DECLARE_CSYM(complex_nofloat_ccall_adjustor_end)

/* ------------------------------------------------------------------
 * Adjustor for a call with 4 or more integer arguments and where the fourth
 * argument is floating point.
 * ------------------------------------------------------------------ */
DECLARE_CSYM(complex_float_ccall_adjustor)
    sub $8,%rsp
    // Handle the fourth argument; this is the only difference between the
    // float/non-float cases
    movsd %xmm3,0x20(%rsp)
    // Push the new return address onto the stack
    pushq complex_float_ccall_ret_code_ptr(%rip)
    // But the old return address has been moved up into a spill slot, so we
    // need to move it above them
    mov 0x10(%rsp),%r9
    mov %r9,0x30(%rsp)
    // Now do the normal argument shuffle
    mov %r8, %r9
    mov %rdx, %r8
    mov %rcx, %rdx
    movsd %xmm2, %xmm3
    movsd %xmm1, %xmm2
    movsd %xmm0, %xmm1
    // Load the address of the context
    mov complex_float_ccall_adjustor_context(%rip), %rax
    mov HPTR_OFF(%rax), %rcx
    jmpq *WPTR_OFF(%rax)

.align 8
complex_float_ccall_ret_code_ptr:
    .quad complex_ccall_ret_code
DECLARE_CSYM(complex_float_ccall_adjustor_context)
// See Note [Adjustors: Local symbol references] in NativeAmd64Asm.S
lcl_complex_float_ccall_adjustor_context:
    // this will be overwritten with a pointer to the AdjustorContext
    .quad 0x0
DECLARE_CSYM(complex_float_ccall_adjustor_end)


/*
  When generating an adjustor thunk that uses the C calling
  convention, we have to make sure that the thunk kicks off
  the process of jumping into Haskell with a tail jump. Why?
  Because as a result of jumping in into Haskell we may end
  up freeing the very adjustor thunk we came from using
  freeHaskellFunctionPtr(). Hence, we better not return to
  the adjustor code on our way  out, since it could by then
  point to junk.

  The fix is readily at hand, just include the opcodes
  for the C stack fixup code that we need to perform when
  returning in some static piece of memory and arrange
  to return to it before tail jumping from the adjustor thunk.
*/
complex_ccall_ret_code:
    addq $0x8, %rsp
    /* On Win64, we had to put the original return address after the
       arg 1-4 spill slots, ro now we have to move it back */
    movq 0x20(%rsp), %rcx
    movq %rcx, (%rsp)
    ret

/* mark stack as nonexecutable */
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",@progbits
#endif