diff options
Diffstat (limited to 'libffi/src/aarch64/sysv.S')
-rw-r--r-- | libffi/src/aarch64/sysv.S | 620 |
1 files changed, 358 insertions, 262 deletions
diff --git a/libffi/src/aarch64/sysv.S b/libffi/src/aarch64/sysv.S index ffb16f84cb1..5c9cdda18c5 100644 --- a/libffi/src/aarch64/sysv.S +++ b/libffi/src/aarch64/sysv.S @@ -22,286 +22,382 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define LIBFFI_ASM #include <fficonfig.h> #include <ffi.h> - -#define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off -#define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off -#define cfi_restore(reg) .cfi_restore reg -#define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg - - .text - .globl ffi_call_SYSV - .type ffi_call_SYSV, #function - -/* ffi_call_SYSV() - - Create a stack frame, setup an argument context, call the callee - and extract the result. - - The maximum required argument stack size is provided, - ffi_call_SYSV() allocates that stack space then calls the - prepare_fn to populate register context and stack. The - argument passing registers are loaded from the register - context and the callee called, on return the register passing - register are saved back to the context. Our caller will - extract the return value from the final state of the saved - register context. - - Prototype: - - extern unsigned - ffi_call_SYSV (void (*)(struct call_context *context, unsigned char *, - extended_cif *), - struct call_context *context, - extended_cif *, - unsigned required_stack_size, - void (*fn)(void)); +#include <ffi_cfi.h> +#include "internal.h" + +#ifdef HAVE_MACHINE_ASM_H +#include <machine/asm.h> +#else +#ifdef __USER_LABEL_PREFIX__ +#define CONCAT1(a, b) CONCAT2(a, b) +#define CONCAT2(a, b) a ## b + +/* Use the right prefix for global labels. */ +#define CNAME(x) CONCAT1 (__USER_LABEL_PREFIX__, x) +#else +#define CNAME(x) x +#endif +#endif + +#ifdef __AARCH64EB__ +# define BE(X) X +#else +# define BE(X) 0 +#endif + + .text + .align 4 + +/* ffi_call_SYSV + extern void ffi_call_SYSV (void *stack, void *frame, + void (*fn)(void), void *rvalue, + int flags, void *closure); Therefore on entry we have: - x0 prepare_fn - x1 &context - x2 &ecif - x3 bytes - x4 fn - - This function uses the following stack frame layout: - - == - saved x30(lr) - x29(fp)-> saved x29(fp) - saved x24 - saved x23 - saved x22 - sp' -> saved x21 - ... - sp -> (constructed callee stack arguments) - == - - Voila! */ - -#define ffi_call_SYSV_FS (8 * 4) - - .cfi_startproc -ffi_call_SYSV: - stp x29, x30, [sp, #-16]! - cfi_adjust_cfa_offset (16) - cfi_rel_offset (x29, 0) - cfi_rel_offset (x30, 8) - - mov x29, sp - cfi_def_cfa_register (x29) - sub sp, sp, #ffi_call_SYSV_FS - - stp x21, x22, [sp, 0] - cfi_rel_offset (x21, 0 - ffi_call_SYSV_FS) - cfi_rel_offset (x22, 8 - ffi_call_SYSV_FS) - - stp x23, x24, [sp, 16] - cfi_rel_offset (x23, 16 - ffi_call_SYSV_FS) - cfi_rel_offset (x24, 24 - ffi_call_SYSV_FS) - - mov x21, x1 - mov x22, x2 - mov x24, x4 - - /* Allocate the stack space for the actual arguments, many - arguments will be passed in registers, but we assume - worst case and allocate sufficient stack for ALL of - the arguments. */ - sub sp, sp, x3 - - /* unsigned (*prepare_fn) (struct call_context *context, - unsigned char *stack, extended_cif *ecif); - */ - mov x23, x0 - mov x0, x1 - mov x1, sp - /* x2 already in place */ - blr x23 - - /* Preserve the flags returned. */ - mov x23, x0 - - /* Figure out if we should touch the vector registers. */ - tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f - - /* Load the vector argument passing registers. */ - ldp q0, q1, [x21, #8*32 + 0] - ldp q2, q3, [x21, #8*32 + 32] - ldp q4, q5, [x21, #8*32 + 64] - ldp q6, q7, [x21, #8*32 + 96] + x0 stack + x1 frame + x2 fn + x3 rvalue + x4 flags + x5 closure +*/ + + cfi_startproc +CNAME(ffi_call_SYSV): + /* Use a stack frame allocated by our caller. */ + cfi_def_cfa(x1, 32); + stp x29, x30, [x1] + mov x29, x1 + mov sp, x0 + cfi_def_cfa_register(x29) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) + + mov x9, x2 /* save fn */ + mov x8, x3 /* install structure return */ +#ifdef FFI_GO_CLOSURES + mov x18, x5 /* install static chain */ +#endif + stp x3, x4, [x29, #16] /* save rvalue and flags */ + + /* Load the vector argument passing registers, if necessary. */ + tbz w4, #AARCH64_FLAG_ARG_V_BIT, 1f + ldp q0, q1, [sp, #0] + ldp q2, q3, [sp, #32] + ldp q4, q5, [sp, #64] + ldp q6, q7, [sp, #96] 1: - /* Load the core argument passing registers. */ - ldp x0, x1, [x21, #0] - ldp x2, x3, [x21, #16] - ldp x4, x5, [x21, #32] - ldp x6, x7, [x21, #48] - - /* Don't forget x8 which may be holding the address of a return buffer. - */ - ldr x8, [x21, #8*8] - - blr x24 - - /* Save the core argument passing registers. */ - stp x0, x1, [x21, #0] - stp x2, x3, [x21, #16] - stp x4, x5, [x21, #32] - stp x6, x7, [x21, #48] - - /* Note nothing useful ever comes back in x8! */ - - /* Figure out if we should touch the vector registers. */ - tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f - - /* Save the vector argument passing registers. */ - stp q0, q1, [x21, #8*32 + 0] - stp q2, q3, [x21, #8*32 + 32] - stp q4, q5, [x21, #8*32 + 64] - stp q6, q7, [x21, #8*32 + 96] -1: - /* All done, unwind our stack frame. */ - ldp x21, x22, [x29, # - ffi_call_SYSV_FS] - cfi_restore (x21) - cfi_restore (x22) - - ldp x23, x24, [x29, # - ffi_call_SYSV_FS + 16] - cfi_restore (x23) - cfi_restore (x24) - - mov sp, x29 - cfi_def_cfa_register (sp) + /* Load the core argument passing registers, including + the structure return pointer. */ + ldp x0, x1, [sp, #16*N_V_ARG_REG + 0] + ldp x2, x3, [sp, #16*N_V_ARG_REG + 16] + ldp x4, x5, [sp, #16*N_V_ARG_REG + 32] + ldp x6, x7, [sp, #16*N_V_ARG_REG + 48] - ldp x29, x30, [sp], #16 - cfi_adjust_cfa_offset (-16) - cfi_restore (x29) - cfi_restore (x30) + /* Deallocate the context, leaving the stacked arguments. */ + add sp, sp, #CALL_CONTEXT_SIZE - ret + blr x9 /* call fn */ - .cfi_endproc - .size ffi_call_SYSV, .-ffi_call_SYSV + ldp x3, x4, [x29, #16] /* reload rvalue and flags */ -#define ffi_closure_SYSV_FS (8 * 2 + AARCH64_CALL_CONTEXT_SIZE) + /* Partially deconstruct the stack frame. */ + mov sp, x29 + cfi_def_cfa_register (sp) + ldp x29, x30, [x29] + + /* Save the return value as directed. */ + adr x5, 0f + and w4, w4, #AARCH64_RET_MASK + add x5, x5, x4, lsl #3 + br x5 + + /* Note that each table entry is 2 insns, and thus 8 bytes. + For integer data, note that we're storing into ffi_arg + and therefore we want to extend to 64 bits; these types + have two consecutive entries allocated for them. */ + .align 4 +0: ret /* VOID */ + nop +1: str x0, [x3] /* INT64 */ + ret +2: stp x0, x1, [x3] /* INT128 */ + ret +3: brk #1000 /* UNUSED */ + ret +4: brk #1000 /* UNUSED */ + ret +5: brk #1000 /* UNUSED */ + ret +6: brk #1000 /* UNUSED */ + ret +7: brk #1000 /* UNUSED */ + ret +8: st4 { v0.s-v3.s }[0], [x3] /* S4 */ + ret +9: st3 { v0.s-v2.s }[0], [x3] /* S3 */ + ret +10: stp s0, s1, [x3] /* S2 */ + ret +11: str s0, [x3] /* S1 */ + ret +12: st4 { v0.d-v3.d }[0], [x3] /* D4 */ + ret +13: st3 { v0.d-v2.d }[0], [x3] /* D3 */ + ret +14: stp d0, d1, [x3] /* D2 */ + ret +15: str d0, [x3] /* D1 */ + ret +16: str q3, [x3, #48] /* Q4 */ + nop +17: str q2, [x3, #32] /* Q3 */ + nop +18: stp q0, q1, [x3] /* Q2 */ + ret +19: str q0, [x3] /* Q1 */ + ret +20: uxtb w0, w0 /* UINT8 */ + str x0, [x3] +21: ret /* reserved */ + nop +22: uxth w0, w0 /* UINT16 */ + str x0, [x3] +23: ret /* reserved */ + nop +24: mov w0, w0 /* UINT32 */ + str x0, [x3] +25: ret /* reserved */ + nop +26: sxtb x0, w0 /* SINT8 */ + str x0, [x3] +27: ret /* reserved */ + nop +28: sxth x0, w0 /* SINT16 */ + str x0, [x3] +29: ret /* reserved */ + nop +30: sxtw x0, w0 /* SINT32 */ + str x0, [x3] +31: ret /* reserved */ + nop + + cfi_endproc + + .globl CNAME(ffi_call_SYSV) +#ifdef __ELF__ + .type CNAME(ffi_call_SYSV), #function + .hidden CNAME(ffi_call_SYSV) + .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV) +#endif /* ffi_closure_SYSV Closure invocation glue. This is the low level code invoked directly by the closure trampoline to setup and call a closure. - On entry x17 points to a struct trampoline_data, x16 has been clobbered + On entry x17 points to a struct ffi_closure, x16 has been clobbered all other registers are preserved. We allocate a call context and save the argument passing registers, then invoked the generic C ffi_closure_SYSV_inner() function to do all the real work, on return we load the result passing registers back from the call context. +*/ + +#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64) + + .align 4 +CNAME(ffi_closure_SYSV_V): + cfi_startproc + stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! + cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) + + /* Save the argument passing vector registers. */ + stp q0, q1, [sp, #16 + 0] + stp q2, q3, [sp, #16 + 32] + stp q4, q5, [sp, #16 + 64] + stp q6, q7, [sp, #16 + 96] + b 0f + cfi_endproc + + .globl CNAME(ffi_closure_SYSV_V) +#ifdef __ELF__ + .type CNAME(ffi_closure_SYSV_V), #function + .hidden CNAME(ffi_closure_SYSV_V) + .size CNAME(ffi_closure_SYSV_V), . - CNAME(ffi_closure_SYSV_V) +#endif + + .align 4 + cfi_startproc +CNAME(ffi_closure_SYSV): + stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! + cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) +0: + mov x29, sp + + /* Save the argument passing core registers. */ + stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] + stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] + stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] + stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] + + /* Load ffi_closure_inner arguments. */ + ldp x0, x1, [x17, #FFI_TRAMPOLINE_SIZE] /* load cif, fn */ + ldr x2, [x17, #FFI_TRAMPOLINE_SIZE+16] /* load user_data */ +.Ldo_closure: + add x3, sp, #16 /* load context */ + add x4, sp, #ffi_closure_SYSV_FS /* load stack */ + add x5, sp, #16+CALL_CONTEXT_SIZE /* load rvalue */ + mov x6, x8 /* load struct_rval */ + bl CNAME(ffi_closure_SYSV_inner) + + /* Load the return value as directed. */ + adr x1, 0f + and w0, w0, #AARCH64_RET_MASK + add x1, x1, x0, lsl #3 + add x3, sp, #16+CALL_CONTEXT_SIZE + br x1 + + /* Note that each table entry is 2 insns, and thus 8 bytes. */ + .align 4 +0: b 99f /* VOID */ + nop +1: ldr x0, [x3] /* INT64 */ + b 99f +2: ldp x0, x1, [x3] /* INT128 */ + b 99f +3: brk #1000 /* UNUSED */ + nop +4: brk #1000 /* UNUSED */ + nop +5: brk #1000 /* UNUSED */ + nop +6: brk #1000 /* UNUSED */ + nop +7: brk #1000 /* UNUSED */ + nop +8: ldr s3, [x3, #12] /* S4 */ + nop +9: ldr s2, [x2, #8] /* S3 */ + nop +10: ldp s0, s1, [x3] /* S2 */ + b 99f +11: ldr s0, [x3] /* S1 */ + b 99f +12: ldr d3, [x3, #24] /* D4 */ + nop +13: ldr d2, [x3, #16] /* D3 */ + nop +14: ldp d0, d1, [x3] /* D2 */ + b 99f +15: ldr d0, [x3] /* D1 */ + b 99f +16: ldr q3, [x3, #48] /* Q4 */ + nop +17: ldr q2, [x3, #32] /* Q3 */ + nop +18: ldp q0, q1, [x3] /* Q2 */ + b 99f +19: ldr q0, [x3] /* Q1 */ + b 99f +20: ldrb w0, [x3, #BE(7)] /* UINT8 */ + b 99f +21: brk #1000 /* reserved */ + nop +22: ldrh w0, [x3, #BE(6)] /* UINT16 */ + b 99f +23: brk #1000 /* reserved */ + nop +24: ldr w0, [x3, #BE(4)] /* UINT32 */ + b 99f +25: brk #1000 /* reserved */ + nop +26: ldrsb x0, [x3, #BE(7)] /* SINT8 */ + b 99f +27: brk #1000 /* reserved */ + nop +28: ldrsh x0, [x3, #BE(6)] /* SINT16 */ + b 99f +29: brk #1000 /* reserved */ + nop +30: ldrsw x0, [x3, #BE(4)] /* SINT32 */ + nop +31: /* reserved */ +99: ldp x29, x30, [sp], #ffi_closure_SYSV_FS + cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS) + cfi_restore (x29) + cfi_restore (x30) + ret + cfi_endproc + + .globl CNAME(ffi_closure_SYSV) +#ifdef __ELF__ + .type CNAME(ffi_closure_SYSV), #function + .hidden CNAME(ffi_closure_SYSV) + .size CNAME(ffi_closure_SYSV), . - CNAME(ffi_closure_SYSV) +#endif + +#ifdef FFI_GO_CLOSURES + .align 4 +CNAME(ffi_go_closure_SYSV_V): + cfi_startproc + stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! + cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) + + /* Save the argument passing vector registers. */ + stp q0, q1, [sp, #16 + 0] + stp q2, q3, [sp, #16 + 32] + stp q4, q5, [sp, #16 + 64] + stp q6, q7, [sp, #16 + 96] + b 0f + cfi_endproc + + .globl CNAME(ffi_go_closure_SYSV_V) +#ifdef __ELF__ + .type CNAME(ffi_go_closure_SYSV_V), #function + .hidden CNAME(ffi_go_closure_SYSV_V) + .size CNAME(ffi_go_closure_SYSV_V), . - CNAME(ffi_go_closure_SYSV_V) +#endif + + .align 4 + cfi_startproc +CNAME(ffi_go_closure_SYSV): + stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! + cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) +0: + mov x29, sp + + /* Save the argument passing core registers. */ + stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] + stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] + stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] + stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] + + /* Load ffi_closure_inner arguments. */ + ldp x0, x1, [x18, #8] /* load cif, fn */ + mov x2, x18 /* load user_data */ + b .Ldo_closure + cfi_endproc + + .globl CNAME(ffi_go_closure_SYSV) +#ifdef __ELF__ + .type CNAME(ffi_go_closure_SYSV), #function + .hidden CNAME(ffi_go_closure_SYSV) + .size CNAME(ffi_go_closure_SYSV), . - CNAME(ffi_go_closure_SYSV) +#endif +#endif /* FFI_GO_CLOSURES */ + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",%progbits +#endif - On entry - - extern void - ffi_closure_SYSV (struct trampoline_data *); - - struct trampoline_data - { - UINT64 *ffi_closure; - UINT64 flags; - }; - - This function uses the following stack frame layout: - - == - saved x30(lr) - x29(fp)-> saved x29(fp) - saved x22 - saved x21 - ... - sp -> call_context - == - - Voila! */ - - .text - .globl ffi_closure_SYSV - .cfi_startproc -ffi_closure_SYSV: - stp x29, x30, [sp, #-16]! - cfi_adjust_cfa_offset (16) - cfi_rel_offset (x29, 0) - cfi_rel_offset (x30, 8) - - mov x29, sp - cfi_def_cfa_register (x29) - - sub sp, sp, #ffi_closure_SYSV_FS - - stp x21, x22, [x29, #-16] - cfi_rel_offset (x21, -16) - cfi_rel_offset (x22, -8) - - /* Load x21 with &call_context. */ - mov x21, sp - /* Preserve our struct trampoline_data * */ - mov x22, x17 - - /* Save the rest of the argument passing registers. */ - stp x0, x1, [x21, #0] - stp x2, x3, [x21, #16] - stp x4, x5, [x21, #32] - stp x6, x7, [x21, #48] - /* Don't forget we may have been given a result scratch pad address. - */ - str x8, [x21, #64] - - /* Figure out if we should touch the vector registers. */ - ldr x0, [x22, #8] - tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f - - /* Save the argument passing vector registers. */ - stp q0, q1, [x21, #8*32 + 0] - stp q2, q3, [x21, #8*32 + 32] - stp q4, q5, [x21, #8*32 + 64] - stp q6, q7, [x21, #8*32 + 96] -1: - /* Load &ffi_closure.. */ - ldr x0, [x22, #0] - mov x1, x21 - /* Compute the location of the stack at the point that the - trampoline was called. */ - add x2, x29, #16 - - bl ffi_closure_SYSV_inner - - /* Figure out if we should touch the vector registers. */ - ldr x0, [x22, #8] - tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f - - /* Load the result passing vector registers. */ - ldp q0, q1, [x21, #8*32 + 0] - ldp q2, q3, [x21, #8*32 + 32] - ldp q4, q5, [x21, #8*32 + 64] - ldp q6, q7, [x21, #8*32 + 96] -1: - /* Load the result passing core registers. */ - ldp x0, x1, [x21, #0] - ldp x2, x3, [x21, #16] - ldp x4, x5, [x21, #32] - ldp x6, x7, [x21, #48] - /* Note nothing usefull is returned in x8. */ - - /* We are done, unwind our frame. */ - ldp x21, x22, [x29, #-16] - cfi_restore (x21) - cfi_restore (x22) - - mov sp, x29 - cfi_def_cfa_register (sp) - - ldp x29, x30, [sp], #16 - cfi_adjust_cfa_offset (-16) - cfi_restore (x29) - cfi_restore (x30) - - ret - .cfi_endproc - .size ffi_closure_SYSV, .-ffi_closure_SYSV |