diff options
-rw-r--r-- | Makefile.am | 1 | ||||
-rw-r--r-- | configure.host | 12 | ||||
-rw-r--r-- | src/x86/darwin.S | 444 | ||||
-rw-r--r-- | src/x86/darwin64.S | 416 | ||||
-rw-r--r-- | src/x86/sysv.S | 83 | ||||
-rw-r--r-- | src/x86/unix64.S | 67 |
6 files changed, 99 insertions, 924 deletions
diff --git a/Makefile.am b/Makefile.am index dfdcea6..6fb3d47 100644 --- a/Makefile.am +++ b/Makefile.am @@ -144,7 +144,6 @@ EXTRA_libffi_la_SOURCES = \ src/x86/ffi.c src/x86/sysv.S \ src/x86/ffiw64.c src/x86/win64.S \ src/x86/ffi64.c src/x86/unix64.S \ - src/x86/darwin64.S src/x86/darwin.S \ src/xtensa/ffi.c src/xtensa/sysv.S TARGET_OBJ = @TARGET_OBJ@ diff --git a/configure.host b/configure.host index bc3e838..5ee632c 100644 --- a/configure.host +++ b/configure.host @@ -84,7 +84,12 @@ case "${host}" in ;; i?86-*-darwin* | x86_64-*-darwin*) - TARGET=X86_DARWIN; TARGETDIR=x86 + TARGETDIR=x86 + if test $ac_cv_sizeof_size_t = 4; then + TARGET=X86_DARWIN + else + TARGET=X86_64 + fi ;; i?86-*-* | x86_64-*-* | amd64-*) @@ -237,7 +242,7 @@ case "${TARGET}" in POWERPC_FREEBSD) SOURCES="ffi.c ffi_sysv.c sysv.S ppc_closure.S" ;; - X86 | X86_FREEBSD | X86_WIN32) + X86 | X86_DARWIN | X86_FREEBSD | X86_WIN32) SOURCES="ffi.c sysv.S" ;; X86_64) @@ -246,9 +251,6 @@ case "${TARGET}" in X86_WIN64) SOURCES="ffiw64.c win64.S" ;; - X86_DARWIN) - SOURCES="ffi.c darwin.S ffi64.c darwin64.S" - ;; esac # If we failed to configure SOURCES, we can't do anything. diff --git a/src/x86/darwin.S b/src/x86/darwin.S deleted file mode 100644 index 8f0f070..0000000 --- a/src/x86/darwin.S +++ /dev/null @@ -1,444 +0,0 @@ -/* ----------------------------------------------------------------------- - darwin.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005 Red Hat, Inc. - Copyright (C) 2008 Free Software Foundation, Inc. - - X86 Foreign Function Interface - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - ``Software''), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - DEALINGS IN THE SOFTWARE. - ----------------------------------------------------------------------- - */ - -#ifndef __x86_64__ - -#define LIBFFI_ASM -#include <fficonfig.h> -#include <ffi.h> - -.text - -.globl _ffi_prep_args - - .align 4 -.globl _ffi_call_SYSV - -_ffi_call_SYSV: -.LFB1: - pushl %ebp -.LCFI0: - movl %esp,%ebp -.LCFI1: - subl $8,%esp - /* Make room for all of the new args. */ - movl 16(%ebp),%ecx - subl %ecx,%esp - - movl %esp,%eax - - /* Place all of the ffi_prep_args in position */ - subl $8,%esp - pushl 12(%ebp) - pushl %eax - call *8(%ebp) - - /* Return stack to previous state and call the function */ - addl $16,%esp - - call *28(%ebp) - - /* Load %ecx with the return type code */ - movl 20(%ebp),%ecx - - /* Protect %esi. We're going to pop it in the epilogue. */ - pushl %esi - - /* If the return value pointer is NULL, assume no return value. */ - cmpl $0,24(%ebp) - jne 0f - - /* Even if there is no space for the return value, we are - obliged to handle floating-point values. */ - cmpl $FFI_TYPE_FLOAT,%ecx - jne noretval - fstp %st(0) - - jmp epilogue -0: - .align 4 - call 1f -.Lstore_table: - .long noretval-.Lstore_table /* FFI_TYPE_VOID */ - .long retint-.Lstore_table /* FFI_TYPE_INT */ - .long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */ - .long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */ - .long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */ - .long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */ - .long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */ - .long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */ - .long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */ - .long retint-.Lstore_table /* FFI_TYPE_UINT32 */ - .long retint-.Lstore_table /* FFI_TYPE_SINT32 */ - .long retint64-.Lstore_table /* FFI_TYPE_UINT64 */ - .long retint64-.Lstore_table /* FFI_TYPE_SINT64 */ - .long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */ - .long retint-.Lstore_table /* FFI_TYPE_POINTER */ - .long retstruct1b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_1B */ - .long retstruct2b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_2B */ -1: - pop %esi - add (%esi, %ecx, 4), %esi - jmp *%esi - - /* Sign/zero extend as appropriate. */ -retsint8: - movsbl %al, %eax - jmp retint - -retsint16: - movswl %ax, %eax - jmp retint - -retuint8: - movzbl %al, %eax - jmp retint - -retuint16: - movzwl %ax, %eax - jmp retint - -retfloat: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - fstps (%ecx) - jmp epilogue - -retdouble: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - fstpl (%ecx) - jmp epilogue - -retlongdouble: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - fstpt (%ecx) - jmp epilogue - -retint64: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - movl %eax,0(%ecx) - movl %edx,4(%ecx) - jmp epilogue - -retstruct1b: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - movb %al,0(%ecx) - jmp epilogue - -retstruct2b: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - movw %ax,0(%ecx) - jmp epilogue - -retint: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - movl %eax,0(%ecx) - -retstruct: - /* Nothing to do! */ - -noretval: -epilogue: - popl %esi - movl %ebp,%esp - popl %ebp - ret - -.LFE1: -.ffi_call_SYSV_end: - - .align 4 -FFI_HIDDEN (ffi_closure_SYSV) -.globl _ffi_closure_SYSV - -_ffi_closure_SYSV: -.LFB2: - pushl %ebp -.LCFI2: - movl %esp, %ebp -.LCFI3: - subl $40, %esp - leal -24(%ebp), %edx - movl %edx, -12(%ebp) /* resp */ - leal 8(%ebp), %edx - movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */ - leal -12(%ebp), %edx - movl %edx, (%esp) /* &resp */ - movl %ebx, 8(%esp) -.LCFI7: - call L_ffi_closure_SYSV_inner$stub - movl 8(%esp), %ebx - movl -12(%ebp), %ecx - cmpl $FFI_TYPE_INT, %eax - je .Lcls_retint - - /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16, - FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */ - cmpl $FFI_TYPE_UINT64, %eax - jge 0f - cmpl $FFI_TYPE_UINT8, %eax - jge .Lcls_retint - -0: cmpl $FFI_TYPE_FLOAT, %eax - je .Lcls_retfloat - cmpl $FFI_TYPE_DOUBLE, %eax - je .Lcls_retdouble - cmpl $FFI_TYPE_LONGDOUBLE, %eax - je .Lcls_retldouble - cmpl $FFI_TYPE_SINT64, %eax - je .Lcls_retllong - cmpl $FFI_TYPE_SMALL_STRUCT_1B, %eax - je .Lcls_retstruct1b - cmpl $FFI_TYPE_SMALL_STRUCT_2B, %eax - je .Lcls_retstruct2b - cmpl $FFI_TYPE_STRUCT, %eax - je .Lcls_retstruct -.Lcls_epilogue: - movl %ebp, %esp - popl %ebp - ret -.Lcls_retint: - movl (%ecx), %eax - jmp .Lcls_epilogue -.Lcls_retfloat: - flds (%ecx) - jmp .Lcls_epilogue -.Lcls_retdouble: - fldl (%ecx) - jmp .Lcls_epilogue -.Lcls_retldouble: - fldt (%ecx) - jmp .Lcls_epilogue -.Lcls_retllong: - movl (%ecx), %eax - movl 4(%ecx), %edx - jmp .Lcls_epilogue -.Lcls_retstruct1b: - movsbl (%ecx), %eax - jmp .Lcls_epilogue -.Lcls_retstruct2b: - movswl (%ecx), %eax - jmp .Lcls_epilogue -.Lcls_retstruct: - lea -8(%ebp),%esp - movl %ebp, %esp - popl %ebp - ret $4 -.LFE2: - -#if !FFI_NO_RAW_API - -#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3) -#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) -#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) -#define CIF_FLAGS_OFFSET 20 - - .align 4 -FFI_HIDDEN (ffi_closure_raw_SYSV) -.globl _ffi_closure_raw_SYSV - -_ffi_closure_raw_SYSV: -.LFB3: - pushl %ebp -.LCFI4: - movl %esp, %ebp -.LCFI5: - pushl %esi -.LCFI6: - subl $36, %esp - movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */ - movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ - movl %edx, 12(%esp) /* user_data */ - leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */ - movl %edx, 8(%esp) /* raw_args */ - leal -24(%ebp), %edx - movl %edx, 4(%esp) /* &res */ - movl %esi, (%esp) /* cif */ - call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */ - movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */ - cmpl $FFI_TYPE_INT, %eax - je .Lrcls_retint - - /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16, - FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */ - cmpl $FFI_TYPE_UINT64, %eax - jge 0f - cmpl $FFI_TYPE_UINT8, %eax - jge .Lrcls_retint -0: - cmpl $FFI_TYPE_FLOAT, %eax - je .Lrcls_retfloat - cmpl $FFI_TYPE_DOUBLE, %eax - je .Lrcls_retdouble - cmpl $FFI_TYPE_LONGDOUBLE, %eax - je .Lrcls_retldouble - cmpl $FFI_TYPE_SINT64, %eax - je .Lrcls_retllong -.Lrcls_epilogue: - addl $36, %esp - popl %esi - popl %ebp - ret -.Lrcls_retint: - movl -24(%ebp), %eax - jmp .Lrcls_epilogue -.Lrcls_retfloat: - flds -24(%ebp) - jmp .Lrcls_epilogue -.Lrcls_retdouble: - fldl -24(%ebp) - jmp .Lrcls_epilogue -.Lrcls_retldouble: - fldt -24(%ebp) - jmp .Lrcls_epilogue -.Lrcls_retllong: - movl -24(%ebp), %eax - movl -20(%ebp), %edx - jmp .Lrcls_epilogue -.LFE3: -#endif - -.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5 -L_ffi_closure_SYSV_inner$stub: - .indirect_symbol _ffi_closure_SYSV_inner - hlt ; hlt ; hlt ; hlt ; hlt - - -.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support -EH_frame1: - .set L$set$0,LECIE1-LSCIE1 - .long L$set$0 -LSCIE1: - .long 0x0 - .byte 0x1 - .ascii "zR\0" - .byte 0x1 - .byte 0x7c - .byte 0x8 - .byte 0x1 - .byte 0x10 - .byte 0xc - .byte 0x5 - .byte 0x4 - .byte 0x88 - .byte 0x1 - .align 2 -LECIE1: -.globl _ffi_call_SYSV.eh -_ffi_call_SYSV.eh: -LSFDE1: - .set L$set$1,LEFDE1-LASFDE1 - .long L$set$1 -LASFDE1: - .long LASFDE1-EH_frame1 - .long .LFB1-. - .set L$set$2,.LFE1-.LFB1 - .long L$set$2 - .byte 0x0 - .byte 0x4 - .set L$set$3,.LCFI0-.LFB1 - .long L$set$3 - .byte 0xe - .byte 0x8 - .byte 0x84 - .byte 0x2 - .byte 0x4 - .set L$set$4,.LCFI1-.LCFI0 - .long L$set$4 - .byte 0xd - .byte 0x4 - .align 2 -LEFDE1: -.globl _ffi_closure_SYSV.eh -_ffi_closure_SYSV.eh: -LSFDE2: - .set L$set$5,LEFDE2-LASFDE2 - .long L$set$5 -LASFDE2: - .long LASFDE2-EH_frame1 - .long .LFB2-. - .set L$set$6,.LFE2-.LFB2 - .long L$set$6 - .byte 0x0 - .byte 0x4 - .set L$set$7,.LCFI2-.LFB2 - .long L$set$7 - .byte 0xe - .byte 0x8 - .byte 0x84 - .byte 0x2 - .byte 0x4 - .set L$set$8,.LCFI3-.LCFI2 - .long L$set$8 - .byte 0xd - .byte 0x4 - .align 2 -LEFDE2: - -#if !FFI_NO_RAW_API - -.globl _ffi_closure_raw_SYSV.eh -_ffi_closure_raw_SYSV.eh: -LSFDE3: - .set L$set$10,LEFDE3-LASFDE3 - .long L$set$10 -LASFDE3: - .long LASFDE3-EH_frame1 - .long .LFB3-. - .set L$set$11,.LFE3-.LFB3 - .long L$set$11 - .byte 0x0 - .byte 0x4 - .set L$set$12,.LCFI4-.LFB3 - .long L$set$12 - .byte 0xe - .byte 0x8 - .byte 0x84 - .byte 0x2 - .byte 0x4 - .set L$set$13,.LCFI5-.LCFI4 - .long L$set$13 - .byte 0xd - .byte 0x4 - .byte 0x4 - .set L$set$14,.LCFI6-.LCFI5 - .long L$set$14 - .byte 0x85 - .byte 0x3 - .align 2 -LEFDE3: - -#endif - -#endif /* ifndef __x86_64__ */ diff --git a/src/x86/darwin64.S b/src/x86/darwin64.S deleted file mode 100644 index 2f7394e..0000000 --- a/src/x86/darwin64.S +++ /dev/null @@ -1,416 +0,0 @@ -/* ----------------------------------------------------------------------- - darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc. - Copyright (c) 2008 Red Hat, Inc. - derived from unix64.S - - x86-64 Foreign Function Interface for Darwin. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - ``Software''), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR - OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - OTHER DEALINGS IN THE SOFTWARE. - ----------------------------------------------------------------------- */ - -#ifdef __x86_64__ -#define LIBFFI_ASM -#include <fficonfig.h> -#include <ffi.h> - - .file "darwin64.S" -.text - -/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, - void *raddr, void (*fnaddr)(void)); - - Bit o trickiness here -- ARGS+BYTES is the base of the stack frame - for this function. This has been allocated by ffi_call. We also - deallocate some of the stack that has been alloca'd. */ - - .align 3 - .globl _ffi_call_unix64 - -_ffi_call_unix64: -LUW0: - movq (%rsp), %r10 /* Load return address. */ - leaq (%rdi, %rsi), %rax /* Find local stack base. */ - movq %rdx, (%rax) /* Save flags. */ - movq %rcx, 8(%rax) /* Save raddr. */ - movq %rbp, 16(%rax) /* Save old frame pointer. */ - movq %r10, 24(%rax) /* Relocate return address. */ - movq %rax, %rbp /* Finalize local stack frame. */ -LUW1: - movq %rdi, %r10 /* Save a copy of the register area. */ - movq %r8, %r11 /* Save a copy of the target fn. */ - movl %r9d, %eax /* Set number of SSE registers. */ - - /* Load up all argument registers. */ - movq (%r10), %rdi - movq 8(%r10), %rsi - movq 16(%r10), %rdx - movq 24(%r10), %rcx - movq 32(%r10), %r8 - movq 40(%r10), %r9 - testl %eax, %eax - jnz Lload_sse -Lret_from_load_sse: - - /* Deallocate the reg arg area. */ - leaq 176(%r10), %rsp - - /* Call the user function. */ - call *%r11 - - /* Deallocate stack arg area; local stack frame in redzone. */ - leaq 24(%rbp), %rsp - - movq 0(%rbp), %rcx /* Reload flags. */ - movq 8(%rbp), %rdi /* Reload raddr. */ - movq 16(%rbp), %rbp /* Reload old frame pointer. */ -LUW2: - - /* The first byte of the flags contains the FFI_TYPE. */ - movzbl %cl, %r10d - leaq Lstore_table(%rip), %r11 - movslq (%r11, %r10, 4), %r10 - addq %r11, %r10 - jmp *%r10 - -Lstore_table: - .long Lst_void-Lstore_table /* FFI_TYPE_VOID */ - .long Lst_sint32-Lstore_table /* FFI_TYPE_INT */ - .long Lst_float-Lstore_table /* FFI_TYPE_FLOAT */ - .long Lst_double-Lstore_table /* FFI_TYPE_DOUBLE */ - .long Lst_ldouble-Lstore_table /* FFI_TYPE_LONGDOUBLE */ - .long Lst_uint8-Lstore_table /* FFI_TYPE_UINT8 */ - .long Lst_sint8-Lstore_table /* FFI_TYPE_SINT8 */ - .long Lst_uint16-Lstore_table /* FFI_TYPE_UINT16 */ - .long Lst_sint16-Lstore_table /* FFI_TYPE_SINT16 */ - .long Lst_uint32-Lstore_table /* FFI_TYPE_UINT32 */ - .long Lst_sint32-Lstore_table /* FFI_TYPE_SINT32 */ - .long Lst_int64-Lstore_table /* FFI_TYPE_UINT64 */ - .long Lst_int64-Lstore_table /* FFI_TYPE_SINT64 */ - .long Lst_struct-Lstore_table /* FFI_TYPE_STRUCT */ - .long Lst_int64-Lstore_table /* FFI_TYPE_POINTER */ - - .text - .align 3 -Lst_void: - ret - .align 3 -Lst_uint8: - movzbq %al, %rax - movq %rax, (%rdi) - ret - .align 3 -Lst_sint8: - movsbq %al, %rax - movq %rax, (%rdi) - ret - .align 3 -Lst_uint16: - movzwq %ax, %rax - movq %rax, (%rdi) - .align 3 -Lst_sint16: - movswq %ax, %rax - movq %rax, (%rdi) - ret - .align 3 -Lst_uint32: - movl %eax, %eax - movq %rax, (%rdi) - .align 3 -Lst_sint32: - cltq - movq %rax, (%rdi) - ret - .align 3 -Lst_int64: - movq %rax, (%rdi) - ret - .align 3 -Lst_float: - movss %xmm0, (%rdi) - ret - .align 3 -Lst_double: - movsd %xmm0, (%rdi) - ret -Lst_ldouble: - fstpt (%rdi) - ret - .align 3 -Lst_struct: - leaq -20(%rsp), %rsi /* Scratch area in redzone. */ - - /* We have to locate the values now, and since we don't want to - write too much data into the user's return value, we spill the - value to a 16 byte scratch area first. Bits 8, 9, and 10 - control where the values are located. Only one of the three - bits will be set; see ffi_prep_cif_machdep for the pattern. */ - movd %xmm0, %r10 - movd %xmm1, %r11 - testl $0x100, %ecx - cmovnz %rax, %rdx - cmovnz %r10, %rax - testl $0x200, %ecx - cmovnz %r10, %rdx - testl $0x400, %ecx - cmovnz %r10, %rax - cmovnz %r11, %rdx - movq %rax, (%rsi) - movq %rdx, 8(%rsi) - - /* Bits 12-31 contain the true size of the structure. Copy from - the scratch area to the true destination. */ - shrl $12, %ecx - rep movsb - ret - - /* Many times we can avoid loading any SSE registers at all. - It's not worth an indirect jump to load the exact set of - SSE registers needed; zero or all is a good compromise. */ - .align 3 -LUW3: -Lload_sse: - movdqa 48(%r10), %xmm0 - movdqa 64(%r10), %xmm1 - movdqa 80(%r10), %xmm2 - movdqa 96(%r10), %xmm3 - movdqa 112(%r10), %xmm4 - movdqa 128(%r10), %xmm5 - movdqa 144(%r10), %xmm6 - movdqa 160(%r10), %xmm7 - jmp Lret_from_load_sse - -LUW4: - .align 3 - .globl _ffi_closure_unix64 - -_ffi_closure_unix64: -LUW5: - /* The carry flag is set by the trampoline iff SSE registers - are used. Don't clobber it before the branch instruction. */ - leaq -200(%rsp), %rsp -LUW6: - movq %rdi, (%rsp) - movq %rsi, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rcx, 24(%rsp) - movq %r8, 32(%rsp) - movq %r9, 40(%rsp) - jc Lsave_sse -Lret_from_save_sse: - - movq %r10, %rdi - leaq 176(%rsp), %rsi - movq %rsp, %rdx - leaq 208(%rsp), %rcx - call _ffi_closure_unix64_inner - - /* Deallocate stack frame early; return value is now in redzone. */ - addq $200, %rsp -LUW7: - - /* The first byte of the return value contains the FFI_TYPE. */ - movzbl %al, %r10d - leaq Lload_table(%rip), %r11 - movslq (%r11, %r10, 4), %r10 - addq %r11, %r10 - jmp *%r10 - -Lload_table: - .long Lld_void-Lload_table /* FFI_TYPE_VOID */ - .long Lld_int32-Lload_table /* FFI_TYPE_INT */ - .long Lld_float-Lload_table /* FFI_TYPE_FLOAT */ - .long Lld_double-Lload_table /* FFI_TYPE_DOUBLE */ - .long Lld_ldouble-Lload_table /* FFI_TYPE_LONGDOUBLE */ - .long Lld_int8-Lload_table /* FFI_TYPE_UINT8 */ - .long Lld_int8-Lload_table /* FFI_TYPE_SINT8 */ - .long Lld_int16-Lload_table /* FFI_TYPE_UINT16 */ - .long Lld_int16-Lload_table /* FFI_TYPE_SINT16 */ - .long Lld_int32-Lload_table /* FFI_TYPE_UINT32 */ - .long Lld_int32-Lload_table /* FFI_TYPE_SINT32 */ - .long Lld_int64-Lload_table /* FFI_TYPE_UINT64 */ - .long Lld_int64-Lload_table /* FFI_TYPE_SINT64 */ - .long Lld_struct-Lload_table /* FFI_TYPE_STRUCT */ - .long Lld_int64-Lload_table /* FFI_TYPE_POINTER */ - - .text - .align 3 -Lld_void: - ret - .align 3 -Lld_int8: - movzbl -24(%rsp), %eax - ret - .align 3 -Lld_int16: - movzwl -24(%rsp), %eax - ret - .align 3 -Lld_int32: - movl -24(%rsp), %eax - ret - .align 3 -Lld_int64: - movq -24(%rsp), %rax - ret - .align 3 -Lld_float: - movss -24(%rsp), %xmm0 - ret - .align 3 -Lld_double: - movsd -24(%rsp), %xmm0 - ret - .align 3 -Lld_ldouble: - fldt -24(%rsp) - ret - .align 3 -Lld_struct: - /* There are four possibilities here, %rax/%rdx, %xmm0/%rax, - %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading - both rdx and xmm1 with the second word. For the remaining, - bit 8 set means xmm0 gets the second word, and bit 9 means - that rax gets the second word. */ - movq -24(%rsp), %rcx - movq -16(%rsp), %rdx - movq -16(%rsp), %xmm1 - testl $0x100, %eax - cmovnz %rdx, %rcx - movd %rcx, %xmm0 - testl $0x200, %eax - movq -24(%rsp), %rax - cmovnz %rdx, %rax - ret - - /* See the comment above Lload_sse; the same logic applies here. */ - .align 3 -LUW8: -Lsave_sse: - movdqa %xmm0, 48(%rsp) - movdqa %xmm1, 64(%rsp) - movdqa %xmm2, 80(%rsp) - movdqa %xmm3, 96(%rsp) - movdqa %xmm4, 112(%rsp) - movdqa %xmm5, 128(%rsp) - movdqa %xmm6, 144(%rsp) - movdqa %xmm7, 160(%rsp) - jmp Lret_from_save_sse - -LUW9: -.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support -EH_frame1: - .set L$set$0,LECIE1-LSCIE1 /* CIE Length */ - .long L$set$0 -LSCIE1: - .long 0x0 /* CIE Identifier Tag */ - .byte 0x1 /* CIE Version */ - .ascii "zR\0" /* CIE Augmentation */ - .byte 0x1 /* uleb128 0x1; CIE Code Alignment Factor */ - .byte 0x78 /* sleb128 -8; CIE Data Alignment Factor */ - .byte 0x10 /* CIE RA Column */ - .byte 0x1 /* uleb128 0x1; Augmentation size */ - .byte 0x10 /* FDE Encoding (pcrel sdata4) */ - .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ - .byte 0x7 /* uleb128 0x7 */ - .byte 0x8 /* uleb128 0x8 */ - .byte 0x90 /* DW_CFA_offset, column 0x10 */ - .byte 0x1 - .align 3 -LECIE1: - .globl _ffi_call_unix64.eh -_ffi_call_unix64.eh: -LSFDE1: - .set L$set$1,LEFDE1-LASFDE1 /* FDE Length */ - .long L$set$1 -LASFDE1: - .long LASFDE1-EH_frame1 /* FDE CIE offset */ - .quad LUW0-. /* FDE initial location */ - .set L$set$2,LUW4-LUW0 /* FDE address range */ - .quad L$set$2 - .byte 0x0 /* Augmentation size */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .set L$set$3,LUW1-LUW0 - .long L$set$3 - - /* New stack frame based off rbp. This is a itty bit of unwind - trickery in that the CFA *has* changed. There is no easy way - to describe it correctly on entry to the function. Fortunately, - it doesn't matter too much since at all points we can correctly - unwind back to ffi_call. Note that the location to which we - moved the return address is (the new) CFA-8, so from the - perspective of the unwind info, it hasn't moved. */ - .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */ - .byte 0x6 - .byte 0x20 - .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */ - .byte 0x2 - .byte 0xa /* DW_CFA_remember_state */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .set L$set$4,LUW2-LUW1 - .long L$set$4 - .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ - .byte 0x7 - .byte 0x8 - .byte 0xc0+6 /* DW_CFA_restore, %rbp */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .set L$set$5,LUW3-LUW2 - .long L$set$5 - .byte 0xb /* DW_CFA_restore_state */ - - .align 3 -LEFDE1: - .globl _ffi_closure_unix64.eh -_ffi_closure_unix64.eh: -LSFDE3: - .set L$set$6,LEFDE3-LASFDE3 /* FDE Length */ - .long L$set$6 -LASFDE3: - .long LASFDE3-EH_frame1 /* FDE CIE offset */ - .quad LUW5-. /* FDE initial location */ - .set L$set$7,LUW9-LUW5 /* FDE address range */ - .quad L$set$7 - .byte 0x0 /* Augmentation size */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .set L$set$8,LUW6-LUW5 - .long L$set$8 - .byte 0xe /* DW_CFA_def_cfa_offset */ - .byte 208,1 /* uleb128 208 */ - .byte 0xa /* DW_CFA_remember_state */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .set L$set$9,LUW7-LUW6 - .long L$set$9 - .byte 0xe /* DW_CFA_def_cfa_offset */ - .byte 0x8 - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .set L$set$10,LUW8-LUW7 - .long L$set$10 - .byte 0xb /* DW_CFA_restore_state */ - - .align 3 -LEFDE3: - .subsections_via_symbols - -#endif /* __x86_64__ */ diff --git a/src/x86/sysv.S b/src/x86/sysv.S index ebd1693..5dac8c2 100644 --- a/src/x86/sysv.S +++ b/src/x86/sysv.S @@ -112,7 +112,7 @@ ffi_call_i386: andl $X86_RET_TYPE_MASK, %ecx #ifdef __PIC__ - call __x86.get_pc_thunk.bx + call C(__x86.get_pc_thunk.bx) 1: leal 0f-1b(%ebx, %ecx, 8), %ebx #else leal 0f(,%ecx, 8), %ebx @@ -212,39 +212,41 @@ ENDF(ffi_call_i386) movl %ecx, 32(%esp); \ movl %eax, 36(%esp) +# define FFI_CLOSURE_CALL_INNER \ + movl %esp, %ecx; /* load closure_data */ \ + leal closure_FS+4(%esp), %edx; /* load incoming stack */ \ + call ffi_closure_inner +#define FFI_CLOSURE_MASK_AND_JUMP \ + andl $X86_RET_TYPE_MASK, %eax; \ + leal 0f(, %eax, 8), %eax; \ + jmp *%eax #ifdef __PIC__ -/* We're going to always load the got register here, even if .hidden says - we're going to avoid the PLT call. We'll use the got register in - FFI_CLOSURE_MASK_AND_JUMP. */ -# if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE -# define PLT(X) X +# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE +# undef FFI_CLOSURE_MASK_AND_JUMP +# define FFI_CLOSURE_MASK_AND_JUMP \ + andl $X86_RET_TYPE_MASK, %eax; \ + call C(__x86.get_pc_thunk.dx); \ +1: leal 0f-1b(%edx, %eax, 8), %eax; \ + jmp *%eax # else -# define PLT(X) X@PLT -# endif -# define FFI_CLOSURE_CALL_INNER \ +# undef FFI_CLOSURE_CALL_INNER +# define FFI_CLOSURE_CALL_INNER \ movl %esp, %ecx; /* load closure_data */ \ leal closure_FS+4(%esp), %edx; /* load incoming stack */ \ movl %ebx, 40(%esp); /* save ebx */ \ cfi_rel_offset(%ebx, 40); \ - call __x86.get_pc_thunk.bx; /* load got register */ \ - addl $C(_GLOBAL_OFFSET_TABLE_), %ebx; \ - call PLT(ffi_closure_inner) -#define FFI_CLOSURE_MASK_AND_JUMP \ + call C(__x86.get_pc_thunk.bx); /* load got register */ \ +1: addl $C(_GLOBAL_OFFSET_TABLE_), %ebx; \ + call ffi_closure_inner@PLT +# undef FFI_CLOSURE_MASK_AND_JUMP +# define FFI_CLOSURE_MASK_AND_JUMP \ andl $X86_RET_TYPE_MASK, %eax; \ leal 0f@GOTOFF(%ebx, %eax, 8), %eax; \ movl 40(%esp), %ebx; /* restore ebx */ \ cfi_restore(%ebx); \ jmp *%eax -#else -# define FFI_CLOSURE_CALL_INNER \ - movl %esp, %ecx; /* load closure_data */ \ - leal closure_FS+4(%esp), %edx; /* load incoming stack */ \ - call ffi_closure_inner -#define FFI_CLOSURE_MASK_AND_JUMP \ - andl $X86_RET_TYPE_MASK, %eax; \ - leal 0f(, %eax, 8), %eax; \ - jmp *%eax +# endif /* DARWIN || HIDDEN */ #endif /* __PIC__ */ #define FFI_GO_CLOSURE(suffix, chain, t1, t2) \ @@ -511,7 +513,7 @@ C(ffi_closure_raw_SYSV): movl 20(%ebx), %eax /* load cif->flags */ andl $X86_RET_TYPE_MASK, %eax #ifdef __PIC__ - call __x86.get_pc_thunk.bx + call C(__x86.get_pc_thunk.bx) 1: leal 0f-1b(%ebx, %eax, 8), %eax #else leal 0f(,%eax, 8), %eax @@ -615,7 +617,7 @@ C(ffi_closure_raw_THISCALL): movl 20(%ebx), %eax /* load cif->flags */ andl $X86_RET_TYPE_MASK, %eax #ifdef __PIC__ - call __x86.get_pc_thunk.bx + call C(__x86.get_pc_thunk.bx) 1: leal 0f-1b(%ebx, %eax, 8), %eax #else leal 0f(,%eax, 8), %eax @@ -685,20 +687,41 @@ ENDF(C(ffi_closure_raw_THISCALL)) #endif /* !FFI_NO_RAW_API */ +#ifdef X86_DARWIN +# define COMDAT(X) \ + .section __TEXT,__textcoal_nt,coalesced,pure_instructions; \ + .weak_definition X; \ + .private_extern X +#elif defined __ELF__ +# define COMDAT(X) \ + .section .text.X,"axG",@progbits,X,comdat; \ + .globl X; \ + FFI_HIDDEN(X) +#else +# define COMDAT(X) +#endif + #if defined(__PIC__) - .section .text.__x86.get_pc_thunk.bx,"axG",@progbits,__x86.get_pc_thunk.bx,comdat - .globl __x86.get_pc_thunk.bx - .hidden __x86.get_pc_thunk.bx - .type __x86.get_pc_thunk.bx,@function -__x86.get_pc_thunk.bx: + COMDAT(C(__x86.get_pc_thunk.bx)) +C(__x86.get_pc_thunk.bx): cfi_startproc movl (%esp), %ebx ret cfi_endproc - .size __x86.get_pc_thunk.bx, . - __x86.get_pc_thunk.bx +ENDF(C(__x86.get_pc_thunk.bx)) +# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE + COMDAT(C(__x86.get_pc_thunk.dx)) +C(__x86.get_pc_thunk.dx): + cfi_startproc + movl (%esp), %edx + ret + cfi_endproc +ENDF(C(__x86.get_pc_thunk.dx)) +#endif /* DARWIN || HIDDEN */ #endif /* __PIC__ */ #endif /* ifndef __x86_64__ */ + #if defined __ELF__ && defined __linux__ .section .note.GNU-stack,"",@progbits #endif diff --git a/src/x86/unix64.S b/src/x86/unix64.S index 58cb153..770d48e 100644 --- a/src/x86/unix64.S +++ b/src/x86/unix64.S @@ -35,6 +35,22 @@ .text +#define C2(X, Y) X ## Y +#define C1(X, Y) C2(X, Y) +#ifdef __USER_LABEL_PREFIX__ +# define C(X) C1(__USER_LABEL_PREFIX__, X) +#else +# define C(X) X +#endif + +#ifdef __ELF__ +# define PLT(X) X@PLT +# define ENDF(X) .type X,@function; .size X, . - X +#else +# define PLT(X) X +# define ENDF(X) +#endif + /* This macro allows the safe creation of jump tables without an actual table. The entry points into the table are all 8 bytes. The use of ORG asserts that we're at the correct location. */ @@ -54,11 +70,10 @@ deallocate some of the stack that has been alloca'd. */ .align 8 - .globl ffi_call_unix64 - .type ffi_call_unix64,@function - FFI_HIDDEN(ffi_call_unix64) + .globl C(ffi_call_unix64) + FFI_HIDDEN(C(ffi_call_unix64)) -ffi_call_unix64: +C(ffi_call_unix64): cfi_startproc movq (%rsp), %r10 /* Load return address. */ leaq (%rdi, %rsi), %rax /* Find local stack base. */ @@ -187,7 +202,7 @@ E UNIX64_RET_ST_RAX_RDX rep movsb ret -9: call abort@PLT +9: call PLT(C(abort)) /* Many times we can avoid loading any SSE registers at all. It's not worth an indirect jump to load the exact set of @@ -206,7 +221,7 @@ E UNIX64_RET_ST_RAX_RDX jmp .Lret_from_load_sse cfi_endproc - .size ffi_call_unix64,.-ffi_call_unix64 +ENDF(C(ffi_call_unix64)) /* 6 general registers, 8 vector registers, 32 bytes of rvalue, 8 bytes of alignment. */ @@ -219,11 +234,10 @@ E UNIX64_RET_ST_RAX_RDX #define ffi_closure_RED_RVALUE (ffi_closure_OFS_RVALUE - ffi_closure_FS) .align 2 - .globl ffi_closure_unix64_sse - .type ffi_closure_unix64_sse,@function - FFI_HIDDEN(ffi_closure_unix64_sse) + .globl C(ffi_closure_unix64_sse) + FFI_HIDDEN(C(ffi_closure_unix64_sse)) -ffi_closure_unix64_sse: +C(ffi_closure_unix64_sse): cfi_startproc subq $ffi_closure_FS, %rsp /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ @@ -240,14 +254,13 @@ ffi_closure_unix64_sse: jmp 0f cfi_endproc - .size ffi_closure_unix64_sse,.-ffi_closure_unix64_sse +ENDF(C(ffi_closure_unix64_sse)) .align 2 - .globl ffi_closure_unix64 - .type ffi_closure_unix64,@function - FFI_HIDDEN(ffi_closure_unix64) + .globl C(ffi_closure_unix64) + FFI_HIDDEN(C(ffi_closure_unix64)) -ffi_closure_unix64: +C(ffi_closure_unix64): cfi_startproc subq $ffi_closure_FS, %rsp /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ @@ -273,7 +286,7 @@ ffi_closure_unix64: leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */ movq %rsp, %r8 /* Load reg_args */ leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */ - call ffi_closure_unix64_inner + call C(ffi_closure_unix64_inner) /* Deallocate stack frame early; return value is now in redzone. */ addq $ffi_closure_FS, %rsp @@ -343,17 +356,16 @@ E UNIX64_RET_ST_RAX_RDX 3: movq (%rsi), %xmm0 ret -9: call abort@PLT +9: call PLT(C(abort)) cfi_endproc - .size ffi_closure_unix64,.-ffi_closure_unix64 +ENDF(C(ffi_closure_unix64)) .align 2 - .globl ffi_go_closure_unix64_sse - .type ffi_go_closure_unix64_sse,@function - FFI_HIDDEN(ffi_go_closure_unix64_sse) + .globl C(ffi_go_closure_unix64_sse) + FFI_HIDDEN(C(ffi_go_closure_unix64_sse)) -ffi_go_closure_unix64_sse: +C(ffi_go_closure_unix64_sse): cfi_startproc subq $ffi_closure_FS, %rsp /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ @@ -370,14 +382,13 @@ ffi_go_closure_unix64_sse: jmp 0f cfi_endproc - .size ffi_go_closure_unix64_sse,.-ffi_go_closure_unix64_sse +ENDF(C(ffi_go_closure_unix64_sse)) .align 2 - .globl ffi_go_closure_unix64 - .type ffi_go_closure_unix64,@function - FFI_HIDDEN(ffi_go_closure_unix64) + .globl C(ffi_go_closure_unix64) + FFI_HIDDEN(C(ffi_go_closure_unix64)) -ffi_go_closure_unix64: +C(ffi_go_closure_unix64): cfi_startproc subq $ffi_closure_FS, %rsp /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ @@ -402,7 +413,7 @@ ffi_go_closure_unix64: jmp .Ldo_closure cfi_endproc - .size ffi_go_closure_unix64,.-ffi_go_closure_unix64 +ENDF(C(ffi_go_closure_unix64)) #endif /* __x86_64__ */ #if defined __ELF__ && defined __linux__ |