diff options
Diffstat (limited to 'libffi/src/powerpc/darwin.S')
-rw-r--r-- | libffi/src/powerpc/darwin.S | 290 |
1 files changed, 214 insertions, 76 deletions
diff --git a/libffi/src/powerpc/darwin.S b/libffi/src/powerpc/darwin.S index d8a1df5fe98..3b425da78b8 100644 --- a/libffi/src/powerpc/darwin.S +++ b/libffi/src/powerpc/darwin.S @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- darwin.S - Copyright (c) 2000 John Hornkvist - Copyright (c) 2004 Free Software Foundation, Inc. + Copyright (c) 2004, 2010 Free Software Foundation, Inc. PowerPC Assembly glue. @@ -24,51 +24,92 @@ OTHER DEALINGS IN THE SOFTWARE. ----------------------------------------------------------------------- */ +#define LIBFFI_ASM #if defined(__ppc64__) #define MODE_CHOICE(x, y) y #else #define MODE_CHOICE(x, y) x #endif -#define g_long MODE_CHOICE(long, quad) /* usage is ".g_long" */ +#define machine_choice MODE_CHOICE(ppc7400,ppc64) -#define LOG2_GPR_BYTES MODE_CHOICE(2,3) /* log2(GPR_BYTES) */ +; Define some pseudo-opcodes for size-independent load & store of GPRs ... +#define lgu MODE_CHOICE(lwzu, ldu) +#define lg MODE_CHOICE(lwz,ld) +#define sg MODE_CHOICE(stw,std) +#define sgu MODE_CHOICE(stwu,stdu) +#define sgux MODE_CHOICE(stwux,stdux) + +; ... and the size of GPRs and their storage indicator. +#define GPR_BYTES MODE_CHOICE(4,8) +#define LOG2_GPR_BYTES MODE_CHOICE(2,3) /* log2(GPR_BYTES) */ +#define g_long MODE_CHOICE(long, quad) /* usage is ".g_long" */ + +; From the ABI doc: "Mac OS X ABI Function Call Guide" Version 2009-02-04. +#define LINKAGE_SIZE MODE_CHOICE(24,48) +#define PARAM_AREA MODE_CHOICE(32,64) +#define SAVED_LR_OFFSET MODE_CHOICE(8,16) /* save position for lr */ + +/* If there is any FP stuff we make space for all of the regs. */ +#define SAVED_FPR_COUNT 13 +#define FPR_SIZE 8 +#define RESULT_BYTES 16 + +/* This should be kept in step with the same value in ffi_darwin.c. */ +#define ASM_NEEDS_REGISTERS 4 +#define SAVE_REGS_SIZE (ASM_NEEDS_REGISTERS * GPR_BYTES) -#define LIBFFI_ASM #include <fficonfig.h> #include <ffi.h> + #define JUMPTARGET(name) name #define L(x) x -.text - .align 2 -.globl _ffi_prep_args -.text + .text .align 2 -.globl _ffi_call_DARWIN -.text + .globl _ffi_prep_args + .align 2 + .globl _ffi_call_DARWIN + + /* We arrive here with: + r3 = ptr to extended cif. + r4 = -bytes. + r5 = cif flags. + r6 = ptr to return value. + r7 = fn pointer (user func). + r8 = fn pointer (ffi_prep_args). + r9 = ffi_type* for the ret val. */ + _ffi_call_DARWIN: -LFB0: +Lstartcode: mr r12,r8 /* We only need r12 until the call, - so it doesn't have to be saved. */ + so it does not have to be saved. */ LFB1: /* Save the old stack pointer as AP. */ mr r8,r1 LCFI0: + + /* Save the retval type in parents frame. */ + sg r9,(LINKAGE_SIZE+6*GPR_BYTES)(r8) + /* Allocate the stack space we need. */ - stwux r1,r1,r4 + sgux r1,r1,r4 /* Save registers we use. */ mflr r9 + sg r9,SAVED_LR_OFFSET(r8) + + sg r28,-(4 * GPR_BYTES)(r8) + sg r29,-(3 * GPR_BYTES)(r8) + sg r30,-(2 * GPR_BYTES)(r8) + sg r31,-( GPR_BYTES)(r8) - stw r28,-16(r8) - stw r29,-12(r8) - stw r30,-8(r8) - stw r31,-4(r8) +#if !defined(POWERPC_DARWIN) + /* The TOC slot is reserved in the Darwin ABI and r2 is volatile. */ + sg r2,(5 * GPR_BYTES)(r1) +#endif - stw r9,8(r8) - stw r2,20(r1) LCFI1: /* Save arguments over call. */ @@ -77,14 +118,17 @@ LCFI1: mr r29,r7 /* function address, */ mr r28,r8 /* our AP. */ LCFI2: - /* Call ffi_prep_args. */ + /* Call ffi_prep_args. r3 = extended cif, r4 = stack ptr copy. */ mr r4,r1 li r9,0 mtctr r12 /* r12 holds address of _ffi_prep_args. */ bctrl - lwz r2,20(r1) +#if !defined(POWERPC_DARWIN) + /* The TOC slot is reserved in the Darwin ABI and r2 is volatile. */ + lg r2,(5 * GPR_BYTES)(r1) +#endif /* Now do the call. Set up cr1 with bits 4-7 of the flags. */ mtcrf 0x40,r31 @@ -92,71 +136,130 @@ LCFI2: mtctr r29 /* Load all those argument registers. We have set up a nice stack frame, just load it into registers. */ - lwz r3,20+(1*4)(r1) - lwz r4,20+(2*4)(r1) - lwz r5,20+(3*4)(r1) - lwz r6,20+(4*4)(r1) + lg r3, (LINKAGE_SIZE )(r1) + lg r4, (LINKAGE_SIZE + GPR_BYTES)(r1) + lg r5, (LINKAGE_SIZE + 2 * GPR_BYTES)(r1) + lg r6, (LINKAGE_SIZE + 3 * GPR_BYTES)(r1) nop - lwz r7,20+(5*4)(r1) - lwz r8,20+(6*4)(r1) - lwz r9,20+(7*4)(r1) - lwz r10,20+(8*4)(r1) + lg r7, (LINKAGE_SIZE + 4 * GPR_BYTES)(r1) + lg r8, (LINKAGE_SIZE + 5 * GPR_BYTES)(r1) + lg r9, (LINKAGE_SIZE + 6 * GPR_BYTES)(r1) + lg r10,(LINKAGE_SIZE + 7 * GPR_BYTES)(r1) L1: - /* Load all the FP registers. */ + /* ... Load all the FP registers. */ bf 6,L2 /* No floats to load. */ - lfd f1,-16-(13*8)(r28) - lfd f2,-16-(12*8)(r28) - lfd f3,-16-(11*8)(r28) - lfd f4,-16-(10*8)(r28) + lfd f1, -SAVE_REGS_SIZE-(13*FPR_SIZE)(r28) + lfd f2, -SAVE_REGS_SIZE-(12*FPR_SIZE)(r28) + lfd f3, -SAVE_REGS_SIZE-(11*FPR_SIZE)(r28) + lfd f4, -SAVE_REGS_SIZE-(10*FPR_SIZE)(r28) nop - lfd f5,-16-(9*8)(r28) - lfd f6,-16-(8*8)(r28) - lfd f7,-16-(7*8)(r28) - lfd f8,-16-(6*8)(r28) + lfd f5, -SAVE_REGS_SIZE-( 9*FPR_SIZE)(r28) + lfd f6, -SAVE_REGS_SIZE-( 8*FPR_SIZE)(r28) + lfd f7, -SAVE_REGS_SIZE-( 7*FPR_SIZE)(r28) + lfd f8, -SAVE_REGS_SIZE-( 6*FPR_SIZE)(r28) nop - lfd f9,-16-(5*8)(r28) - lfd f10,-16-(4*8)(r28) - lfd f11,-16-(3*8)(r28) - lfd f12,-16-(2*8)(r28) + lfd f9, -SAVE_REGS_SIZE-( 5*FPR_SIZE)(r28) + lfd f10,-SAVE_REGS_SIZE-( 4*FPR_SIZE)(r28) + lfd f11,-SAVE_REGS_SIZE-( 3*FPR_SIZE)(r28) + lfd f12,-SAVE_REGS_SIZE-( 2*FPR_SIZE)(r28) nop - lfd f13,-16-(1*8)(r28) + lfd f13,-SAVE_REGS_SIZE-( 1*FPR_SIZE)(r28) L2: mr r12,r29 /* Put the target address in r12 as specified. */ mtctr r12 nop nop + /* Make the call. */ bctrl /* Now, deal with the return value. */ - mtcrf 0x01,r31 - bt 30,L(done_return_value) - bt 29,L(fp_return_value) - stw r3,0(r30) - bf 28,L(done_return_value) - stw r4,4(r30) + /* m64 structure returns can occupy the same set of registers as + would be used to pass such a structure as arg0 - so take care + not to step on any possibly hot regs. */ - /* Fall through. */ + /* Get the flags.. */ + mtcrf 0x03,r31 ; we need c6 & cr7 now. + ; FLAG_RETURNS_NOTHING also covers struct ret-by-ref. + bt 30,L(done_return_value) ; FLAG_RETURNS_NOTHING + bf 27,L(scalar_return_value) ; not FLAG_RETURNS_STRUCT + + /* OK, so we have a struct. */ +#if defined(__ppc64__) + bt 31,L(maybe_return_128) ; FLAG_RETURNS_128BITS, special case -L(done_return_value): - /* Restore the registers we used and return. */ - lwz r9,8(r28) - lwz r31,-4(r28) - mtlr r9 - lwz r30,-8(r28) - lwz r29,-12(r28) - lwz r28,-16(r28) - lwz r1,0(r1) - blr + /* OK, we have to map the return back to a mem struct. + We are about to trample the parents param area, so recover the + return type. r29 is free, since the call is done. */ + lg r29,(LINKAGE_SIZE + 6 * GPR_BYTES)(r28) + + sg r3, (LINKAGE_SIZE )(r28) + sg r4, (LINKAGE_SIZE + GPR_BYTES)(r28) + sg r5, (LINKAGE_SIZE + 2 * GPR_BYTES)(r28) + sg r6, (LINKAGE_SIZE + 3 * GPR_BYTES)(r28) + nop + sg r7, (LINKAGE_SIZE + 4 * GPR_BYTES)(r28) + sg r8, (LINKAGE_SIZE + 5 * GPR_BYTES)(r28) + sg r9, (LINKAGE_SIZE + 6 * GPR_BYTES)(r28) + sg r10,(LINKAGE_SIZE + 7 * GPR_BYTES)(r28) + /* OK, so do the block move - we trust that memcpy will not trample + the fprs... */ + mr r3,r30 ; dest + addi r4,r28,LINKAGE_SIZE ; source + /* The size is a size_t, should be long. */ + lg r5,0(r29) + /* Figure out small structs */ + cmpi 0,r5,4 + bgt L3 ; 1, 2 and 4 bytes have special rules. + cmpi 0,r5,3 + beq L3 ; not 3 + addi r4,r4,8 + subf r4,r5,r4 +L3: + bl _memcpy + + /* ... do we need the FP registers? - recover the flags.. */ + mtcrf 0x03,r31 ; we need c6 & cr7 now. + bf 29,L(done_return_value) /* No floats in the struct. */ + stfd f1, -SAVE_REGS_SIZE-(13*FPR_SIZE)(r28) + stfd f2, -SAVE_REGS_SIZE-(12*FPR_SIZE)(r28) + stfd f3, -SAVE_REGS_SIZE-(11*FPR_SIZE)(r28) + stfd f4, -SAVE_REGS_SIZE-(10*FPR_SIZE)(r28) + nop + stfd f5, -SAVE_REGS_SIZE-( 9*FPR_SIZE)(r28) + stfd f6, -SAVE_REGS_SIZE-( 8*FPR_SIZE)(r28) + stfd f7, -SAVE_REGS_SIZE-( 7*FPR_SIZE)(r28) + stfd f8, -SAVE_REGS_SIZE-( 6*FPR_SIZE)(r28) + nop + stfd f9, -SAVE_REGS_SIZE-( 5*FPR_SIZE)(r28) + stfd f10,-SAVE_REGS_SIZE-( 4*FPR_SIZE)(r28) + stfd f11,-SAVE_REGS_SIZE-( 3*FPR_SIZE)(r28) + stfd f12,-SAVE_REGS_SIZE-( 2*FPR_SIZE)(r28) + nop + stfd f13,-SAVE_REGS_SIZE-( 1*FPR_SIZE)(r28) + + mr r3,r29 ; ffi_type * + mr r4,r30 ; dest + addi r5,r28,-SAVE_REGS_SIZE-(13*FPR_SIZE) ; fprs + xor r6,r6,r6 + sg r6,(LINKAGE_SIZE + 7 * GPR_BYTES)(r28) + addi r6,r28,(LINKAGE_SIZE + 7 * GPR_BYTES) ; point to a zeroed counter. + bl _darwin64_struct_floats_to_mem + + b L(done_return_value) +#else + stw r3,0(r30) ; m32 the only struct return in reg is 4 bytes. +#endif + b L(done_return_value) L(fp_return_value): /* Do we have long double to store? */ - bf 31,L(fd_return_value) + bf 31,L(fd_return_value) ; FLAG_RETURNS_128BITS stfd f1,0(r30) - stfd f2,8(r30) + stfd f2,FPR_SIZE(r30) b L(done_return_value) L(fd_return_value): @@ -170,21 +273,57 @@ L(float_return_value): stfs f1,0(r30) b L(done_return_value) +L(scalar_return_value): + bt 29,L(fp_return_value) ; FLAG_RETURNS_FP + ; ffi_arg is defined as unsigned long. + sg r3,0(r30) ; Save the reg. + bf 28,L(done_return_value) ; not FLAG_RETURNS_64BITS + +#if defined(__ppc64__) +L(maybe_return_128): + std r3,0(r30) + bf 31,L(done_return_value) ; not FLAG_RETURNS_128BITS + std r4,8(r30) +#else + stw r4,4(r30) +#endif + + /* Fall through. */ + /* We want this at the end to simplify eh epilog computation. */ + +L(done_return_value): + /* Restore the registers we used and return. */ + lg r29,SAVED_LR_OFFSET(r28) + ; epilog + lg r31,-(1 * GPR_BYTES)(r28) + mtlr r29 + lg r30,-(2 * GPR_BYTES)(r28) + lg r29,-(3 * GPR_BYTES)(r28) + lg r28,-(4 * GPR_BYTES)(r28) + lg r1,0(r1) + blr LFE1: + .align 1 /* END(_ffi_call_DARWIN) */ /* Provide a null definition of _ffi_call_AIX. */ -.text - .align 2 -.globl _ffi_call_AIX -.text + .text + .globl _ffi_call_AIX .align 2 _ffi_call_AIX: blr /* END(_ffi_call_AIX) */ -.data -.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms +/* EH stuff. */ + +#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78) + + .static_data + .align LOG2_GPR_BYTES +LLFB0$non_lazy_ptr: + .g_long Lstartcode + + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support EH_frame1: .set L$set$0,LECIE1-LSCIE1 .long L$set$0 ; Length of Common Information Entry @@ -193,7 +332,7 @@ LSCIE1: .byte 0x1 ; CIE Version .ascii "zR\0" ; CIE Augmentation .byte 0x1 ; uleb128 0x1; CIE Code Alignment Factor - .byte 0x7c ; sleb128 -4; CIE Data Alignment Factor + .byte EH_DATA_ALIGN_FACT ; sleb128 -4; CIE Data Alignment Factor .byte 0x41 ; CIE RA Column .byte 0x1 ; uleb128 0x1; Augmentation size .byte 0x90 ; FDE Encoding (indirect pcrel) @@ -202,7 +341,8 @@ LSCIE1: .byte 0x0 ; uleb128 0x0 .align LOG2_GPR_BYTES LECIE1: -.globl _ffi_call_DARWIN.eh + + .globl _ffi_call_DARWIN.eh _ffi_call_DARWIN.eh: LSFDE1: .set L$set$1,LEFDE1-LASFDE1 @@ -210,11 +350,11 @@ LSFDE1: LASFDE1: .long LASFDE1-EH_frame1 ; FDE CIE offset .g_long LLFB0$non_lazy_ptr-. ; FDE initial location - .set L$set$3,LFE1-LFB0 + .set L$set$3,LFE1-Lstartcode .g_long L$set$3 ; FDE address range .byte 0x0 ; uleb128 0x0; Augmentation size .byte 0x4 ; DW_CFA_advance_loc4 - .set L$set$4,LCFI0-LFB1 + .set L$set$4,LCFI0-Lstartcode .long L$set$4 .byte 0xd ; DW_CFA_def_cfa_register .byte 0x08 ; uleb128 0x08 @@ -239,7 +379,5 @@ LASFDE1: .byte 0x1c ; uleb128 0x1c .align LOG2_GPR_BYTES LEFDE1: -.data - .align LOG2_GPR_BYTES -LLFB0$non_lazy_ptr: - .g_long LFB0 + .align 1 + |