summaryrefslogtreecommitdiff
path: root/src/aarch64
diff options
context:
space:
mode:
authorRichard Henderson <rth@redhat.com>2014-10-22 21:53:30 -0400
committerRichard Henderson <rth@twiddle.net>2014-11-12 09:28:44 +0100
commit12cf89ee04dc01650f71bc38ce414c953c83199a (patch)
treee7e39cdcd13999a05214d2674fd878414590bacc /src/aarch64
parent4fe1aea1211aaf228c2db867b8ac6f1620de72d4 (diff)
downloadlibffi-12cf89ee04dc01650f71bc38ce414c953c83199a.tar.gz
aarch64: Move return value handling into ffi_closure_SYSV
As with the change to ffi_call_SYSV, this avoids copying data into a temporary buffer.
Diffstat (limited to 'src/aarch64')
-rw-r--r--src/aarch64/ffi.c196
-rw-r--r--src/aarch64/ffitarget.h2
-rw-r--r--src/aarch64/sysv.S249
3 files changed, 176 insertions, 271 deletions
diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
index ffa1363..c5a429a 100644
--- a/src/aarch64/ffi.c
+++ b/src/aarch64/ffi.c
@@ -71,9 +71,6 @@ ffi_clear_cache (void *start, void *end)
#endif
}
-extern void
-ffi_closure_SYSV (ffi_closure *);
-
/* Test for an FFI floating point representation. */
static unsigned
@@ -211,69 +208,6 @@ is_hfa(const ffi_type *ty)
return (ele_count << 8) | candidate;
}
-/* Test if an ffi_type is a candidate for passing in a register.
-
- This test does not check that sufficient registers of the
- appropriate class are actually available, merely that IFF
- sufficient registers are available then the argument will be passed
- in register(s).
-
- Note that an ffi_type that is deemed to be a register candidate
- will always be returned in registers.
-
- Returns 1 if a register candidate else 0. */
-
-static int
-is_register_candidate (ffi_type *ty)
-{
- switch (ty->type)
- {
- case FFI_TYPE_VOID:
- return 0;
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
- case FFI_TYPE_LONGDOUBLE:
- case FFI_TYPE_UINT8:
- case FFI_TYPE_UINT16:
- case FFI_TYPE_UINT32:
- case FFI_TYPE_UINT64:
- case FFI_TYPE_POINTER:
- case FFI_TYPE_SINT8:
- case FFI_TYPE_SINT16:
- case FFI_TYPE_SINT32:
- case FFI_TYPE_INT:
- case FFI_TYPE_SINT64:
- return 1;
-
- case FFI_TYPE_STRUCT:
- if (is_hfa (ty))
- {
- return 1;
- }
- else if (ty->size > 16)
- {
- /* Too large. Will be replaced with a pointer to memory. The
- pointer MAY be passed in a register, but the value will
- not. This test specifically fails since the argument will
- never be passed by value in registers. */
- return 0;
- }
- else
- {
- /* Might be passed in registers depending on the number of
- registers required. */
- return (ty->size + 7) / 8 < N_X_ARG_REG;
- }
- break;
-
- default:
- FFI_ASSERT (0);
- break;
- }
-
- return 0;
-}
-
/* Test if an ffi_type argument or result is a candidate for a vector
register. */
@@ -797,42 +731,42 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *orig_rvalue, void **avalue)
memcpy (orig_rvalue, rvalue, rtype_size);
}
-static unsigned char trampoline [] =
-{ 0x70, 0x00, 0x00, 0x58, /* ldr x16, 1f */
- 0x91, 0x00, 0x00, 0x10, /* adr x17, 2f */
- 0x00, 0x02, 0x1f, 0xd6 /* br x16 */
-};
-
/* Build a trampoline. */
-#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS) \
- ({unsigned char *__tramp = (unsigned char*)(TRAMP); \
- UINT64 __fun = (UINT64)(FUN); \
- UINT64 __ctx = (UINT64)(CTX); \
- UINT64 __flags = (UINT64)(FLAGS); \
- memcpy (__tramp, trampoline, sizeof (trampoline)); \
- memcpy (__tramp + 12, &__fun, sizeof (__fun)); \
- memcpy (__tramp + 20, &__ctx, sizeof (__ctx)); \
- memcpy (__tramp + 28, &__flags, sizeof (__flags)); \
- ffi_clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE); \
- })
+extern void ffi_closure_SYSV (void) FFI_HIDDEN;
+extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
ffi_status
-ffi_prep_closure_loc (ffi_closure* closure,
+ffi_prep_closure_loc (ffi_closure *closure,
ffi_cif* cif,
void (*fun)(ffi_cif*,void*,void**,void*),
void *user_data,
void *codeloc)
{
+ static const unsigned char trampoline[16] = {
+ 0x90, 0x00, 0x00, 0x58, /* ldr x16, tramp+16 */
+ 0xf1, 0xff, 0xff, 0x10, /* adr x17, tramp+0 */
+ 0x00, 0x02, 0x1f, 0xd6 /* br x16 */
+ };
+ char *tramp = closure->tramp;
+ void (*start)(void);
+
if (cif->abi != FFI_SYSV)
return FFI_BAD_ABI;
- FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_SYSV, codeloc,
- cif->aarch64_flags);
-
- closure->cif = cif;
+ closure->cif = cif;
+ closure->fun = fun;
closure->user_data = user_data;
- closure->fun = fun;
+
+ memcpy (tramp, trampoline, sizeof(trampoline));
+
+ if (cif->flags & AARCH64_FLAG_ARG_V)
+ start = ffi_closure_SYSV_V;
+ else
+ start = ffi_closure_SYSV;
+ *(UINT64 *)(tramp + 16) = (uintptr_t)start;
+
+ ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
return FFI_OK;
}
@@ -853,20 +787,20 @@ ffi_prep_closure_loc (ffi_closure* closure,
descriptors, invokes the wrapped function, then marshalls the return
value back into the call context. */
-void FFI_HIDDEN
-ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
- void *stack)
+int FFI_HIDDEN
+ffi_closure_SYSV_inner (ffi_cif *cif,
+ void (*fun)(ffi_cif*,void*,void**,void*),
+ void *user_data,
+ struct call_context *context,
+ void *stack, void *rvalue)
{
- ffi_cif *cif = closure->cif;
void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
- void *rvalue = NULL;
- int i, h, nargs = cif->nargs;
+ int i, h, nargs, flags;
struct arg_state state;
- ffi_type *rtype;
arg_init (&state);
- for (i = 0; i < nargs; i++)
+ for (i = 0, nargs = cif->nargs; i < nargs; i++)
{
ffi_type *ty = cif->arg_types[i];
int t = ty->type;
@@ -955,69 +889,11 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
}
}
- /* Figure out where the return value will be passed, either in registers
- or in a memory block allocated by the caller and passed in x8. */
- rtype = cif->rtype;
- if (is_register_candidate (rtype))
- {
- size_t s = rtype->size;
- int t;
-
- /* Register candidates are *always* returned in registers. */
-
- /* Allocate a scratchpad for the return value, we will let the
- callee scrible the result into the scratch pad then move the
- contents into the appropriate return value location for the
- call convention. */
- rvalue = alloca (s);
- (closure->fun) (cif, rvalue, avalue, closure->user_data);
-
- /* Copy the return value into the call context so that it is returned
- as expected to our caller. */
- t = rtype->type;
- switch (t)
- {
- case FFI_TYPE_VOID:
- break;
-
- case FFI_TYPE_INT:
- case FFI_TYPE_UINT8:
- case FFI_TYPE_UINT16:
- case FFI_TYPE_UINT32:
- case FFI_TYPE_UINT64:
- case FFI_TYPE_SINT8:
- case FFI_TYPE_SINT16:
- case FFI_TYPE_SINT32:
- case FFI_TYPE_SINT64:
- case FFI_TYPE_POINTER:
- context->x[0] = extend_integer_type (rvalue, t);
- break;
-
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
- case FFI_TYPE_LONGDOUBLE:
- extend_hfa_type (&context->v[0], rvalue, 0x100 + t);
- break;
+ flags = cif->flags;
+ if (flags & AARCH64_RET_IN_MEM)
+ rvalue = (void *)(uintptr_t)context->x8;
- case FFI_TYPE_STRUCT:
- h = is_hfa (cif->rtype);
- if (h)
- extend_hfa_type (&context->v[0], rvalue, h);
- else
- {
- FFI_ASSERT (s <= 16);
- memcpy (&context->x[0], rvalue, s);
- }
- break;
+ fun (cif, rvalue, avalue, user_data);
- default:
- abort();
- }
- }
- else
- {
- rvalue = (void *)(uintptr_t)context->x8;
- (closure->fun) (cif, rvalue, avalue, closure->user_data);
- }
+ return flags;
}
-
diff --git a/src/aarch64/ffitarget.h b/src/aarch64/ffitarget.h
index 336f28a..b488bbe 100644
--- a/src/aarch64/ffitarget.h
+++ b/src/aarch64/ffitarget.h
@@ -42,7 +42,7 @@ typedef enum ffi_abi
/* ---- Definitions for closures ----------------------------------------- */
#define FFI_CLOSURES 1
-#define FFI_TRAMPOLINE_SIZE 36
+#define FFI_TRAMPOLINE_SIZE 24
#define FFI_NATIVE_RAW_API 0
/* ---- Internal ---- */
diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S
index ba15663..abd848d 100644
--- a/src/aarch64/sysv.S
+++ b/src/aarch64/sysv.S
@@ -39,15 +39,15 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#endif
#endif
+#ifdef __AARCH64EB__
+# define BE(X) X
+#else
+# define BE(X) 0
+#endif
+
.text
.align 4
- .globl CNAME(ffi_call_SYSV)
-#ifdef __ELF__
- .type CNAME(ffi_call_SYSV), #function
- .hidden CNAME(ffi_call_SYSV)
-#endif
-
/* ffi_call_SYSV
extern void ffi_call_SYSV (void *stack, void *frame,
void (*fn)(void), void *rvalue, int flags);
@@ -179,131 +179,160 @@ CNAME(ffi_call_SYSV):
nop
cfi_endproc
+
+ .globl CNAME(ffi_call_SYSV)
#ifdef __ELF__
- .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
+ .type CNAME(ffi_call_SYSV), #function
+ .hidden CNAME(ffi_call_SYSV)
+ .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
#endif
-#define ffi_closure_SYSV_FS (8 * 2 + CALL_CONTEXT_SIZE)
-
/* ffi_closure_SYSV
Closure invocation glue. This is the low level code invoked directly by
the closure trampoline to setup and call a closure.
- On entry x17 points to a struct trampoline_data, x16 has been clobbered
+ On entry x17 points to a struct ffi_closure, x16 has been clobbered
all other registers are preserved.
We allocate a call context and save the argument passing registers,
then invoked the generic C ffi_closure_SYSV_inner() function to do all
the real work, on return we load the result passing registers back from
the call context.
+*/
- On entry
-
- extern void
- ffi_closure_SYSV (struct trampoline_data *);
-
- struct trampoline_data
- {
- UINT64 *ffi_closure;
- UINT64 flags;
- };
-
- This function uses the following stack frame layout:
-
- ==
- saved x30(lr)
- x29(fp)-> saved x29(fp)
- saved x22
- saved x21
- ...
- sp -> call_context
- ==
+#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64)
- Voila! */
+ .align 4
+CNAME(ffi_closure_SYSV_V):
+ cfi_startproc
+ stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+ cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+ cfi_rel_offset (x29, 0)
+ cfi_rel_offset (x30, 8)
- .text
- .align 4
+ /* Save the argument passing vector registers. */
+ stp q0, q1, [sp, #16 + 0]
+ stp q2, q3, [sp, #16 + 32]
+ stp q4, q5, [sp, #16 + 64]
+ stp q6, q7, [sp, #16 + 96]
+ b 0f
+ cfi_endproc
- .globl CNAME(ffi_closure_SYSV)
+ .globl CNAME(ffi_closure_SYSV_V)
#ifdef __ELF__
- .type CNAME(ffi_closure_SYSV), #function
- .hidden CNAME(ffi_closure_SYSV)
+ .type CNAME(ffi_closure_SYSV_V), #function
+ .hidden CNAME(ffi_closure_SYSV_V)
+ .size CNAME(ffi_closure_SYSV_V), . - CNAME(ffi_closure_SYSV_V)
#endif
- cfi_startproc
-CNAME(ffi_closure_SYSV):
- stp x29, x30, [sp, #-16]!
- cfi_adjust_cfa_offset (16)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
-
- mov x29, sp
- cfi_def_cfa_register (x29)
-
- sub sp, sp, #ffi_closure_SYSV_FS
-
- stp x21, x22, [x29, #-16]
- cfi_rel_offset (x21, -16)
- cfi_rel_offset (x22, -8)
-
- /* Load x21 with &call_context. */
- mov x21, sp
- /* Preserve our struct trampoline_data * */
- mov x22, x17
-
- /* Save the rest of the argument passing registers, including
- the structure return pointer. */
- stp x0, x1, [x21, #16*N_V_ARG_REG + 0]
- stp x2, x3, [x21, #16*N_V_ARG_REG + 16]
- stp x4, x5, [x21, #16*N_V_ARG_REG + 32]
- stp x6, x7, [x21, #16*N_V_ARG_REG + 48]
- str x8, [x21, #16*N_V_ARG_REG + 64]
-
- /* Figure out if we should touch the vector registers. */
- ldr x0, [x22, #8]
- tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f
-
- /* Save the argument passing vector registers. */
- stp q0, q1, [x21, #0]
- stp q2, q3, [x21, #32]
- stp q4, q5, [x21, #64]
- stp q6, q7, [x21, #96]
-1:
- /* Load &ffi_closure.. */
- ldr x0, [x22, #0]
- mov x1, x21
- /* Compute the location of the stack at the point that the
- trampoline was called. */
- add x2, x29, #16
-
- bl CNAME(ffi_closure_SYSV_inner)
-
- /* Figure out if we should touch the vector registers. */
- ldr x0, [x22, #8]
- tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f
-
- /* Load the result passing vector registers. */
- ldp q0, q1, [x21, #0]
- ldp q2, q3, [x21, #32]
-1:
- /* Load the result passing core registers. */
- ldp x0, x1, [x21, #16*N_V_ARG_REG + 0]
-
- /* We are done, unwind our frame. */
- ldp x21, x22, [x29, #-16]
- cfi_restore (x21)
- cfi_restore (x22)
- mov sp, x29
- cfi_def_cfa_register (sp)
-
- ldp x29, x30, [sp], #16
- cfi_adjust_cfa_offset (-16)
- cfi_restore (x29)
- cfi_restore (x30)
-
- ret
+ .align 4
+ cfi_startproc
+CNAME(ffi_closure_SYSV):
+ stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+ cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+ cfi_rel_offset (x29, 0)
+ cfi_rel_offset (x30, 8)
+0:
+ mov x29, sp
+
+ /* Save the argument passing core registers. */
+ stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
+ stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
+ stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
+ stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
+ str x8, [sp, #16 + 16*N_V_ARG_REG + 64]
+
+ /* Load ffi_closure_inner arguments. */
+ ldp x0, x1, [x17, #FFI_TRAMPOLINE_SIZE] /* load cif, fn */
+ ldr x2, [x17, #FFI_TRAMPOLINE_SIZE+16] /* load user_data */
+ add x3, sp, #16 /* load context */
+ add x4, sp, #ffi_closure_SYSV_FS /* load stack */
+ add x5, sp, #16+CALL_CONTEXT_SIZE /* load rvalue */
+ bl CNAME(ffi_closure_SYSV_inner)
+
+ /* Load the return value as directed. */
+ adr x1, 0f
+ and w0, w0, #AARCH64_RET_MASK
+ add x1, x1, x0, lsl #3
+ add x3, sp, #16+CALL_CONTEXT_SIZE
+ br x1
+
+ /* Note that each table entry is 2 insns, and thus 8 bytes. */
+ .align 4
+0: b 99f /* VOID */
+ nop
+1: ldr x0, [x3] /* INT64 */
+ b 99f
+2: ldp x0, x1, [x3] /* INT128 */
+ b 99f
+3: brk #1000 /* UNUSED */
+ nop
+4: brk #1000 /* UNUSED */
+ nop
+5: brk #1000 /* UNUSED */
+ nop
+6: brk #1000 /* UNUSED */
+ nop
+7: brk #1000 /* UNUSED */
+ nop
+8: ldr s3, [x3, #12] /* S4 */
+ nop
+9: ldr s2, [x2, #8] /* S3 */
+ nop
+10: ldp s0, s1, [x3] /* S2 */
+ b 99f
+11: ldr s0, [x3] /* S1 */
+ b 99f
+12: ldr d3, [x3, #24] /* D4 */
+ nop
+13: ldr d2, [x3, #16] /* D3 */
+ nop
+14: ldp d0, d1, [x3] /* D2 */
+ b 99f
+15: ldr d0, [x3] /* D1 */
+ b 99f
+16: ldr q3, [x3, #48] /* Q4 */
+ nop
+17: ldr q2, [x3, #32] /* Q3 */
+ nop
+18: ldp q0, q1, [x3] /* Q2 */
+ b 99f
+19: ldr q0, [x3] /* Q1 */
+ b 99f
+20: ldrb w0, [x3, #BE(7)] /* UINT8 */
+ b 99f
+21: brk #1000 /* reserved */
+ nop
+22: ldrh w0, [x3, #BE(6)] /* UINT16 */
+ b 99f
+23: brk #1000 /* reserved */
+ nop
+24: ldr w0, [x3, #BE(4)] /* UINT32 */
+ b 99f
+25: brk #1000 /* reserved */
+ nop
+26: ldrsb x0, [x3, #BE(7)] /* SINT8 */
+ b 99f
+27: brk #1000 /* reserved */
+ nop
+28: ldrsh x0, [x3, #BE(6)] /* SINT16 */
+ b 99f
+29: brk #1000 /* reserved */
+ nop
+30: ldrsw x0, [x3, #BE(4)] /* SINT32 */
+ nop
+31: /* reserved */
+99: ldp x29, x30, [sp], #ffi_closure_SYSV_FS
+ cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS)
+ cfi_restore (x29)
+ cfi_restore (x30)
+ ret
cfi_endproc
+
+ .globl CNAME(ffi_closure_SYSV)
#ifdef __ELF__
- .size CNAME(ffi_closure_SYSV), .-CNAME(ffi_closure_SYSV)
+ .type CNAME(ffi_closure_SYSV), #function
+ .hidden CNAME(ffi_closure_SYSV)
+ .size CNAME(ffi_closure_SYSV), . - CNAME(ffi_closure_SYSV)
#endif