diff options
author | Andy Wingo <wingo@pobox.com> | 2018-07-29 15:36:07 +0200 |
---|---|---|
committer | Andy Wingo <wingo@pobox.com> | 2018-07-29 15:47:07 +0200 |
commit | b8a9a666f140282fc3928f1027f235f01bad1ade (patch) | |
tree | 508dd64aae9dd247cf2b7fcd5b14adb380b01cce /libguile/gsubr.c | |
parent | 5077e6737128b04e840b96775627b000e29c63f1 (diff) | |
download | guile-b8a9a666f140282fc3928f1027f235f01bad1ade.tar.gz |
Rewrite subr implementation
* libguile/gsubr.c: Reimplement to store subr names and procedures in a
side table, and to allocate fresh vcode for each subr. This allows
JIT of subrs, moves to a uniform all-code-starts-with-instrument-entry
regime, and also allows statprof to distinguish between subrs based on
IP.
* libguile/gsubr.h (SCM_SUBRF, SCM_SUBR_NAME): Call out to functions,
now that these are in a side table.
(scm_subr_function, scm_subr_name): New exports.
(scm_i_primitive_name): New internal function, for looking up a
primitive name based on IP.
(scm_apply_subr): Take the subr index.
* libguile/vm-engine.c (subr-call):
* libguile/jit.c (compile_subr_call): Adapt to take index as arg.
* module/statprof.scm (sample-stack-procs, count-call):
(stack-samples->procedure-data): Update to always record IP in stack
samples and call counts.
* module/system/vm/frame.scm (frame-procedure-name): Simplify.
(frame-instruction-pointer-or-primitive-procedure-name): Removed.
* libguile/programs.h:
* libguile/programs.c (scm_primitive_code_name): New function.
* module/system/vm/program.scm (primitive-code-name): New export.
Diffstat (limited to 'libguile/gsubr.c')
-rw-r--r-- | libguile/gsubr.c | 542 |
1 files changed, 333 insertions, 209 deletions
diff --git a/libguile/gsubr.c b/libguile/gsubr.c index bc12acfd9..bd9da0fda 100644 --- a/libguile/gsubr.c +++ b/libguile/gsubr.c @@ -25,16 +25,19 @@ #include <stdio.h> #include <stdarg.h> +#include <string.h> #include "foreign.h" #include "frames.h" #include "instructions.h" +#include "jit.h" #include "modules.h" #include "numbers.h" #include "private-options.h" #include "programs.h" #include "srfi-4.h" #include "symbols.h" +#include "threads.h" #include "gsubr.h" @@ -46,224 +49,292 @@ * and rest arguments. */ +static scm_i_pthread_mutex_t admin_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER; + +static void **subrs = NULL; +static uint32_t next_subr_idx = 0; +static uint32_t subrs_array_size = 0; + +static uint32_t +alloc_subr_idx (void *subr) +{ + uint32_t idx; + + scm_i_pthread_mutex_lock (&admin_mutex); + + idx = next_subr_idx++; + + if (idx > 0xffffff) abort (); + + if (idx >= subrs_array_size) + { + void **new_subrs; + + if (subrs_array_size) + subrs_array_size *= 2; + else + /* In July 2018 there were 1140 subrs defined in stock Guile. */ + subrs_array_size = 1500; + + /* Leak this allocation, as code lives as long as the program + does. In the likely case, we only make one malloc for the + program; in the general case it's still O(n) in number of subrs + because of the geometric factor. Use malloc instead of GC + allocations because it's not traceable and not collectable. */ + new_subrs = malloc (subrs_array_size * sizeof (void*)); + memcpy (new_subrs, subrs, idx * sizeof (void*)); + subrs = new_subrs; + } + + subrs[idx] = subr; + + scm_i_pthread_mutex_unlock (&admin_mutex); + + return idx; +} + -/* OK here goes nothing: we're going to define VM assembly trampolines for - invoking subrs. Ready? Right! */ - -/* There's a maximum of 10 args, so the number of possible combinations is: - (REQ-OPT-REST) - for 0 args: 1 (000) (1 + 0) - for 1 arg: 3 (100, 010, 001) (2 + 1) - for 2 args: 5 (200, 110, 020, 101, 011) (3 + 2) - for 3 args: 7 (300, 210, 120, 030, 201, 111, 021) (4 + 3) - for N args: 2N+1 - - and the index at which N args starts: - for 0 args: 0 - for 1 args: 1 - for 2 args: 4 - for 3 args: 9 - for N args: N^2 - - One can prove this: - - (1 + 3 + 5 + ... + (2N+1)) - = ((2N+1)+1)/2 * (N+1) - = 2(N+1)/2 * (N+1) - = (N+1)^2 - - Thus the total sum is 11^2 = 121. Let's just generate all of them as - read-only data. -*/ - -/* A: req; B: opt; C: rest */ -#define A(nreq) \ - SCM_PACK_OP_24 (assert_nargs_ee, nreq + 1), \ - SCM_PACK_OP_24 (subr_call, 0), \ - SCM_PACK_OP_24 (handle_interrupts, 0), \ - SCM_PACK_OP_24 (return_values, 0), \ - 0, \ - 0 - -#define B(nopt) \ - SCM_PACK_OP_24 (assert_nargs_le, nopt + 1), \ - SCM_PACK_OP_24 (alloc_frame, nopt + 1), \ - SCM_PACK_OP_24 (subr_call, 0), \ - SCM_PACK_OP_24 (handle_interrupts, 0), \ - SCM_PACK_OP_24 (return_values, 0), \ - 0 - -#define C() \ - SCM_PACK_OP_24 (bind_rest, 1), \ - SCM_PACK_OP_24 (subr_call, 0), \ - SCM_PACK_OP_24 (handle_interrupts, 0), \ - SCM_PACK_OP_24 (return_values, 0), \ - 0, \ - 0 - -#define AB(nreq, nopt) \ - SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1), \ - SCM_PACK_OP_24 (assert_nargs_le, nreq + nopt + 1), \ - SCM_PACK_OP_24 (alloc_frame, nreq + nopt + 1), \ - SCM_PACK_OP_24 (subr_call, 0), \ - SCM_PACK_OP_24 (handle_interrupts, 0), \ - SCM_PACK_OP_24 (return_values, 0) - -#define AC(nreq) \ - SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1), \ - SCM_PACK_OP_24 (bind_rest, nreq + 1), \ - SCM_PACK_OP_24 (subr_call, 0), \ - SCM_PACK_OP_24 (handle_interrupts, 0), \ - SCM_PACK_OP_24 (return_values, 0), \ - 0 - -#define BC(nopt) \ - SCM_PACK_OP_24 (bind_rest, nopt + 1), \ - SCM_PACK_OP_24 (subr_call, 0), \ - SCM_PACK_OP_24 (handle_interrupts, 0), \ - SCM_PACK_OP_24 (return_values, 0), \ - 0, \ - 0 - -#define ABC(nreq, nopt) \ - SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1), \ - SCM_PACK_OP_24 (bind_rest, nreq + nopt + 1), \ - SCM_PACK_OP_24 (subr_call, 0), \ - SCM_PACK_OP_24 (handle_interrupts, 0), \ - SCM_PACK_OP_24 (return_values, 0), \ - 0 +static SCM *names = NULL; +static uint32_t names_array_size = 0; +static void +record_subr_name (uint32_t idx, SCM name) +{ + scm_i_pthread_mutex_lock (&admin_mutex); -/* - (defun generate-bytecode (n) - "Generate bytecode for N arguments" - (interactive "p") - (insert (format "/\* %d arguments *\/\n " n)) - (let ((nreq n)) - (while (<= 0 nreq) - (let ((nopt (- n nreq))) - (insert - (if (< 0 nreq) - (if (< 0 nopt) - (format " AB(%d,%d)," nreq nopt) - (format " A(%d)," nreq)) - (if (< 0 nopt) - (format " B(%d)," nopt) - (format " A(0),")))) - (setq nreq (1- nreq)))) - (insert "\n ") - (setq nreq (1- n)) - (while (<= 0 nreq) - (let ((nopt (- n nreq 1))) - (insert - (if (< 0 nreq) - (if (< 0 nopt) - (format " ABC(%d,%d)," nreq nopt) - (format " AC(%d)," nreq)) - (if (< 0 nopt) - (format " BC(%d)," nopt) - (format " C(),")))) - (setq nreq (1- nreq)))) - (insert "\n\n "))) - - (defun generate-bytecodes (n) - "Generate bytecodes for up to N arguments" - (interactive "p") - (let ((i 0)) - (while (<= i n) - (generate-bytecode i) - (setq i (1+ i))))) -*/ -static const uint32_t subr_stub_code[] = { - /* C-u 1 0 M-x generate-bytecodes RET */ - /* 0 arguments */ - A(0), - - /* 1 arguments */ - A(1), B(1), - C(), - - /* 2 arguments */ - A(2), AB(1,1), B(2), - AC(1), BC(1), - - /* 3 arguments */ - A(3), AB(2,1), AB(1,2), B(3), - AC(2), ABC(1,1), BC(2), - - /* 4 arguments */ - A(4), AB(3,1), AB(2,2), AB(1,3), B(4), - AC(3), ABC(2,1), ABC(1,2), BC(3), - - /* 5 arguments */ - A(5), AB(4,1), AB(3,2), AB(2,3), AB(1,4), B(5), - AC(4), ABC(3,1), ABC(2,2), ABC(1,3), BC(4), - - /* 6 arguments */ - A(6), AB(5,1), AB(4,2), AB(3,3), AB(2,4), AB(1,5), B(6), - AC(5), ABC(4,1), ABC(3,2), ABC(2,3), ABC(1,4), BC(5), - - /* 7 arguments */ - A(7), AB(6,1), AB(5,2), AB(4,3), AB(3,4), AB(2,5), AB(1,6), B(7), - AC(6), ABC(5,1), ABC(4,2), ABC(3,3), ABC(2,4), ABC(1,5), BC(6), - - /* 8 arguments */ - A(8), AB(7,1), AB(6,2), AB(5,3), AB(4,4), AB(3,5), AB(2,6), AB(1,7), B(8), - AC(7), ABC(6,1), ABC(5,2), ABC(4,3), ABC(3,4), ABC(2,5), ABC(1,6), BC(7), - - /* 9 arguments */ - A(9), AB(8,1), AB(7,2), AB(6,3), AB(5,4), AB(4,5), AB(3,6), AB(2,7), AB(1,8), B(9), - AC(8), ABC(7,1), ABC(6,2), ABC(5,3), ABC(4,4), ABC(3,5), ABC(2,6), ABC(1,7), BC(8), - - /* 10 arguments */ - A(10), AB(9,1), AB(8,2), AB(7,3), AB(6,4), AB(5,5), AB(4,6), AB(3,7), AB(2,8), AB(1,9), B(10), - AC(9), ABC(8,1), ABC(7,2), ABC(6,3), ABC(5,4), ABC(4,5), ABC(3,6), ABC(2,7), ABC(1,8), BC(9), + if (idx >= names_array_size) + { + SCM *new_names; + uint32_t new_size; + + /* See comments in alloc_subr_idx about how we choose 1500 as + initial size. It's a GC-managed allocation though. */ + + if (names_array_size) + new_size = names_array_size * 2; + else + new_size = 1500; + + new_names = SCM_GC_MALLOC (new_size * sizeof (SCM)); + memcpy (new_names, names, names_array_size * sizeof (SCM)); + while (names_array_size < new_size) + new_names[names_array_size++] = SCM_BOOL_F; + names = new_names; + names_array_size = new_size; + } + + names[idx] = name; + + scm_i_pthread_mutex_unlock (&admin_mutex); +} + + + +static char *arena = NULL; +static size_t arena_used = 0; +static size_t arena_size = 0; + +static size_t +round_up_power_of_two (size_t n, size_t m) +{ + return (n + (m-1)) & ~(m-1); +} + +static char * +alloc (size_t byte_size) +{ + char *ret; + + byte_size = round_up_power_of_two (byte_size, sizeof (void *)); + + scm_i_pthread_mutex_lock (&admin_mutex); + + while (arena_used + byte_size > arena_size) + { + char *new_arena; + + /* See comments in alloc_subr_idx about how we choose 1500 as + initial size and why we leak the allocation. */ + + if (arena_size) + arena_size *= 2; + else + { + size_t avg_size = 6 * sizeof(uint32_t); + avg_size += sizeof(struct scm_jit_function_data); + arena_size = 1500 * avg_size; + } + + new_arena = malloc (arena_size); + memcpy (new_arena, arena, arena_used); + arena = new_arena; + } + + ret = arena + arena_used; + arena_used += byte_size; + + scm_i_pthread_mutex_unlock (&admin_mutex); + + memset (ret, 0, byte_size); + + return ret; +} + +static uint32_t * +alloc_primitive_code_with_instrumentation (size_t uint32_count, + uint32_t **write_ptr) +{ + char *ptr; + uint32_t *ret; + struct scm_jit_function_data *data; + size_t byte_size, padded_byte_size; + + /* Reserve space for instrument-entry. */ + byte_size = (2 + uint32_count) * sizeof (uint32_t); + padded_byte_size = round_up_power_of_two (byte_size, sizeof (void *)); + /* Reserve space for jit data. */ + ptr = alloc (padded_byte_size + sizeof (struct scm_jit_function_data)); + ret = (uint32_t *) ptr; + data = (struct scm_jit_function_data*) (ret + padded_byte_size); + + ret[0] = SCM_PACK_OP_24 (instrument_entry, 0); + ret[1] = padded_byte_size / 4; + + *write_ptr = ret + 2; + + data->mcode = NULL; + data->counter = 0; + data->start = -padded_byte_size; + data->end = -(padded_byte_size - byte_size); + + return (uint32_t *) ret; +} + +static int +is_primitive_code (const void *ptr) +{ + const char *cptr = ptr; + int ret; + + scm_i_pthread_mutex_lock (&admin_mutex); + ret = cptr >= arena && (cptr - arena) < arena_used; + scm_i_pthread_mutex_unlock (&admin_mutex); + + return ret; +} + +static uint32_t * +alloc_subr_code (uint32_t subr_idx, uint32_t code[], size_t code_size) +{ + uint32_t post[3] = { SCM_PACK_OP_24 (subr_call, subr_idx), + SCM_PACK_OP_24 (handle_interrupts, 0), + SCM_PACK_OP_24 (return_values, 0) }; + uint32_t *ret, *write; + + ret = alloc_primitive_code_with_instrumentation (code_size + 3, &write); + + memcpy (write, code, code_size * sizeof (uint32_t)); + write += code_size; + memcpy (write, post, 3 * sizeof (uint32_t)); + + return ret; +} + +enum arity_kind { + NULLARY = 0, + REQ = 1, + OPT = 2, + REST = 4, + REQ_OPT = REQ + OPT, + REQ_REST = REQ + REST, + OPT_REST = OPT + REST, + REQ_OPT_REST = REQ + OPT + REST }; -#undef A -#undef B -#undef C -#undef AB -#undef AC -#undef BC -#undef ABC - -/* (nargs * nargs) + nopt + rest * (nargs + 1) */ -#define SUBR_STUB_CODE(nreq,nopt,rest) \ - &subr_stub_code[((nreq + nopt + rest) * (nreq + nopt + rest) \ - + nopt + rest * (nreq + nopt + rest + 1)) * 6] - -static const uint32_t* -get_subr_stub_code (unsigned int nreq, unsigned int nopt, unsigned int rest) +static uint32_t* +get_subr_stub_code (uint32_t subr_idx, + unsigned int nreq, unsigned int nopt, unsigned int rest) { + enum arity_kind kind = NULLARY; + if (SCM_UNLIKELY (rest > 1 || nreq + nopt + rest > 10)) scm_out_of_range ("make-subr", scm_from_uint (nreq + nopt + rest)); - return SUBR_STUB_CODE (nreq, nopt, rest); + if (nreq) kind += REQ; + if (nopt) kind += OPT; + if (rest) kind += REST; + + switch (kind) + { + case NULLARY: + case REQ: + { + uint32_t code[1] = { SCM_PACK_OP_24 (assert_nargs_ee, nreq + 1) }; + return alloc_subr_code (subr_idx, code, 1); + } + case OPT: + { + uint32_t code[2] = { SCM_PACK_OP_24 (assert_nargs_le, nopt + 1), + SCM_PACK_OP_24 (alloc_frame, nopt + 1) }; + return alloc_subr_code (subr_idx, code, 2); + } + case REST: + { + uint32_t code[1] = { SCM_PACK_OP_24 (bind_rest, 1) }; + return alloc_subr_code (subr_idx, code, 1); + } + case REQ_OPT: + { + uint32_t code[3] = { SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1), + SCM_PACK_OP_24 (assert_nargs_le, nreq + nopt + 1), + SCM_PACK_OP_24 (alloc_frame, nreq + nopt + 1) }; + return alloc_subr_code (subr_idx, code, 3); + } + case REQ_REST: + { + uint32_t code[2] = { SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1), + SCM_PACK_OP_24 (bind_rest, nreq + 1) }; + return alloc_subr_code (subr_idx, code, 2); + } + case OPT_REST: + { + uint32_t code[1] = { SCM_PACK_OP_24 (bind_rest, nopt + 1) }; + return alloc_subr_code (subr_idx, code, 1); + } + case REQ_OPT_REST: + { + uint32_t code[2] = { SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1), + SCM_PACK_OP_24 (bind_rest, nreq + nopt + 1) }; + return alloc_subr_code (subr_idx, code, 2); + } + default: + abort (); + } } static SCM create_subr (int define, const char *name, unsigned int nreq, unsigned int nopt, unsigned int rest, - SCM (*fcn) (), SCM *generic_loc) + void *fcn, SCM *generic_loc) { SCM ret, sname; + uint32_t idx; scm_t_bits flags; - scm_t_bits nfree = generic_loc ? 3 : 2; + scm_t_bits nfree = generic_loc ? 1 : 0; + idx = alloc_subr_idx (fcn); sname = scm_from_utf8_symbol (name); flags = SCM_F_PROGRAM_IS_PRIMITIVE; flags |= generic_loc ? SCM_F_PROGRAM_IS_PRIMITIVE_GENERIC : 0; ret = scm_words (scm_tc7_program | (nfree << 16) | flags, nfree + 2); - SCM_SET_CELL_WORD_1 (ret, get_subr_stub_code (nreq, nopt, rest)); - SCM_PROGRAM_FREE_VARIABLE_SET (ret, 0, scm_from_pointer (fcn, NULL)); - SCM_PROGRAM_FREE_VARIABLE_SET (ret, 1, sname); + SCM_SET_CELL_WORD_1 (ret, get_subr_stub_code (idx, nreq, nopt, rest)); + record_subr_name (idx, sname); if (generic_loc) - SCM_PROGRAM_FREE_VARIABLE_SET (ret, 2, - scm_from_pointer (generic_loc, NULL)); + SCM_PROGRAM_FREE_VARIABLE_SET (ret, 0, + scm_from_pointer (generic_loc, NULL)); if (define) scm_define (sname, ret); @@ -274,33 +345,86 @@ create_subr (int define, const char *name, int scm_i_primitive_code_p (const uint32_t *code) { - if (code < subr_stub_code) - return 0; - if (code > subr_stub_code + (sizeof(subr_stub_code) / sizeof(uint32_t))) - return 0; + return is_primitive_code (code); +} + +static uintptr_t +primitive_call_ip (const uint32_t *code) +{ + int direction = 0; + while (1) + { + switch (code[0] & 0xff) + { + case scm_op_instrument_entry: + if (direction < 0) abort (); + direction = 1; + code += 2; + break; + case scm_op_assert_nargs_ee: + case scm_op_assert_nargs_le: + case scm_op_assert_nargs_ge: + case scm_op_bind_rest: + case scm_op_alloc_frame: + if (direction < 0) abort (); + direction = 1; + code += 1; + break; + case scm_op_subr_call: + return (uintptr_t) code; + case scm_op_return_values: + case scm_op_handle_interrupts: + /* Going back isn't possible for instruction streams where we + don't know the length of the preceding instruction, but for + the code we emit, these particular opcodes are only ever + preceded by 4-byte instructions. */ + if (direction > 0) abort (); + direction = -1; + code -= 1; + break; + default: + abort (); + } + } +} - return 1; +static uint32_t +primitive_subr_idx (const uint32_t *code) +{ + uintptr_t call_ip = primitive_call_ip (code); + uint32_t idx = ((uint32_t *) call_ip)[0] >> 8; + if (idx >= next_subr_idx) abort(); + return idx; } uintptr_t scm_i_primitive_call_ip (SCM subr) { - size_t i; - const uint32_t *code = SCM_PROGRAM_CODE (subr); - - /* A stub is 6 32-bit words long, or 24 bytes. The call will be one - instruction, in either the fourth, third, or second word. Return a - byte offset from the entry. */ - for (i = 1; i < 4; i++) - if ((code[i] & 0xff) == scm_op_subr_call) - return (uintptr_t) (code + i); - abort (); + return primitive_call_ip (SCM_PROGRAM_CODE (subr)); +} + +SCM +scm_i_primitive_name (const uint32_t *code) +{ + return names[primitive_subr_idx (code)]; +} + +scm_t_subr +scm_subr_function (SCM subr) +{ + return subrs[primitive_subr_idx (SCM_PROGRAM_CODE (subr))]; +} + +SCM +scm_subr_name (SCM subr) +{ + return scm_i_primitive_name (SCM_PROGRAM_CODE (subr)); } SCM -scm_apply_subr (union scm_vm_stack_element *sp, ptrdiff_t nslots) +scm_apply_subr (union scm_vm_stack_element *sp, uint32_t idx, ptrdiff_t nslots) { - SCM (*subr)() = SCM_SUBRF (sp[nslots - 1].as_scm); + SCM (*subr)() = subrs[idx]; #define ARG(i) (sp[i].as_scm) switch (nslots - 1) |