summaryrefslogtreecommitdiff
path: root/libguile/gsubr.c
diff options
context:
space:
mode:
authorAndy Wingo <wingo@pobox.com>2018-07-29 15:36:07 +0200
committerAndy Wingo <wingo@pobox.com>2018-07-29 15:47:07 +0200
commitb8a9a666f140282fc3928f1027f235f01bad1ade (patch)
tree508dd64aae9dd247cf2b7fcd5b14adb380b01cce /libguile/gsubr.c
parent5077e6737128b04e840b96775627b000e29c63f1 (diff)
downloadguile-b8a9a666f140282fc3928f1027f235f01bad1ade.tar.gz
Rewrite subr implementation
* libguile/gsubr.c: Reimplement to store subr names and procedures in a side table, and to allocate fresh vcode for each subr. This allows JIT of subrs, moves to a uniform all-code-starts-with-instrument-entry regime, and also allows statprof to distinguish between subrs based on IP. * libguile/gsubr.h (SCM_SUBRF, SCM_SUBR_NAME): Call out to functions, now that these are in a side table. (scm_subr_function, scm_subr_name): New exports. (scm_i_primitive_name): New internal function, for looking up a primitive name based on IP. (scm_apply_subr): Take the subr index. * libguile/vm-engine.c (subr-call): * libguile/jit.c (compile_subr_call): Adapt to take index as arg. * module/statprof.scm (sample-stack-procs, count-call): (stack-samples->procedure-data): Update to always record IP in stack samples and call counts. * module/system/vm/frame.scm (frame-procedure-name): Simplify. (frame-instruction-pointer-or-primitive-procedure-name): Removed. * libguile/programs.h: * libguile/programs.c (scm_primitive_code_name): New function. * module/system/vm/program.scm (primitive-code-name): New export.
Diffstat (limited to 'libguile/gsubr.c')
-rw-r--r--libguile/gsubr.c542
1 files changed, 333 insertions, 209 deletions
diff --git a/libguile/gsubr.c b/libguile/gsubr.c
index bc12acfd9..bd9da0fda 100644
--- a/libguile/gsubr.c
+++ b/libguile/gsubr.c
@@ -25,16 +25,19 @@
#include <stdio.h>
#include <stdarg.h>
+#include <string.h>
#include "foreign.h"
#include "frames.h"
#include "instructions.h"
+#include "jit.h"
#include "modules.h"
#include "numbers.h"
#include "private-options.h"
#include "programs.h"
#include "srfi-4.h"
#include "symbols.h"
+#include "threads.h"
#include "gsubr.h"
@@ -46,224 +49,292 @@
* and rest arguments.
*/
+static scm_i_pthread_mutex_t admin_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
+
+static void **subrs = NULL;
+static uint32_t next_subr_idx = 0;
+static uint32_t subrs_array_size = 0;
+
+static uint32_t
+alloc_subr_idx (void *subr)
+{
+ uint32_t idx;
+
+ scm_i_pthread_mutex_lock (&admin_mutex);
+
+ idx = next_subr_idx++;
+
+ if (idx > 0xffffff) abort ();
+
+ if (idx >= subrs_array_size)
+ {
+ void **new_subrs;
+
+ if (subrs_array_size)
+ subrs_array_size *= 2;
+ else
+ /* In July 2018 there were 1140 subrs defined in stock Guile. */
+ subrs_array_size = 1500;
+
+ /* Leak this allocation, as code lives as long as the program
+ does. In the likely case, we only make one malloc for the
+ program; in the general case it's still O(n) in number of subrs
+ because of the geometric factor. Use malloc instead of GC
+ allocations because it's not traceable and not collectable. */
+ new_subrs = malloc (subrs_array_size * sizeof (void*));
+ memcpy (new_subrs, subrs, idx * sizeof (void*));
+ subrs = new_subrs;
+ }
+
+ subrs[idx] = subr;
+
+ scm_i_pthread_mutex_unlock (&admin_mutex);
+
+ return idx;
+}
+
-/* OK here goes nothing: we're going to define VM assembly trampolines for
- invoking subrs. Ready? Right! */
-
-/* There's a maximum of 10 args, so the number of possible combinations is:
- (REQ-OPT-REST)
- for 0 args: 1 (000) (1 + 0)
- for 1 arg: 3 (100, 010, 001) (2 + 1)
- for 2 args: 5 (200, 110, 020, 101, 011) (3 + 2)
- for 3 args: 7 (300, 210, 120, 030, 201, 111, 021) (4 + 3)
- for N args: 2N+1
-
- and the index at which N args starts:
- for 0 args: 0
- for 1 args: 1
- for 2 args: 4
- for 3 args: 9
- for N args: N^2
-
- One can prove this:
-
- (1 + 3 + 5 + ... + (2N+1))
- = ((2N+1)+1)/2 * (N+1)
- = 2(N+1)/2 * (N+1)
- = (N+1)^2
-
- Thus the total sum is 11^2 = 121. Let's just generate all of them as
- read-only data.
-*/
-
-/* A: req; B: opt; C: rest */
-#define A(nreq) \
- SCM_PACK_OP_24 (assert_nargs_ee, nreq + 1), \
- SCM_PACK_OP_24 (subr_call, 0), \
- SCM_PACK_OP_24 (handle_interrupts, 0), \
- SCM_PACK_OP_24 (return_values, 0), \
- 0, \
- 0
-
-#define B(nopt) \
- SCM_PACK_OP_24 (assert_nargs_le, nopt + 1), \
- SCM_PACK_OP_24 (alloc_frame, nopt + 1), \
- SCM_PACK_OP_24 (subr_call, 0), \
- SCM_PACK_OP_24 (handle_interrupts, 0), \
- SCM_PACK_OP_24 (return_values, 0), \
- 0
-
-#define C() \
- SCM_PACK_OP_24 (bind_rest, 1), \
- SCM_PACK_OP_24 (subr_call, 0), \
- SCM_PACK_OP_24 (handle_interrupts, 0), \
- SCM_PACK_OP_24 (return_values, 0), \
- 0, \
- 0
-
-#define AB(nreq, nopt) \
- SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1), \
- SCM_PACK_OP_24 (assert_nargs_le, nreq + nopt + 1), \
- SCM_PACK_OP_24 (alloc_frame, nreq + nopt + 1), \
- SCM_PACK_OP_24 (subr_call, 0), \
- SCM_PACK_OP_24 (handle_interrupts, 0), \
- SCM_PACK_OP_24 (return_values, 0)
-
-#define AC(nreq) \
- SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1), \
- SCM_PACK_OP_24 (bind_rest, nreq + 1), \
- SCM_PACK_OP_24 (subr_call, 0), \
- SCM_PACK_OP_24 (handle_interrupts, 0), \
- SCM_PACK_OP_24 (return_values, 0), \
- 0
-
-#define BC(nopt) \
- SCM_PACK_OP_24 (bind_rest, nopt + 1), \
- SCM_PACK_OP_24 (subr_call, 0), \
- SCM_PACK_OP_24 (handle_interrupts, 0), \
- SCM_PACK_OP_24 (return_values, 0), \
- 0, \
- 0
-
-#define ABC(nreq, nopt) \
- SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1), \
- SCM_PACK_OP_24 (bind_rest, nreq + nopt + 1), \
- SCM_PACK_OP_24 (subr_call, 0), \
- SCM_PACK_OP_24 (handle_interrupts, 0), \
- SCM_PACK_OP_24 (return_values, 0), \
- 0
+static SCM *names = NULL;
+static uint32_t names_array_size = 0;
+static void
+record_subr_name (uint32_t idx, SCM name)
+{
+ scm_i_pthread_mutex_lock (&admin_mutex);
-/*
- (defun generate-bytecode (n)
- "Generate bytecode for N arguments"
- (interactive "p")
- (insert (format "/\* %d arguments *\/\n " n))
- (let ((nreq n))
- (while (<= 0 nreq)
- (let ((nopt (- n nreq)))
- (insert
- (if (< 0 nreq)
- (if (< 0 nopt)
- (format " AB(%d,%d)," nreq nopt)
- (format " A(%d)," nreq))
- (if (< 0 nopt)
- (format " B(%d)," nopt)
- (format " A(0),"))))
- (setq nreq (1- nreq))))
- (insert "\n ")
- (setq nreq (1- n))
- (while (<= 0 nreq)
- (let ((nopt (- n nreq 1)))
- (insert
- (if (< 0 nreq)
- (if (< 0 nopt)
- (format " ABC(%d,%d)," nreq nopt)
- (format " AC(%d)," nreq))
- (if (< 0 nopt)
- (format " BC(%d)," nopt)
- (format " C(),"))))
- (setq nreq (1- nreq))))
- (insert "\n\n ")))
-
- (defun generate-bytecodes (n)
- "Generate bytecodes for up to N arguments"
- (interactive "p")
- (let ((i 0))
- (while (<= i n)
- (generate-bytecode i)
- (setq i (1+ i)))))
-*/
-static const uint32_t subr_stub_code[] = {
- /* C-u 1 0 M-x generate-bytecodes RET */
- /* 0 arguments */
- A(0),
-
- /* 1 arguments */
- A(1), B(1),
- C(),
-
- /* 2 arguments */
- A(2), AB(1,1), B(2),
- AC(1), BC(1),
-
- /* 3 arguments */
- A(3), AB(2,1), AB(1,2), B(3),
- AC(2), ABC(1,1), BC(2),
-
- /* 4 arguments */
- A(4), AB(3,1), AB(2,2), AB(1,3), B(4),
- AC(3), ABC(2,1), ABC(1,2), BC(3),
-
- /* 5 arguments */
- A(5), AB(4,1), AB(3,2), AB(2,3), AB(1,4), B(5),
- AC(4), ABC(3,1), ABC(2,2), ABC(1,3), BC(4),
-
- /* 6 arguments */
- A(6), AB(5,1), AB(4,2), AB(3,3), AB(2,4), AB(1,5), B(6),
- AC(5), ABC(4,1), ABC(3,2), ABC(2,3), ABC(1,4), BC(5),
-
- /* 7 arguments */
- A(7), AB(6,1), AB(5,2), AB(4,3), AB(3,4), AB(2,5), AB(1,6), B(7),
- AC(6), ABC(5,1), ABC(4,2), ABC(3,3), ABC(2,4), ABC(1,5), BC(6),
-
- /* 8 arguments */
- A(8), AB(7,1), AB(6,2), AB(5,3), AB(4,4), AB(3,5), AB(2,6), AB(1,7), B(8),
- AC(7), ABC(6,1), ABC(5,2), ABC(4,3), ABC(3,4), ABC(2,5), ABC(1,6), BC(7),
-
- /* 9 arguments */
- A(9), AB(8,1), AB(7,2), AB(6,3), AB(5,4), AB(4,5), AB(3,6), AB(2,7), AB(1,8), B(9),
- AC(8), ABC(7,1), ABC(6,2), ABC(5,3), ABC(4,4), ABC(3,5), ABC(2,6), ABC(1,7), BC(8),
-
- /* 10 arguments */
- A(10), AB(9,1), AB(8,2), AB(7,3), AB(6,4), AB(5,5), AB(4,6), AB(3,7), AB(2,8), AB(1,9), B(10),
- AC(9), ABC(8,1), ABC(7,2), ABC(6,3), ABC(5,4), ABC(4,5), ABC(3,6), ABC(2,7), ABC(1,8), BC(9),
+ if (idx >= names_array_size)
+ {
+ SCM *new_names;
+ uint32_t new_size;
+
+ /* See comments in alloc_subr_idx about how we choose 1500 as
+ initial size. It's a GC-managed allocation though. */
+
+ if (names_array_size)
+ new_size = names_array_size * 2;
+ else
+ new_size = 1500;
+
+ new_names = SCM_GC_MALLOC (new_size * sizeof (SCM));
+ memcpy (new_names, names, names_array_size * sizeof (SCM));
+ while (names_array_size < new_size)
+ new_names[names_array_size++] = SCM_BOOL_F;
+ names = new_names;
+ names_array_size = new_size;
+ }
+
+ names[idx] = name;
+
+ scm_i_pthread_mutex_unlock (&admin_mutex);
+}
+
+
+
+static char *arena = NULL;
+static size_t arena_used = 0;
+static size_t arena_size = 0;
+
+static size_t
+round_up_power_of_two (size_t n, size_t m)
+{
+ return (n + (m-1)) & ~(m-1);
+}
+
+static char *
+alloc (size_t byte_size)
+{
+ char *ret;
+
+ byte_size = round_up_power_of_two (byte_size, sizeof (void *));
+
+ scm_i_pthread_mutex_lock (&admin_mutex);
+
+ while (arena_used + byte_size > arena_size)
+ {
+ char *new_arena;
+
+ /* See comments in alloc_subr_idx about how we choose 1500 as
+ initial size and why we leak the allocation. */
+
+ if (arena_size)
+ arena_size *= 2;
+ else
+ {
+ size_t avg_size = 6 * sizeof(uint32_t);
+ avg_size += sizeof(struct scm_jit_function_data);
+ arena_size = 1500 * avg_size;
+ }
+
+ new_arena = malloc (arena_size);
+ memcpy (new_arena, arena, arena_used);
+ arena = new_arena;
+ }
+
+ ret = arena + arena_used;
+ arena_used += byte_size;
+
+ scm_i_pthread_mutex_unlock (&admin_mutex);
+
+ memset (ret, 0, byte_size);
+
+ return ret;
+}
+
+static uint32_t *
+alloc_primitive_code_with_instrumentation (size_t uint32_count,
+ uint32_t **write_ptr)
+{
+ char *ptr;
+ uint32_t *ret;
+ struct scm_jit_function_data *data;
+ size_t byte_size, padded_byte_size;
+
+ /* Reserve space for instrument-entry. */
+ byte_size = (2 + uint32_count) * sizeof (uint32_t);
+ padded_byte_size = round_up_power_of_two (byte_size, sizeof (void *));
+ /* Reserve space for jit data. */
+ ptr = alloc (padded_byte_size + sizeof (struct scm_jit_function_data));
+ ret = (uint32_t *) ptr;
+ data = (struct scm_jit_function_data*) (ret + padded_byte_size);
+
+ ret[0] = SCM_PACK_OP_24 (instrument_entry, 0);
+ ret[1] = padded_byte_size / 4;
+
+ *write_ptr = ret + 2;
+
+ data->mcode = NULL;
+ data->counter = 0;
+ data->start = -padded_byte_size;
+ data->end = -(padded_byte_size - byte_size);
+
+ return (uint32_t *) ret;
+}
+
+static int
+is_primitive_code (const void *ptr)
+{
+ const char *cptr = ptr;
+ int ret;
+
+ scm_i_pthread_mutex_lock (&admin_mutex);
+ ret = cptr >= arena && (cptr - arena) < arena_used;
+ scm_i_pthread_mutex_unlock (&admin_mutex);
+
+ return ret;
+}
+
+static uint32_t *
+alloc_subr_code (uint32_t subr_idx, uint32_t code[], size_t code_size)
+{
+ uint32_t post[3] = { SCM_PACK_OP_24 (subr_call, subr_idx),
+ SCM_PACK_OP_24 (handle_interrupts, 0),
+ SCM_PACK_OP_24 (return_values, 0) };
+ uint32_t *ret, *write;
+
+ ret = alloc_primitive_code_with_instrumentation (code_size + 3, &write);
+
+ memcpy (write, code, code_size * sizeof (uint32_t));
+ write += code_size;
+ memcpy (write, post, 3 * sizeof (uint32_t));
+
+ return ret;
+}
+
+enum arity_kind {
+ NULLARY = 0,
+ REQ = 1,
+ OPT = 2,
+ REST = 4,
+ REQ_OPT = REQ + OPT,
+ REQ_REST = REQ + REST,
+ OPT_REST = OPT + REST,
+ REQ_OPT_REST = REQ + OPT + REST
};
-#undef A
-#undef B
-#undef C
-#undef AB
-#undef AC
-#undef BC
-#undef ABC
-
-/* (nargs * nargs) + nopt + rest * (nargs + 1) */
-#define SUBR_STUB_CODE(nreq,nopt,rest) \
- &subr_stub_code[((nreq + nopt + rest) * (nreq + nopt + rest) \
- + nopt + rest * (nreq + nopt + rest + 1)) * 6]
-
-static const uint32_t*
-get_subr_stub_code (unsigned int nreq, unsigned int nopt, unsigned int rest)
+static uint32_t*
+get_subr_stub_code (uint32_t subr_idx,
+ unsigned int nreq, unsigned int nopt, unsigned int rest)
{
+ enum arity_kind kind = NULLARY;
+
if (SCM_UNLIKELY (rest > 1 || nreq + nopt + rest > 10))
scm_out_of_range ("make-subr", scm_from_uint (nreq + nopt + rest));
- return SUBR_STUB_CODE (nreq, nopt, rest);
+ if (nreq) kind += REQ;
+ if (nopt) kind += OPT;
+ if (rest) kind += REST;
+
+ switch (kind)
+ {
+ case NULLARY:
+ case REQ:
+ {
+ uint32_t code[1] = { SCM_PACK_OP_24 (assert_nargs_ee, nreq + 1) };
+ return alloc_subr_code (subr_idx, code, 1);
+ }
+ case OPT:
+ {
+ uint32_t code[2] = { SCM_PACK_OP_24 (assert_nargs_le, nopt + 1),
+ SCM_PACK_OP_24 (alloc_frame, nopt + 1) };
+ return alloc_subr_code (subr_idx, code, 2);
+ }
+ case REST:
+ {
+ uint32_t code[1] = { SCM_PACK_OP_24 (bind_rest, 1) };
+ return alloc_subr_code (subr_idx, code, 1);
+ }
+ case REQ_OPT:
+ {
+ uint32_t code[3] = { SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1),
+ SCM_PACK_OP_24 (assert_nargs_le, nreq + nopt + 1),
+ SCM_PACK_OP_24 (alloc_frame, nreq + nopt + 1) };
+ return alloc_subr_code (subr_idx, code, 3);
+ }
+ case REQ_REST:
+ {
+ uint32_t code[2] = { SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1),
+ SCM_PACK_OP_24 (bind_rest, nreq + 1) };
+ return alloc_subr_code (subr_idx, code, 2);
+ }
+ case OPT_REST:
+ {
+ uint32_t code[1] = { SCM_PACK_OP_24 (bind_rest, nopt + 1) };
+ return alloc_subr_code (subr_idx, code, 1);
+ }
+ case REQ_OPT_REST:
+ {
+ uint32_t code[2] = { SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1),
+ SCM_PACK_OP_24 (bind_rest, nreq + nopt + 1) };
+ return alloc_subr_code (subr_idx, code, 2);
+ }
+ default:
+ abort ();
+ }
}
static SCM
create_subr (int define, const char *name,
unsigned int nreq, unsigned int nopt, unsigned int rest,
- SCM (*fcn) (), SCM *generic_loc)
+ void *fcn, SCM *generic_loc)
{
SCM ret, sname;
+ uint32_t idx;
scm_t_bits flags;
- scm_t_bits nfree = generic_loc ? 3 : 2;
+ scm_t_bits nfree = generic_loc ? 1 : 0;
+ idx = alloc_subr_idx (fcn);
sname = scm_from_utf8_symbol (name);
flags = SCM_F_PROGRAM_IS_PRIMITIVE;
flags |= generic_loc ? SCM_F_PROGRAM_IS_PRIMITIVE_GENERIC : 0;
ret = scm_words (scm_tc7_program | (nfree << 16) | flags, nfree + 2);
- SCM_SET_CELL_WORD_1 (ret, get_subr_stub_code (nreq, nopt, rest));
- SCM_PROGRAM_FREE_VARIABLE_SET (ret, 0, scm_from_pointer (fcn, NULL));
- SCM_PROGRAM_FREE_VARIABLE_SET (ret, 1, sname);
+ SCM_SET_CELL_WORD_1 (ret, get_subr_stub_code (idx, nreq, nopt, rest));
+ record_subr_name (idx, sname);
if (generic_loc)
- SCM_PROGRAM_FREE_VARIABLE_SET (ret, 2,
- scm_from_pointer (generic_loc, NULL));
+ SCM_PROGRAM_FREE_VARIABLE_SET (ret, 0,
+ scm_from_pointer (generic_loc, NULL));
if (define)
scm_define (sname, ret);
@@ -274,33 +345,86 @@ create_subr (int define, const char *name,
int
scm_i_primitive_code_p (const uint32_t *code)
{
- if (code < subr_stub_code)
- return 0;
- if (code > subr_stub_code + (sizeof(subr_stub_code) / sizeof(uint32_t)))
- return 0;
+ return is_primitive_code (code);
+}
+
+static uintptr_t
+primitive_call_ip (const uint32_t *code)
+{
+ int direction = 0;
+ while (1)
+ {
+ switch (code[0] & 0xff)
+ {
+ case scm_op_instrument_entry:
+ if (direction < 0) abort ();
+ direction = 1;
+ code += 2;
+ break;
+ case scm_op_assert_nargs_ee:
+ case scm_op_assert_nargs_le:
+ case scm_op_assert_nargs_ge:
+ case scm_op_bind_rest:
+ case scm_op_alloc_frame:
+ if (direction < 0) abort ();
+ direction = 1;
+ code += 1;
+ break;
+ case scm_op_subr_call:
+ return (uintptr_t) code;
+ case scm_op_return_values:
+ case scm_op_handle_interrupts:
+ /* Going back isn't possible for instruction streams where we
+ don't know the length of the preceding instruction, but for
+ the code we emit, these particular opcodes are only ever
+ preceded by 4-byte instructions. */
+ if (direction > 0) abort ();
+ direction = -1;
+ code -= 1;
+ break;
+ default:
+ abort ();
+ }
+ }
+}
- return 1;
+static uint32_t
+primitive_subr_idx (const uint32_t *code)
+{
+ uintptr_t call_ip = primitive_call_ip (code);
+ uint32_t idx = ((uint32_t *) call_ip)[0] >> 8;
+ if (idx >= next_subr_idx) abort();
+ return idx;
}
uintptr_t
scm_i_primitive_call_ip (SCM subr)
{
- size_t i;
- const uint32_t *code = SCM_PROGRAM_CODE (subr);
-
- /* A stub is 6 32-bit words long, or 24 bytes. The call will be one
- instruction, in either the fourth, third, or second word. Return a
- byte offset from the entry. */
- for (i = 1; i < 4; i++)
- if ((code[i] & 0xff) == scm_op_subr_call)
- return (uintptr_t) (code + i);
- abort ();
+ return primitive_call_ip (SCM_PROGRAM_CODE (subr));
+}
+
+SCM
+scm_i_primitive_name (const uint32_t *code)
+{
+ return names[primitive_subr_idx (code)];
+}
+
+scm_t_subr
+scm_subr_function (SCM subr)
+{
+ return subrs[primitive_subr_idx (SCM_PROGRAM_CODE (subr))];
+}
+
+SCM
+scm_subr_name (SCM subr)
+{
+ return scm_i_primitive_name (SCM_PROGRAM_CODE (subr));
}
SCM
-scm_apply_subr (union scm_vm_stack_element *sp, ptrdiff_t nslots)
+scm_apply_subr (union scm_vm_stack_element *sp, uint32_t idx, ptrdiff_t nslots)
{
- SCM (*subr)() = SCM_SUBRF (sp[nslots - 1].as_scm);
+ SCM (*subr)() = subrs[idx];
#define ARG(i) (sp[i].as_scm)
switch (nslots - 1)