diff options
author | Ben Gamari <ben@smart-cactus.org> | 2022-02-06 20:41:06 -0500 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-02-13 03:26:14 -0500 |
commit | be591e27c76f1eaee79df671d8cf7e39d3d41720 (patch) | |
tree | d80283255c65abd71226b2124d4d0aca7c14056f | |
parent | 191dfd2d88f6d2fd7a879f14da26fe88e616c8ea (diff) | |
download | haskell-be591e27c76f1eaee79df671d8cf7e39d3d41720.tar.gz |
rts: Initial commit of AdjustorPool
-rw-r--r-- | rts/adjustor/AdjustorPool.c | 334 | ||||
-rw-r--r-- | rts/adjustor/AdjustorPool.h | 29 | ||||
-rw-r--r-- | rts/ghc.mk | 1 | ||||
-rw-r--r-- | rts/rts.cabal.in | 1 |
4 files changed, 365 insertions, 0 deletions
diff --git a/rts/adjustor/AdjustorPool.c b/rts/adjustor/AdjustorPool.c new file mode 100644 index 0000000000..b8171d4cc6 --- /dev/null +++ b/rts/adjustor/AdjustorPool.c @@ -0,0 +1,334 @@ +#include "rts/PosixSource.h" +#include "Rts.h" + +#include "sm/OSMem.h" +#include "RtsUtils.h" +#include "linker/MMap.h" +#include "AdjustorPool.h" + +#include <string.h> + +/* + * Note [Adjustor pools] + * ~~~~~~~~~~~~~~~~~~~~~ + * Memory management for adjustors is somewhat complicated by the fact that + * modern operating systems highly discourage users from simultaneously + * mapping memory as both writable and executable. This means that when GHC + * creates an adjustor, it must immediately re-map it to be non-writable, + * thus precluding the possibility of adding further adjustors to the same + * page. + * + * For a long time we would simply allocate a fresh page for each adjustor. + * Given that the adjustor code is quite small (on the order of a dozen + * bytes), this incurred significant space overhead. This was particularly + * bad on Windows, where address space can only be allocated in units of 16 + * pages (64 kBytes). Consequently, even allocating a couple of thousand + * adjustors can consume gigabytes of memory. + * + * AdjustorPool is a specialized allocator for managing adjustors. + * Specifically, it exploits the fact that an adjustor's code can be + * constructed even if we don't know the location of closure to which the + * adjustor will eventually refer. Consequently, we can allocate an entire + * page (a "chunk") of adjustors at a time, populate it with code, and remap + * it as non-writable/executable. Each adjustor's code is constructed to take + * its closure pointer from another region of memory that we leave as + * writable. + * + * To construct an AdjustorPool one uses newAdjustorPool and provide: + * + * - the size of the adjustor code, in bytes + * - a function to construct the adjustor code + * + * After a pool has been constructed, new adjustors can be allocated from it + * using alloc_adjustor and freed using free_adjustor. The pool maintains a + * free list and will reallocate into free adjustor slots when possible. + * We currently make no attempt at freeing AdjustorChunks which contain no + * live adjustors. + * + * The AdjustorPool module also exposes a high-level interface, + * new_adjustor_pool_from_template, capturing the common case where the + * adjustor code can be simply copied from a "template" and the adjustor fixed + * up. + * + * ┌───────────────────────────────────────┬─────────────────────────┐ + * │ │ │ + * ▼AdjustorPool AdjustorChunk │ AdjustorChunk │ + * ┌────────────┐ ┌──►┌───────────────┐ │ ┌──►┌────────────────┐ │ + * │code_size │ │ │owner ├──┘ │ │owner ├──┘ + * │free_list ├───┤ ┌─┤exec_page │ │ │exec_page_next │ + * └────────────┘ │ │ │free_list_next ├────┘ │free_list_next ├───► ... + * │ │ │first_free │ │first_free │ + * │ │ ├───────────────┤ ├────────────────┤ + * │ │ │slot_bitmap │ │slot_bitmap │ + * │ │ │ │ │ │ + * │ │ │ │ │ │ + * │ │ ├───────────────┤ ├────────────────┤ + * │ │ │context[0] │ │context[0] │ + * │ │ │context[1] │ │context[1] │ + * │ │ │... │ │... │ + * │ │ └───────────────┘ └────────────────┘ + * │ │ + * │ │ + * │ │ + * │ └─┐ + * │ ▼AdjustorExecPage + * │ ┌──────────────────┐ + * └───┤owner │ + * ├──────────────────┤ + * │adjustor 0 code │ N.B. the code of each adjustor + * │ │ contains a reference back to + * ├──────────────────┤ its slot's chunk->context + * │adjustor 1 code │ entry. + * │ │ + * ├──────────────────┤ + * │adjustor 3 code │ + * │ │ + * ├──────────────────┤ + * │... │ + * │ │ + * └──────────────────┘ + * + */ + +// Round up the N to the nearest multiple of s +#define ROUND_UP(n, s) ((((n) + (s) - 1) / (s)) * (s)) + +// Forward declarations +struct AdjustorExecPage; +struct AdjustorChunk; +struct AdjustorPool; + +static struct AdjustorChunk *alloc_adjustor_chunk(struct AdjustorPool *owner); + +#define ADJUSTOR_EXEC_PAGE_MAGIC 0xddeeffaabbcc0011ULL + +struct AdjustorExecPage { + uint64_t magic; + /* since FFI is ripe for human error, we include a magic number at the + * beginning of the page to ensure that free_adjustor can catch when it + * is passed an invalid pointer. + */ + struct AdjustorChunk *owner; + uint8_t adjustor_code[]; +}; + +struct AdjustorPool { + mk_adjustor_code_fn make_code; + void *user_data; /* user data to be passed to make_code */ + + size_t adjustor_code_size; /* how many bytes of code does each adjustor require? */ + size_t chunk_slots; /* how many adjustors per chunk? */ + struct AdjustorChunk *free_list; +#if defined(THREADED_RTS) + Mutex lock; +#endif +}; + +struct AdjustorChunk { + size_t first_free; + /* index of the first free adjustor slot. + * Invariant: this must be the index of a slot with unset bit in slot_bitmap + * if the chunk is on its owning AdjustorPool's free_list. If the chunk is not + * on free_list then first_free == pool->chunk_slots. + */ + struct AdjustorPool *owner; + /* the pool which owns this chunk */ + struct AdjustorChunk *free_list_next; + /* the next chunk in the pool's free list */ + struct AdjustorExecPage *exec_page; + /* an AdjustorExecPage containing the code for each adjustor */ + struct AdjustorContext *contexts; + /* an AdjustorContext for each adjustor slot. This points to the contexts + * array which lives after slot_bitmap */ + uint8_t slot_bitmap[]; + /* a bit for each adjustor slot; bit is set if the slot is allocated */ +}; + +struct AdjustorPool * +new_adjustor_pool(size_t code_size, mk_adjustor_code_fn make_code, void *user_data) +{ + struct AdjustorPool *pool = stgMallocBytes(sizeof(struct AdjustorPool), "newAdjustorPool"); + const size_t code_alignment = 16; + pool->make_code = make_code; + pool->user_data = user_data; + pool->adjustor_code_size = code_size; + size_t usable_exec_page_sz = getPageSize() - ROUND_UP(sizeof(struct AdjustorExecPage), code_alignment); + pool->chunk_slots = usable_exec_page_sz / ROUND_UP(code_size, code_alignment); + pool->free_list = NULL; +#if defined(THREADED_RTS) + initMutex(&pool->lock); +#endif + return pool; +} + +/* Return the index of the first unset bit of the given bitmap or + * length_in_bits. */ +static size_t +bitmap_first_unset(uint8_t *bitmap, size_t length_in_bits, size_t start_idx) +{ + for (size_t i = start_idx; i < length_in_bits; i += 8) { + uint8_t x = bitmap[i / 8]; + if (x != 0xff) { + return i + __builtin_clz(~x); + } + } + return length_in_bits; +} + +static void +bitmap_set(uint8_t *bitmap, size_t idx, bool value) +{ + size_t word_n = idx / 8; + uint8_t bit = 1 << (idx % 8); + if (value) { + bitmap[word_n] |= bit; + } else { + bitmap[word_n] &= ~bit; + } +} + +static bool +bitmap_get(uint8_t *bitmap, size_t idx) +{ + size_t word_n = idx / 8; + uint8_t bit = 1 << (idx % 8); + return bitmap[word_n] & bit; +} + +void * +alloc_adjustor(struct AdjustorPool *pool, struct AdjustorContext context) +{ + size_t slot_idx; + struct AdjustorChunk *chunk; + + ACQUIRE_LOCK(&pool->lock); + // allocate a new chunk if free_list is empty. + if (pool->free_list == NULL) { + pool->free_list = alloc_adjustor_chunk(pool); + } + + chunk = pool->free_list; + slot_idx = chunk->first_free; + ASSERT(slot_idx < pool->chunk_slots); + bitmap_set(chunk->slot_bitmap, slot_idx, 1); + + // advance first_free + chunk->first_free = bitmap_first_unset(chunk->slot_bitmap, pool->chunk_slots, slot_idx+1); + if (chunk->first_free == pool->chunk_slots) { + // there are no free slots left in this chunk; remove it from + // free_list. + pool->free_list = chunk->free_list_next; + chunk->free_list_next = NULL; + } + + ASSERT(bitmap_get(chunk->slot_bitmap, slot_idx)); + bitmap_set(chunk->slot_bitmap, slot_idx, true); + chunk->contexts[slot_idx] = context; + void *adjustor = &chunk->exec_page->adjustor_code[pool->adjustor_code_size * slot_idx]; + RELEASE_LOCK(&pool->lock); + + return adjustor; +} + +/* Free an adjustor previously allocated with alloc_adjustor, returning its + * AdjustorContext. + */ +struct AdjustorContext +free_adjustor(void *adjustor) { + uintptr_t exec_page_mask = ~(getPageSize() - 1ULL); + struct AdjustorExecPage *exec_page = (struct AdjustorExecPage *) ((uintptr_t) adjustor & exec_page_mask); + if (exec_page->magic != ADJUSTOR_EXEC_PAGE_MAGIC) { + barf("free_adjustor was passed an invalid adjustor"); + } + struct AdjustorChunk *chunk = exec_page->owner; + struct AdjustorPool *pool = chunk->owner; + + size_t slot_off = (uint8_t *) adjustor - exec_page->adjustor_code; + size_t slot_idx = slot_off / pool->adjustor_code_size; + // ensure that the slot is aligned as we would expect. + ASSERT(slot_off % pool->adjustor_code_size == 0); + + ACQUIRE_LOCK(&pool->lock); + + // ensure that the slot is in fact allocated. + ASSERT(bitmap_get(chunk->slot_bitmap, slot_idx)); + // mark it as free. + bitmap_set(chunk->slot_bitmap, slot_idx, false); + // add the chunk to the pool's free_list if necessary. + if (chunk->first_free == pool->chunk_slots) { + chunk->free_list_next = pool->free_list; + pool->free_list = chunk; + } + + // update first_free + if (chunk->first_free > slot_idx) { + chunk->first_free = slot_idx; + } + + struct AdjustorContext context = chunk->contexts[slot_idx]; + chunk->contexts[slot_idx] = (struct AdjustorContext) { NULL, NULL }; + + RELEASE_LOCK(&pool->lock); + return context; +} + +/* Must hold owner->lock */ +static struct AdjustorChunk * +alloc_adjustor_chunk(struct AdjustorPool *owner) { + size_t pg_sz = getPageSize(); + struct AdjustorExecPage *exec_page = mmapAnonForLinker(pg_sz); + if (exec_page == NULL) { + barf("alloc_adjustor_chunk: failed to allocate"); + } + exec_page->magic = ADJUSTOR_EXEC_PAGE_MAGIC; + + // N.B. pad bitmap to ensure that .contexts is aligned. + size_t bitmap_sz = ROUND_UP(owner->chunk_slots, 8*sizeof(void*)) / 8; + size_t contexts_sz = sizeof(struct AdjustorContext) * owner->chunk_slots; + size_t alloc_sz = sizeof(struct AdjustorChunk) + bitmap_sz + contexts_sz; + struct AdjustorChunk *chunk = stgMallocBytes(alloc_sz, "allocAdjustorChunk"); + chunk->owner = owner; + chunk->first_free = 0; + chunk->contexts = (struct AdjustorContext *) (chunk->slot_bitmap + bitmap_sz); + chunk->free_list_next = NULL; + chunk->exec_page = exec_page; + chunk->exec_page->owner = chunk; + + // initialize the slot bitmap + memset(chunk->slot_bitmap, 0, bitmap_sz); + + size_t code_sz = owner->adjustor_code_size; + for (size_t i = 0; i < owner->chunk_slots; i++) { + struct AdjustorContext *ctxt = &chunk->contexts[i]; + owner->make_code(&exec_page->adjustor_code[i*code_sz], ctxt, owner->user_data); + *ctxt = (struct AdjustorContext) { NULL, NULL }; + } + + // Remap the executable page as executable + mprotectForLinker(exec_page, pg_sz, MEM_READ_EXECUTE); + + return chunk; +} + +static void mk_adjustor_from_template( + uint8_t *exec_code, + const struct AdjustorContext *context, + void *user_data) +{ + struct AdjustorTemplate *tmpl = (struct AdjustorTemplate *) user_data; + + // Copy the code + memcpy(exec_code, tmpl->code_start, tmpl->code_end - tmpl->code_start); + + // Fix up the context pointer + size_t context_off = (uint8_t *) tmpl->context_ptr - tmpl->code_start; + const struct AdjustorContext **context_ptr = (const struct AdjustorContext **) (exec_code + context_off); + *context_ptr = context; +} + +struct AdjustorPool *new_adjustor_pool_from_template(const struct AdjustorTemplate *tmpl) +{ + size_t code_size = tmpl->code_end - tmpl->code_start; + return new_adjustor_pool(code_size, mk_adjustor_from_template, (void *) tmpl); +} + diff --git a/rts/adjustor/AdjustorPool.h b/rts/adjustor/AdjustorPool.h new file mode 100644 index 0000000000..f71fe2fba2 --- /dev/null +++ b/rts/adjustor/AdjustorPool.h @@ -0,0 +1,29 @@ +#pragma once + +#include "BeginPrivate.h" + +struct AdjustorPool; + +/* N.B. the adjustor assembler snippets rely on the layout of this structure + */ +struct AdjustorContext { + StgStablePtr hptr; + StgFunPtr wptr; +}; + +typedef void (*mk_adjustor_code_fn)(uint8_t *exec_code, const struct AdjustorContext *context, void *user_data); + +struct AdjustorPool *new_adjustor_pool(size_t code_size, mk_adjustor_code_fn make_code, void *user_data); +void *alloc_adjustor(struct AdjustorPool *pool, struct AdjustorContext context); +struct AdjustorContext free_adjustor(void *adjustor); + +/* High-level interface: Adjustors from code template */ +struct AdjustorTemplate { + uint8_t *code_start; + uint8_t *code_end; + const struct AdjustorContext **context_ptr; +}; + +struct AdjustorPool *new_adjustor_pool_from_template(const struct AdjustorTemplate *tmpl); + +#include "EndPrivate.h" diff --git a/rts/ghc.mk b/rts/ghc.mk index 370c88b01f..d9a1d60c21 100644 --- a/rts/ghc.mk +++ b/rts/ghc.mk @@ -64,6 +64,7 @@ endif ifneq "$(CLEANING)" "YES" # N.B. we don't source config.mk when CLEANING=YES so none of the below # variables will be set. See #20166. +rts_C_SRCS += rts/adjustor/AdjustorPool.c ifeq "$(UseLibffiForAdjustors)" "YES" rts_C_SRCS += rts/adjustor/LibffiAdjustor.c else diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in index 0c40c2f6bb..6465a1f8e6 100644 --- a/rts/rts.cabal.in +++ b/rts/rts.cabal.in @@ -470,6 +470,7 @@ library asm-sources: StgCRunAsm.S c-sources: Adjustor.c + adjustor/AdjustorPool.c ExecPage.c Arena.c Capability.c |