diff options
author | Ben Gamari <ben@smart-cactus.org> | 2019-10-28 13:37:39 -0400 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2019-11-01 04:55:24 -0400 |
commit | c6759080a91804266feb0e4e3a2c614f330649f5 (patch) | |
tree | 51fa5e23839ab359c6f8995fe400c27269b3f136 | |
parent | 70b62c97622f6527cf606af052fb1f0f14783b28 (diff) | |
download | haskell-c6759080a91804266feb0e4e3a2c614f330649f5.tar.gz |
rts: Make m32 allocator per-ObjectCode
MacOS Catalina is finally going to force our hand in forbidden writable
exeutable mappings. Unfortunately, this is quite incompatible with the
current global m32 allocator, which mixes symbols from various objects
in a single page. The problem here is that some of these symbols may not
yet be resolved (e.g. had relocations performed) as this happens lazily
(and therefore we can't yet make the section read-only and therefore
executable).
The easiest way around this is to simply create one m32 allocator per
ObjectCode. This may slightly increase fragmentation for short-running
programs but I suspect will actually improve fragmentation for programs
doing lots of loading/unloading since we can always free all of the
pages allocated to an object when it is unloaded (although this ability
will only be implemented in a later patch).
-rw-r--r-- | rts/Linker.c | 10 | ||||
-rw-r--r-- | rts/LinkerInternals.h | 6 | ||||
-rw-r--r-- | rts/linker/Elf.c | 2 | ||||
-rw-r--r-- | rts/linker/LoadArchive.c | 3 | ||||
-rw-r--r-- | rts/linker/M32Alloc.c | 95 | ||||
-rw-r--r-- | rts/linker/M32Alloc.h | 11 | ||||
-rw-r--r-- | rts/linker/SymbolExtras.c | 9 |
7 files changed, 83 insertions, 53 deletions
diff --git a/rts/Linker.c b/rts/Linker.c index 34028d4490..544e7675f0 100644 --- a/rts/Linker.c +++ b/rts/Linker.c @@ -501,9 +501,6 @@ initLinker_ (int retain_cafs) mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase; } - if (RTS_LINKER_USE_MMAP) - m32_allocator_init(); - #if defined(OBJFORMAT_PEi386) initLinker_PEi386(); #endif @@ -1233,6 +1230,7 @@ void freeObjectCode (ObjectCode *oc) ocDeinit_ELF(oc); #endif + m32_allocator_free(oc->m32); stgFree(oc->fileName); stgFree(oc->archiveMemberName); @@ -1311,6 +1309,10 @@ mkOc( pathchar *path, char *image, int imageSize, /* chain it onto the list of objects */ oc->next = NULL; +#if RTS_LINKER_USE_MMAP + oc->m32 = m32_allocator_new(); +#endif + IF_DEBUG(linker, debugBelch("mkOc: done\n")); return oc; } @@ -1629,6 +1631,8 @@ int ocTryLoad (ObjectCode* oc) { # endif if (!r) { return r; } + m32_allocator_flush(oc->m32); + // run init/init_array/ctors/mod_init_func IF_DEBUG(linker, debugBelch("ocTryLoad: ocRunInit start\n")); diff --git a/rts/LinkerInternals.h b/rts/LinkerInternals.h index 9bdd7530d5..79fb46eb2b 100644 --- a/rts/LinkerInternals.h +++ b/rts/LinkerInternals.h @@ -10,6 +10,7 @@ #include "Rts.h" #include "Hash.h" +#include "linker/M32Alloc.h" #if RTS_LINKER_USE_MMAP #include <sys/mman.h> @@ -244,6 +245,11 @@ typedef struct _ObjectCode { require extra information.*/ HashTable *extraInfos; +#if RTS_LINKER_USE_MMAP == 1 + /* The m32 allocator used for allocating small sections + * and symbol extras during loading */ + m32_allocator *m32; +#endif } ObjectCode; #define OC_INFORMATIVE_FILENAME(OC) \ diff --git a/rts/linker/Elf.c b/rts/linker/Elf.c index 79786ccd3d..313666197b 100644 --- a/rts/linker/Elf.c +++ b/rts/linker/Elf.c @@ -778,7 +778,7 @@ ocGetNames_ELF ( ObjectCode* oc ) // (i.e. we cannot map the secions separately), or if the section // size is small. else if (!oc->imageMapped || size < getPageSize() / 3) { - start = m32_alloc(size, 8); + start = m32_alloc(oc->m32, size, 8); if (start == NULL) goto fail; memcpy(start, oc->image + offset, size); alloc = SECTION_M32; diff --git a/rts/linker/LoadArchive.c b/rts/linker/LoadArchive.c index a92f86fff0..936c0e5261 100644 --- a/rts/linker/LoadArchive.c +++ b/rts/linker/LoadArchive.c @@ -616,9 +616,6 @@ fail: #endif } - if (RTS_LINKER_USE_MMAP) - m32_allocator_flush(); - DEBUG_LOG("done\n"); return retcode; } diff --git a/rts/linker/M32Alloc.c b/rts/linker/M32Alloc.c index 33c4335286..ca8d865e35 100644 --- a/rts/linker/M32Alloc.c +++ b/rts/linker/M32Alloc.c @@ -8,6 +8,7 @@ #include "Rts.h" #include "sm/OSMem.h" +#include "RtsUtils.h" #include "linker/M32Alloc.h" #include "LinkerInternals.h" @@ -123,12 +124,9 @@ struct m32_alloc_t { * Currently an allocator is just a set of pages being filled. The maximum * number of pages can be configured with M32_MAX_PAGES. */ -typedef struct m32_allocator_t { +struct m32_allocator_t { struct m32_alloc_t pages[M32_MAX_PAGES]; -} m32_allocator; - -// We use a global memory allocator -static struct m32_allocator_t alloc; +}; /** * Wrapper for `unmap` that handles error cases. @@ -150,23 +148,37 @@ munmapForLinker (void * addr, size_t size) * This is the real implementation. There is another dummy implementation below. * See the note titled "Compile Time Trickery" at the top of this file. */ -void -m32_allocator_init(void) +m32_allocator * +m32_allocator_new() { - memset(&alloc, 0, sizeof(struct m32_allocator_t)); - // Preallocate the initial M32_MAX_PAGES to ensure that they don't - // fragment the memory. - size_t pgsz = getPageSize(); - char* bigchunk = mmapForLinker(pgsz * M32_MAX_PAGES,MAP_ANONYMOUS,-1,0); - if (bigchunk == NULL) - barf("m32_allocator_init: Failed to map"); + m32_allocator *alloc = + stgMallocBytes(sizeof(m32_allocator), "m32_new_allocator"); + memset(alloc, 0, sizeof(struct m32_allocator_t)); + + // Preallocate the initial M32_MAX_PAGES to ensure that they don't + // fragment the memory. + size_t pgsz = getPageSize(); + char* bigchunk = mmapForLinker(pgsz * M32_MAX_PAGES,MAP_ANONYMOUS,-1,0); + if (bigchunk == NULL) + barf("m32_allocator_init: Failed to map"); + + int i; + for (i=0; i<M32_MAX_PAGES; i++) { + alloc->pages[i].base_addr = bigchunk + i*pgsz; + *((uintptr_t*)alloc->pages[i].base_addr) = 1; + alloc->pages[i].current_size = M32_REFCOUNT_BYTES; + } + return alloc; +} - int i; - for (i=0; i<M32_MAX_PAGES; i++) { - alloc.pages[i].base_addr = bigchunk + i*pgsz; - *((uintptr_t*)alloc.pages[i].base_addr) = 1; - alloc.pages[i].current_size = M32_REFCOUNT_BYTES; - } +/** + * Free an m32_allocator. Note that this doesn't free the pages + * allocated using the allocator. This must be done separately with m32_free. + */ +void m32_allocator_free(m32_allocator *alloc) +{ + m32_allocator_flush(alloc); + stgFree(alloc); } /** @@ -193,10 +205,10 @@ m32_free_internal(void * addr) { * See the note titled "Compile Time Trickery" at the top of this file. */ void -m32_allocator_flush(void) { +m32_allocator_flush(m32_allocator *alloc) { int i; for (i=0; i<M32_MAX_PAGES; i++) { - void * addr = __sync_fetch_and_and(&alloc.pages[i].base_addr, 0x0); + void * addr = __sync_fetch_and_and(&alloc->pages[i].base_addr, 0x0); if (addr != 0) { m32_free_internal(addr); } @@ -243,7 +255,7 @@ m32_free(void *addr, size_t size) * See the note titled "Compile Time Trickery" at the top of this file. */ void * -m32_alloc(size_t size, size_t alignment) +m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment) { size_t pgsz = getPageSize(); @@ -259,7 +271,7 @@ m32_alloc(size_t size, size_t alignment) int i; for (i=0; i<M32_MAX_PAGES; i++) { // empty page - if (alloc.pages[i].base_addr == 0) { + if (alloc->pages[i].base_addr == 0) { empty = empty == -1 ? i : empty; continue; } @@ -268,21 +280,21 @@ m32_alloc(size_t size, size_t alignment) // few bytes left to allocate and we don't get to use or free them // until we use up all the "filling" pages. This will unnecessarily // allocate new pages and fragment the address space. - if (*((uintptr_t*)(alloc.pages[i].base_addr)) == 1) { - alloc.pages[i].current_size = M32_REFCOUNT_BYTES; + if (*((uintptr_t*)(alloc->pages[i].base_addr)) == 1) { + alloc->pages[i].current_size = M32_REFCOUNT_BYTES; } // page can contain the buffer? - size_t alsize = ROUND_UP(alloc.pages[i].current_size, alignment); + size_t alsize = ROUND_UP(alloc->pages[i].current_size, alignment); if (size <= pgsz - alsize) { - void * addr = (char*)alloc.pages[i].base_addr + alsize; - alloc.pages[i].current_size = alsize + size; + void * addr = (char*)alloc->pages[i].base_addr + alsize; + alloc->pages[i].current_size = alsize + size; // increment the counter atomically - __sync_fetch_and_add((uintptr_t*)alloc.pages[i].base_addr, 1); + __sync_fetch_and_add((uintptr_t*)alloc->pages[i].base_addr, 1); return addr; } // most filled? if (most_filled == -1 - || alloc.pages[most_filled].current_size < alloc.pages[i].current_size) + || alloc->pages[most_filled].current_size < alloc->pages[i].current_size) { most_filled = i; } @@ -290,9 +302,9 @@ m32_alloc(size_t size, size_t alignment) // If we haven't found an empty page, flush the most filled one if (empty == -1) { - m32_free_internal(alloc.pages[most_filled].base_addr); - alloc.pages[most_filled].base_addr = 0; - alloc.pages[most_filled].current_size = 0; + m32_free_internal(alloc->pages[most_filled].base_addr); + alloc->pages[most_filled].base_addr = 0; + alloc->pages[most_filled].current_size = 0; empty = most_filled; } @@ -301,9 +313,9 @@ m32_alloc(size_t size, size_t alignment) if (addr == NULL) { return NULL; } - alloc.pages[empty].base_addr = addr; + alloc->pages[empty].base_addr = addr; // Add M32_REFCOUNT_BYTES bytes for the counter + padding - alloc.pages[empty].current_size = + alloc->pages[empty].current_size = size+ROUND_UP(M32_REFCOUNT_BYTES,alignment); // Initialize the counter: // 1 for the allocator + 1 for the returned allocated memory @@ -317,14 +329,19 @@ m32_alloc(size_t size, size_t alignment) // they are, there is a bug at the call site. // See the note titled "Compile Time Trickery" at the top of this file. -void -m32_allocator_init(void) +m32_allocator * +m32_allocator_new(void) +{ + barf("%s: RTS_LINKER_USE_MMAP is %d", __func__, RTS_LINKER_USE_MMAP); +} + +void m32_allocator_free(m32_allocator *alloc) { barf("%s: RTS_LINKER_USE_MMAP is %d", __func__, RTS_LINKER_USE_MMAP); } void -m32_allocator_flush(void) +m32_flush(void) { barf("%s: RTS_LINKER_USE_MMAP is %d", __func__, RTS_LINKER_USE_MMAP); } diff --git a/rts/linker/M32Alloc.h b/rts/linker/M32Alloc.h index 645f6b2422..8af9235921 100644 --- a/rts/linker/M32Alloc.h +++ b/rts/linker/M32Alloc.h @@ -26,12 +26,17 @@ #define M32_NO_RETURN GNUC3_ATTRIBUTE(__noreturn__) #endif -void m32_allocator_init(void) M32_NO_RETURN; +struct m32_allocator_t; +typedef struct m32_allocator_t m32_allocator; -void m32_allocator_flush(void) M32_NO_RETURN; +m32_allocator *m32_allocator_new(void) M32_NO_RETURN; + +void m32_allocator_free(m32_allocator *alloc) M32_NO_RETURN; + +void m32_allocator_flush(m32_allocator *alloc) M32_NO_RETURN; void m32_free(void *addr, size_t size) M32_NO_RETURN; -void * m32_alloc(size_t size, size_t alignment) M32_NO_RETURN; +void * m32_alloc(m32_allocator *alloc, size_t size, size_t alignment) M32_NO_RETURN; #include "EndPrivate.h" diff --git a/rts/linker/SymbolExtras.c b/rts/linker/SymbolExtras.c index 4149522ba0..04a678898f 100644 --- a/rts/linker/SymbolExtras.c +++ b/rts/linker/SymbolExtras.c @@ -53,6 +53,7 @@ int ocAllocateExtras(ObjectCode* oc, int count, int first, int bssSize) { void* oldImage = oc->image; + const size_t extras_size = sizeof(SymbolExtra) * count; if (count > 0 || bssSize > 0) { if (!RTS_LINKER_USE_MMAP) { @@ -64,7 +65,7 @@ int ocAllocateExtras(ObjectCode* oc, int count, int first, int bssSize) oc->image -= misalignment; oc->image = stgReallocBytes( oc->image, misalignment + - aligned + sizeof (SymbolExtra) * count, + aligned + extras_size, "ocAllocateExtras" ); oc->image += misalignment; @@ -73,7 +74,7 @@ int ocAllocateExtras(ObjectCode* oc, int count, int first, int bssSize) /* Keep image, bssExtras and symbol_extras contiguous */ size_t n = roundUpToPage(oc->fileSize); bssSize = roundUpToAlign(bssSize, 8); - size_t allocated_size = n + bssSize + (sizeof(SymbolExtra) * count); + size_t allocated_size = n + bssSize + extras_size; void *new = mmapForLinker(allocated_size, MAP_ANONYMOUS, -1, 0); if (new) { memcpy(new, oc->image, oc->fileSize); @@ -92,13 +93,13 @@ int ocAllocateExtras(ObjectCode* oc, int count, int first, int bssSize) return 0; } } else { - oc->symbol_extras = m32_alloc(sizeof(SymbolExtra) * count, 8); + oc->symbol_extras = m32_alloc(oc->m32, extras_size, 8); if (oc->symbol_extras == NULL) return 0; } } if (oc->symbol_extras != NULL) { - memset( oc->symbol_extras, 0, sizeof (SymbolExtra) * count ); + memset( oc->symbol_extras, 0, extras_size ); } // ObjectCodeFormatInfo contains computed addresses based on offset to |