diff options
author | Erik de Castro Lopo <erikd@mega-nerd.com> | 2016-05-24 19:26:56 +1000 |
---|---|---|
committer | Erik de Castro Lopo <erikd@mega-nerd.com> | 2016-05-25 07:32:09 +1000 |
commit | fe8a4e5d77ef8b2bdb2e7e87d50eb477c94bce43 (patch) | |
tree | 36b0c877bce4ecc04beb8dccc60f1209c1692dc1 | |
parent | 95dfdceb8b4dcc54a366949577d9ee389bad5bc3 (diff) | |
download | haskell-fe8a4e5d77ef8b2bdb2e7e87d50eb477c94bce43.tar.gz |
Runtime linker: Break m32 allocator out into its own file
This makes the code a little more modular and allows the removal of some
CPP hackery. By providing dummy implementations of of the `m32_*`
functions (which simply call `errorBelch`) it means that the call sites
for these functions are syntax checked even when `RTS_LINKER_USE_MMAP`
is `0`.
Also changes some size parameter types from `unsigned int` to `size_t`.
Test Plan: Validate on Linux, OS X and Windows
Reviewers: Phyx, hsyl20, bgamari, simonmar, austin
Reviewed By: simonmar, austin
Subscribers: thomie
Differential Revision: https://phabricator.haskell.org/D2237
-rw-r--r-- | rts/Linker.c | 321 | ||||
-rw-r--r-- | rts/ghc.mk | 2 | ||||
-rw-r--r-- | rts/linker/M32Alloc.c | 345 | ||||
-rw-r--r-- | rts/linker/M32Alloc.h | 47 | ||||
-rw-r--r-- | rts/posix/OSMem.c | 12 | ||||
-rw-r--r-- | rts/sm/OSMem.h | 17 | ||||
-rw-r--r-- | rts/win32/OSMem.c | 12 |
7 files changed, 435 insertions, 321 deletions
diff --git a/rts/Linker.c b/rts/Linker.c index 9deefcadb5..4f1ec921a0 100644 --- a/rts/Linker.c +++ b/rts/Linker.c @@ -25,6 +25,8 @@ #include "Stable.h" #include "RtsSymbols.h" #include "Profiling.h" +#include "sm/OSMem.h" +#include "linker/M32Alloc.h" #if !defined(mingw32_HOST_OS) #include "posix/Signals.h" @@ -52,17 +54,6 @@ #include <dlfcn.h> #endif -#if RTS_LINKER_USE_MMAP -#include <fcntl.h> -#include <sys/mman.h> - -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif - -#endif - - /* PowerPC and ARM have relative branch instructions with only 24 bit * displacements and therefore need jump islands contiguous with each object * code module. @@ -381,35 +372,6 @@ typedef WINBOOL(WINAPI *LPRemoveDLLDirectory)(DLL_DIRECTORY_COOKIE Cookie); static void freeProddableBlocks (ObjectCode *oc); -#if RTS_LINKER_USE_MMAP -/** - * An allocated page being filled by the allocator - */ -struct m32_alloc_t { - void * base_addr; // Page address - unsigned int current_size; // Number of bytes already reserved -}; - -#define M32_MAX_PAGES 32 -#define M32_REFCOUNT_BYTES 8 - -/** - * Allocator - * - * Currently an allocator is just a set of pages being filled. The maximum - * number of pages can be configured with M32_MAX_PAGES. - */ -typedef struct m32_allocator_t { - struct m32_alloc_t pages[M32_MAX_PAGES]; -} * m32_allocator; - -// We use a global memory allocator -static struct m32_allocator_t allocator; - -struct m32_allocator_t; -static void m32_allocator_init(struct m32_allocator_t *m32); -#endif - /* on x86_64 we have a problem with relocating symbol references in * code that was compiled without -fPIC. By default, the small memory * model is used, which assumes that symbol references can fit in a @@ -510,11 +472,6 @@ static void m32_allocator_init(struct m32_allocator_t *m32); static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT; #endif -/* MAP_ANONYMOUS is MAP_ANON on some systems, e.g. OpenBSD */ -#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) -#define MAP_ANONYMOUS MAP_ANON -#endif - static void ghciRemoveSymbolTable(HashTable *table, const char *key, ObjectCode *owner) { @@ -809,9 +766,8 @@ initLinker_ (int retain_cafs) addDLLHandle(WSTR("*.exe"), GetModuleHandle(NULL)); #endif -#if RTS_LINKER_USE_MMAP - m32_allocator_init(&allocator); -#endif + if (RTS_LINKER_USE_MMAP) + m32_allocator_init(); IF_DEBUG(linker, debugBelch("initLinker: done\n")); return; @@ -1478,38 +1434,15 @@ void ghci_enquire ( char* addr ) #endif #if RTS_LINKER_USE_MMAP -#define ROUND_UP(x,size) ((x + size - 1) & ~(size - 1)) -#define ROUND_DOWN(x,size) (x & ~(size - 1)) - -static StgWord getPageSize(void) -{ - static StgWord pagesize = 0; - if (pagesize == 0) { - pagesize = sysconf(_SC_PAGESIZE); - } - return pagesize; -} - -static StgWord roundUpToPage (StgWord size) -{ - return ROUND_UP(size, getPageSize()); -} - -#ifdef OBJFORMAT_ELF -static StgWord roundDownToPage (StgWord size) -{ - return ROUND_DOWN(size, getPageSize()); -} -#endif - // // Returns NULL on failure. // -static void * mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset) +void * +mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset) { void *map_addr = NULL; void *result; - StgWord size; + size_t size; static uint32_t fixed = 0; IF_DEBUG(linker, debugBelch("mmapForLinker: start\n")); @@ -1594,231 +1527,7 @@ mmap_again: return result; } - -/* - -Note [M32 Allocator] -~~~~~~~~~~~~~~~~~~~~ - -A memory allocator that allocates only pages in the 32-bit range (lower 2GB). -This is useful on 64-bit platforms to ensure that addresses of allocated -objects can be referenced with a 32-bit relative offset. - -Initially, the linker used `mmap` to allocate a page per object. Hence it -wasted a lot of space for small objects (see #9314). With this allocator, we -try to fill pages as much as we can for small objects. - -How does it work? ------------------ - -For small objects, a Word64 counter is added at the beginning of the page they -are stored in. It indicates the number of objects that are still alive in the -page. When the counter drops down to zero, the page is freed. The counter is -atomically decremented, hence the deallocation is thread-safe. - -During the allocation phase, the allocator keeps track of some pages that are -not totally filled: the number of pages in the "filling" list is configurable -with M32_MAX_PAGES. Allocation consists in finding some place in one of these -pages or starting a new one, then increasing the page counter. If none of the -pages in the "filling" list has enough free space, the most filled one is -flushed (see below) and a new one is allocated. - -The allocator holds a reference on pages in the "filling" list: the counter in -these pages is 1+n where n is the current number of objects allocated in the -page. Hence allocated objects can be freed while the allocator is using -(filling) the page. Flushing a page consists in decreasing its counter and -removing it from the "filling" list. By extension, flushing the allocator -consists in flushing all the pages in the "filling" list. Don't forget to -flush the allocator at the end of the allocation phase in order to avoid space -leaks! - -Large objects are objects that are larger than a page (minus the bytes required -for the counter and the optional padding). These objects are allocated into -their own set of pages. We can differentiate large and small objects from -their address: large objects are aligned on page size while small objects never -are (because of the space reserved for the page's object counter). - -For large objects, the remaining space at the end of the last page is left -unused by the allocator. It can be used with care as it will be freed with the -associated large object. GHC linker uses this feature/hack, hence changing the -implementation of the M32 allocator must be done with care (i.e. do not try to -improve the allocator to avoid wasting this space without modifying the linker -code accordingly). - -Object allocation is *not* thread-safe (however it could be done easily with a -lock in the allocator structure). Object deallocation is thread-safe. - -*/ - -/**************************************************************************** - * M32 ALLOCATOR (see Note [M32 Allocator] - ***************************************************************************/ - -/** - * Wrapper for `unmap` that handles error cases. - */ -static void munmapForLinker (void * addr, size_t size) -{ - int r = munmap(addr,size); - if (r == -1) { - // Should we abort here? - sysErrorBelch("munmap"); - } -} - -/** - * Initialize the allocator structure - */ -static void m32_allocator_init(m32_allocator m32) { - memset(m32, 0, sizeof(struct m32_allocator_t)); - // Preallocate the initial M32_MAX_PAGES to ensure that they don't - // fragment the memory. - unsigned int pgsz = (unsigned int)getPageSize(); - char* bigchunk = mmapForLinker(pgsz * M32_MAX_PAGES,MAP_ANONYMOUS,-1,0); - int i; - for (i=0; i<M32_MAX_PAGES; i++) { - m32->pages[i].base_addr = bigchunk + i*pgsz; - *((uintptr_t*)m32->pages[i].base_addr) = 1; - m32->pages[i].current_size = M32_REFCOUNT_BYTES; - } -} - -/** - * Atomically decrement the object counter on the given page and release the - * page if necessary. The given address must be the *base address* of the page. - * - * You shouldn't have to use this method. Use `m32_free` instead. - */ -static void m32_free_internal(void * addr) { - uintptr_t c = __sync_sub_and_fetch((uintptr_t*)addr, 1); - if (c == 0) { - munmapForLinker(addr, getPageSize()); - } -} - -/** - * Release the allocator's reference to pages on the "filling" list. This - * should be called when it is believed that no more allocations will be needed - * from the allocator to ensure that empty pages waiting to be filled aren't - * unnecessarily held. - */ -static void m32_allocator_flush(m32_allocator m32) { - int i; - for (i=0; i<M32_MAX_PAGES; i++) { - void * addr = __sync_fetch_and_and(&m32->pages[i].base_addr, 0x0); - if (addr != 0) { - m32_free_internal(addr); - } - } -} - -// Return true if the object has its own dedicated set of pages -#define m32_is_large_object(size,alignment) \ - (size >= getPageSize() - ROUND_UP(M32_REFCOUNT_BYTES,alignment)) - -// Return true if the object has its own dedicated set of pages -#define m32_is_large_object_addr(addr) \ - ((uintptr_t) addr % getPageSize() == 0) - -/** - * Free the memory associated with an object. - * - * If the object is "small", the object counter of the page it is allocated in - * is decremented and the page is not freed until all of its objects are freed. - */ -static void m32_free(void *addr, unsigned int size) { - uintptr_t m = (uintptr_t) addr % getPageSize(); - - if (m == 0) { - // large object - munmapForLinker(addr,ROUND_UP(size,getPageSize())); - } - else { - // small object - void * page_addr = (void*)((uintptr_t)addr - m); - m32_free_internal(page_addr); - } -} - -/** - * Allocate `size` bytes of memory with the given alignment - */ -static void * -m32_alloc(m32_allocator m32, unsigned int size, - unsigned int alignment) { - - unsigned int pgsz = (unsigned int)getPageSize(); - - if (m32_is_large_object(size,alignment)) { - // large object - return mmapForLinker(size,MAP_ANONYMOUS,-1,0); - } - else { - // small object - // Try to find a page that can contain it - int empty = -1; - int most_filled = -1; - int i; - for (i=0; i<M32_MAX_PAGES; i++) { - // empty page - if (m32->pages[i].base_addr == 0) { - empty = empty == -1 ? i : empty; - continue; - } - // If the page is referenced only by the allocator, we can reuse it. - // If we don't then we'll be left with a bunch of pages that have a - // few bytes left to allocate and we don't get to use or free them - // until we use up all the "filling" pages. This will unnecessarily - // allocate new pages and fragment the address space. - if (*((uintptr_t*)(m32->pages[i].base_addr)) == 1) { - m32->pages[i].current_size = M32_REFCOUNT_BYTES; - } - // page can contain the buffer? - unsigned int alsize = ROUND_UP(m32->pages[i].current_size, alignment); - if (size <= pgsz - alsize) { - void * addr = (char*)m32->pages[i].base_addr + alsize; - m32->pages[i].current_size = alsize + size; - // increment the counter atomically - __sync_fetch_and_add((uintptr_t*)m32->pages[i].base_addr, 1); - return addr; - } - // most filled? - if (most_filled == -1 - || m32->pages[most_filled].current_size < m32->pages[i].current_size) - { - most_filled = i; - } - } - - // If we haven't found an empty page, flush the most filled one - if (empty == -1) { - m32_free_internal(m32->pages[most_filled].base_addr); - m32->pages[most_filled].base_addr = 0; - m32->pages[most_filled].current_size = 0; - empty = most_filled; - } - - // Allocate a new page - void * addr = mmapForLinker(pgsz,MAP_ANONYMOUS,-1,0); - if (addr == NULL) { - return NULL; - } - m32->pages[empty].base_addr = addr; - // Add M32_REFCOUNT_BYTES bytes for the counter + padding - m32->pages[empty].current_size = - size+ROUND_UP(M32_REFCOUNT_BYTES,alignment); - // Initialize the counter: - // 1 for the allocator + 1 for the returned allocated memory - *((uintptr_t*)addr) = 2; - return (char*)addr + ROUND_UP(M32_REFCOUNT_BYTES,alignment); - } -} - -/**************************************************************************** - * END (M32 ALLOCATOR) - ***************************************************************************/ - -#endif // RTS_LINKER_USE_MMAP +#endif /* * Remove symbols from the symbol table, and free oc->symbols. @@ -2533,9 +2242,8 @@ static HsInt loadArchive_ (pathchar *path) #endif } -#if RTS_LINKER_USE_MMAP - m32_allocator_flush(&allocator); -#endif + if (RTS_LINKER_USE_MMAP) + m32_allocator_flush(); IF_DEBUG(linker, debugBelch("loadArchive: done\n")); return 1; @@ -3038,7 +2746,7 @@ addSection (Section *s, SectionKind kind, SectionAlloc alloc, static int ocAllocateSymbolExtras( ObjectCode* oc, int count, int first ) { - StgWord n; + size_t n; if (RTS_LINKER_USE_MMAP && USE_CONTIGUOUS_MMAP) { n = roundUpToPage(oc->fileSize); @@ -3065,8 +2773,7 @@ static int ocAllocateSymbolExtras( ObjectCode* oc, int count, int first ) if (RTS_LINKER_USE_MMAP) { n = roundUpToPage(oc->fileSize); - oc->symbol_extras = m32_alloc(&allocator, - sizeof(SymbolExtra) * count, 8); + oc->symbol_extras = m32_alloc(sizeof(SymbolExtra) * count, 8); if (oc->symbol_extras == NULL) return 0; } else { @@ -5074,7 +4781,7 @@ mapObjectFileSection (int fd, Elf_Word offset, Elf_Word size, StgWord *mapped_offset) { void *p; - StgWord pageOffset, pageSize; + size_t pageOffset, pageSize; pageOffset = roundDownToPage(offset); pageSize = roundUpToPage(offset-pageOffset+size); @@ -5155,7 +4862,7 @@ ocGetNames_ELF ( ObjectCode* oc ) // (i.e. we cannot map the secions separately), or if the section // size is small. else if (!oc->imageMapped || size < getPageSize() / 3) { - start = m32_alloc(&allocator, size, 8); + start = m32_alloc(size, 8); if (start == NULL) goto fail; memcpy(start, oc->image + offset, size); alloc = SECTION_M32; diff --git a/rts/ghc.mk b/rts/ghc.mk index d7a46f909a..6fdc2cc213 100644 --- a/rts/ghc.mk +++ b/rts/ghc.mk @@ -37,7 +37,7 @@ $(eval $(call all-target,rts,$(ALL_RTS_LIBS))) # ----------------------------------------------------------------------------- # Defining the sources -ALL_DIRS = hooks sm eventlog +ALL_DIRS = hooks sm eventlog linker ifeq "$(HostOS_CPP)" "mingw32" ALL_DIRS += win32 diff --git a/rts/linker/M32Alloc.c b/rts/linker/M32Alloc.c new file mode 100644 index 0000000000..bdc2fdbe35 --- /dev/null +++ b/rts/linker/M32Alloc.c @@ -0,0 +1,345 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2000-2012 + * + * RTS Object Linker + * + * ---------------------------------------------------------------------------*/ + +#include "Rts.h" +#include "sm/OSMem.h" +#include "linker/M32Alloc.h" + +#include <inttypes.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +/* + +Note [Compile Time Trickery] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This file implements two versions of each of the `m32_*` functions. At the top +of the file there is the real implementaion (compiled in when +`RTS_LINKER_USE_MMAP` is true) and a dummy implementation that exists only to +satisfy the compiler and which hould never be called. If any of these dummy +implementaions are called the program will abort. + +The rationale for this is to allow the calling code to be written without using +the C pre-processor (CPP) `#if` hackery. The value of `RTS_LINKER_USE_MMAP` is +known at compile time, code like: + + if (RTS_LINKER_USE_MMAP) + m32_allocator_init(); + +will be compiled to call to `m32_allocator_init` if `RTS_LINKER_USE_MMAP` is +true and will be optimised awat to nothing if `RTS_LINKER_USE_MMAP` is false. +However, regardless of the value of `RTS_LINKER_USE_MMAP` the compiler will +still check the call for syntax and correct function parameter types. + +*/ + +#if RTS_LINKER_USE_MMAP == 1 + +/* + +Note [M32 Allocator] +~~~~~~~~~~~~~~~~~~~~ + +A memory allocator that allocates only pages in the 32-bit range (lower 2GB). +This is useful on 64-bit platforms to ensure that addresses of allocated +objects can be referenced with a 32-bit relative offset. + +Initially, the linker used `mmap` to allocate a page per object. Hence it +wasted a lot of space for small objects (see #9314). With this allocator, we +try to fill pages as much as we can for small objects. + +How does it work? +----------------- + +For small objects, a Word64 counter is added at the beginning of the page they +are stored in. It indicates the number of objects that are still alive in the +page. When the counter drops down to zero, the page is freed. The counter is +atomically decremented, hence the deallocation is thread-safe. + +During the allocation phase, the allocator keeps track of some pages that are +not totally filled: the number of pages in the "filling" list is configurable +with M32_MAX_PAGES. Allocation consists in finding some place in one of these +pages or starting a new one, then increasing the page counter. If none of the +pages in the "filling" list has enough free space, the most filled one is +flushed (see below) and a new one is allocated. + +The allocator holds a reference on pages in the "filling" list: the counter in +these pages is 1+n where n is the current number of objects allocated in the +page. Hence allocated objects can be freed while the allocator is using +(filling) the page. Flushing a page consists in decreasing its counter and +removing it from the "filling" list. By extension, flushing the allocator +consists in flushing all the pages in the "filling" list. Don't forget to +flush the allocator at the end of the allocation phase in order to avoid space +leaks! + +Large objects are objects that are larger than a page (minus the bytes required +for the counter and the optional padding). These objects are allocated into +their own set of pages. We can differentiate large and small objects from +their address: large objects are aligned on page size while small objects never +are (because of the space reserved for the page's object counter). + +For large objects, the remaining space at the end of the last page is left +unused by the allocator. It can be used with care as it will be freed with the +associated large object. GHC linker uses this feature/hack, hence changing the +implementation of the M32 allocator must be done with care (i.e. do not try to +improve the allocator to avoid wasting this space without modifying the linker +code accordingly). + +Object allocation is *not* thread-safe (however it could be done easily with a +lock in the allocator structure). Object deallocation is thread-safe. + +*/ + +#define ROUND_UP(x,size) ((x + size - 1) & ~(size - 1)) +#define ROUND_DOWN(x,size) (x & ~(size - 1)) + +/**************************************************************************** + * M32 ALLOCATOR (see Note [M32 Allocator] + ***************************************************************************/ + +#define M32_MAX_PAGES 32 +#define M32_REFCOUNT_BYTES 8 + + +/** + * An allocated page being filled by the allocator + */ +struct m32_alloc_t { + void * base_addr; // Page address + size_t current_size; // Number of bytes already reserved +}; + +/** + * Allocator + * + * Currently an allocator is just a set of pages being filled. The maximum + * number of pages can be configured with M32_MAX_PAGES. + */ +typedef struct m32_allocator_t { + struct m32_alloc_t pages[M32_MAX_PAGES]; +} m32_allocator; + +// We use a global memory allocator +static struct m32_allocator_t alloc; + +/** + * Wrapper for `unmap` that handles error cases. + * This is the real implementation. There is another dummy implementation below. + * See the note titled "Compile Time Trickery" at the top of this file. + */ +static void +munmapForLinker (void * addr, size_t size) +{ + int r = munmap(addr,size); + if (r == -1) { + // Should we abort here? + sysErrorBelch("munmap"); + } +} + +/** + * Initialize the allocator structure + * This is the real implementation. There is another dummy implementation below. + * See the note titled "Compile Time Trickery" at the top of this file. + */ +void +m32_allocator_init(void) +{ + memset(&alloc, 0, sizeof(struct m32_allocator_t)); + // Preallocate the initial M32_MAX_PAGES to ensure that they don't + // fragment the memory. + size_t pgsz = getPageSize(); + char* bigchunk = mmapForLinker(pgsz * M32_MAX_PAGES,MAP_ANONYMOUS,-1,0); + int i; + for (i=0; i<M32_MAX_PAGES; i++) { + alloc.pages[i].base_addr = bigchunk + i*pgsz; + *((uintptr_t*)alloc.pages[i].base_addr) = 1; + alloc.pages[i].current_size = M32_REFCOUNT_BYTES; + } +} + +/** + * Atomically decrement the object counter on the given page and release the + * page if necessary. The given address must be the *base address* of the page. + * + * You shouldn't have to use this method. Use `m32_free` instead. + */ +static void +m32_free_internal(void * addr) { + uintptr_t c = __sync_sub_and_fetch((uintptr_t*)addr, 1); + if (c == 0) { + munmapForLinker(addr, getPageSize()); + } +} + +/** + * Release the allocator's reference to pages on the "filling" list. This + * should be called when it is believed that no more allocations will be needed + * from the allocator to ensure that empty pages waiting to be filled aren't + * unnecessarily held. + * + * This is the real implementation. There is another dummy implementation below. + * See the note titled "Compile Time Trickery" at the top of this file. + */ +void +m32_allocator_flush(void) { + int i; + for (i=0; i<M32_MAX_PAGES; i++) { + void * addr = __sync_fetch_and_and(&alloc.pages[i].base_addr, 0x0); + if (addr != 0) { + m32_free_internal(addr); + } + } +} + +// Return true if the object has its own dedicated set of pages +#define m32_is_large_object(size,alignment) \ + (size >= getPageSize() - ROUND_UP(M32_REFCOUNT_BYTES,alignment)) + +// Return true if the object has its own dedicated set of pages +#define m32_is_large_object_addr(addr) \ + ((uintptr_t) addr % getPageSize() == 0) + +/** + * Free the memory associated with an object. + * + * If the object is "small", the object counter of the page it is allocated in + * is decremented and the page is not freed until all of its objects are freed. + * + * This is the real implementation. There is another dummy implementation below. + * See the note titled "Compile Time Trickery" at the top of this file. + */ +void +m32_free(void *addr, size_t size) +{ + uintptr_t m = (uintptr_t) addr % getPageSize(); + + if (m == 0) { + // large object + munmapForLinker(addr,roundUpToPage(size)); + } + else { + // small object + void * page_addr = (void*)((uintptr_t)addr - m); + m32_free_internal(page_addr); + } +} + +/** + * Allocate `size` bytes of memory with the given alignment. + * + * This is the real implementation. There is another dummy implementation below. + * See the note titled "Compile Time Trickery" at the top of this file. + */ +void * +m32_alloc(size_t size, size_t alignment) +{ + size_t pgsz = getPageSize(); + + if (m32_is_large_object(size,alignment)) { + // large object + return mmapForLinker(size,MAP_ANONYMOUS,-1,0); + } + + // small object + // Try to find a page that can contain it + int empty = -1; + int most_filled = -1; + int i; + for (i=0; i<M32_MAX_PAGES; i++) { + // empty page + if (alloc.pages[i].base_addr == 0) { + empty = empty == -1 ? i : empty; + continue; + } + // If the page is referenced only by the allocator, we can reuse it. + // If we don't then we'll be left with a bunch of pages that have a + // few bytes left to allocate and we don't get to use or free them + // until we use up all the "filling" pages. This will unnecessarily + // allocate new pages and fragment the address space. + if (*((uintptr_t*)(alloc.pages[i].base_addr)) == 1) { + alloc.pages[i].current_size = M32_REFCOUNT_BYTES; + } + // page can contain the buffer? + size_t alsize = ROUND_UP(alloc.pages[i].current_size, alignment); + if (size <= pgsz - alsize) { + void * addr = (char*)alloc.pages[i].base_addr + alsize; + alloc.pages[i].current_size = alsize + size; + // increment the counter atomically + __sync_fetch_and_add((uintptr_t*)alloc.pages[i].base_addr, 1); + return addr; + } + // most filled? + if (most_filled == -1 + || alloc.pages[most_filled].current_size < alloc.pages[i].current_size) + { + most_filled = i; + } + } + + // If we haven't found an empty page, flush the most filled one + if (empty == -1) { + m32_free_internal(alloc.pages[most_filled].base_addr); + alloc.pages[most_filled].base_addr = 0; + alloc.pages[most_filled].current_size = 0; + empty = most_filled; + } + + // Allocate a new page + void * addr = mmapForLinker(pgsz,MAP_ANONYMOUS,-1,0); + if (addr == NULL) { + return NULL; + } + alloc.pages[empty].base_addr = addr; + // Add M32_REFCOUNT_BYTES bytes for the counter + padding + alloc.pages[empty].current_size = + size+ROUND_UP(M32_REFCOUNT_BYTES,alignment); + // Initialize the counter: + // 1 for the allocator + 1 for the returned allocated memory + *((uintptr_t*)addr) = 2; + return (char*)addr + ROUND_UP(M32_REFCOUNT_BYTES,alignment); +} + +#elif RTS_LINKER_USE_MMAP == 0 + +// The following implementations of these functions should never be called. If +// they are, there is a bug at the call site. +// See the note titled "Compile Time Trickery" at the top of this file. + +void +m32_allocator_init(void) +{ + barf("%s: RTS_LINKER_USE_MMAP is %d", __func__, RTS_LINKER_USE_MMAP); +} + +void +m32_allocator_flush(void) +{ + barf("%s: RTS_LINKER_USE_MMAP is %d", __func__, RTS_LINKER_USE_MMAP); +} + +void +m32_free(void *addr STG_UNUSED, size_t size STG_UNUSED) +{ + barf("%s: RTS_LINKER_USE_MMAP is %d", __func__, RTS_LINKER_USE_MMAP); +} + +void * +m32_alloc(size_t size STG_UNUSED, size_t alignment STG_UNUSED) +{ + barf("%s: RTS_LINKER_USE_MMAP is %d", __func__, RTS_LINKER_USE_MMAP); + return NULL; +} + +#else + +#error RTS_LINKER_USE_MMAP should be either `0` or `1`. + +#endif diff --git a/rts/linker/M32Alloc.h b/rts/linker/M32Alloc.h new file mode 100644 index 0000000000..3d53a26bb3 --- /dev/null +++ b/rts/linker/M32Alloc.h @@ -0,0 +1,47 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2000-2012 + * + * RTS Object Linker + * + * ---------------------------------------------------------------------------*/ + +#ifndef RTS_LINKER_M32ALLOC +#define RTS_LINKER_M32ALLOC + +#if RTS_LINKER_USE_MMAP +#include <fcntl.h> +#include <sys/mman.h> + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#endif + +/* MAP_ANONYMOUS is MAP_ANON on some systems, e.g. OS X, OpenBSD etc */ +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +#define MAP_ANONYMOUS MAP_ANON +#endif + +#include "BeginPrivate.h" + +#if RTS_LINKER_USE_MMAP +#define M32_NO_RETURN /* Nothing */ +#else +#define M32_NO_RETURN GNUC3_ATTRIBUTE(__noreturn__) +#endif + +void m32_allocator_init(void) M32_NO_RETURN; + +void m32_allocator_flush(void) M32_NO_RETURN; + +void m32_free(void *addr, size_t size) M32_NO_RETURN; + +void * m32_alloc(size_t size, size_t alignment) M32_NO_RETURN; + +void * mmapForLinker (size_t bytes, uint32_t flags, int fd, int offset); + +#include "EndPrivate.h" + +#endif diff --git a/rts/posix/OSMem.c b/rts/posix/OSMem.c index 5c997ababb..5ff4bc86e4 100644 --- a/rts/posix/OSMem.c +++ b/rts/posix/OSMem.c @@ -315,20 +315,20 @@ void osFreeAllMBlocks(void) } } -W_ getPageSize (void) +size_t getPageSize (void) { - static W_ pageSize = 0; - if (pageSize) { - return pageSize; - } else { + static size_t pageSize = 0; + + if (pageSize == 0) { long ret; ret = sysconf(_SC_PAGESIZE); if (ret == -1) { barf("getPageSize: cannot get page size"); } pageSize = ret; - return ret; } + + return pageSize; } /* Returns 0 if physical memory size cannot be identified */ diff --git a/rts/sm/OSMem.h b/rts/sm/OSMem.h index a9d4fc9cd2..8518f05d1b 100644 --- a/rts/sm/OSMem.h +++ b/rts/sm/OSMem.h @@ -16,10 +16,25 @@ void *osGetMBlocks(uint32_t n); void osFreeMBlocks(void *addr, uint32_t n); void osReleaseFreeMemory(void); void osFreeAllMBlocks(void); -W_ getPageSize (void); +size_t getPageSize (void); StgWord64 getPhysicalMemorySize (void); void setExecutable (void *p, W_ len, rtsBool exec); +INLINE_HEADER size_t +roundDownToPage (size_t x) +{ + size_t size = getPageSize(); + return (x & ~(size - 1)); +} + +INLINE_HEADER size_t +roundUpToPage (size_t x) +{ + size_t size = getPageSize(); + return ((x + size - 1) & ~(size - 1)); +} + + #ifdef USE_LARGE_ADDRESS_SPACE /* diff --git a/rts/win32/OSMem.c b/rts/win32/OSMem.c index c51aab7d20..f0680e953a 100644 --- a/rts/win32/OSMem.c +++ b/rts/win32/OSMem.c @@ -381,17 +381,17 @@ osFreeAllMBlocks(void) } } -W_ getPageSize (void) +size_t getPageSize (void) { - static W_ pagesize = 0; - if (pagesize) { - return pagesize; - } else { + static size_t pagesize = 0; + + if (pagesize == 0) { SYSTEM_INFO sSysInfo; GetSystemInfo(&sSysInfo); pagesize = sSysInfo.dwPageSize; - return pagesize; } + + return pagesize; } /* Returns 0 if physical memory size cannot be identified */ |