/* ----------------------------------------------------------------------------- * * (c) The GHC Team, 2000-2012 * * RTS Object Linker * * ---------------------------------------------------------------------------*/ #include "Rts.h" #include "sm/OSMem.h" #include "linker/M32Alloc.h" #include "LinkerInternals.h" #include #include #include #include /* Note [Compile Time Trickery] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This file implements two versions of each of the `m32_*` functions. At the top of the file there is the real implementation (compiled in when `RTS_LINKER_USE_MMAP` is true) and a dummy implementation that exists only to satisfy the compiler and which hould never be called. If any of these dummy implementations are called the program will abort. The rationale for this is to allow the calling code to be written without using the C pre-processor (CPP) `#if` hackery. The value of `RTS_LINKER_USE_MMAP` is known at compile time, code like: if (RTS_LINKER_USE_MMAP) m32_allocator_init(); will be compiled to call to `m32_allocator_init` if `RTS_LINKER_USE_MMAP` is true and will be optimised awat to nothing if `RTS_LINKER_USE_MMAP` is false. However, regardless of the value of `RTS_LINKER_USE_MMAP` the compiler will still check the call for syntax and correct function parameter types. */ #if RTS_LINKER_USE_MMAP == 1 /* Note [M32 Allocator] ~~~~~~~~~~~~~~~~~~~~ A memory allocator that allocates only pages in the 32-bit range (lower 2GB). This is useful on 64-bit platforms to ensure that addresses of allocated objects can be referenced with a 32-bit relative offset. Initially, the linker used `mmap` to allocate a page per object. Hence it wasted a lot of space for small objects (see #9314). With this allocator, we try to fill pages as much as we can for small objects. How does it work? ----------------- For small objects, a Word64 counter is added at the beginning of the page they are stored in. It indicates the number of objects that are still alive in the page. When the counter drops down to zero, the page is freed. The counter is atomically decremented, hence the deallocation is thread-safe. During the allocation phase, the allocator keeps track of some pages that are not totally filled: the number of pages in the "filling" list is configurable with M32_MAX_PAGES. Allocation consists in finding some place in one of these pages or starting a new one, then increasing the page counter. If none of the pages in the "filling" list has enough free space, the most filled one is flushed (see below) and a new one is allocated. The allocator holds a reference on pages in the "filling" list: the counter in these pages is 1+n where n is the current number of objects allocated in the page. Hence allocated objects can be freed while the allocator is using (filling) the page. Flushing a page consists in decreasing its counter and removing it from the "filling" list. By extension, flushing the allocator consists in flushing all the pages in the "filling" list. Don't forget to flush the allocator at the end of the allocation phase in order to avoid space leaks! Large objects are objects that are larger than a page (minus the bytes required for the counter and the optional padding). These objects are allocated into their own set of pages. We can differentiate large and small objects from their address: large objects are aligned on page size while small objects never are (because of the space reserved for the page's object counter). For large objects, the remaining space at the end of the last page is left unused by the allocator. It can be used with care as it will be freed with the associated large object. GHC linker uses this feature/hack, hence changing the implementation of the M32 allocator must be done with care (i.e. do not try to improve the allocator to avoid wasting this space without modifying the linker code accordingly). Object allocation is *not* thread-safe (however it could be done easily with a lock in the allocator structure). Object deallocation is thread-safe. */ #define ROUND_UP(x,size) ((x + size - 1) & ~(size - 1)) #define ROUND_DOWN(x,size) (x & ~(size - 1)) /**************************************************************************** * M32 ALLOCATOR (see Note [M32 Allocator] ***************************************************************************/ #define M32_MAX_PAGES 32 #define M32_REFCOUNT_BYTES 8 /** * An allocated page being filled by the allocator */ struct m32_alloc_t { void * base_addr; // Page address size_t current_size; // Number of bytes already reserved }; /** * Allocator * * Currently an allocator is just a set of pages being filled. The maximum * number of pages can be configured with M32_MAX_PAGES. */ typedef struct m32_allocator_t { struct m32_alloc_t pages[M32_MAX_PAGES]; } m32_allocator; // We use a global memory allocator static struct m32_allocator_t alloc; /** * Wrapper for `unmap` that handles error cases. * This is the real implementation. There is another dummy implementation below. * See the note titled "Compile Time Trickery" at the top of this file. */ static void munmapForLinker (void * addr, size_t size) { int r = munmap(addr,size); if (r == -1) { // Should we abort here? sysErrorBelch("munmap"); } } /** * Initialize the allocator structure * This is the real implementation. There is another dummy implementation below. * See the note titled "Compile Time Trickery" at the top of this file. */ void m32_allocator_init(void) { memset(&alloc, 0, sizeof(struct m32_allocator_t)); // Preallocate the initial M32_MAX_PAGES to ensure that they don't // fragment the memory. size_t pgsz = getPageSize(); char* bigchunk = mmapForLinker(pgsz * M32_MAX_PAGES,MAP_ANONYMOUS,-1,0); int i; for (i=0; i= getPageSize() - ROUND_UP(M32_REFCOUNT_BYTES,alignment)) // Return true if the object has its own dedicated set of pages #define m32_is_large_object_addr(addr) \ ((uintptr_t) addr % getPageSize() == 0) /** * Free the memory associated with an object. * * If the object is "small", the object counter of the page it is allocated in * is decremented and the page is not freed until all of its objects are freed. * * This is the real implementation. There is another dummy implementation below. * See the note titled "Compile Time Trickery" at the top of this file. */ void m32_free(void *addr, size_t size) { uintptr_t m = (uintptr_t) addr % getPageSize(); if (m == 0) { // large object munmapForLinker(addr,roundUpToPage(size)); } else { // small object void * page_addr = (void*)((uintptr_t)addr - m); m32_free_internal(page_addr); } } /** * Allocate `size` bytes of memory with the given alignment. * * This is the real implementation. There is another dummy implementation below. * See the note titled "Compile Time Trickery" at the top of this file. */ void * m32_alloc(size_t size, size_t alignment) { size_t pgsz = getPageSize(); if (m32_is_large_object(size,alignment)) { // large object return mmapForLinker(size,MAP_ANONYMOUS,-1,0); } // small object // Try to find a page that can contain it int empty = -1; int most_filled = -1; int i; for (i=0; i