diff options
Diffstat (limited to 'rts')
-rw-r--r-- | rts/CheckUnload.c | 6 | ||||
-rw-r--r-- | rts/RetainerProfile.c | 3 | ||||
-rw-r--r-- | rts/Sparks.c | 1 | ||||
-rw-r--r-- | rts/posix/OSMem.c | 200 | ||||
-rw-r--r-- | rts/sm/BlockAlloc.c | 14 | ||||
-rw-r--r-- | rts/sm/Compact.c | 4 | ||||
-rw-r--r-- | rts/sm/Evac.c | 95 | ||||
-rw-r--r-- | rts/sm/GC.c | 48 | ||||
-rw-r--r-- | rts/sm/GC.h | 2 | ||||
-rw-r--r-- | rts/sm/GCAux.c | 10 | ||||
-rw-r--r-- | rts/sm/GCThread.h | 7 | ||||
-rw-r--r-- | rts/sm/HeapAlloc.h | 224 | ||||
-rw-r--r-- | rts/sm/MBlock.c | 399 | ||||
-rw-r--r-- | rts/sm/OSMem.h | 41 | ||||
-rw-r--r-- | rts/sm/Sanity.c | 3 | ||||
-rw-r--r-- | rts/sm/Scav.c | 11 | ||||
-rw-r--r-- | rts/sm/Storage.c | 10 | ||||
-rw-r--r-- | rts/sm/Storage.h | 57 | ||||
-rw-r--r-- | rts/win32/OSMem.c | 77 |
19 files changed, 1040 insertions, 172 deletions
diff --git a/rts/CheckUnload.c b/rts/CheckUnload.c index 2c01113f63..34f976db4d 100644 --- a/rts/CheckUnload.c +++ b/rts/CheckUnload.c @@ -271,7 +271,8 @@ void checkUnload (StgClosure *static_objects) addrs = allocHashTable(); - for (p = static_objects; p != END_OF_STATIC_LIST; p = link) { + for (p = static_objects; p != END_OF_STATIC_OBJECT_LIST; p = link) { + p = UNTAG_STATIC_LIST_PTR(p); checkAddress(addrs, p); info = get_itbl(p); link = *STATIC_LINK(info, p); @@ -279,8 +280,9 @@ void checkUnload (StgClosure *static_objects) // CAFs on revertible_caf_list are not on static_objects for (p = (StgClosure*)revertible_caf_list; - p != END_OF_STATIC_LIST; + p != END_OF_CAF_LIST; p = ((StgIndStatic *)p)->static_link) { + p = UNTAG_STATIC_LIST_PTR(p); checkAddress(addrs, p); } diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c index 78daa892ad..ba58c199f0 100644 --- a/rts/RetainerProfile.c +++ b/rts/RetainerProfile.c @@ -1881,7 +1881,8 @@ resetStaticObjectForRetainerProfiling( StgClosure *static_objects ) count = 0; #endif p = static_objects; - while (p != END_OF_STATIC_LIST) { + while (p != END_OF_STATIC_OBJECT_LIST) { + p = UNTAG_STATIC_LIST_PTR(p); #ifdef DEBUG_RETAINER count++; #endif diff --git a/rts/Sparks.c b/rts/Sparks.c index ada2adfd3a..ec075805bf 100644 --- a/rts/Sparks.c +++ b/rts/Sparks.c @@ -14,6 +14,7 @@ #include "Trace.h" #include "Prelude.h" #include "Sparks.h" +#include "sm/HeapAlloc.h" #if defined(THREADED_RTS) diff --git a/rts/posix/OSMem.c b/rts/posix/OSMem.c index 359df7022b..125ae10367 100644 --- a/rts/posix/OSMem.c +++ b/rts/posix/OSMem.c @@ -13,6 +13,7 @@ #include "RtsUtils.h" #include "sm/OSMem.h" +#include "sm/HeapAlloc.h" #ifdef HAVE_UNISTD_H #include <unistd.h> @@ -72,23 +73,67 @@ void osMemInit(void) -------------------------------------------------------------------------- */ -// A wrapper around mmap(), to abstract away from OS differences in -// the mmap() interface. +/* + A wrapper around mmap(), to abstract away from OS differences in + the mmap() interface. + + It supports the following operations: + - reserve: find a new chunk of available address space, and make it so + that we own it (no other library will get it), but don't actually + allocate memory for it + the addr is a hint for where to place the memory (and most + of the time the OS happily ignores!) + - commit: given a chunk of address space that we know we own, make sure + there is some memory backing it + the addr is not a hint, it must point into previously reserved + address space, or bad things happen + - reserve&commit: do both at the same time + + The naming is chosen from the Win32 API (VirtualAlloc) which does the + same thing and has done so forever, while support for this in Unix systems + has only been added recently and is hidden in the posix portability mess. + It is confusing because to get the reserve behavior we need MAP_NORESERVE + (which tells the kernel not to allocate backing space), but heh... +*/ +enum +{ + MEM_RESERVE = 1, + MEM_COMMIT = 2, + MEM_RESERVE_AND_COMMIT = MEM_RESERVE | MEM_COMMIT +}; static void * -my_mmap (void *addr, W_ size) +my_mmap (void *addr, W_ size, int operation) { void *ret; + int prot, flags; + + if (operation & MEM_COMMIT) + prot = PROT_READ | PROT_WRITE; + else + prot = PROT_NONE; + if (operation == MEM_RESERVE) + flags = MAP_NORESERVE; + else if (operation == MEM_COMMIT) + flags = MAP_FIXED; + else + flags = 0; #if defined(solaris2_HOST_OS) || defined(irix_HOST_OS) { - int fd = open("/dev/zero",O_RDONLY); - ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); - close(fd); + if (operation & MEM_RESERVE) + { + int fd = open("/dev/zero",O_RDONLY); + ret = mmap(addr, size, prot, flags | MAP_PRIVATE, fd, 0); + close(fd); + } + else + { + ret = mmap(addr, size, prot, flags | MAP_PRIVATE, -1, 0); + } } #elif hpux_HOST_OS - ret = mmap(addr, size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + ret = mmap(addr, size, prot, flags | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); #elif darwin_HOST_OS // Without MAP_FIXED, Apple's mmap ignores addr. // With MAP_FIXED, it overwrites already mapped regions, whic @@ -100,10 +145,16 @@ my_mmap (void *addr, W_ size) kern_return_t err = 0; ret = addr; - if(addr) // try to allocate at address - err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, FALSE); - if(!addr || err) // try to allocate anywhere - err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, TRUE); + + if(operation & MEM_RESERVE) + { + if(addr) // try to allocate at address + err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, + size, FALSE); + if(!addr || err) // try to allocate anywhere + err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, + size, TRUE); + } if(err) { // don't know what the error codes mean exactly, assume it's @@ -111,23 +162,24 @@ my_mmap (void *addr, W_ size) errorBelch("memory allocation failed (requested %" FMT_Word " bytes)", size); stg_exit(EXIT_FAILURE); - } else { + } + + if(operation & MEM_COMMIT) { vm_protect(mach_task_self(), (vm_address_t)ret, size, FALSE, VM_PROT_READ|VM_PROT_WRITE); } + #elif linux_HOST_OS - ret = mmap(addr, size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ret = mmap(addr, size, prot, flags | MAP_ANON | MAP_PRIVATE, -1, 0); if (ret == (void *)-1 && errno == EPERM) { // Linux may return EPERM if it tried to give us // a chunk of address space below mmap_min_addr, // See Trac #7500. - if (addr != 0) { + if (addr != 0 && (operation & MEM_RESERVE)) { // Try again with no hint address. // It's not clear that this can ever actually help, // but since our alternative is to abort, we may as well try. - ret = mmap(0, size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ret = mmap(0, size, prot, flags | MAP_ANON | MAP_PRIVATE, -1, 0); } if (ret == (void *)-1 && errno == EPERM) { // Linux is not willing to give us any mapping, @@ -137,8 +189,7 @@ my_mmap (void *addr, W_ size) } } #else - ret = mmap(addr, size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ret = mmap(addr, size, prot, flags | MAP_ANON | MAP_PRIVATE, -1, 0); #endif if (ret == (void *)-1) { @@ -168,7 +219,7 @@ gen_map_mblocks (W_ size) // Try to map a larger block, and take the aligned portion from // it (unmap the rest). size += MBLOCK_SIZE; - ret = my_mmap(0, size); + ret = my_mmap(0, size, MEM_RESERVE_AND_COMMIT); // unmap the slop bits around the chunk we allocated slop = (W_)ret & MBLOCK_MASK; @@ -207,7 +258,7 @@ osGetMBlocks(nat n) // use gen_map_mblocks the first time. ret = gen_map_mblocks(size); } else { - ret = my_mmap(next_request, size); + ret = my_mmap(next_request, size, MEM_RESERVE_AND_COMMIT); if (((W_)ret & MBLOCK_MASK) != 0) { // misaligned block! @@ -244,10 +295,11 @@ void osReleaseFreeMemory(void) { void osFreeAllMBlocks(void) { void *mblock; + void *state; - for (mblock = getFirstMBlock(); + for (mblock = getFirstMBlock(&state); mblock != NULL; - mblock = getNextMBlock(mblock)) { + mblock = getNextMBlock(&state, mblock)) { munmap(mblock, MBLOCK_SIZE); } } @@ -318,3 +370,103 @@ void setExecutable (void *p, W_ len, rtsBool exec) barf("setExecutable: failed to protect 0x%p\n", p); } } + +#ifdef USE_LARGE_ADDRESS_SPACE + +static void * +osTryReserveHeapMemory (void *hint) +{ + void *base, *top; + void *start, *end; + + /* We try to allocate MBLOCK_SPACE_SIZE + MBLOCK_SIZE, + because we need memory which is MBLOCK_SIZE aligned, + and then we discard what we don't need */ + + base = my_mmap(hint, MBLOCK_SPACE_SIZE + MBLOCK_SIZE, MEM_RESERVE); + top = (void*)((W_)base + MBLOCK_SPACE_SIZE + MBLOCK_SIZE); + + if (((W_)base & MBLOCK_MASK) != 0) { + start = MBLOCK_ROUND_UP(base); + end = MBLOCK_ROUND_DOWN(top); + ASSERT(((W_)end - (W_)start) == MBLOCK_SPACE_SIZE); + + if (munmap(base, (W_)start-(W_)base) < 0) { + sysErrorBelch("unable to release slop before heap"); + } + if (munmap(end, (W_)top-(W_)end) < 0) { + sysErrorBelch("unable to release slop after heap"); + } + } else { + start = base; + } + + return start; +} + +void *osReserveHeapMemory(void) +{ + int attempt; + void *at; + + /* We want to ensure the heap starts at least 8 GB inside the address space, + to make sure that any dynamically loaded code will be close enough to the + original code so that short relocations will work. This is in particular + important on Darwin/Mach-O, because object files not compiled as shared + libraries are position independent but cannot be loaded about 4GB. + + We do so with a hint to the mmap, and we verify the OS satisfied our + hint. We loop a few times in case there is already something allocated + there, but we bail if we cannot allocate at all. + */ + + attempt = 0; + do { + at = osTryReserveHeapMemory((void*)((W_)8 * (1 << 30) + + attempt * BLOCK_SIZE)); + } while ((W_)at < ((W_)8 * (1 << 30))); + + return at; +} + +void osCommitMemory(void *at, W_ size) +{ + my_mmap(at, size, MEM_COMMIT); +} + +void osDecommitMemory(void *at, W_ size) +{ + int r; + + // First make the memory unaccessible (so that we get a segfault + // at the next attempt to touch it) + // We only do this in DEBUG because it forces the OS to remove + // all MMU entries for this page range, and there is no reason + // to do so unless there is memory pressure +#ifdef DEBUG + r = mprotect(at, size, PROT_NONE); + if(r < 0) + sysErrorBelch("unable to make released memory unaccessible"); +#endif + +#ifdef MADV_FREE + // Try MADV_FREE first, FreeBSD has both and MADV_DONTNEED + // just swaps memory out + r = madvise(at, size, MADV_FREE); +#else + r = madvise(at, size, MADV_DONTNEED); +#endif + if(r < 0) + sysErrorBelch("unable to decommit memory"); +} + +void osReleaseHeapMemory(void) +{ + int r; + + r = munmap((void*)mblock_address_space_begin, MBLOCK_SPACE_SIZE); + if(r < 0) + sysErrorBelch("unable to release address space"); +} + +#endif diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c index c2a5913963..e721fb13b6 100644 --- a/rts/sm/BlockAlloc.c +++ b/rts/sm/BlockAlloc.c @@ -736,7 +736,14 @@ void returnMemoryToOS(nat n /* megablocks */) } free_mblock_list = bd; - osReleaseFreeMemory(); + // Ask the OS to release any address space portion + // that was associated with the just released MBlocks + // + // Historically, we used to ask the OS directly (via + // osReleaseFreeMemory()) - now the MBlock layer might + // have a reason to preserve the address space range, + // so we keep it + releaseFreeMemory(); IF_DEBUG(gc, if (n != 0) { @@ -869,11 +876,12 @@ void reportUnmarkedBlocks (void) { void *mblock; + void *state; bdescr *bd; debugBelch("Unreachable blocks:\n"); - for (mblock = getFirstMBlock(); mblock != NULL; - mblock = getNextMBlock(mblock)) { + for (mblock = getFirstMBlock(&state); mblock != NULL; + mblock = getNextMBlock(&state, mblock)) { for (bd = FIRST_BDESCR(mblock); bd <= LAST_BDESCR(mblock); ) { if (!(bd->flags & BF_KNOWN) && bd->free != (P_)-1) { debugBelch(" %p\n",bd); diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c index a053dc3b4e..4ee88da11c 100644 --- a/rts/sm/Compact.c +++ b/rts/sm/Compact.c @@ -197,8 +197,8 @@ thread_static( StgClosure* p ) // keep going until we've threaded all the objects on the linked // list... - while (p != END_OF_STATIC_LIST) { - + while (p != END_OF_STATIC_OBJECT_LIST) { + p = UNTAG_STATIC_LIST_PTR(p); info = get_itbl(p); switch (info->type) { diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index b0ef807768..bc8cb9ad13 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -324,6 +324,38 @@ evacuate_large(StgPtr p) } /* ---------------------------------------------------------------------------- + Evacuate static objects + + When a static object is visited for the first time in this GC, it + is chained on to the gct->static_objects list. + + evacuate_static_object (link_field, q) + - link_field must be STATIC_LINK(q) + ------------------------------------------------------------------------- */ + +STATIC_INLINE void +evacuate_static_object (StgClosure **link_field, StgClosure *q) +{ + StgWord link = (StgWord)*link_field; + + // See Note [STATIC_LINK fields] for how the link field bits work + if ((((StgWord)(link)&STATIC_BITS) | prev_static_flag) != 3) { + StgWord new_list_head = (StgWord)q | static_flag; +#ifndef THREADED_RTS + *link_field = gct->static_objects; + gct->static_objects = (StgClosure *)new_list_head; +#else + StgWord prev; + prev = cas((StgVolatilePtr)link_field, link, + (StgWord)gct->static_objects); + if (prev == link) { + gct->static_objects = (StgClosure *)new_list_head; + } +#endif + } +} + +/* ---------------------------------------------------------------------------- Evacuate This is called (eventually) for every live object in the system. @@ -392,38 +424,13 @@ loop: case THUNK_STATIC: if (info->srt_bitmap != 0) { - if (*THUNK_STATIC_LINK((StgClosure *)q) == NULL) { -#ifndef THREADED_RTS - *THUNK_STATIC_LINK((StgClosure *)q) = gct->static_objects; - gct->static_objects = (StgClosure *)q; -#else - StgPtr link; - link = (StgPtr)cas((StgPtr)THUNK_STATIC_LINK((StgClosure *)q), - (StgWord)NULL, - (StgWord)gct->static_objects); - if (link == NULL) { - gct->static_objects = (StgClosure *)q; - } -#endif - } + evacuate_static_object(THUNK_STATIC_LINK((StgClosure *)q), q); } return; case FUN_STATIC: - if (info->srt_bitmap != 0 && - *FUN_STATIC_LINK((StgClosure *)q) == NULL) { -#ifndef THREADED_RTS - *FUN_STATIC_LINK((StgClosure *)q) = gct->static_objects; - gct->static_objects = (StgClosure *)q; -#else - StgPtr link; - link = (StgPtr)cas((StgPtr)FUN_STATIC_LINK((StgClosure *)q), - (StgWord)NULL, - (StgWord)gct->static_objects); - if (link == NULL) { - gct->static_objects = (StgClosure *)q; - } -#endif + if (info->srt_bitmap != 0) { + evacuate_static_object(FUN_STATIC_LINK((StgClosure *)q), q); } return; @@ -432,39 +439,11 @@ loop: * on the CAF list, so don't do anything with it here (we'll * scavenge it later). */ - if (*IND_STATIC_LINK((StgClosure *)q) == NULL) { -#ifndef THREADED_RTS - *IND_STATIC_LINK((StgClosure *)q) = gct->static_objects; - gct->static_objects = (StgClosure *)q; -#else - StgPtr link; - link = (StgPtr)cas((StgPtr)IND_STATIC_LINK((StgClosure *)q), - (StgWord)NULL, - (StgWord)gct->static_objects); - if (link == NULL) { - gct->static_objects = (StgClosure *)q; - } -#endif - } + evacuate_static_object(IND_STATIC_LINK((StgClosure *)q), q); return; case CONSTR_STATIC: - if (*STATIC_LINK(info,(StgClosure *)q) == NULL) { -#ifndef THREADED_RTS - *STATIC_LINK(info,(StgClosure *)q) = gct->static_objects; - gct->static_objects = (StgClosure *)q; -#else - StgPtr link; - link = (StgPtr)cas((StgPtr)STATIC_LINK(info,(StgClosure *)q), - (StgWord)NULL, - (StgWord)gct->static_objects); - if (link == NULL) { - gct->static_objects = (StgClosure *)q; - } -#endif - } - /* I am assuming that static_objects pointers are not - * written to other objects, and thus, no need to retag. */ + evacuate_static_object(STATIC_LINK(info,(StgClosure *)q), q); return; case CONSTR_NOCAF_STATIC: diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 52d7f98fa0..e6a23395eb 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -134,6 +134,9 @@ long copied; // *words* copied & scavenged during this GC rtsBool work_stealing; +nat static_flag = STATIC_FLAG_B; +nat prev_static_flag = STATIC_FLAG_A; + DECLARE_GCT /* ----------------------------------------------------------------------------- @@ -141,7 +144,6 @@ DECLARE_GCT -------------------------------------------------------------------------- */ static void mark_root (void *user, StgClosure **root); -static void zero_static_object_list (StgClosure* first_static); static void prepare_collected_gen (generation *gen); static void prepare_uncollected_gen (generation *gen); static void init_gc_thread (gc_thread *t); @@ -246,6 +248,12 @@ GarbageCollect (nat collect_gen, N = collect_gen; major_gc = (N == RtsFlags.GcFlags.generations-1); + if (major_gc) { + prev_static_flag = static_flag; + static_flag = + static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A; + } + #if defined(THREADED_RTS) work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled && N >= RtsFlags.ParFlags.parGcLoadBalancingGen; @@ -672,20 +680,6 @@ GarbageCollect (nat collect_gen, resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects); #endif - // zero the scavenged static object list - if (major_gc) { - nat i; - if (n_gc_threads == 1) { - zero_static_object_list(gct->scavenged_static_objects); - } else { - for (i = 0; i < n_gc_threads; i++) { - if (!gc_threads[i]->idle) { - zero_static_object_list(gc_threads[i]->scavenged_static_objects); - } - } - } - } - // Start any pending finalizers. Must be after // updateStableTables() and stableUnlock() (see #4221). RELEASE_SM_LOCK; @@ -1427,8 +1421,8 @@ collect_pinned_object_blocks (void) static void init_gc_thread (gc_thread *t) { - t->static_objects = END_OF_STATIC_LIST; - t->scavenged_static_objects = END_OF_STATIC_LIST; + t->static_objects = END_OF_STATIC_OBJECT_LIST; + t->scavenged_static_objects = END_OF_STATIC_OBJECT_LIST; t->scan_bd = NULL; t->mut_lists = t->cap->mut_lists; t->evac_gen_no = 0; @@ -1465,24 +1459,6 @@ mark_root(void *user USED_IF_THREADS, StgClosure **root) SET_GCT(saved_gct); } -/* ----------------------------------------------------------------------------- - Initialising the static object & mutable lists - -------------------------------------------------------------------------- */ - -static void -zero_static_object_list(StgClosure* first_static) -{ - StgClosure* p; - StgClosure* link; - const StgInfoTable *info; - - for (p = first_static; p != END_OF_STATIC_LIST; p = link) { - info = get_itbl(p); - link = *STATIC_LINK(info, p); - *STATIC_LINK(info,p) = NULL; - } -} - /* ---------------------------------------------------------------------------- Reset the sizes of the older generations when we do a major collection. @@ -1728,7 +1704,7 @@ static void gcCAFs(void) p = debug_caf_list; prev = NULL; - for (p = debug_caf_list; p != (StgIndStatic*)END_OF_STATIC_LIST; + for (p = debug_caf_list; p != (StgIndStatic*)END_OF_CAF_LIST; p = (StgIndStatic*)p->saved_info) { info = get_itbl((StgClosure*)p); diff --git a/rts/sm/GC.h b/rts/sm/GC.h index 571aa07110..5744eb95a8 100644 --- a/rts/sm/GC.h +++ b/rts/sm/GC.h @@ -16,6 +16,8 @@ #include "BeginPrivate.h" +#include "HeapAlloc.h" + void GarbageCollect (rtsBool force_major_gc, rtsBool do_heap_census, nat gc_type, Capability *cap); diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c index 13316e4d29..d3cbdaefb4 100644 --- a/rts/sm/GCAux.c +++ b/rts/sm/GCAux.c @@ -118,14 +118,14 @@ revertCAFs( void ) StgIndStatic *c; for (c = revertible_caf_list; - c != (StgIndStatic *)END_OF_STATIC_LIST; + c != (StgIndStatic *)END_OF_CAF_LIST; c = (StgIndStatic *)c->static_link) { SET_INFO((StgClosure *)c, c->saved_info); c->saved_info = NULL; // could, but not necessary: c->static_link = NULL; } - revertible_caf_list = (StgIndStatic*)END_OF_STATIC_LIST; + revertible_caf_list = (StgIndStatic*)END_OF_CAF_LIST; } void @@ -134,15 +134,17 @@ markCAFs (evac_fn evac, void *user) StgIndStatic *c; for (c = dyn_caf_list; - c != (StgIndStatic*)END_OF_STATIC_LIST; + c != (StgIndStatic*)END_OF_CAF_LIST; c = (StgIndStatic *)c->static_link) { + c = (StgIndStatic *)UNTAG_STATIC_LIST_PTR(c); evac(user, &c->indirectee); } for (c = revertible_caf_list; - c != (StgIndStatic*)END_OF_STATIC_LIST; + c != (StgIndStatic*)END_OF_CAF_LIST; c = (StgIndStatic *)c->static_link) { + c = (StgIndStatic *)UNTAG_STATIC_LIST_PTR(c); evac(user, &c->indirectee); } } diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h index cbe4346afe..d42b89f973 100644 --- a/rts/sm/GCThread.h +++ b/rts/sm/GCThread.h @@ -131,8 +131,11 @@ typedef struct gc_thread_ { // during GC without accessing the block // allocators spin lock. - StgClosure* static_objects; // live static objects - StgClosure* scavenged_static_objects; // static objects scavenged so far + // These two lists are chained through the STATIC_LINK() fields of static + // objects. Pointers are tagged with the current static_flag, so before + // following a pointer, untag it with UNTAG_STATIC_LIST_PTR(). + StgClosure* static_objects; // live static objects + StgClosure* scavenged_static_objects; // static objects scavenged so far W_ gc_count; // number of GCs this thread has done diff --git a/rts/sm/HeapAlloc.h b/rts/sm/HeapAlloc.h new file mode 100644 index 0000000000..c914b5db40 --- /dev/null +++ b/rts/sm/HeapAlloc.h @@ -0,0 +1,224 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The University of Glasgow 2006-2008 + * + * The HEAP_ALLOCED() test. + * + * ---------------------------------------------------------------------------*/ + +#ifndef SM_HEAP_ALLOC_H +#define SM_HEAP_ALLOC_H + +#include "BeginPrivate.h" + +/* ----------------------------------------------------------------------------- + The HEAP_ALLOCED() test. + + HEAP_ALLOCED is called FOR EVERY SINGLE CLOSURE during GC. + It needs to be FAST. + + See wiki commentary at + http://ghc.haskell.org/trac/ghc/wiki/Commentary/HeapAlloced + + Implementation of HEAP_ALLOCED + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Since heap is allocated in chunks of megablocks (MBLOCK_SIZE), we + can just use a table to record which megablocks in the address + space belong to the heap. On a 32-bit machine, with 1Mb + megablocks, using 8 bits for each entry in the table, the table + requires 4k. Lookups during GC will be fast, because the table + will be quickly cached (indeed, performance measurements showed no + measurable difference between doing the table lookup and using a + constant comparison). + + On 64-bit machines, we have two possibilities. One is to request + a single chunk of address space that we deem "large enough" + (currently 1TB, could easily be extended to, say 16TB or more). + Memory from that chunk is GC memory, everything else is not. This + case is tricky in that it requires support from the OS to allocate + address space without allocating memory (in practice, all modern + OSes do this). It's also tricky in that it is the only case where + a successful HEAP_ALLOCED(p) check can trigger a segfault when + accessing p (and for debugging purposes, it will). + + Alternatively, the older implementation caches one 12-bit block map + that describes 4096 megablocks or 4GB of memory. If HEAP_ALLOCED is + called for an address that is not in the cache, it calls + slowIsHeapAlloced (see MBlock.c) which will find the block map for + the 4GB block in question. + -------------------------------------------------------------------------- */ + +#ifdef USE_LARGE_ADDRESS_SPACE + +extern W_ mblock_address_space_begin; +# define MBLOCK_SPACE_SIZE ((StgWord)1 << 40) /* 1 TB */ +# define HEAP_ALLOCED(p) ((W_)(p) >= mblock_address_space_begin && \ + (W_)(p) < (mblock_address_space_begin + \ + MBLOCK_SPACE_SIZE)) +# define HEAP_ALLOCED_GC(p) HEAP_ALLOCED(p) + +#elif SIZEOF_VOID_P == 4 +extern StgWord8 mblock_map[]; + +/* On a 32-bit machine a 4KB table is always sufficient */ +# define MBLOCK_MAP_SIZE 4096 +# define MBLOCK_MAP_ENTRY(p) ((StgWord)(p) >> MBLOCK_SHIFT) +# define HEAP_ALLOCED(p) mblock_map[MBLOCK_MAP_ENTRY(p)] +# define HEAP_ALLOCED_GC(p) HEAP_ALLOCED(p) + +/* ----------------------------------------------------------------------------- + HEAP_ALLOCED for 64-bit machines (without LARGE_ADDRESS_SPACE). + + Here are some cache layout options: + + [1] + 16KB cache of 16-bit entries, 1MB lines (capacity 8GB) + mblock size = 20 bits + entries = 8192 13 bits + line size = 0 bits (1 bit of value) + tag size = 15 bits + = 48 bits + + [2] + 32KB cache of 16-bit entries, 4MB lines (capacity 32GB) + mblock size = 20 bits + entries = 16384 14 bits + line size = 2 bits (4 bits of value) + tag size = 12 bits + = 48 bits + + [3] + 16KB cache of 16-bit entries, 2MB lines (capacity 16GB) + mblock size = 20 bits + entries = 8192 13 bits + line size = 1 bits (2 bits of value) + tag size = 14 bits + = 48 bits + + [4] + 4KB cache of 32-bit entries, 16MB lines (capacity 16GB) + mblock size = 20 bits + entries = 1024 10 bits + line size = 4 bits (16 bits of value) + tag size = 14 bits + = 48 bits + + [5] + 4KB cache of 64-bit entries, 32MB lines (capacity 16GB) + mblock size = 20 bits + entries = 512 9 bits + line size = 5 bits (32 bits of value) + tag size = 14 bits + = 48 bits + + We actually use none of the above. After much experimentation it was + found that optimising the lookup is the most important factor, + followed by reducing the number of misses. To that end, we use a + variant of [1] in which each cache entry is ((mblock << 1) + value) + where value is 0 for non-heap and 1 for heap. The cache entries can + be 32 bits, since the mblock number is 48-20 = 28 bits, and we need + 1 bit for the value. The cache can be as big as we like, but + currently we use 8k entries, giving us 8GB capacity. + + ---------------------------------------------------------------------------- */ + +#elif SIZEOF_VOID_P == 8 + +#define MBC_LINE_BITS 0 +#define MBC_TAG_BITS 15 + +#if x86_64_HOST_ARCH +// 32bits are enough for 'entry' as modern amd64 boxes have +// only 48bit sized virtual addres. +typedef StgWord32 MbcCacheLine; +#else +// 32bits is not enough here as some arches (like ia64) use +// upper address bits to distinct memory areas. +typedef StgWord64 MbcCacheLine; +#endif + +typedef StgWord8 MBlockMapLine; + +#define MBLOCK_MAP_LINE(p) (((StgWord)p & 0xffffffff) >> (MBLOCK_SHIFT + MBC_LINE_BITS)) + +#define MBC_LINE_SIZE (1<<MBC_LINE_BITS) +#define MBC_SHIFT (48 - MBLOCK_SHIFT - MBC_LINE_BITS - MBC_TAG_BITS) +#define MBC_ENTRIES (1<<MBC_SHIFT) + +extern MbcCacheLine mblock_cache[]; + +#define MBC_LINE(p) ((StgWord)p >> (MBLOCK_SHIFT + MBC_LINE_BITS)) + +#define MBLOCK_MAP_ENTRIES (1 << (32 - MBLOCK_SHIFT - MBC_LINE_BITS)) + +typedef struct { + StgWord32 addrHigh32; + MBlockMapLine lines[MBLOCK_MAP_ENTRIES]; +} MBlockMap; + +extern W_ mpc_misses; + +StgBool HEAP_ALLOCED_miss(StgWord mblock, void *p); + +INLINE_HEADER +StgBool HEAP_ALLOCED(void *p) +{ + StgWord mblock; + nat entry_no; + MbcCacheLine entry, value; + + mblock = (StgWord)p >> MBLOCK_SHIFT; + entry_no = mblock & (MBC_ENTRIES-1); + entry = mblock_cache[entry_no]; + value = entry ^ (mblock << 1); + // this formulation coaxes gcc into prioritising the value==1 + // case, which we expect to be the most common. + // __builtin_expect() didn't have any useful effect (gcc-4.3.0). + if (value == 1) { + return 1; + } else if (value == 0) { + return 0; + } else { + // putting the rest out of line turned out to be a slight + // performance improvement: + return HEAP_ALLOCED_miss(mblock,p); + } +} + +// In the parallel GC, the cache itself is safe to *read*, and can be +// updated atomically, but we need to place a lock around operations +// that touch the MBlock map. +INLINE_HEADER +StgBool HEAP_ALLOCED_GC(void *p) +{ + StgWord mblock; + nat entry_no; + MbcCacheLine entry, value; + StgBool b; + + mblock = (StgWord)p >> MBLOCK_SHIFT; + entry_no = mblock & (MBC_ENTRIES-1); + entry = mblock_cache[entry_no]; + value = entry ^ (mblock << 1); + if (value == 1) { + return 1; + } else if (value == 0) { + return 0; + } else { + // putting the rest out of line turned out to be a slight + // performance improvement: + ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync); + b = HEAP_ALLOCED_miss(mblock,p); + RELEASE_SPIN_LOCK(&gc_alloc_block_sync); + return b; + } +} + +#else +# error HEAP_ALLOCED not defined +#endif + +#include "EndPrivate.h" + +#endif /* SM_HEAP_ALLOC_H */ diff --git a/rts/sm/MBlock.c b/rts/sm/MBlock.c index f626e1f43b..35a11bf589 100644 --- a/rts/sm/MBlock.c +++ b/rts/sm/MBlock.c @@ -23,9 +23,320 @@ W_ mblocks_allocated = 0; W_ mpc_misses = 0; /* ----------------------------------------------------------------------------- - The MBlock Map: provides our implementation of HEAP_ALLOCED() + The MBlock Map: provides our implementation of HEAP_ALLOCED() and the + utilities to walk the really allocated (thus accessible without risk of + segfault) heap -------------------------------------------------------------------------- */ +/* + There are two different cases here: either we use "large address + space" (which really means two-step allocation), so we have to + manage which memory is good (= accessible without fear of segfault) + and which is not owned by us, or we use the older method and get + good memory straight from the system. + + Both code paths need to provide: + + void *getFirstMBlock(void ** state) + return the first (lowest address) mblock + that was actually committed + + void *getNextMBlock(void ** state, void * mblock) + return the first (lowest address) mblock + that was committed, after the given one + + For both these calls, @state is an in-out parameter that points to + an opaque state threading the calls togheter. The calls should only + be used in an interation fashion. Pass NULL if @state is not + interesting,or pass a pointer to NULL if you don't have a state. + + void *getCommittedMBlocks(nat n) + return @n new mblocks, ready to be used (reserved and committed) + + void *decommitMBlocks(char *addr, nat n) + release memory for @n mblocks, starting at the given address + + void releaseFreeMemory() + potentially release any address space that was associated + with recently decommitted blocks +*/ + +#ifdef USE_LARGE_ADDRESS_SPACE + +// Large address space means we use two-step allocation: reserve +// something large upfront, and then commit as needed +// (This is normally only useful on 64-bit, where we can assume +// that reserving 1TB is possible) +// +// There is no block map in this case, but there is a free list +// of blocks that were committed and decommitted at least once, +// which we use to choose which block to commit next in the already +// reserved space. +// +// We cannot let the OS choose it as we do in the +// non large address space case, because the committing wants to +// know the exact address upfront. +// +// The free list is coalesced and ordered, which means that +// allocate and free are worst-case O(n), but benchmarks have shown +// that this is not a significant problem, because large (>=2MB) +// allocations are infrequent and their time is mostly insignificant +// compared to the time to use that memory. +// +// The free list is stored in malloc()'d memory, unlike the other free +// lists in BlockAlloc.c which are stored in block descriptors, +// because we cannot touch the contents of decommitted mblocks. + +typedef struct free_list { + struct free_list *prev; + struct free_list *next; + W_ address; + W_ size; +} free_list; + +static free_list *free_list_head; +static W_ mblock_high_watermark; +W_ mblock_address_space_begin = 0; + +static void *getAllocatedMBlock(free_list **start_iter, W_ startingAt) +{ + free_list *iter; + W_ p = startingAt; + + for (iter = *start_iter; iter != NULL; iter = iter->next) + { + if (p < iter->address) + break; + + if (p == iter->address) + p += iter->size; + } + + *start_iter = iter; + + if (p >= mblock_high_watermark) + return NULL; + + return (void*)p; +} + +void * getFirstMBlock(void **state STG_UNUSED) +{ + free_list *fake_state; + free_list **casted_state; + + if (state) + casted_state = (free_list**)state; + else + casted_state = &fake_state; + + *casted_state = free_list_head; + return getAllocatedMBlock(casted_state, mblock_address_space_begin); +} + +void * getNextMBlock(void **state STG_UNUSED, void *mblock) +{ + free_list *fake_state = free_list_head; + free_list **casted_state; + + if (state) + casted_state = (free_list**)state; + else + casted_state = &fake_state; + + return getAllocatedMBlock(casted_state, (W_)mblock + MBLOCK_SIZE); +} + +static void *getReusableMBlocks(nat n) +{ + struct free_list *iter; + W_ size = MBLOCK_SIZE * (W_)n; + + for (iter = free_list_head; iter != NULL; iter = iter->next) { + void *addr; + + if (iter->size < size) + continue; + + addr = (void*)iter->address; + iter->address += size; + iter->size -= size; + if (iter->size == 0) { + struct free_list *prev, *next; + + prev = iter->prev; + next = iter->next; + if (prev == NULL) { + ASSERT(free_list_head == iter); + free_list_head = next; + } else { + prev->next = next; + } + if (next != NULL) { + next->prev = prev; + } + stgFree(iter); + } + + osCommitMemory(addr, size); + return addr; + } + + return NULL; +} + +static void *getFreshMBlocks(nat n) +{ + W_ size = MBLOCK_SIZE * (W_)n; + void *addr = (void*)mblock_high_watermark; + + if (mblock_high_watermark + size > + mblock_address_space_begin + MBLOCK_SPACE_SIZE) + { + // whoa, 1 TB of heap? + errorBelch("out of memory"); + stg_exit(EXIT_HEAPOVERFLOW); + } + + osCommitMemory(addr, size); + mblock_high_watermark += size; + return addr; +} + +static void *getCommittedMBlocks(nat n) +{ + void *p; + + p = getReusableMBlocks(n); + if (p == NULL) { + p = getFreshMBlocks(n); + } + + ASSERT(p != NULL && p != (void*)-1); + return p; +} + +static void decommitMBlocks(char *addr, nat n) +{ + struct free_list *iter, *prev; + W_ size = MBLOCK_SIZE * (W_)n; + W_ address = (W_)addr; + + osDecommitMemory(addr, size); + + prev = NULL; + for (iter = free_list_head; iter != NULL; iter = iter->next) + { + prev = iter; + + if (iter->address + iter->size < address) + continue; + + if (iter->address + iter->size == address) { + iter->size += size; + + if (address + size == mblock_high_watermark) { + mblock_high_watermark -= iter->size; + if (iter->prev) { + iter->prev->next = NULL; + } else { + ASSERT(iter == free_list_head); + free_list_head = NULL; + } + stgFree(iter); + return; + } + + if (iter->next && + iter->next->address == iter->address + iter->size) { + struct free_list *next; + + next = iter->next; + iter->size += next->size; + iter->next = next->next; + + if (iter->next) { + iter->next->prev = iter; + + /* We don't need to consolidate more */ + ASSERT(iter->next->address > iter->address + iter->size); + } + + stgFree(next); + } + return; + } else if (address + size == iter->address) { + iter->address = address; + iter->size += size; + + /* We don't need to consolidate backwards + (because otherwise it would have been handled by + the previous iteration) */ + if (iter->prev) { + ASSERT(iter->prev->address + iter->prev->size < iter->address); + } + return; + } else { + struct free_list *new_iter; + + /* All other cases have been handled */ + ASSERT(iter->address > address + size); + + new_iter = stgMallocBytes(sizeof(struct free_list), "freeMBlocks"); + new_iter->address = address; + new_iter->size = size; + new_iter->next = iter; + new_iter->prev = iter->prev; + if (new_iter->prev) { + new_iter->prev->next = new_iter; + } else { + ASSERT(iter == free_list_head); + free_list_head = new_iter; + } + iter->prev = new_iter; + return; + } + } + + /* We're past the last free list entry, so we must + be the highest allocation so far + */ + ASSERT(address + size <= mblock_high_watermark); + + /* Fast path the case of releasing high or all memory */ + if (address + size == mblock_high_watermark) { + mblock_high_watermark -= size; + } else { + struct free_list *new_iter; + + new_iter = stgMallocBytes(sizeof(struct free_list), "freeMBlocks"); + new_iter->address = address; + new_iter->size = size; + new_iter->next = NULL; + new_iter->prev = prev; + if (new_iter->prev) { + ASSERT(new_iter->prev->next == NULL); + new_iter->prev->next = new_iter; + } else { + ASSERT(free_list_head == NULL); + free_list_head = new_iter; + } + } +} + +void releaseFreeMemory(void) +{ + // This function exists for releasing address space + // on Windows 32 bit + // + // Do nothing if USE_LARGE_ADDRESS_SPACE, we never want + // to release address space + + debugTrace(DEBUG_gc, "mblock_high_watermark: %p\n", mblock_high_watermark); +} + +#else // !USE_LARGE_ADDRESS_SPACE + #if SIZEOF_VOID_P == 4 StgWord8 mblock_map[MBLOCK_MAP_SIZE]; // initially all zeros @@ -108,6 +419,7 @@ setHeapAlloced(void *p, StgWord8 i) mblock_cache[entry_no] = (mblock << 1) + i; } } + #endif static void @@ -130,7 +442,7 @@ void * mapEntryToMBlock(nat i) return (void *)((StgWord)i << MBLOCK_SHIFT); } -void * getFirstMBlock(void) +void * getFirstMBlock(void **state STG_UNUSED) { nat i; @@ -140,7 +452,7 @@ void * getFirstMBlock(void) return NULL; } -void * getNextMBlock(void *mblock) +void * getNextMBlock(void **state STG_UNUSED, void *mblock) { nat i; @@ -152,7 +464,7 @@ void * getNextMBlock(void *mblock) #elif SIZEOF_VOID_P == 8 -void * getNextMBlock(void *p) +void * getNextMBlock(void **state STG_UNUSED, void *p) { MBlockMap *map; nat off, j; @@ -189,7 +501,7 @@ void * getNextMBlock(void *p) return NULL; } -void * getFirstMBlock(void) +void * getFirstMBlock(void **state STG_UNUSED) { MBlockMap *map = mblock_maps[0]; nat line_no, off; @@ -210,7 +522,38 @@ void * getFirstMBlock(void) return NULL; } -#endif // SIZEOF_VOID_P +#endif // SIZEOF_VOID_P == 8 + +static void *getCommittedMBlocks(nat n) +{ + // The OS layer returns committed memory directly + void *ret = osGetMBlocks(n); + nat i; + + // fill in the table + for (i = 0; i < n; i++) { + markHeapAlloced( (StgWord8*)ret + i * MBLOCK_SIZE ); + } + + return ret; +} + +static void decommitMBlocks(void *p, nat n) +{ + osFreeMBlocks(p, n); + nat i; + + for (i = 0; i < n; i++) { + markHeapUnalloced( (StgWord8*)p + i * MBLOCK_SIZE ); + } +} + +void releaseFreeMemory(void) +{ + osReleaseFreeMemory(); +} + +#endif /* !USE_LARGE_ADDRESS_SPACE */ /* ----------------------------------------------------------------------------- Allocate new mblock(s) @@ -228,18 +571,12 @@ getMBlock(void) void * getMBlocks(nat n) { - nat i; void *ret; - ret = osGetMBlocks(n); + ret = getCommittedMBlocks(n); debugTrace(DEBUG_gc, "allocated %d megablock(s) at %p",n,ret); - // fill in the table - for (i = 0; i < n; i++) { - markHeapAlloced( (StgWord8*)ret + i * MBLOCK_SIZE ); - } - mblocks_allocated += n; peak_mblocks_allocated = stg_max(peak_mblocks_allocated, mblocks_allocated); @@ -249,17 +586,11 @@ getMBlocks(nat n) void freeMBlocks(void *addr, nat n) { - nat i; - debugTrace(DEBUG_gc, "freeing %d megablock(s) at %p",n,addr); mblocks_allocated -= n; - for (i = 0; i < n; i++) { - markHeapUnalloced( (StgWord8*)addr + i * MBLOCK_SIZE ); - } - - osFreeMBlocks(addr, n); + decommitMBlocks(addr, n); } void @@ -267,6 +598,22 @@ freeAllMBlocks(void) { debugTrace(DEBUG_gc, "freeing all megablocks"); +#ifdef USE_LARGE_ADDRESS_SPACE + { + struct free_list *iter, *next; + + for (iter = free_list_head; iter != NULL; iter = next) + { + next = iter->next; + stgFree(iter); + } + } + + osReleaseHeapMemory(); + + mblock_address_space_begin = (W_)-1; + mblock_high_watermark = (W_)-1; +#else osFreeAllMBlocks(); #if SIZEOF_VOID_P == 8 @@ -276,13 +623,23 @@ freeAllMBlocks(void) } stgFree(mblock_maps); #endif + +#endif } void initMBlocks(void) { osMemInit(); -#if SIZEOF_VOID_P == 8 + +#ifdef USE_LARGE_ADDRESS_SPACE + { + void *addr = osReserveHeapMemory(); + + mblock_address_space_begin = (W_)addr; + mblock_high_watermark = (W_)addr; + } +#elif SIZEOF_VOID_P == 8 memset(mblock_cache,0xff,sizeof(mblock_cache)); #endif } diff --git a/rts/sm/OSMem.h b/rts/sm/OSMem.h index db704fc78b..9a6ccdd7ec 100644 --- a/rts/sm/OSMem.h +++ b/rts/sm/OSMem.h @@ -20,6 +20,47 @@ W_ getPageSize (void); StgWord64 getPhysicalMemorySize (void); void setExecutable (void *p, W_ len, rtsBool exec); +#ifdef USE_LARGE_ADDRESS_SPACE + +/* + If "large address space" is enabled, we allocate memory in two + steps: first we request some address space, and then we request some + memory in it. This allows us to ask for much more address space that + we will ever need, which keeps everything nice and consecutive. +*/ + +// Reserve the large address space blob, and return the address that +// the OS has chosen for it. It is not safe to access the memory +// pointed to by the return value, until that memory is committed +// using osCommitMemory(). +// +// This function is called once when the block allocator is initialized. +void *osReserveHeapMemory(void); + +// Commit (allocate memory for) a piece of address space, which must +// be within the previously reserved space After this call, it is safe +// to access @p up to @len bytes. +// +// There is no guarantee on the contents of the memory pointed to by +// @p, in particular it must not be assumed to contain all zeros. +void osCommitMemory(void *p, W_ len); + +// Decommit (release backing memory for) a piece of address space, +// which must be within the previously reserve space and must have +// been previously committed After this call, it is again unsafe to +// access @p (up to @len bytes), but there is no guarantee that the +// memory will be released to the system (as far as eg. RSS statistics +// from top are concerned). +void osDecommitMemory(void *p, W_ len); + +// Release the address space previously obtained and undo the effects of +// osReserveHeapMemory +// +// This function is called once, when the block allocator is deinitialized +// before the program terminates. +void osReleaseHeapMemory(void); +#endif + #include "EndPrivate.h" #endif /* SM_OSMEM_H */ diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index c4a699e59a..e7a8401145 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -637,7 +637,8 @@ checkStaticObjects ( StgClosure* static_objects ) StgClosure *p = static_objects; StgInfoTable *info; - while (p != END_OF_STATIC_LIST) { + while (p != END_OF_STATIC_OBJECT_LIST) { + p = UNTAG_STATIC_LIST_PTR(p); checkClosure(p); info = get_itbl(p); switch (info->type) { diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index a8f0ab037f..dfad0bef58 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -1672,7 +1672,7 @@ scavenge_capability_mut_lists (Capability *cap) static void scavenge_static(void) { - StgClosure* p; + StgClosure *flagged_p, *p; const StgInfoTable *info; debugTrace(DEBUG_gc, "scavenging static objects"); @@ -1690,10 +1690,11 @@ scavenge_static(void) * be more stuff on this list after each evacuation... * (static_objects is a global) */ - p = gct->static_objects; - if (p == END_OF_STATIC_LIST) { + flagged_p = gct->static_objects; + if (flagged_p == END_OF_STATIC_OBJECT_LIST) { break; } + p = UNTAG_STATIC_LIST_PTR(flagged_p); ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); info = get_itbl(p); @@ -1708,7 +1709,7 @@ scavenge_static(void) */ gct->static_objects = *STATIC_LINK(info,p); *STATIC_LINK(info,p) = gct->scavenged_static_objects; - gct->scavenged_static_objects = p; + gct->scavenged_static_objects = flagged_p; switch (info -> type) { @@ -2066,7 +2067,7 @@ loop: work_to_do = rtsFalse; // scavenge static objects - if (major_gc && gct->static_objects != END_OF_STATIC_LIST) { + if (major_gc && gct->static_objects != END_OF_STATIC_OBJECT_LIST) { IF_DEBUG(sanity, checkStaticObjects(gct->static_objects)); scavenge_static(); } diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 6e9b0634b7..65f5b70c21 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -175,9 +175,9 @@ initStorage (void) generations[0].max_blocks = 0; - dyn_caf_list = (StgIndStatic*)END_OF_STATIC_LIST; - debug_caf_list = (StgIndStatic*)END_OF_STATIC_LIST; - revertible_caf_list = (StgIndStatic*)END_OF_STATIC_LIST; + dyn_caf_list = (StgIndStatic*)END_OF_CAF_LIST; + debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST; + revertible_caf_list = (StgIndStatic*)END_OF_CAF_LIST; /* initialise the allocate() interface */ large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W; @@ -427,7 +427,7 @@ newCAF(StgRegTable *reg, StgIndStatic *caf) ACQUIRE_SM_LOCK; // dyn_caf_list is global, locked by sm_mutex caf->static_link = (StgClosure*)dyn_caf_list; - dyn_caf_list = caf; + dyn_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST); RELEASE_SM_LOCK; } else @@ -484,7 +484,7 @@ StgInd* newRetainedCAF (StgRegTable *reg, StgIndStatic *caf) ACQUIRE_SM_LOCK; caf->static_link = (StgClosure*)revertible_caf_list; - revertible_caf_list = caf; + revertible_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST); RELEASE_SM_LOCK; diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h index a4421db3f2..d0094b60fb 100644 --- a/rts/sm/Storage.h +++ b/rts/sm/Storage.h @@ -133,12 +133,59 @@ W_ calcLiveWords (void); extern bdescr *exec_block; -#define END_OF_STATIC_LIST ((StgClosure*)1) - void move_STACK (StgStack *src, StgStack *dest); /* ----------------------------------------------------------------------------- - CAF lists + Note [STATIC_LINK fields] + + The low 2 bits of the static link field have the following meaning: + + 00 we haven't seen this static object before + + 01/10 if it equals static_flag, then we saw it in this GC, otherwise + we saw it in the previous GC. + + 11 ignore during GC. This value is used in two ways + - When we put CAFs on a list (see Note [CAF lists]) + - a static constructor that was determined to have no CAF + references at compile time is given this value, so we + don't traverse it during GC + + This choice of values is quite deliberate, because it means we can + decide whether a static object should be traversed during GC using a + single test: + + bits = link_field & 3; + if ((bits | prev_static_flag) != 3) { ... } + + -------------------------------------------------------------------------- */ + +#define STATIC_BITS 3 + +#define STATIC_FLAG_A 1 +#define STATIC_FLAG_B 2 +#define STATIC_FLAG_LIST 3 + +#define END_OF_CAF_LIST ((StgClosure*)STATIC_FLAG_LIST) + +// The previous and current values of the static flag. These flip +// between STATIC_FLAG_A and STATIC_FLAG_B at each major GC. +extern nat prev_static_flag, static_flag; + +// In the chain of static objects built up during GC, all the link +// fields are tagged with the current static_flag value. How to mark +// the end of the chain? It must be a special value so that we can +// tell it is the end of the chain, but note that we're going to store +// this value in the link field of a static object, which means that +// during the NEXT GC we should treat it like any other object that +// has not been visited during this GC. Therefore, we use static_flag +// as the sentinel value. +#define END_OF_STATIC_OBJECT_LIST ((StgClosure*)(StgWord)static_flag) + +#define UNTAG_STATIC_LIST_PTR(p) ((StgClosure*)((StgWord)(p) & ~STATIC_BITS)) + +/* ----------------------------------------------------------------------------- + Note [CAF lists] dyn_caf_list (CAFs chained through static_link) This is a chain of all CAFs in the program, used for @@ -154,6 +201,10 @@ void move_STACK (StgStack *src, StgStack *dest); A chain of CAFs in object code loaded with the RTS linker. These CAFs can be reverted to their unevaluated state using revertCAFs. + + Pointers in these lists are tagged with STATIC_FLAG_LIST, so when + traversing the list remember to untag each pointer with + UNTAG_STATIC_LIST_PTR(). --------------------------------------------------------------------------- */ extern StgIndStatic * dyn_caf_list; diff --git a/rts/win32/OSMem.c b/rts/win32/OSMem.c index afa5113638..716171b3fc 100644 --- a/rts/win32/OSMem.c +++ b/rts/win32/OSMem.c @@ -8,6 +8,7 @@ #include "Rts.h" #include "sm/OSMem.h" +#include "sm/HeapAlloc.h" #include "RtsUtils.h" #if HAVE_WINDOWS_H @@ -28,7 +29,11 @@ typedef struct block_rec_ { /* allocs are kept in ascending order, and are the memory regions as returned by the OS as we need to have matching VirtualAlloc and - VirtualFree calls. */ + VirtualFree calls. + + If USE_LARGE_ADDRESS_SPACE is defined, this list will contain only + one element. +*/ static alloc_rec* allocs = NULL; /* free_blocks are kept in ascending order, and adjacent blocks are merged */ @@ -207,12 +212,9 @@ osGetMBlocks(nat n) { return ret; } -void osFreeMBlocks(char *addr, nat n) +static void decommitBlocks(char *addr, W_ nBytes) { alloc_rec *p; - W_ nBytes = (W_)n * MBLOCK_SIZE; - - insertFree(addr, nBytes); p = allocs; while ((p != NULL) && (addr >= (p->base + p->size))) { @@ -243,6 +245,14 @@ void osFreeMBlocks(char *addr, nat n) } } +void osFreeMBlocks(char *addr, nat n) +{ + W_ nBytes = (W_)n * MBLOCK_SIZE; + + insertFree(addr, nBytes); + decommitBlocks(addr, nBytes); +} + void osReleaseFreeMemory(void) { alloc_rec *prev_a, *a; @@ -414,3 +424,60 @@ void setExecutable (void *p, W_ len, rtsBool exec) stg_exit(EXIT_FAILURE); } } + +#ifdef USE_LARGE_ADDRESS_SPACE + +static void* heap_base = NULL; + +void *osReserveHeapMemory (void) +{ + void *start; + + heap_base = VirtualAlloc(NULL, MBLOCK_SPACE_SIZE + MBLOCK_SIZE, + MEM_RESERVE, PAGE_READWRITE); + if (heap_base == NULL) { + if (GetLastError() == ERROR_NOT_ENOUGH_MEMORY) { + errorBelch("out of memory"); + } else { + sysErrorBelch( + "osReserveHeapMemory: VirtualAlloc MEM_RESERVE %llu bytes failed", + MBLOCK_SPACE_SIZE + MBLOCK_SIZE); + } + stg_exit(EXIT_FAILURE); + } + + // VirtualFree MEM_RELEASE must always match a + // previous MEM_RESERVE call, in address and size + // so we necessarily leak some address space here, + // before and after the aligned area + // It is not a huge problem because we never commit + // that memory + start = MBLOCK_ROUND_UP(heap_base); + + return start; +} + +void osCommitMemory (void *at, W_ size) +{ + void *temp; + temp = VirtualAlloc(at, size, MEM_COMMIT, PAGE_READWRITE); + if (temp == NULL) { + sysErrorBelch("osCommitMemory: VirtualAlloc MEM_COMMIT failed"); + stg_exit(EXIT_FAILURE); + } +} + +void osDecommitMemory (void *at, W_ size) +{ + if (!VirtualFree(at, size, MEM_DECOMMIT)) { + sysErrorBelch("osDecommitMemory: VirtualFree MEM_DECOMMIT failed"); + stg_exit(EXIT_FAILURE); + } +} + +void osReleaseHeapMemory (void) +{ + VirtualFree(heap_base, 0, MEM_RELEASE); +} + +#endif |