summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
Diffstat (limited to 'rts')
-rw-r--r--rts/CheckUnload.c6
-rw-r--r--rts/RetainerProfile.c3
-rw-r--r--rts/Sparks.c1
-rw-r--r--rts/posix/OSMem.c200
-rw-r--r--rts/sm/BlockAlloc.c14
-rw-r--r--rts/sm/Compact.c4
-rw-r--r--rts/sm/Evac.c95
-rw-r--r--rts/sm/GC.c48
-rw-r--r--rts/sm/GC.h2
-rw-r--r--rts/sm/GCAux.c10
-rw-r--r--rts/sm/GCThread.h7
-rw-r--r--rts/sm/HeapAlloc.h224
-rw-r--r--rts/sm/MBlock.c399
-rw-r--r--rts/sm/OSMem.h41
-rw-r--r--rts/sm/Sanity.c3
-rw-r--r--rts/sm/Scav.c11
-rw-r--r--rts/sm/Storage.c10
-rw-r--r--rts/sm/Storage.h57
-rw-r--r--rts/win32/OSMem.c77
19 files changed, 1040 insertions, 172 deletions
diff --git a/rts/CheckUnload.c b/rts/CheckUnload.c
index 2c01113f63..34f976db4d 100644
--- a/rts/CheckUnload.c
+++ b/rts/CheckUnload.c
@@ -271,7 +271,8 @@ void checkUnload (StgClosure *static_objects)
addrs = allocHashTable();
- for (p = static_objects; p != END_OF_STATIC_LIST; p = link) {
+ for (p = static_objects; p != END_OF_STATIC_OBJECT_LIST; p = link) {
+ p = UNTAG_STATIC_LIST_PTR(p);
checkAddress(addrs, p);
info = get_itbl(p);
link = *STATIC_LINK(info, p);
@@ -279,8 +280,9 @@ void checkUnload (StgClosure *static_objects)
// CAFs on revertible_caf_list are not on static_objects
for (p = (StgClosure*)revertible_caf_list;
- p != END_OF_STATIC_LIST;
+ p != END_OF_CAF_LIST;
p = ((StgIndStatic *)p)->static_link) {
+ p = UNTAG_STATIC_LIST_PTR(p);
checkAddress(addrs, p);
}
diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c
index 78daa892ad..ba58c199f0 100644
--- a/rts/RetainerProfile.c
+++ b/rts/RetainerProfile.c
@@ -1881,7 +1881,8 @@ resetStaticObjectForRetainerProfiling( StgClosure *static_objects )
count = 0;
#endif
p = static_objects;
- while (p != END_OF_STATIC_LIST) {
+ while (p != END_OF_STATIC_OBJECT_LIST) {
+ p = UNTAG_STATIC_LIST_PTR(p);
#ifdef DEBUG_RETAINER
count++;
#endif
diff --git a/rts/Sparks.c b/rts/Sparks.c
index ada2adfd3a..ec075805bf 100644
--- a/rts/Sparks.c
+++ b/rts/Sparks.c
@@ -14,6 +14,7 @@
#include "Trace.h"
#include "Prelude.h"
#include "Sparks.h"
+#include "sm/HeapAlloc.h"
#if defined(THREADED_RTS)
diff --git a/rts/posix/OSMem.c b/rts/posix/OSMem.c
index 359df7022b..125ae10367 100644
--- a/rts/posix/OSMem.c
+++ b/rts/posix/OSMem.c
@@ -13,6 +13,7 @@
#include "RtsUtils.h"
#include "sm/OSMem.h"
+#include "sm/HeapAlloc.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
@@ -72,23 +73,67 @@ void osMemInit(void)
-------------------------------------------------------------------------- */
-// A wrapper around mmap(), to abstract away from OS differences in
-// the mmap() interface.
+/*
+ A wrapper around mmap(), to abstract away from OS differences in
+ the mmap() interface.
+
+ It supports the following operations:
+ - reserve: find a new chunk of available address space, and make it so
+ that we own it (no other library will get it), but don't actually
+ allocate memory for it
+ the addr is a hint for where to place the memory (and most
+ of the time the OS happily ignores!)
+ - commit: given a chunk of address space that we know we own, make sure
+ there is some memory backing it
+ the addr is not a hint, it must point into previously reserved
+ address space, or bad things happen
+ - reserve&commit: do both at the same time
+
+ The naming is chosen from the Win32 API (VirtualAlloc) which does the
+ same thing and has done so forever, while support for this in Unix systems
+ has only been added recently and is hidden in the posix portability mess.
+ It is confusing because to get the reserve behavior we need MAP_NORESERVE
+ (which tells the kernel not to allocate backing space), but heh...
+*/
+enum
+{
+ MEM_RESERVE = 1,
+ MEM_COMMIT = 2,
+ MEM_RESERVE_AND_COMMIT = MEM_RESERVE | MEM_COMMIT
+};
static void *
-my_mmap (void *addr, W_ size)
+my_mmap (void *addr, W_ size, int operation)
{
void *ret;
+ int prot, flags;
+
+ if (operation & MEM_COMMIT)
+ prot = PROT_READ | PROT_WRITE;
+ else
+ prot = PROT_NONE;
+ if (operation == MEM_RESERVE)
+ flags = MAP_NORESERVE;
+ else if (operation == MEM_COMMIT)
+ flags = MAP_FIXED;
+ else
+ flags = 0;
#if defined(solaris2_HOST_OS) || defined(irix_HOST_OS)
{
- int fd = open("/dev/zero",O_RDONLY);
- ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
- close(fd);
+ if (operation & MEM_RESERVE)
+ {
+ int fd = open("/dev/zero",O_RDONLY);
+ ret = mmap(addr, size, prot, flags | MAP_PRIVATE, fd, 0);
+ close(fd);
+ }
+ else
+ {
+ ret = mmap(addr, size, prot, flags | MAP_PRIVATE, -1, 0);
+ }
}
#elif hpux_HOST_OS
- ret = mmap(addr, size, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ ret = mmap(addr, size, prot, flags | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
#elif darwin_HOST_OS
// Without MAP_FIXED, Apple's mmap ignores addr.
// With MAP_FIXED, it overwrites already mapped regions, whic
@@ -100,10 +145,16 @@ my_mmap (void *addr, W_ size)
kern_return_t err = 0;
ret = addr;
- if(addr) // try to allocate at address
- err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, FALSE);
- if(!addr || err) // try to allocate anywhere
- err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, TRUE);
+
+ if(operation & MEM_RESERVE)
+ {
+ if(addr) // try to allocate at address
+ err = vm_allocate(mach_task_self(),(vm_address_t*) &ret,
+ size, FALSE);
+ if(!addr || err) // try to allocate anywhere
+ err = vm_allocate(mach_task_self(),(vm_address_t*) &ret,
+ size, TRUE);
+ }
if(err) {
// don't know what the error codes mean exactly, assume it's
@@ -111,23 +162,24 @@ my_mmap (void *addr, W_ size)
errorBelch("memory allocation failed (requested %" FMT_Word " bytes)",
size);
stg_exit(EXIT_FAILURE);
- } else {
+ }
+
+ if(operation & MEM_COMMIT) {
vm_protect(mach_task_self(), (vm_address_t)ret, size, FALSE,
VM_PROT_READ|VM_PROT_WRITE);
}
+
#elif linux_HOST_OS
- ret = mmap(addr, size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ret = mmap(addr, size, prot, flags | MAP_ANON | MAP_PRIVATE, -1, 0);
if (ret == (void *)-1 && errno == EPERM) {
// Linux may return EPERM if it tried to give us
// a chunk of address space below mmap_min_addr,
// See Trac #7500.
- if (addr != 0) {
+ if (addr != 0 && (operation & MEM_RESERVE)) {
// Try again with no hint address.
// It's not clear that this can ever actually help,
// but since our alternative is to abort, we may as well try.
- ret = mmap(0, size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ret = mmap(0, size, prot, flags | MAP_ANON | MAP_PRIVATE, -1, 0);
}
if (ret == (void *)-1 && errno == EPERM) {
// Linux is not willing to give us any mapping,
@@ -137,8 +189,7 @@ my_mmap (void *addr, W_ size)
}
}
#else
- ret = mmap(addr, size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ret = mmap(addr, size, prot, flags | MAP_ANON | MAP_PRIVATE, -1, 0);
#endif
if (ret == (void *)-1) {
@@ -168,7 +219,7 @@ gen_map_mblocks (W_ size)
// Try to map a larger block, and take the aligned portion from
// it (unmap the rest).
size += MBLOCK_SIZE;
- ret = my_mmap(0, size);
+ ret = my_mmap(0, size, MEM_RESERVE_AND_COMMIT);
// unmap the slop bits around the chunk we allocated
slop = (W_)ret & MBLOCK_MASK;
@@ -207,7 +258,7 @@ osGetMBlocks(nat n)
// use gen_map_mblocks the first time.
ret = gen_map_mblocks(size);
} else {
- ret = my_mmap(next_request, size);
+ ret = my_mmap(next_request, size, MEM_RESERVE_AND_COMMIT);
if (((W_)ret & MBLOCK_MASK) != 0) {
// misaligned block!
@@ -244,10 +295,11 @@ void osReleaseFreeMemory(void) {
void osFreeAllMBlocks(void)
{
void *mblock;
+ void *state;
- for (mblock = getFirstMBlock();
+ for (mblock = getFirstMBlock(&state);
mblock != NULL;
- mblock = getNextMBlock(mblock)) {
+ mblock = getNextMBlock(&state, mblock)) {
munmap(mblock, MBLOCK_SIZE);
}
}
@@ -318,3 +370,103 @@ void setExecutable (void *p, W_ len, rtsBool exec)
barf("setExecutable: failed to protect 0x%p\n", p);
}
}
+
+#ifdef USE_LARGE_ADDRESS_SPACE
+
+static void *
+osTryReserveHeapMemory (void *hint)
+{
+ void *base, *top;
+ void *start, *end;
+
+ /* We try to allocate MBLOCK_SPACE_SIZE + MBLOCK_SIZE,
+ because we need memory which is MBLOCK_SIZE aligned,
+ and then we discard what we don't need */
+
+ base = my_mmap(hint, MBLOCK_SPACE_SIZE + MBLOCK_SIZE, MEM_RESERVE);
+ top = (void*)((W_)base + MBLOCK_SPACE_SIZE + MBLOCK_SIZE);
+
+ if (((W_)base & MBLOCK_MASK) != 0) {
+ start = MBLOCK_ROUND_UP(base);
+ end = MBLOCK_ROUND_DOWN(top);
+ ASSERT(((W_)end - (W_)start) == MBLOCK_SPACE_SIZE);
+
+ if (munmap(base, (W_)start-(W_)base) < 0) {
+ sysErrorBelch("unable to release slop before heap");
+ }
+ if (munmap(end, (W_)top-(W_)end) < 0) {
+ sysErrorBelch("unable to release slop after heap");
+ }
+ } else {
+ start = base;
+ }
+
+ return start;
+}
+
+void *osReserveHeapMemory(void)
+{
+ int attempt;
+ void *at;
+
+ /* We want to ensure the heap starts at least 8 GB inside the address space,
+ to make sure that any dynamically loaded code will be close enough to the
+ original code so that short relocations will work. This is in particular
+ important on Darwin/Mach-O, because object files not compiled as shared
+ libraries are position independent but cannot be loaded about 4GB.
+
+ We do so with a hint to the mmap, and we verify the OS satisfied our
+ hint. We loop a few times in case there is already something allocated
+ there, but we bail if we cannot allocate at all.
+ */
+
+ attempt = 0;
+ do {
+ at = osTryReserveHeapMemory((void*)((W_)8 * (1 << 30) +
+ attempt * BLOCK_SIZE));
+ } while ((W_)at < ((W_)8 * (1 << 30)));
+
+ return at;
+}
+
+void osCommitMemory(void *at, W_ size)
+{
+ my_mmap(at, size, MEM_COMMIT);
+}
+
+void osDecommitMemory(void *at, W_ size)
+{
+ int r;
+
+ // First make the memory unaccessible (so that we get a segfault
+ // at the next attempt to touch it)
+ // We only do this in DEBUG because it forces the OS to remove
+ // all MMU entries for this page range, and there is no reason
+ // to do so unless there is memory pressure
+#ifdef DEBUG
+ r = mprotect(at, size, PROT_NONE);
+ if(r < 0)
+ sysErrorBelch("unable to make released memory unaccessible");
+#endif
+
+#ifdef MADV_FREE
+ // Try MADV_FREE first, FreeBSD has both and MADV_DONTNEED
+ // just swaps memory out
+ r = madvise(at, size, MADV_FREE);
+#else
+ r = madvise(at, size, MADV_DONTNEED);
+#endif
+ if(r < 0)
+ sysErrorBelch("unable to decommit memory");
+}
+
+void osReleaseHeapMemory(void)
+{
+ int r;
+
+ r = munmap((void*)mblock_address_space_begin, MBLOCK_SPACE_SIZE);
+ if(r < 0)
+ sysErrorBelch("unable to release address space");
+}
+
+#endif
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index c2a5913963..e721fb13b6 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -736,7 +736,14 @@ void returnMemoryToOS(nat n /* megablocks */)
}
free_mblock_list = bd;
- osReleaseFreeMemory();
+ // Ask the OS to release any address space portion
+ // that was associated with the just released MBlocks
+ //
+ // Historically, we used to ask the OS directly (via
+ // osReleaseFreeMemory()) - now the MBlock layer might
+ // have a reason to preserve the address space range,
+ // so we keep it
+ releaseFreeMemory();
IF_DEBUG(gc,
if (n != 0) {
@@ -869,11 +876,12 @@ void
reportUnmarkedBlocks (void)
{
void *mblock;
+ void *state;
bdescr *bd;
debugBelch("Unreachable blocks:\n");
- for (mblock = getFirstMBlock(); mblock != NULL;
- mblock = getNextMBlock(mblock)) {
+ for (mblock = getFirstMBlock(&state); mblock != NULL;
+ mblock = getNextMBlock(&state, mblock)) {
for (bd = FIRST_BDESCR(mblock); bd <= LAST_BDESCR(mblock); ) {
if (!(bd->flags & BF_KNOWN) && bd->free != (P_)-1) {
debugBelch(" %p\n",bd);
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c
index a053dc3b4e..4ee88da11c 100644
--- a/rts/sm/Compact.c
+++ b/rts/sm/Compact.c
@@ -197,8 +197,8 @@ thread_static( StgClosure* p )
// keep going until we've threaded all the objects on the linked
// list...
- while (p != END_OF_STATIC_LIST) {
-
+ while (p != END_OF_STATIC_OBJECT_LIST) {
+ p = UNTAG_STATIC_LIST_PTR(p);
info = get_itbl(p);
switch (info->type) {
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index b0ef807768..bc8cb9ad13 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -324,6 +324,38 @@ evacuate_large(StgPtr p)
}
/* ----------------------------------------------------------------------------
+ Evacuate static objects
+
+ When a static object is visited for the first time in this GC, it
+ is chained on to the gct->static_objects list.
+
+ evacuate_static_object (link_field, q)
+ - link_field must be STATIC_LINK(q)
+ ------------------------------------------------------------------------- */
+
+STATIC_INLINE void
+evacuate_static_object (StgClosure **link_field, StgClosure *q)
+{
+ StgWord link = (StgWord)*link_field;
+
+ // See Note [STATIC_LINK fields] for how the link field bits work
+ if ((((StgWord)(link)&STATIC_BITS) | prev_static_flag) != 3) {
+ StgWord new_list_head = (StgWord)q | static_flag;
+#ifndef THREADED_RTS
+ *link_field = gct->static_objects;
+ gct->static_objects = (StgClosure *)new_list_head;
+#else
+ StgWord prev;
+ prev = cas((StgVolatilePtr)link_field, link,
+ (StgWord)gct->static_objects);
+ if (prev == link) {
+ gct->static_objects = (StgClosure *)new_list_head;
+ }
+#endif
+ }
+}
+
+/* ----------------------------------------------------------------------------
Evacuate
This is called (eventually) for every live object in the system.
@@ -392,38 +424,13 @@ loop:
case THUNK_STATIC:
if (info->srt_bitmap != 0) {
- if (*THUNK_STATIC_LINK((StgClosure *)q) == NULL) {
-#ifndef THREADED_RTS
- *THUNK_STATIC_LINK((StgClosure *)q) = gct->static_objects;
- gct->static_objects = (StgClosure *)q;
-#else
- StgPtr link;
- link = (StgPtr)cas((StgPtr)THUNK_STATIC_LINK((StgClosure *)q),
- (StgWord)NULL,
- (StgWord)gct->static_objects);
- if (link == NULL) {
- gct->static_objects = (StgClosure *)q;
- }
-#endif
- }
+ evacuate_static_object(THUNK_STATIC_LINK((StgClosure *)q), q);
}
return;
case FUN_STATIC:
- if (info->srt_bitmap != 0 &&
- *FUN_STATIC_LINK((StgClosure *)q) == NULL) {
-#ifndef THREADED_RTS
- *FUN_STATIC_LINK((StgClosure *)q) = gct->static_objects;
- gct->static_objects = (StgClosure *)q;
-#else
- StgPtr link;
- link = (StgPtr)cas((StgPtr)FUN_STATIC_LINK((StgClosure *)q),
- (StgWord)NULL,
- (StgWord)gct->static_objects);
- if (link == NULL) {
- gct->static_objects = (StgClosure *)q;
- }
-#endif
+ if (info->srt_bitmap != 0) {
+ evacuate_static_object(FUN_STATIC_LINK((StgClosure *)q), q);
}
return;
@@ -432,39 +439,11 @@ loop:
* on the CAF list, so don't do anything with it here (we'll
* scavenge it later).
*/
- if (*IND_STATIC_LINK((StgClosure *)q) == NULL) {
-#ifndef THREADED_RTS
- *IND_STATIC_LINK((StgClosure *)q) = gct->static_objects;
- gct->static_objects = (StgClosure *)q;
-#else
- StgPtr link;
- link = (StgPtr)cas((StgPtr)IND_STATIC_LINK((StgClosure *)q),
- (StgWord)NULL,
- (StgWord)gct->static_objects);
- if (link == NULL) {
- gct->static_objects = (StgClosure *)q;
- }
-#endif
- }
+ evacuate_static_object(IND_STATIC_LINK((StgClosure *)q), q);
return;
case CONSTR_STATIC:
- if (*STATIC_LINK(info,(StgClosure *)q) == NULL) {
-#ifndef THREADED_RTS
- *STATIC_LINK(info,(StgClosure *)q) = gct->static_objects;
- gct->static_objects = (StgClosure *)q;
-#else
- StgPtr link;
- link = (StgPtr)cas((StgPtr)STATIC_LINK(info,(StgClosure *)q),
- (StgWord)NULL,
- (StgWord)gct->static_objects);
- if (link == NULL) {
- gct->static_objects = (StgClosure *)q;
- }
-#endif
- }
- /* I am assuming that static_objects pointers are not
- * written to other objects, and thus, no need to retag. */
+ evacuate_static_object(STATIC_LINK(info,(StgClosure *)q), q);
return;
case CONSTR_NOCAF_STATIC:
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 52d7f98fa0..e6a23395eb 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -134,6 +134,9 @@ long copied; // *words* copied & scavenged during this GC
rtsBool work_stealing;
+nat static_flag = STATIC_FLAG_B;
+nat prev_static_flag = STATIC_FLAG_A;
+
DECLARE_GCT
/* -----------------------------------------------------------------------------
@@ -141,7 +144,6 @@ DECLARE_GCT
-------------------------------------------------------------------------- */
static void mark_root (void *user, StgClosure **root);
-static void zero_static_object_list (StgClosure* first_static);
static void prepare_collected_gen (generation *gen);
static void prepare_uncollected_gen (generation *gen);
static void init_gc_thread (gc_thread *t);
@@ -246,6 +248,12 @@ GarbageCollect (nat collect_gen,
N = collect_gen;
major_gc = (N == RtsFlags.GcFlags.generations-1);
+ if (major_gc) {
+ prev_static_flag = static_flag;
+ static_flag =
+ static_flag == STATIC_FLAG_A ? STATIC_FLAG_B : STATIC_FLAG_A;
+ }
+
#if defined(THREADED_RTS)
work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
@@ -672,20 +680,6 @@ GarbageCollect (nat collect_gen,
resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
#endif
- // zero the scavenged static object list
- if (major_gc) {
- nat i;
- if (n_gc_threads == 1) {
- zero_static_object_list(gct->scavenged_static_objects);
- } else {
- for (i = 0; i < n_gc_threads; i++) {
- if (!gc_threads[i]->idle) {
- zero_static_object_list(gc_threads[i]->scavenged_static_objects);
- }
- }
- }
- }
-
// Start any pending finalizers. Must be after
// updateStableTables() and stableUnlock() (see #4221).
RELEASE_SM_LOCK;
@@ -1427,8 +1421,8 @@ collect_pinned_object_blocks (void)
static void
init_gc_thread (gc_thread *t)
{
- t->static_objects = END_OF_STATIC_LIST;
- t->scavenged_static_objects = END_OF_STATIC_LIST;
+ t->static_objects = END_OF_STATIC_OBJECT_LIST;
+ t->scavenged_static_objects = END_OF_STATIC_OBJECT_LIST;
t->scan_bd = NULL;
t->mut_lists = t->cap->mut_lists;
t->evac_gen_no = 0;
@@ -1465,24 +1459,6 @@ mark_root(void *user USED_IF_THREADS, StgClosure **root)
SET_GCT(saved_gct);
}
-/* -----------------------------------------------------------------------------
- Initialising the static object & mutable lists
- -------------------------------------------------------------------------- */
-
-static void
-zero_static_object_list(StgClosure* first_static)
-{
- StgClosure* p;
- StgClosure* link;
- const StgInfoTable *info;
-
- for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
- info = get_itbl(p);
- link = *STATIC_LINK(info, p);
- *STATIC_LINK(info,p) = NULL;
- }
-}
-
/* ----------------------------------------------------------------------------
Reset the sizes of the older generations when we do a major
collection.
@@ -1728,7 +1704,7 @@ static void gcCAFs(void)
p = debug_caf_list;
prev = NULL;
- for (p = debug_caf_list; p != (StgIndStatic*)END_OF_STATIC_LIST;
+ for (p = debug_caf_list; p != (StgIndStatic*)END_OF_CAF_LIST;
p = (StgIndStatic*)p->saved_info) {
info = get_itbl((StgClosure*)p);
diff --git a/rts/sm/GC.h b/rts/sm/GC.h
index 571aa07110..5744eb95a8 100644
--- a/rts/sm/GC.h
+++ b/rts/sm/GC.h
@@ -16,6 +16,8 @@
#include "BeginPrivate.h"
+#include "HeapAlloc.h"
+
void GarbageCollect (rtsBool force_major_gc,
rtsBool do_heap_census,
nat gc_type, Capability *cap);
diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c
index 13316e4d29..d3cbdaefb4 100644
--- a/rts/sm/GCAux.c
+++ b/rts/sm/GCAux.c
@@ -118,14 +118,14 @@ revertCAFs( void )
StgIndStatic *c;
for (c = revertible_caf_list;
- c != (StgIndStatic *)END_OF_STATIC_LIST;
+ c != (StgIndStatic *)END_OF_CAF_LIST;
c = (StgIndStatic *)c->static_link)
{
SET_INFO((StgClosure *)c, c->saved_info);
c->saved_info = NULL;
// could, but not necessary: c->static_link = NULL;
}
- revertible_caf_list = (StgIndStatic*)END_OF_STATIC_LIST;
+ revertible_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
}
void
@@ -134,15 +134,17 @@ markCAFs (evac_fn evac, void *user)
StgIndStatic *c;
for (c = dyn_caf_list;
- c != (StgIndStatic*)END_OF_STATIC_LIST;
+ c != (StgIndStatic*)END_OF_CAF_LIST;
c = (StgIndStatic *)c->static_link)
{
+ c = (StgIndStatic *)UNTAG_STATIC_LIST_PTR(c);
evac(user, &c->indirectee);
}
for (c = revertible_caf_list;
- c != (StgIndStatic*)END_OF_STATIC_LIST;
+ c != (StgIndStatic*)END_OF_CAF_LIST;
c = (StgIndStatic *)c->static_link)
{
+ c = (StgIndStatic *)UNTAG_STATIC_LIST_PTR(c);
evac(user, &c->indirectee);
}
}
diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h
index cbe4346afe..d42b89f973 100644
--- a/rts/sm/GCThread.h
+++ b/rts/sm/GCThread.h
@@ -131,8 +131,11 @@ typedef struct gc_thread_ {
// during GC without accessing the block
// allocators spin lock.
- StgClosure* static_objects; // live static objects
- StgClosure* scavenged_static_objects; // static objects scavenged so far
+ // These two lists are chained through the STATIC_LINK() fields of static
+ // objects. Pointers are tagged with the current static_flag, so before
+ // following a pointer, untag it with UNTAG_STATIC_LIST_PTR().
+ StgClosure* static_objects; // live static objects
+ StgClosure* scavenged_static_objects; // static objects scavenged so far
W_ gc_count; // number of GCs this thread has done
diff --git a/rts/sm/HeapAlloc.h b/rts/sm/HeapAlloc.h
new file mode 100644
index 0000000000..c914b5db40
--- /dev/null
+++ b/rts/sm/HeapAlloc.h
@@ -0,0 +1,224 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The University of Glasgow 2006-2008
+ *
+ * The HEAP_ALLOCED() test.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef SM_HEAP_ALLOC_H
+#define SM_HEAP_ALLOC_H
+
+#include "BeginPrivate.h"
+
+/* -----------------------------------------------------------------------------
+ The HEAP_ALLOCED() test.
+
+ HEAP_ALLOCED is called FOR EVERY SINGLE CLOSURE during GC.
+ It needs to be FAST.
+
+ See wiki commentary at
+ http://ghc.haskell.org/trac/ghc/wiki/Commentary/HeapAlloced
+
+ Implementation of HEAP_ALLOCED
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Since heap is allocated in chunks of megablocks (MBLOCK_SIZE), we
+ can just use a table to record which megablocks in the address
+ space belong to the heap. On a 32-bit machine, with 1Mb
+ megablocks, using 8 bits for each entry in the table, the table
+ requires 4k. Lookups during GC will be fast, because the table
+ will be quickly cached (indeed, performance measurements showed no
+ measurable difference between doing the table lookup and using a
+ constant comparison).
+
+ On 64-bit machines, we have two possibilities. One is to request
+ a single chunk of address space that we deem "large enough"
+ (currently 1TB, could easily be extended to, say 16TB or more).
+ Memory from that chunk is GC memory, everything else is not. This
+ case is tricky in that it requires support from the OS to allocate
+ address space without allocating memory (in practice, all modern
+ OSes do this). It's also tricky in that it is the only case where
+ a successful HEAP_ALLOCED(p) check can trigger a segfault when
+ accessing p (and for debugging purposes, it will).
+
+ Alternatively, the older implementation caches one 12-bit block map
+ that describes 4096 megablocks or 4GB of memory. If HEAP_ALLOCED is
+ called for an address that is not in the cache, it calls
+ slowIsHeapAlloced (see MBlock.c) which will find the block map for
+ the 4GB block in question.
+ -------------------------------------------------------------------------- */
+
+#ifdef USE_LARGE_ADDRESS_SPACE
+
+extern W_ mblock_address_space_begin;
+# define MBLOCK_SPACE_SIZE ((StgWord)1 << 40) /* 1 TB */
+# define HEAP_ALLOCED(p) ((W_)(p) >= mblock_address_space_begin && \
+ (W_)(p) < (mblock_address_space_begin + \
+ MBLOCK_SPACE_SIZE))
+# define HEAP_ALLOCED_GC(p) HEAP_ALLOCED(p)
+
+#elif SIZEOF_VOID_P == 4
+extern StgWord8 mblock_map[];
+
+/* On a 32-bit machine a 4KB table is always sufficient */
+# define MBLOCK_MAP_SIZE 4096
+# define MBLOCK_MAP_ENTRY(p) ((StgWord)(p) >> MBLOCK_SHIFT)
+# define HEAP_ALLOCED(p) mblock_map[MBLOCK_MAP_ENTRY(p)]
+# define HEAP_ALLOCED_GC(p) HEAP_ALLOCED(p)
+
+/* -----------------------------------------------------------------------------
+ HEAP_ALLOCED for 64-bit machines (without LARGE_ADDRESS_SPACE).
+
+ Here are some cache layout options:
+
+ [1]
+ 16KB cache of 16-bit entries, 1MB lines (capacity 8GB)
+ mblock size = 20 bits
+ entries = 8192 13 bits
+ line size = 0 bits (1 bit of value)
+ tag size = 15 bits
+ = 48 bits
+
+ [2]
+ 32KB cache of 16-bit entries, 4MB lines (capacity 32GB)
+ mblock size = 20 bits
+ entries = 16384 14 bits
+ line size = 2 bits (4 bits of value)
+ tag size = 12 bits
+ = 48 bits
+
+ [3]
+ 16KB cache of 16-bit entries, 2MB lines (capacity 16GB)
+ mblock size = 20 bits
+ entries = 8192 13 bits
+ line size = 1 bits (2 bits of value)
+ tag size = 14 bits
+ = 48 bits
+
+ [4]
+ 4KB cache of 32-bit entries, 16MB lines (capacity 16GB)
+ mblock size = 20 bits
+ entries = 1024 10 bits
+ line size = 4 bits (16 bits of value)
+ tag size = 14 bits
+ = 48 bits
+
+ [5]
+ 4KB cache of 64-bit entries, 32MB lines (capacity 16GB)
+ mblock size = 20 bits
+ entries = 512 9 bits
+ line size = 5 bits (32 bits of value)
+ tag size = 14 bits
+ = 48 bits
+
+ We actually use none of the above. After much experimentation it was
+ found that optimising the lookup is the most important factor,
+ followed by reducing the number of misses. To that end, we use a
+ variant of [1] in which each cache entry is ((mblock << 1) + value)
+ where value is 0 for non-heap and 1 for heap. The cache entries can
+ be 32 bits, since the mblock number is 48-20 = 28 bits, and we need
+ 1 bit for the value. The cache can be as big as we like, but
+ currently we use 8k entries, giving us 8GB capacity.
+
+ ---------------------------------------------------------------------------- */
+
+#elif SIZEOF_VOID_P == 8
+
+#define MBC_LINE_BITS 0
+#define MBC_TAG_BITS 15
+
+#if x86_64_HOST_ARCH
+// 32bits are enough for 'entry' as modern amd64 boxes have
+// only 48bit sized virtual addres.
+typedef StgWord32 MbcCacheLine;
+#else
+// 32bits is not enough here as some arches (like ia64) use
+// upper address bits to distinct memory areas.
+typedef StgWord64 MbcCacheLine;
+#endif
+
+typedef StgWord8 MBlockMapLine;
+
+#define MBLOCK_MAP_LINE(p) (((StgWord)p & 0xffffffff) >> (MBLOCK_SHIFT + MBC_LINE_BITS))
+
+#define MBC_LINE_SIZE (1<<MBC_LINE_BITS)
+#define MBC_SHIFT (48 - MBLOCK_SHIFT - MBC_LINE_BITS - MBC_TAG_BITS)
+#define MBC_ENTRIES (1<<MBC_SHIFT)
+
+extern MbcCacheLine mblock_cache[];
+
+#define MBC_LINE(p) ((StgWord)p >> (MBLOCK_SHIFT + MBC_LINE_BITS))
+
+#define MBLOCK_MAP_ENTRIES (1 << (32 - MBLOCK_SHIFT - MBC_LINE_BITS))
+
+typedef struct {
+ StgWord32 addrHigh32;
+ MBlockMapLine lines[MBLOCK_MAP_ENTRIES];
+} MBlockMap;
+
+extern W_ mpc_misses;
+
+StgBool HEAP_ALLOCED_miss(StgWord mblock, void *p);
+
+INLINE_HEADER
+StgBool HEAP_ALLOCED(void *p)
+{
+ StgWord mblock;
+ nat entry_no;
+ MbcCacheLine entry, value;
+
+ mblock = (StgWord)p >> MBLOCK_SHIFT;
+ entry_no = mblock & (MBC_ENTRIES-1);
+ entry = mblock_cache[entry_no];
+ value = entry ^ (mblock << 1);
+ // this formulation coaxes gcc into prioritising the value==1
+ // case, which we expect to be the most common.
+ // __builtin_expect() didn't have any useful effect (gcc-4.3.0).
+ if (value == 1) {
+ return 1;
+ } else if (value == 0) {
+ return 0;
+ } else {
+ // putting the rest out of line turned out to be a slight
+ // performance improvement:
+ return HEAP_ALLOCED_miss(mblock,p);
+ }
+}
+
+// In the parallel GC, the cache itself is safe to *read*, and can be
+// updated atomically, but we need to place a lock around operations
+// that touch the MBlock map.
+INLINE_HEADER
+StgBool HEAP_ALLOCED_GC(void *p)
+{
+ StgWord mblock;
+ nat entry_no;
+ MbcCacheLine entry, value;
+ StgBool b;
+
+ mblock = (StgWord)p >> MBLOCK_SHIFT;
+ entry_no = mblock & (MBC_ENTRIES-1);
+ entry = mblock_cache[entry_no];
+ value = entry ^ (mblock << 1);
+ if (value == 1) {
+ return 1;
+ } else if (value == 0) {
+ return 0;
+ } else {
+ // putting the rest out of line turned out to be a slight
+ // performance improvement:
+ ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
+ b = HEAP_ALLOCED_miss(mblock,p);
+ RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
+ return b;
+ }
+}
+
+#else
+# error HEAP_ALLOCED not defined
+#endif
+
+#include "EndPrivate.h"
+
+#endif /* SM_HEAP_ALLOC_H */
diff --git a/rts/sm/MBlock.c b/rts/sm/MBlock.c
index f626e1f43b..35a11bf589 100644
--- a/rts/sm/MBlock.c
+++ b/rts/sm/MBlock.c
@@ -23,9 +23,320 @@ W_ mblocks_allocated = 0;
W_ mpc_misses = 0;
/* -----------------------------------------------------------------------------
- The MBlock Map: provides our implementation of HEAP_ALLOCED()
+ The MBlock Map: provides our implementation of HEAP_ALLOCED() and the
+ utilities to walk the really allocated (thus accessible without risk of
+ segfault) heap
-------------------------------------------------------------------------- */
+/*
+ There are two different cases here: either we use "large address
+ space" (which really means two-step allocation), so we have to
+ manage which memory is good (= accessible without fear of segfault)
+ and which is not owned by us, or we use the older method and get
+ good memory straight from the system.
+
+ Both code paths need to provide:
+
+ void *getFirstMBlock(void ** state)
+ return the first (lowest address) mblock
+ that was actually committed
+
+ void *getNextMBlock(void ** state, void * mblock)
+ return the first (lowest address) mblock
+ that was committed, after the given one
+
+ For both these calls, @state is an in-out parameter that points to
+ an opaque state threading the calls togheter. The calls should only
+ be used in an interation fashion. Pass NULL if @state is not
+ interesting,or pass a pointer to NULL if you don't have a state.
+
+ void *getCommittedMBlocks(nat n)
+ return @n new mblocks, ready to be used (reserved and committed)
+
+ void *decommitMBlocks(char *addr, nat n)
+ release memory for @n mblocks, starting at the given address
+
+ void releaseFreeMemory()
+ potentially release any address space that was associated
+ with recently decommitted blocks
+*/
+
+#ifdef USE_LARGE_ADDRESS_SPACE
+
+// Large address space means we use two-step allocation: reserve
+// something large upfront, and then commit as needed
+// (This is normally only useful on 64-bit, where we can assume
+// that reserving 1TB is possible)
+//
+// There is no block map in this case, but there is a free list
+// of blocks that were committed and decommitted at least once,
+// which we use to choose which block to commit next in the already
+// reserved space.
+//
+// We cannot let the OS choose it as we do in the
+// non large address space case, because the committing wants to
+// know the exact address upfront.
+//
+// The free list is coalesced and ordered, which means that
+// allocate and free are worst-case O(n), but benchmarks have shown
+// that this is not a significant problem, because large (>=2MB)
+// allocations are infrequent and their time is mostly insignificant
+// compared to the time to use that memory.
+//
+// The free list is stored in malloc()'d memory, unlike the other free
+// lists in BlockAlloc.c which are stored in block descriptors,
+// because we cannot touch the contents of decommitted mblocks.
+
+typedef struct free_list {
+ struct free_list *prev;
+ struct free_list *next;
+ W_ address;
+ W_ size;
+} free_list;
+
+static free_list *free_list_head;
+static W_ mblock_high_watermark;
+W_ mblock_address_space_begin = 0;
+
+static void *getAllocatedMBlock(free_list **start_iter, W_ startingAt)
+{
+ free_list *iter;
+ W_ p = startingAt;
+
+ for (iter = *start_iter; iter != NULL; iter = iter->next)
+ {
+ if (p < iter->address)
+ break;
+
+ if (p == iter->address)
+ p += iter->size;
+ }
+
+ *start_iter = iter;
+
+ if (p >= mblock_high_watermark)
+ return NULL;
+
+ return (void*)p;
+}
+
+void * getFirstMBlock(void **state STG_UNUSED)
+{
+ free_list *fake_state;
+ free_list **casted_state;
+
+ if (state)
+ casted_state = (free_list**)state;
+ else
+ casted_state = &fake_state;
+
+ *casted_state = free_list_head;
+ return getAllocatedMBlock(casted_state, mblock_address_space_begin);
+}
+
+void * getNextMBlock(void **state STG_UNUSED, void *mblock)
+{
+ free_list *fake_state = free_list_head;
+ free_list **casted_state;
+
+ if (state)
+ casted_state = (free_list**)state;
+ else
+ casted_state = &fake_state;
+
+ return getAllocatedMBlock(casted_state, (W_)mblock + MBLOCK_SIZE);
+}
+
+static void *getReusableMBlocks(nat n)
+{
+ struct free_list *iter;
+ W_ size = MBLOCK_SIZE * (W_)n;
+
+ for (iter = free_list_head; iter != NULL; iter = iter->next) {
+ void *addr;
+
+ if (iter->size < size)
+ continue;
+
+ addr = (void*)iter->address;
+ iter->address += size;
+ iter->size -= size;
+ if (iter->size == 0) {
+ struct free_list *prev, *next;
+
+ prev = iter->prev;
+ next = iter->next;
+ if (prev == NULL) {
+ ASSERT(free_list_head == iter);
+ free_list_head = next;
+ } else {
+ prev->next = next;
+ }
+ if (next != NULL) {
+ next->prev = prev;
+ }
+ stgFree(iter);
+ }
+
+ osCommitMemory(addr, size);
+ return addr;
+ }
+
+ return NULL;
+}
+
+static void *getFreshMBlocks(nat n)
+{
+ W_ size = MBLOCK_SIZE * (W_)n;
+ void *addr = (void*)mblock_high_watermark;
+
+ if (mblock_high_watermark + size >
+ mblock_address_space_begin + MBLOCK_SPACE_SIZE)
+ {
+ // whoa, 1 TB of heap?
+ errorBelch("out of memory");
+ stg_exit(EXIT_HEAPOVERFLOW);
+ }
+
+ osCommitMemory(addr, size);
+ mblock_high_watermark += size;
+ return addr;
+}
+
+static void *getCommittedMBlocks(nat n)
+{
+ void *p;
+
+ p = getReusableMBlocks(n);
+ if (p == NULL) {
+ p = getFreshMBlocks(n);
+ }
+
+ ASSERT(p != NULL && p != (void*)-1);
+ return p;
+}
+
+static void decommitMBlocks(char *addr, nat n)
+{
+ struct free_list *iter, *prev;
+ W_ size = MBLOCK_SIZE * (W_)n;
+ W_ address = (W_)addr;
+
+ osDecommitMemory(addr, size);
+
+ prev = NULL;
+ for (iter = free_list_head; iter != NULL; iter = iter->next)
+ {
+ prev = iter;
+
+ if (iter->address + iter->size < address)
+ continue;
+
+ if (iter->address + iter->size == address) {
+ iter->size += size;
+
+ if (address + size == mblock_high_watermark) {
+ mblock_high_watermark -= iter->size;
+ if (iter->prev) {
+ iter->prev->next = NULL;
+ } else {
+ ASSERT(iter == free_list_head);
+ free_list_head = NULL;
+ }
+ stgFree(iter);
+ return;
+ }
+
+ if (iter->next &&
+ iter->next->address == iter->address + iter->size) {
+ struct free_list *next;
+
+ next = iter->next;
+ iter->size += next->size;
+ iter->next = next->next;
+
+ if (iter->next) {
+ iter->next->prev = iter;
+
+ /* We don't need to consolidate more */
+ ASSERT(iter->next->address > iter->address + iter->size);
+ }
+
+ stgFree(next);
+ }
+ return;
+ } else if (address + size == iter->address) {
+ iter->address = address;
+ iter->size += size;
+
+ /* We don't need to consolidate backwards
+ (because otherwise it would have been handled by
+ the previous iteration) */
+ if (iter->prev) {
+ ASSERT(iter->prev->address + iter->prev->size < iter->address);
+ }
+ return;
+ } else {
+ struct free_list *new_iter;
+
+ /* All other cases have been handled */
+ ASSERT(iter->address > address + size);
+
+ new_iter = stgMallocBytes(sizeof(struct free_list), "freeMBlocks");
+ new_iter->address = address;
+ new_iter->size = size;
+ new_iter->next = iter;
+ new_iter->prev = iter->prev;
+ if (new_iter->prev) {
+ new_iter->prev->next = new_iter;
+ } else {
+ ASSERT(iter == free_list_head);
+ free_list_head = new_iter;
+ }
+ iter->prev = new_iter;
+ return;
+ }
+ }
+
+ /* We're past the last free list entry, so we must
+ be the highest allocation so far
+ */
+ ASSERT(address + size <= mblock_high_watermark);
+
+ /* Fast path the case of releasing high or all memory */
+ if (address + size == mblock_high_watermark) {
+ mblock_high_watermark -= size;
+ } else {
+ struct free_list *new_iter;
+
+ new_iter = stgMallocBytes(sizeof(struct free_list), "freeMBlocks");
+ new_iter->address = address;
+ new_iter->size = size;
+ new_iter->next = NULL;
+ new_iter->prev = prev;
+ if (new_iter->prev) {
+ ASSERT(new_iter->prev->next == NULL);
+ new_iter->prev->next = new_iter;
+ } else {
+ ASSERT(free_list_head == NULL);
+ free_list_head = new_iter;
+ }
+ }
+}
+
+void releaseFreeMemory(void)
+{
+ // This function exists for releasing address space
+ // on Windows 32 bit
+ //
+ // Do nothing if USE_LARGE_ADDRESS_SPACE, we never want
+ // to release address space
+
+ debugTrace(DEBUG_gc, "mblock_high_watermark: %p\n", mblock_high_watermark);
+}
+
+#else // !USE_LARGE_ADDRESS_SPACE
+
#if SIZEOF_VOID_P == 4
StgWord8 mblock_map[MBLOCK_MAP_SIZE]; // initially all zeros
@@ -108,6 +419,7 @@ setHeapAlloced(void *p, StgWord8 i)
mblock_cache[entry_no] = (mblock << 1) + i;
}
}
+
#endif
static void
@@ -130,7 +442,7 @@ void * mapEntryToMBlock(nat i)
return (void *)((StgWord)i << MBLOCK_SHIFT);
}
-void * getFirstMBlock(void)
+void * getFirstMBlock(void **state STG_UNUSED)
{
nat i;
@@ -140,7 +452,7 @@ void * getFirstMBlock(void)
return NULL;
}
-void * getNextMBlock(void *mblock)
+void * getNextMBlock(void **state STG_UNUSED, void *mblock)
{
nat i;
@@ -152,7 +464,7 @@ void * getNextMBlock(void *mblock)
#elif SIZEOF_VOID_P == 8
-void * getNextMBlock(void *p)
+void * getNextMBlock(void **state STG_UNUSED, void *p)
{
MBlockMap *map;
nat off, j;
@@ -189,7 +501,7 @@ void * getNextMBlock(void *p)
return NULL;
}
-void * getFirstMBlock(void)
+void * getFirstMBlock(void **state STG_UNUSED)
{
MBlockMap *map = mblock_maps[0];
nat line_no, off;
@@ -210,7 +522,38 @@ void * getFirstMBlock(void)
return NULL;
}
-#endif // SIZEOF_VOID_P
+#endif // SIZEOF_VOID_P == 8
+
+static void *getCommittedMBlocks(nat n)
+{
+ // The OS layer returns committed memory directly
+ void *ret = osGetMBlocks(n);
+ nat i;
+
+ // fill in the table
+ for (i = 0; i < n; i++) {
+ markHeapAlloced( (StgWord8*)ret + i * MBLOCK_SIZE );
+ }
+
+ return ret;
+}
+
+static void decommitMBlocks(void *p, nat n)
+{
+ osFreeMBlocks(p, n);
+ nat i;
+
+ for (i = 0; i < n; i++) {
+ markHeapUnalloced( (StgWord8*)p + i * MBLOCK_SIZE );
+ }
+}
+
+void releaseFreeMemory(void)
+{
+ osReleaseFreeMemory();
+}
+
+#endif /* !USE_LARGE_ADDRESS_SPACE */
/* -----------------------------------------------------------------------------
Allocate new mblock(s)
@@ -228,18 +571,12 @@ getMBlock(void)
void *
getMBlocks(nat n)
{
- nat i;
void *ret;
- ret = osGetMBlocks(n);
+ ret = getCommittedMBlocks(n);
debugTrace(DEBUG_gc, "allocated %d megablock(s) at %p",n,ret);
- // fill in the table
- for (i = 0; i < n; i++) {
- markHeapAlloced( (StgWord8*)ret + i * MBLOCK_SIZE );
- }
-
mblocks_allocated += n;
peak_mblocks_allocated = stg_max(peak_mblocks_allocated, mblocks_allocated);
@@ -249,17 +586,11 @@ getMBlocks(nat n)
void
freeMBlocks(void *addr, nat n)
{
- nat i;
-
debugTrace(DEBUG_gc, "freeing %d megablock(s) at %p",n,addr);
mblocks_allocated -= n;
- for (i = 0; i < n; i++) {
- markHeapUnalloced( (StgWord8*)addr + i * MBLOCK_SIZE );
- }
-
- osFreeMBlocks(addr, n);
+ decommitMBlocks(addr, n);
}
void
@@ -267,6 +598,22 @@ freeAllMBlocks(void)
{
debugTrace(DEBUG_gc, "freeing all megablocks");
+#ifdef USE_LARGE_ADDRESS_SPACE
+ {
+ struct free_list *iter, *next;
+
+ for (iter = free_list_head; iter != NULL; iter = next)
+ {
+ next = iter->next;
+ stgFree(iter);
+ }
+ }
+
+ osReleaseHeapMemory();
+
+ mblock_address_space_begin = (W_)-1;
+ mblock_high_watermark = (W_)-1;
+#else
osFreeAllMBlocks();
#if SIZEOF_VOID_P == 8
@@ -276,13 +623,23 @@ freeAllMBlocks(void)
}
stgFree(mblock_maps);
#endif
+
+#endif
}
void
initMBlocks(void)
{
osMemInit();
-#if SIZEOF_VOID_P == 8
+
+#ifdef USE_LARGE_ADDRESS_SPACE
+ {
+ void *addr = osReserveHeapMemory();
+
+ mblock_address_space_begin = (W_)addr;
+ mblock_high_watermark = (W_)addr;
+ }
+#elif SIZEOF_VOID_P == 8
memset(mblock_cache,0xff,sizeof(mblock_cache));
#endif
}
diff --git a/rts/sm/OSMem.h b/rts/sm/OSMem.h
index db704fc78b..9a6ccdd7ec 100644
--- a/rts/sm/OSMem.h
+++ b/rts/sm/OSMem.h
@@ -20,6 +20,47 @@ W_ getPageSize (void);
StgWord64 getPhysicalMemorySize (void);
void setExecutable (void *p, W_ len, rtsBool exec);
+#ifdef USE_LARGE_ADDRESS_SPACE
+
+/*
+ If "large address space" is enabled, we allocate memory in two
+ steps: first we request some address space, and then we request some
+ memory in it. This allows us to ask for much more address space that
+ we will ever need, which keeps everything nice and consecutive.
+*/
+
+// Reserve the large address space blob, and return the address that
+// the OS has chosen for it. It is not safe to access the memory
+// pointed to by the return value, until that memory is committed
+// using osCommitMemory().
+//
+// This function is called once when the block allocator is initialized.
+void *osReserveHeapMemory(void);
+
+// Commit (allocate memory for) a piece of address space, which must
+// be within the previously reserved space After this call, it is safe
+// to access @p up to @len bytes.
+//
+// There is no guarantee on the contents of the memory pointed to by
+// @p, in particular it must not be assumed to contain all zeros.
+void osCommitMemory(void *p, W_ len);
+
+// Decommit (release backing memory for) a piece of address space,
+// which must be within the previously reserve space and must have
+// been previously committed After this call, it is again unsafe to
+// access @p (up to @len bytes), but there is no guarantee that the
+// memory will be released to the system (as far as eg. RSS statistics
+// from top are concerned).
+void osDecommitMemory(void *p, W_ len);
+
+// Release the address space previously obtained and undo the effects of
+// osReserveHeapMemory
+//
+// This function is called once, when the block allocator is deinitialized
+// before the program terminates.
+void osReleaseHeapMemory(void);
+#endif
+
#include "EndPrivate.h"
#endif /* SM_OSMEM_H */
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index c4a699e59a..e7a8401145 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -637,7 +637,8 @@ checkStaticObjects ( StgClosure* static_objects )
StgClosure *p = static_objects;
StgInfoTable *info;
- while (p != END_OF_STATIC_LIST) {
+ while (p != END_OF_STATIC_OBJECT_LIST) {
+ p = UNTAG_STATIC_LIST_PTR(p);
checkClosure(p);
info = get_itbl(p);
switch (info->type) {
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index a8f0ab037f..dfad0bef58 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -1672,7 +1672,7 @@ scavenge_capability_mut_lists (Capability *cap)
static void
scavenge_static(void)
{
- StgClosure* p;
+ StgClosure *flagged_p, *p;
const StgInfoTable *info;
debugTrace(DEBUG_gc, "scavenging static objects");
@@ -1690,10 +1690,11 @@ scavenge_static(void)
* be more stuff on this list after each evacuation...
* (static_objects is a global)
*/
- p = gct->static_objects;
- if (p == END_OF_STATIC_LIST) {
+ flagged_p = gct->static_objects;
+ if (flagged_p == END_OF_STATIC_OBJECT_LIST) {
break;
}
+ p = UNTAG_STATIC_LIST_PTR(flagged_p);
ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
info = get_itbl(p);
@@ -1708,7 +1709,7 @@ scavenge_static(void)
*/
gct->static_objects = *STATIC_LINK(info,p);
*STATIC_LINK(info,p) = gct->scavenged_static_objects;
- gct->scavenged_static_objects = p;
+ gct->scavenged_static_objects = flagged_p;
switch (info -> type) {
@@ -2066,7 +2067,7 @@ loop:
work_to_do = rtsFalse;
// scavenge static objects
- if (major_gc && gct->static_objects != END_OF_STATIC_LIST) {
+ if (major_gc && gct->static_objects != END_OF_STATIC_OBJECT_LIST) {
IF_DEBUG(sanity, checkStaticObjects(gct->static_objects));
scavenge_static();
}
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 6e9b0634b7..65f5b70c21 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -175,9 +175,9 @@ initStorage (void)
generations[0].max_blocks = 0;
- dyn_caf_list = (StgIndStatic*)END_OF_STATIC_LIST;
- debug_caf_list = (StgIndStatic*)END_OF_STATIC_LIST;
- revertible_caf_list = (StgIndStatic*)END_OF_STATIC_LIST;
+ dyn_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
+ debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
+ revertible_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
/* initialise the allocate() interface */
large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W;
@@ -427,7 +427,7 @@ newCAF(StgRegTable *reg, StgIndStatic *caf)
ACQUIRE_SM_LOCK; // dyn_caf_list is global, locked by sm_mutex
caf->static_link = (StgClosure*)dyn_caf_list;
- dyn_caf_list = caf;
+ dyn_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
RELEASE_SM_LOCK;
}
else
@@ -484,7 +484,7 @@ StgInd* newRetainedCAF (StgRegTable *reg, StgIndStatic *caf)
ACQUIRE_SM_LOCK;
caf->static_link = (StgClosure*)revertible_caf_list;
- revertible_caf_list = caf;
+ revertible_caf_list = (StgIndStatic*)((StgWord)caf | STATIC_FLAG_LIST);
RELEASE_SM_LOCK;
diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h
index a4421db3f2..d0094b60fb 100644
--- a/rts/sm/Storage.h
+++ b/rts/sm/Storage.h
@@ -133,12 +133,59 @@ W_ calcLiveWords (void);
extern bdescr *exec_block;
-#define END_OF_STATIC_LIST ((StgClosure*)1)
-
void move_STACK (StgStack *src, StgStack *dest);
/* -----------------------------------------------------------------------------
- CAF lists
+ Note [STATIC_LINK fields]
+
+ The low 2 bits of the static link field have the following meaning:
+
+ 00 we haven't seen this static object before
+
+ 01/10 if it equals static_flag, then we saw it in this GC, otherwise
+ we saw it in the previous GC.
+
+ 11 ignore during GC. This value is used in two ways
+ - When we put CAFs on a list (see Note [CAF lists])
+ - a static constructor that was determined to have no CAF
+ references at compile time is given this value, so we
+ don't traverse it during GC
+
+ This choice of values is quite deliberate, because it means we can
+ decide whether a static object should be traversed during GC using a
+ single test:
+
+ bits = link_field & 3;
+ if ((bits | prev_static_flag) != 3) { ... }
+
+ -------------------------------------------------------------------------- */
+
+#define STATIC_BITS 3
+
+#define STATIC_FLAG_A 1
+#define STATIC_FLAG_B 2
+#define STATIC_FLAG_LIST 3
+
+#define END_OF_CAF_LIST ((StgClosure*)STATIC_FLAG_LIST)
+
+// The previous and current values of the static flag. These flip
+// between STATIC_FLAG_A and STATIC_FLAG_B at each major GC.
+extern nat prev_static_flag, static_flag;
+
+// In the chain of static objects built up during GC, all the link
+// fields are tagged with the current static_flag value. How to mark
+// the end of the chain? It must be a special value so that we can
+// tell it is the end of the chain, but note that we're going to store
+// this value in the link field of a static object, which means that
+// during the NEXT GC we should treat it like any other object that
+// has not been visited during this GC. Therefore, we use static_flag
+// as the sentinel value.
+#define END_OF_STATIC_OBJECT_LIST ((StgClosure*)(StgWord)static_flag)
+
+#define UNTAG_STATIC_LIST_PTR(p) ((StgClosure*)((StgWord)(p) & ~STATIC_BITS))
+
+/* -----------------------------------------------------------------------------
+ Note [CAF lists]
dyn_caf_list (CAFs chained through static_link)
This is a chain of all CAFs in the program, used for
@@ -154,6 +201,10 @@ void move_STACK (StgStack *src, StgStack *dest);
A chain of CAFs in object code loaded with the RTS linker.
These CAFs can be reverted to their unevaluated state using
revertCAFs.
+
+ Pointers in these lists are tagged with STATIC_FLAG_LIST, so when
+ traversing the list remember to untag each pointer with
+ UNTAG_STATIC_LIST_PTR().
--------------------------------------------------------------------------- */
extern StgIndStatic * dyn_caf_list;
diff --git a/rts/win32/OSMem.c b/rts/win32/OSMem.c
index afa5113638..716171b3fc 100644
--- a/rts/win32/OSMem.c
+++ b/rts/win32/OSMem.c
@@ -8,6 +8,7 @@
#include "Rts.h"
#include "sm/OSMem.h"
+#include "sm/HeapAlloc.h"
#include "RtsUtils.h"
#if HAVE_WINDOWS_H
@@ -28,7 +29,11 @@ typedef struct block_rec_ {
/* allocs are kept in ascending order, and are the memory regions as
returned by the OS as we need to have matching VirtualAlloc and
- VirtualFree calls. */
+ VirtualFree calls.
+
+ If USE_LARGE_ADDRESS_SPACE is defined, this list will contain only
+ one element.
+*/
static alloc_rec* allocs = NULL;
/* free_blocks are kept in ascending order, and adjacent blocks are merged */
@@ -207,12 +212,9 @@ osGetMBlocks(nat n) {
return ret;
}
-void osFreeMBlocks(char *addr, nat n)
+static void decommitBlocks(char *addr, W_ nBytes)
{
alloc_rec *p;
- W_ nBytes = (W_)n * MBLOCK_SIZE;
-
- insertFree(addr, nBytes);
p = allocs;
while ((p != NULL) && (addr >= (p->base + p->size))) {
@@ -243,6 +245,14 @@ void osFreeMBlocks(char *addr, nat n)
}
}
+void osFreeMBlocks(char *addr, nat n)
+{
+ W_ nBytes = (W_)n * MBLOCK_SIZE;
+
+ insertFree(addr, nBytes);
+ decommitBlocks(addr, nBytes);
+}
+
void osReleaseFreeMemory(void)
{
alloc_rec *prev_a, *a;
@@ -414,3 +424,60 @@ void setExecutable (void *p, W_ len, rtsBool exec)
stg_exit(EXIT_FAILURE);
}
}
+
+#ifdef USE_LARGE_ADDRESS_SPACE
+
+static void* heap_base = NULL;
+
+void *osReserveHeapMemory (void)
+{
+ void *start;
+
+ heap_base = VirtualAlloc(NULL, MBLOCK_SPACE_SIZE + MBLOCK_SIZE,
+ MEM_RESERVE, PAGE_READWRITE);
+ if (heap_base == NULL) {
+ if (GetLastError() == ERROR_NOT_ENOUGH_MEMORY) {
+ errorBelch("out of memory");
+ } else {
+ sysErrorBelch(
+ "osReserveHeapMemory: VirtualAlloc MEM_RESERVE %llu bytes failed",
+ MBLOCK_SPACE_SIZE + MBLOCK_SIZE);
+ }
+ stg_exit(EXIT_FAILURE);
+ }
+
+ // VirtualFree MEM_RELEASE must always match a
+ // previous MEM_RESERVE call, in address and size
+ // so we necessarily leak some address space here,
+ // before and after the aligned area
+ // It is not a huge problem because we never commit
+ // that memory
+ start = MBLOCK_ROUND_UP(heap_base);
+
+ return start;
+}
+
+void osCommitMemory (void *at, W_ size)
+{
+ void *temp;
+ temp = VirtualAlloc(at, size, MEM_COMMIT, PAGE_READWRITE);
+ if (temp == NULL) {
+ sysErrorBelch("osCommitMemory: VirtualAlloc MEM_COMMIT failed");
+ stg_exit(EXIT_FAILURE);
+ }
+}
+
+void osDecommitMemory (void *at, W_ size)
+{
+ if (!VirtualFree(at, size, MEM_DECOMMIT)) {
+ sysErrorBelch("osDecommitMemory: VirtualFree MEM_DECOMMIT failed");
+ stg_exit(EXIT_FAILURE);
+ }
+}
+
+void osReleaseHeapMemory (void)
+{
+ VirtualFree(heap_base, 0, MEM_RELEASE);
+}
+
+#endif