diff options
Diffstat (limited to 'rts')
-rw-r--r-- | rts/ClosureFlags.c | 5 | ||||
-rw-r--r-- | rts/LdvProfile.c | 1 | ||||
-rw-r--r-- | rts/PrimOps.cmm | 131 | ||||
-rw-r--r-- | rts/Printer.c | 9 | ||||
-rw-r--r-- | rts/ProfHeap.c | 23 | ||||
-rw-r--r-- | rts/RetainerProfile.c | 1 | ||||
-rw-r--r-- | rts/RtsStartup.c | 1 | ||||
-rw-r--r-- | rts/RtsSymbols.c | 9 | ||||
-rw-r--r-- | rts/StgMiscClosures.cmm | 12 | ||||
-rw-r--r-- | rts/sm/BlockAlloc.c | 1 | ||||
-rw-r--r-- | rts/sm/CNF.c | 1352 | ||||
-rw-r--r-- | rts/sm/CNF.h | 71 | ||||
-rw-r--r-- | rts/sm/Compact.c | 1 | ||||
-rw-r--r-- | rts/sm/Evac.c | 130 | ||||
-rw-r--r-- | rts/sm/GC.c | 40 | ||||
-rw-r--r-- | rts/sm/Sanity.c | 49 | ||||
-rw-r--r-- | rts/sm/Scav.c | 9 | ||||
-rw-r--r-- | rts/sm/Storage.c | 12 |
18 files changed, 1842 insertions, 15 deletions
diff --git a/rts/ClosureFlags.c b/rts/ClosureFlags.c index cd2c7e1435..b2359107a5 100644 --- a/rts/ClosureFlags.c +++ b/rts/ClosureFlags.c @@ -84,9 +84,10 @@ StgWord16 closure_flags[] = { [SMALL_MUT_ARR_PTRS_CLEAN] = (_HNF| _NS| _MUT|_UPT ), [SMALL_MUT_ARR_PTRS_DIRTY] = (_HNF| _NS| _MUT|_UPT ), [SMALL_MUT_ARR_PTRS_FROZEN0] = (_HNF| _NS| _MUT|_UPT ), - [SMALL_MUT_ARR_PTRS_FROZEN] = (_HNF| _NS| _UPT ) + [SMALL_MUT_ARR_PTRS_FROZEN] = (_HNF| _NS| _UPT ), + [COMPACT_NFDATA] = (_HNF| _NS ), }; -#if N_CLOSURE_TYPES != 64 +#if N_CLOSURE_TYPES != 65 #error Closure types changed: update ClosureFlags.c! #endif diff --git a/rts/LdvProfile.c b/rts/LdvProfile.c index 428078bb40..26ead95061 100644 --- a/rts/LdvProfile.c +++ b/rts/LdvProfile.c @@ -142,6 +142,7 @@ processHeapClosureForDead( const StgClosure *c ) case RET_BIG: // others case INVALID_OBJECT: + case COMPACT_NFDATA: default: barf("Invalid object in processHeapClosureForDead(): %d", info->type); return 0; diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index b82eebe07f..60d8106983 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -1917,6 +1917,137 @@ stg_deRefStablePtrzh ( P_ sp ) } /* ----------------------------------------------------------------------------- + CompactNFData primitives + + See Note [Compact Normal Forms] + ------------------------------------------------------------------------- */ + +stg_compactNewzh ( W_ size ) +{ + P_ str; + + again: MAYBE_GC(again); + + ("ptr" str) = ccall compactNew(MyCapability() "ptr", size); + return (str); +} + +stg_compactAppendzh ( P_ str, P_ val , W_ share) +{ + P_ root; + + again: MAYBE_GC(again); + + ("ptr" root) = ccall compactAppend(MyCapability() "ptr", str "ptr", val "ptr", share); + return (root); +} + +stg_compactResizzezh ( P_ str, W_ new_size ) +{ + again: MAYBE_GC(again); + + ccall compactResize(MyCapability() "ptr", str "ptr", new_size); + return (); +} + +stg_compactContainszh ( P_ str, P_ val ) +{ + W_ rval; + + (rval) = ccall compactContains(str "ptr", val "ptr"); + return (rval); +} + +stg_compactContainsAnyzh ( P_ val ) +{ + W_ rval; + + (rval) = ccall compactContains(0 "ptr", val "ptr"); + return (rval); +} + +stg_compactGetFirstBlockzh ( P_ str ) +{ + /* W_, not P_, because it is not a gc pointer */ + W_ block; + W_ bd; + W_ size; + + block = str - SIZEOF_StgCompactNFDataBlock::W_; + ASSERT (StgCompactNFDataBlock_owner(block) == str); + + bd = Bdescr(str); + size = bdescr_free(bd) - bdescr_start(bd); + ASSERT (size <= TO_W_(bdescr_blocks(bd)) * BLOCK_SIZE); + + return (block, size); +} + +stg_compactGetNextBlockzh ( P_ str, W_ block ) +{ + /* str is a pointer to the closure holding the Compact# + it is there primarily to keep everything reachable from + the GC: by having it on the stack of type P_, the GC will + see all the blocks as live (any pointer in the Compact# + keeps it alive), and will not collect the block + We don't run a GC inside this primop, but it could + happen right after, or we could be preempted. + + str is also useful for debugging, as it can be casted + to a useful C struct from the gdb command line and all + blocks can be inspected + */ + W_ bd; + W_ next_block; + W_ size; + + next_block = StgCompactNFDataBlock_next(block); + + if (next_block == 0::W_) { + return (0::W_, 0::W_); + } + + ASSERT (StgCompactNFDataBlock_owner(next_block) == str || + StgCompactNFDataBlock_owner(next_block) == NULL); + + bd = Bdescr(next_block); + size = bdescr_free(bd) - bdescr_start(bd); + ASSERT (size <= TO_W_(bdescr_blocks(bd)) * BLOCK_SIZE); + + return (next_block, size); +} + +stg_compactAllocateBlockzh ( W_ size, W_ previous ) +{ + W_ actual_block; + + again: MAYBE_GC(again); + + ("ptr" actual_block) = ccall compactAllocateBlock(MyCapability(), + size, + previous "ptr"); + + return (actual_block); +} + +stg_compactFixupPointerszh ( W_ first_block, W_ root ) +{ + W_ str; + P_ gcstr; + W_ ok; + + str = first_block + SIZEOF_StgCompactNFDataBlock::W_; + (ok) = ccall compactFixupPointers (str "ptr", root "ptr"); + + // Now we can let the GC know about str, because it was linked + // into the generation list and the book-keeping pointers are + // guaranteed to be valid + // (this is true even if the fixup phase failed) + gcstr = str; + return (gcstr, ok); +} + +/* ----------------------------------------------------------------------------- Bytecode object primitives ------------------------------------------------------------------------- */ diff --git a/rts/Printer.c b/rts/Printer.c index 1ee1c6c4b3..678922500c 100644 --- a/rts/Printer.c +++ b/rts/Printer.c @@ -386,6 +386,12 @@ printClosure( const StgClosure *obj ) break; #endif + case COMPACT_NFDATA: + debugBelch("COMPACT_NFDATA(size=%" FMT_Word ")\n", + (W_)((StgCompactNFData *)obj)->totalDataW * sizeof(W_)); + break; + + default: //barf("printClosure %d",get_itbl(obj)->type); debugBelch("*** printClosure: unknown type %d ****\n", @@ -873,7 +879,8 @@ const char *closure_type_names[] = { [ATOMICALLY_FRAME] = "ATOMICALLY_FRAME", [CATCH_RETRY_FRAME] = "CATCH_RETRY_FRAME", [CATCH_STM_FRAME] = "CATCH_STM_FRAME", - [WHITEHOLE] = "WHITEHOLE" + [WHITEHOLE] = "WHITEHOLE", + [COMPACT_NFDATA] = "COMPACT_NFDATA" }; const char * diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c index 664ee50d70..956a250747 100644 --- a/rts/ProfHeap.c +++ b/rts/ProfHeap.c @@ -940,6 +940,24 @@ static void heapProfObject(Census *census, StgClosure *p, size_t size, } } +// Compact objects require special handling code because they +// are not stored consecutively in memory (rather, each object +// is a list of objects), and that would break the while loop +// below. But we know that each block holds at most one object +// so we don't need the loop. +// +// See Note [Compact Normal Forms] for details. +static void +heapCensusCompactList(Census *census, bdescr *bd) +{ + for (; bd != NULL; bd = bd->link) { + StgCompactNFDataBlock *block = (StgCompactNFDataBlock*)bd->start; + StgCompactNFData *str = block->owner; + heapProfObject(census, (StgClosure*)str, + compact_nfdata_full_sizeW(str), rtsTrue); + } +} + /* ----------------------------------------------------------------------------- * Code to perform a heap census. * -------------------------------------------------------------------------- */ @@ -1116,6 +1134,10 @@ heapCensusChain( Census *census, bdescr *bd ) size = sizeofW(StgTRecChunk); break; + case COMPACT_NFDATA: + barf("heapCensus, found compact object in the wrong list"); + break; + default: barf("heapCensus, unknown object: %d", info->type); } @@ -1153,6 +1175,7 @@ void heapCensus (Time t) // Are we interested in large objects? might be // confusing to include the stack in a heap profile. heapCensusChain( census, generations[g].large_objects ); + heapCensusCompactList ( census, generations[g].compact_objects ); for (n = 0; n < n_capabilities; n++) { ws = &gc_threads[n]->gens[g]; diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c index 3fe0f8bf9a..6cd9c89b83 100644 --- a/rts/RetainerProfile.c +++ b/rts/RetainerProfile.c @@ -451,6 +451,7 @@ push( StgClosure *c, retainer c_child_r, StgClosure **first_child ) case CONSTR_0_1: case CONSTR_0_2: case ARR_WORDS: + case COMPACT_NFDATA: *first_child = NULL; return; diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c index 0aa3b28b2e..123fb9b64a 100644 --- a/rts/RtsStartup.c +++ b/rts/RtsStartup.c @@ -35,6 +35,7 @@ #include "FileLock.h" #include "LinkerInternals.h" #include "LibdwPool.h" +#include "sm/CNF.h" #if defined(PROFILING) # include "ProfHeap.h" diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c index e66b4d81cb..ed9bdfb808 100644 --- a/rts/RtsSymbols.c +++ b/rts/RtsSymbols.c @@ -422,6 +422,15 @@ SymI_HasProto(stg_catchSTMzh) \ SymI_HasProto(stg_checkzh) \ SymI_HasProto(stg_clearCCSzh) \ + SymI_HasProto(stg_compactNewzh) \ + SymI_HasProto(stg_compactAppendzh) \ + SymI_HasProto(stg_compactResizzezh) \ + SymI_HasProto(stg_compactContainszh) \ + SymI_HasProto(stg_compactContainsAnyzh) \ + SymI_HasProto(stg_compactGetFirstBlockzh) \ + SymI_HasProto(stg_compactGetNextBlockzh) \ + SymI_HasProto(stg_compactAllocateBlockzh) \ + SymI_HasProto(stg_compactFixupPointerszh) \ SymI_HasProto(closure_flags) \ SymI_HasProto(cmp_thread) \ SymI_HasProto(createAdjustor) \ diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index 905f81ec2e..6c1edf70b5 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -614,6 +614,18 @@ INFO_TABLE_CONSTR(stg_MVAR_TSO_QUEUE,2,0,0,PRIM,"MVAR_TSO_QUEUE","MVAR_TSO_QUEUE { foreign "C" barf("MVAR_TSO_QUEUE object entered!") never returns; } /* ---------------------------------------------------------------------------- + COMPACT_NFDATA (a blob of data in NF with no outgoing pointers) + + Just return immediately because the structure is in NF already + ------------------------------------------------------------------------- */ + +INFO_TABLE( stg_COMPACT_NFDATA, 0, 0, COMPACT_NFDATA, "COMPACT_NFDATA", "COMPACT_NFDATA") + () +{ + return (); +} + +/* ---------------------------------------------------------------------------- CHARLIKE and INTLIKE closures. These are static representations of Chars and small Ints, so that diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c index 6c2e96414e..c729c1874f 100644 --- a/rts/sm/BlockAlloc.c +++ b/rts/sm/BlockAlloc.c @@ -795,6 +795,7 @@ countAllocdBlocks(bdescr *bd) W_ n; for (n=0; bd != NULL; bd=bd->link) { n += bd->blocks; + // hack for megablock groups: see (*1) above if (bd->blocks > BLOCKS_PER_MBLOCK) { n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK) diff --git a/rts/sm/CNF.c b/rts/sm/CNF.c new file mode 100644 index 0000000000..3c681c2ee2 --- /dev/null +++ b/rts/sm/CNF.c @@ -0,0 +1,1352 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team 1998-2014 + * + * GC support for immutable non-GCed structures, also known as Compact + * Normal Forms (CNF for short). This provides the RTS support for + * the 'compact' package and the Data.Compact module. + * + * ---------------------------------------------------------------------------*/ + +#define _GNU_SOURCE + +#include "PosixSource.h" +#include <string.h> +#include "Rts.h" +#include "RtsUtils.h" + +#include "Capability.h" +#include "GC.h" +#include "Storage.h" +#include "CNF.h" +#include "Hash.h" +#include "HeapAlloc.h" +#include "BlockAlloc.h" + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_LIMITS_H +#include <limits.h> +#endif +#include <dlfcn.h> +#include <endian.h> + +/** + * Note [Compact Normal Forms] + * + * A Compact Normal Form, is at its essence a chain of memory blocks (multiple + * of block allocator blocks) containing other closures inside. + * + * Each block starts with a header, of type StgCompactNFDataBlock, that points + * to the first and to the next block in the chain. Right after the header + * in the first block we have a closure of type StgCompactNFData, which holds + * compact-wide metadata. This closure is the Compact# that Cmm and Haskell + * see, and it's mostly a regular Haskell closure. + * + * Blocks are appended to the chain automatically as needed, or manually with a + * compactResize() call, which also adjust the size of automatically appended + * blocks. + * + * Objects can be appended to the block currently marked to the nursery, or any + * of the later blocks if the nursery block is too full to fit the entire + * object. For each block in the chain (which can be multiple block allocator + * blocks), we use the bdescr of its beginning to store how full it is. + * After an object is appended, it is scavenged for any outgoing pointers, + * and all pointed to objects are appended, recursively, in a manner similar + * to copying GC (further discussion in the note [Appending to a Compact]) + * + * We also flag each bdescr in each block allocator block of a compact + * (including those there were obtained as second or later from a single + * allocGroup(n) call) with the BF_COMPACT. This allows the GC to quickly + * realize that a given pointer is in a compact region, and trigger the + * CNF path. + * + * These two facts combined mean that in any compact block where some object + * begins bdescrs must be valid. For this simplicity this is achieved by + * restricting the maximum size of a compact block to 252 block allocator + * blocks (so that the total with the bdescr is one megablock). + * + * Compacts as a whole live in special list in each generation, where the + * list is held through the bd->link field of the bdescr of the StgCompactNFData + * closure (as for large objects). They live in a different list than large + * objects because the operation to free them is different (all blocks in + * a compact must be freed individually), and stats/sanity behavior are + * slightly different. This is also the reason that compact allocates memory + * using a special function instead of just calling allocate(). + * + * Compacts are also suitable for network or disk serialization, and to + * that extent they support a pointer fixup operation, which adjusts pointers + * from a previous layout of the chain in memory to the new allocation. + * This works by constructing a temporary binary search table (in the C heap) + * of the old block addresses (which are known from the block header), and + * then searching for each pointer in the table, and adjusting it. + * It relies on ABI compatibility and static linking (or no ASLR) because it + * does not attempt to reconstruct info tables, and uses info tables to detect + * pointers. In practice this means only the exact same binary should be + * used. + */ + +typedef enum { + ALLOCATE_APPEND, + ALLOCATE_NEW, + ALLOCATE_IMPORT_NEW, + ALLOCATE_IMPORT_APPEND, +} AllocateOp; + +static StgCompactNFDataBlock * +compactAllocateBlockInternal(Capability *cap, + StgWord aligned_size, + StgCompactNFDataBlock *first, + AllocateOp operation) +{ + StgCompactNFDataBlock *self; + bdescr *block, *head; + uint32_t n_blocks; + generation *g; + + n_blocks = aligned_size / BLOCK_SIZE; + + // Attempting to allocate an object larger than maxHeapSize + // should definitely be disallowed. (bug #1791) + if ((RtsFlags.GcFlags.maxHeapSize > 0 && + n_blocks >= RtsFlags.GcFlags.maxHeapSize) || + n_blocks >= HS_INT32_MAX) // avoid overflow when + // calling allocGroup() below + { + heapOverflow(); + // heapOverflow() doesn't exit (see #2592), but we aren't + // in a position to do a clean shutdown here: we + // either have to allocate the memory or exit now. + // Allocating the memory would be bad, because the user + // has requested that we not exceed maxHeapSize, so we + // just exit. + stg_exit(EXIT_HEAPOVERFLOW); + } + + // It is imperative that first is the first block in the compact + // (or NULL if the compact does not exist yet) + // because the evacuate code does not update the generation of + // blocks other than the first (so we would get the statistics + // wrong and crash in Sanity) + if (first != NULL) { + block = Bdescr((P_)first); + g = block->gen; + } else { + g = g0; + } + + ACQUIRE_SM_LOCK; + block = allocGroup(n_blocks); + switch (operation) { + case ALLOCATE_NEW: + ASSERT (first == NULL); + ASSERT (g == g0); + dbl_link_onto(block, &g0->compact_objects); + g->n_compact_blocks += block->blocks; + g->n_new_large_words += aligned_size / sizeof(StgWord); + break; + + case ALLOCATE_IMPORT_NEW: + dbl_link_onto(block, &g0->compact_blocks_in_import); + /* fallthrough */ + case ALLOCATE_IMPORT_APPEND: + ASSERT (first == NULL); + ASSERT (g == g0); + g->n_compact_blocks_in_import += block->blocks; + g->n_new_large_words += aligned_size / sizeof(StgWord); + break; + + case ALLOCATE_APPEND: + g->n_compact_blocks += block->blocks; + if (g == g0) + g->n_new_large_words += aligned_size / sizeof(StgWord); + break; + + default: +#ifdef DEBUG + ASSERT(!"code should not be reached"); +#else + __builtin_unreachable(); +#endif + } + RELEASE_SM_LOCK; + + cap->total_allocated += aligned_size / sizeof(StgWord); + + self = (StgCompactNFDataBlock*) block->start; + self->self = self; + self->next = NULL; + + head = block; + initBdescr(head, g, g); + head->flags = BF_COMPACT; + for (block = head + 1, n_blocks --; n_blocks > 0; block++, n_blocks--) { + block->link = head; + block->blocks = 0; + block->flags = BF_COMPACT; + } + + return self; +} + +static inline StgCompactNFDataBlock * +compactGetFirstBlock(StgCompactNFData *str) +{ + return (StgCompactNFDataBlock*) ((W_)str - sizeof(StgCompactNFDataBlock)); +} + +static inline StgCompactNFData * +firstBlockGetCompact(StgCompactNFDataBlock *block) +{ + return (StgCompactNFData*) ((W_)block + sizeof(StgCompactNFDataBlock)); +} + +static void +freeBlockChain(StgCompactNFDataBlock *block) +{ + StgCompactNFDataBlock *next; + bdescr *bd; + + for ( ; block; block = next) { + next = block->next; + bd = Bdescr((StgPtr)block); + ASSERT((bd->flags & BF_EVACUATED) == 0); + freeGroup(bd); + } +} + +void +compactFree(StgCompactNFData *str) +{ + StgCompactNFDataBlock *block; + + block = compactGetFirstBlock(str); + freeBlockChain(block); +} + +void +compactMarkKnown(StgCompactNFData *str) +{ + bdescr *bd; + StgCompactNFDataBlock *block; + + block = compactGetFirstBlock(str); + for ( ; block; block = block->next) { + bd = Bdescr((StgPtr)block); + bd->flags |= BF_KNOWN; + } +} + +StgWord +countCompactBlocks(bdescr *outer) +{ + StgCompactNFDataBlock *block; + W_ count; + + count = 0; + while (outer) { + bdescr *inner; + + block = (StgCompactNFDataBlock*)(outer->start); + do { + inner = Bdescr((P_)block); + ASSERT (inner->flags & BF_COMPACT); + + count += inner->blocks; + block = block->next; + } while(block); + + outer = outer->link; + } + + return count; +} + +StgCompactNFData * +compactNew (Capability *cap, StgWord size) +{ + StgWord aligned_size; + StgCompactNFDataBlock *block; + StgCompactNFData *self; + bdescr *bd; + + aligned_size = BLOCK_ROUND_UP(size + sizeof(StgCompactNFDataBlock) + + sizeof(StgCompactNFDataBlock)); + if (aligned_size >= BLOCK_SIZE * BLOCKS_PER_MBLOCK) + aligned_size = BLOCK_SIZE * BLOCKS_PER_MBLOCK; + + block = compactAllocateBlockInternal(cap, aligned_size, NULL, + ALLOCATE_NEW); + + self = firstBlockGetCompact(block); + SET_INFO((StgClosure*)self, &stg_COMPACT_NFDATA_info); + self->totalDataW = aligned_size / sizeof(StgWord); + self->autoBlockW = aligned_size / sizeof(StgWord); + self->nursery = block; + self->last = block; + + block->owner = self; + + bd = Bdescr((P_)block); + bd->free = (StgPtr)((W_)self + sizeof(StgCompactNFData)); + ASSERT (bd->free == (StgPtr)self + sizeofW(StgCompactNFData)); + + self->totalW = bd->blocks * BLOCK_SIZE_W; + + return self; +} + +static StgCompactNFDataBlock * +compactAppendBlock (Capability *cap, + StgCompactNFData *str, + StgWord aligned_size) +{ + StgCompactNFDataBlock *block; + bdescr *bd; + + block = compactAllocateBlockInternal(cap, aligned_size, + compactGetFirstBlock(str), + ALLOCATE_APPEND); + block->owner = str; + block->next = NULL; + + ASSERT (str->last->next == NULL); + str->last->next = block; + str->last = block; + if (str->nursery == NULL) + str->nursery = block; + str->totalDataW += aligned_size / sizeof(StgWord); + + bd = Bdescr((P_)block); + bd->free = (StgPtr)((W_)block + sizeof(StgCompactNFDataBlock)); + ASSERT (bd->free == (StgPtr)block + sizeofW(StgCompactNFDataBlock)); + + str->totalW += bd->blocks * BLOCK_SIZE_W; + + return block; +} + +void +compactResize (Capability *cap, StgCompactNFData *str, StgWord new_size) +{ + StgWord aligned_size; + + aligned_size = BLOCK_ROUND_UP(new_size + sizeof(StgCompactNFDataBlock)); + if (aligned_size >= BLOCK_SIZE * BLOCKS_PER_MBLOCK) + aligned_size = BLOCK_SIZE * BLOCKS_PER_MBLOCK; + + str->autoBlockW = aligned_size / sizeof(StgWord); + + compactAppendBlock(cap, str, aligned_size); +} + +/* Note [Appending to a Compact] + + This is a simple reimplementation of the copying GC. + One could be tempted to reuse the actual GC code here, but he + would quickly find out that it would bring all the generational + GC complexity for no need at all. + + Plus, we don't need to scavenge/evacuate all kinds of weird + objects here, just constructors and primitives. Thunks are + expected to be evaluated before appending by the API layer + (in Haskell, above the primop which is implemented here). + Also, we have a different policy for large objects: instead + of relinking to the new large object list, we fully copy + them inside the compact and scavenge them normally. + + Note that if we allowed thunks and lazy evaluation the compact + would be a mutable object, which would create all sorts of + GC problems (besides, evaluating a thunk could exaust the + compact space or yield an invalid object, and we would have + no way to signal that to the user) + + Just like the real evacuate/scavenge pairs, we need to handle + object loops. We would want to use the same strategy of rewriting objects + with forwarding pointer, but in a real GC, at the end the + blocks from the old space are dropped (dropping all forwarding + pointers at the same time), which we can't do here as we don't + know all pointers to the objects being evacuated. Also, in parallel + we don't know which other threads are evaluating the thunks + that we just corrupted at the same time. + + So instead we use a hash table of "visited" objects, and add + the pointer as we copy it. To reduce the overhead, we also offer + a version of the API that does not preserve sharing (TODO). + + You might be tempted to replace the objects with StdInd to + the object in the compact, but you would be wrong: the haskell + code assumes that objects in the heap only become more evaluated + (thunks to blackholes to inds to actual objects), and in + particular it assumes that if a pointer is tagged the object + is directly referenced and the values can be read directly, + without entering the closure. + + FIXME: any better idea than the hash table? +*/ + +static void +unroll_memcpy(StgPtr to, StgPtr from, StgWord size) +{ + for (; size > 0; size--) + *(to++) = *(from++); +} + +static rtsBool +allocate_in_compact (StgCompactNFDataBlock *block, StgWord sizeW, StgPtr *at) +{ + bdescr *bd; + StgPtr top; + StgPtr free; + + bd = Bdescr((StgPtr)block); + top = bd->start + BLOCK_SIZE_W * bd->blocks; + if (bd->free + sizeW > top) + return rtsFalse; + + free = bd->free; + bd->free += sizeW; + *at = free; + + return rtsTrue; +} + +static rtsBool +block_is_full (StgCompactNFDataBlock *block) +{ + bdescr *bd; + StgPtr top; + StgWord sizeW; + + bd = Bdescr((StgPtr)block); + top = bd->start + BLOCK_SIZE_W * bd->blocks; + + // We consider a block full if we could not fit + // an entire closure with 7 payload items + // (this leaves a slop of 64 bytes at most, but + // it avoids leaving a block almost empty to fit + // a large byte array, while at the same time + // it avoids trying to allocate a large closure + // in a chain of almost empty blocks) + sizeW = sizeofW(StgHeader) + 7; + return (bd->free + sizeW > top); +} + +static inline StgWord max(StgWord a, StgWord b) +{ + if (a > b) + return a; + else + return b; +} + +static rtsBool +allocate_loop (Capability *cap, + StgCompactNFData *str, + StgWord sizeW, + StgPtr *at) +{ + StgCompactNFDataBlock *block; + StgWord next_size; + + // try the nursery first + retry: + if (str->nursery != NULL) { + if (allocate_in_compact(str->nursery, sizeW, at)) + return rtsTrue; + + if (block_is_full (str->nursery)) { + str->nursery = str->nursery->next; + goto retry; + } + + // try subsequent blocks + block = str->nursery->next; + while (block != NULL) { + if (allocate_in_compact(block, sizeW, at)) + return rtsTrue; + + block = block->next; + } + } + + next_size = max(str->autoBlockW * sizeof(StgWord), + BLOCK_ROUND_UP(sizeW * sizeof(StgWord))); + if (next_size >= BLOCKS_PER_MBLOCK * BLOCK_SIZE) + next_size = BLOCKS_PER_MBLOCK * BLOCK_SIZE; + if (next_size < sizeW * sizeof(StgWord) + sizeof(StgCompactNFDataBlock)) + return rtsFalse; + + block = compactAppendBlock(cap, str, next_size); + ASSERT (str->nursery != NULL); + return allocate_in_compact(block, sizeW, at); +} + +static void +copy_tag (Capability *cap, + StgCompactNFData *str, + HashTable *hash, + StgClosure **p, + StgClosure *from, + StgWord tag) +{ + StgPtr to; + StgWord sizeW; + + sizeW = closure_sizeW(from); + + if (!allocate_loop(cap, str, sizeW, &to)) { + barf("Failed to copy object in compact, object too large\n"); + return; + } + + // unroll memcpy for small sizes because we can + // benefit of known alignment + // (32 extracted from my magic hat) + if (sizeW < 32) + unroll_memcpy(to, (StgPtr)from, sizeW); + else + memcpy(to, from, sizeW * sizeof(StgWord)); + + if (hash != NULL) + insertHashTable(hash, (StgWord)from, to); + + *p = TAG_CLOSURE(tag, (StgClosure*)to); +} + +STATIC_INLINE rtsBool +object_in_compact (StgCompactNFData *str, StgClosure *p) +{ + bdescr *bd; + + if (!HEAP_ALLOCED(p)) + return rtsFalse; + + bd = Bdescr((P_)p); + return (bd->flags & BF_COMPACT) != 0 && + objectGetCompact(p) == str; +} + +static void +simple_evacuate (Capability *cap, + StgCompactNFData *str, + HashTable *hash, + StgClosure **p) +{ + StgWord tag; + StgClosure *from; + void *already; + + from = *p; + tag = GET_CLOSURE_TAG(from); + from = UNTAG_CLOSURE(from); + + // If the object referenced is already in this compact + // (for example by reappending an object that was obtained + // by compactGetRoot) then do nothing + if (object_in_compact(str, from)) + return; + + switch (get_itbl(from)->type) { + case BLACKHOLE: + // If tag == 0, the indirectee is the TSO that claimed the tag + // + // Not useful and not NFData + from = ((StgInd*)from)->indirectee; + if (GET_CLOSURE_TAG(from) == 0) { + debugBelch("Claimed but not updated BLACKHOLE in Compact," + " not normal form"); + return; + } + + *p = from; + return simple_evacuate(cap, str, hash, p); + + case IND: + case IND_STATIC: + // follow chains of indirections, don't evacuate them + from = ((StgInd*)from)->indirectee; + *p = from; + // Evac.c uses a goto, but let's rely on a smart compiler + // and get readable code instead + return simple_evacuate(cap, str, hash, p); + + default: + // This object was evacuated already, return the existing + // pointer + if (hash != NULL && + (already = lookupHashTable (hash, (StgWord)from))) { + *p = TAG_CLOSURE(tag, (StgClosure*)already); + return; + } + + copy_tag(cap, str, hash, p, from, tag); + } +} + +static void +simple_scavenge_mut_arr_ptrs (Capability *cap, + StgCompactNFData *str, + HashTable *hash, + StgMutArrPtrs *a) +{ + StgPtr p, q; + + p = (StgPtr)&a->payload[0]; + q = (StgPtr)&a->payload[a->ptrs]; + for (; p < q; p++) { + simple_evacuate(cap, str, hash, (StgClosure**)p); + } +} + +static void +simple_scavenge_block (Capability *cap, + StgCompactNFData *str, + StgCompactNFDataBlock *block, + HashTable *hash, + StgPtr p) +{ + const StgInfoTable *info; + bdescr *bd = Bdescr((P_)block); + + while (p < bd->free) { + ASSERT (LOOKS_LIKE_CLOSURE_PTR(p)); + info = get_itbl((StgClosure*)p); + + switch (info->type) { + case CONSTR_1_0: + simple_evacuate(cap, str, hash, &((StgClosure*)p)->payload[0]); + case CONSTR_0_1: + p += sizeofW(StgClosure) + 1; + break; + + case CONSTR_2_0: + simple_evacuate(cap, str, hash, &((StgClosure*)p)->payload[1]); + case CONSTR_1_1: + simple_evacuate(cap, str, hash, &((StgClosure*)p)->payload[0]); + case CONSTR_0_2: + p += sizeofW(StgClosure) + 2; + break; + + case CONSTR: + case PRIM: + case CONSTR_NOCAF_STATIC: + case CONSTR_STATIC: + { + StgPtr end; + + end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs; + for (p = (P_)((StgClosure *)p)->payload; p < end; p++) { + simple_evacuate(cap, str, hash, (StgClosure **)p); + } + p += info->layout.payload.nptrs; + break; + } + + case ARR_WORDS: + p += arr_words_sizeW((StgArrBytes*)p); + break; + + case MUT_ARR_PTRS_FROZEN: + case MUT_ARR_PTRS_FROZEN0: + simple_scavenge_mut_arr_ptrs(cap, str, hash, (StgMutArrPtrs*)p); + p += mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); + break; + + case SMALL_MUT_ARR_PTRS_FROZEN: + case SMALL_MUT_ARR_PTRS_FROZEN0: + { + uint32_t i; + StgSmallMutArrPtrs *arr = (StgSmallMutArrPtrs*)p; + + for (i = 0; i < arr->ptrs; i++) + simple_evacuate(cap, str, hash, &arr->payload[i]); + + p += sizeofW(StgSmallMutArrPtrs) + arr->ptrs; + break; + } + + case IND: + case BLACKHOLE: + case IND_STATIC: + // They get shortcircuited by simple_evaluate() + barf("IND/BLACKHOLE in Compact"); + break; + + default: + barf("Invalid non-NFData closure in Compact\n"); + } + } +} + +static void +scavenge_loop (Capability *cap, + StgCompactNFData *str, + StgCompactNFDataBlock *first_block, + HashTable *hash, + StgPtr p) +{ + // Scavenge the first block + simple_scavenge_block(cap, str, first_block, hash, p); + + // Note: simple_scavenge_block can change str->last, which + // changes this check, in addition to iterating through + while (first_block != str->last) { + // we can't allocate in blocks that were already scavenged + // so push the nursery forward + if (str->nursery == first_block) + str->nursery = str->nursery->next; + + first_block = first_block->next; + simple_scavenge_block(cap, str, first_block, hash, + (P_)first_block + sizeofW(StgCompactNFDataBlock)); + } +} + +#ifdef DEBUG +static rtsBool +objectIsWHNFData (StgClosure *what) +{ + switch (get_itbl(what)->type) { + case CONSTR: + case CONSTR_1_0: + case CONSTR_0_1: + case CONSTR_2_0: + case CONSTR_1_1: + case CONSTR_0_2: + case CONSTR_STATIC: + case CONSTR_NOCAF_STATIC: + case ARR_WORDS: + case MUT_ARR_PTRS_FROZEN: + case MUT_ARR_PTRS_FROZEN0: + case SMALL_MUT_ARR_PTRS_FROZEN: + case SMALL_MUT_ARR_PTRS_FROZEN0: + return rtsTrue; + + case IND: + case BLACKHOLE: + return objectIsWHNFData(UNTAG_CLOSURE(((StgInd*)what)->indirectee)); + + default: + return rtsFalse; + } +} + +static rtsBool +verify_mut_arr_ptrs (StgCompactNFData *str, + StgMutArrPtrs *a) +{ + StgPtr p, q; + + p = (StgPtr)&a->payload[0]; + q = (StgPtr)&a->payload[a->ptrs]; + for (; p < q; p++) { + if (!object_in_compact(str, UNTAG_CLOSURE(*(StgClosure**)p))) + return rtsFalse; + } + + return rtsTrue; +} + +static rtsBool +verify_consistency_block (StgCompactNFData *str, StgCompactNFDataBlock *block) +{ + bdescr *bd; + StgPtr p; + const StgInfoTable *info; + StgClosure *q; + + p = (P_)firstBlockGetCompact(block); + bd = Bdescr((P_)block); + while (p < bd->free) { + q = (StgClosure*)p; + + if (!LOOKS_LIKE_CLOSURE_PTR(q)) + return rtsFalse; + + info = get_itbl(q); + switch (info->type) { + case CONSTR_1_0: + if (!object_in_compact(str, UNTAG_CLOSURE(q->payload[0]))) + return rtsFalse; + case CONSTR_0_1: + p += sizeofW(StgClosure) + 1; + break; + + case CONSTR_2_0: + if (!object_in_compact(str, UNTAG_CLOSURE(q->payload[1]))) + return rtsFalse; + case CONSTR_1_1: + if (!object_in_compact(str, UNTAG_CLOSURE(q->payload[0]))) + return rtsFalse; + case CONSTR_0_2: + p += sizeofW(StgClosure) + 2; + break; + + case CONSTR: + case PRIM: + case CONSTR_STATIC: + case CONSTR_NOCAF_STATIC: + { + uint32_t i; + + for (i = 0; i < info->layout.payload.ptrs; i++) + if (!object_in_compact(str, UNTAG_CLOSURE(q->payload[i]))) + return rtsFalse; + + p += sizeofW(StgClosure) + info->layout.payload.ptrs + + info->layout.payload.nptrs; + break; + } + + case ARR_WORDS: + p += arr_words_sizeW((StgArrBytes*)p); + break; + + case MUT_ARR_PTRS_FROZEN: + case MUT_ARR_PTRS_FROZEN0: + if (!verify_mut_arr_ptrs(str, (StgMutArrPtrs*)p)) + return rtsFalse; + p += mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); + break; + + case SMALL_MUT_ARR_PTRS_FROZEN: + case SMALL_MUT_ARR_PTRS_FROZEN0: + { + uint32_t i; + StgSmallMutArrPtrs *arr = (StgSmallMutArrPtrs*)p; + + for (i = 0; i < arr->ptrs; i++) + if (!object_in_compact(str, UNTAG_CLOSURE(arr->payload[i]))) + return rtsFalse; + + p += sizeofW(StgSmallMutArrPtrs) + arr->ptrs; + break; + } + + case COMPACT_NFDATA: + p += sizeofW(StgCompactNFData); + break; + + default: + return rtsFalse; + } + } + + return rtsTrue; +} + +static rtsBool +verify_consistency_loop (StgCompactNFData *str) +{ + StgCompactNFDataBlock *block; + + block = compactGetFirstBlock(str); + do { + if (!verify_consistency_block(str, block)) + return rtsFalse; + block = block->next; + } while (block && block->owner); + + return rtsTrue; +} +#endif + + +StgPtr +compactAppend (Capability *cap, + StgCompactNFData *str, + StgClosure *what, + StgWord share) +{ + StgClosure *root; + StgClosure *tagged_root; + HashTable *hash; + StgCompactNFDataBlock *evaced_block; + + ASSERT(objectIsWHNFData(UNTAG_CLOSURE(what))); + + tagged_root = what; + simple_evacuate(cap, str, NULL, &tagged_root); + + root = UNTAG_CLOSURE(tagged_root); + evaced_block = objectGetCompactBlock(root); + + if (share) { + hash = allocHashTable (); + insertHashTable(hash, (StgWord)UNTAG_CLOSURE(what), root); + } else + hash = NULL; + + scavenge_loop(cap, str, evaced_block, hash, (P_)root); + + if (share) + freeHashTable(hash, NULL); + + ASSERT(verify_consistency_loop(str)); + + return (StgPtr)tagged_root; +} + +StgWord +compactContains (StgCompactNFData *str, StgPtr what) +{ + bdescr *bd; + + // This check is the reason why this needs to be + // implemented in C instead of (possibly faster) Cmm + if (!HEAP_ALLOCED (what)) + return 0; + + // Note that we don't care about tags, they are eaten + // away by the Bdescr operation anyway + bd = Bdescr((P_)what); + return (bd->flags & BF_COMPACT) != 0 && + (str == NULL || objectGetCompact((StgClosure*)what) == str); +} + +StgCompactNFDataBlock * +compactAllocateBlock(Capability *cap, + StgWord size, + StgCompactNFDataBlock *previous) +{ + StgWord aligned_size; + StgCompactNFDataBlock *block; + bdescr *bd; + + aligned_size = BLOCK_ROUND_UP(size); + + // We do not link the new object into the generation ever + // - we cannot let the GC know about this object until we're done + // importing it and we have fixed up all info tables and stuff + // + // but we do update n_compact_blocks, otherwise memInventory() + // in Sanity will think we have a memory leak, because it compares + // the blocks he knows about with the blocks obtained by the + // block allocator + // (if by chance a memory leak does happen due to a bug somewhere + // else, memInventory will also report that all compact blocks + // associated with this compact are leaked - but they are not really, + // we have a pointer to them and we're not losing track of it, it's + // just we can't use the GC until we're done with the import) + // + // (That btw means that the high level import code must be careful + // not to lose the pointer, so don't use the primops directly + // unless you know what you're doing!) + + // Other trickery: we pass NULL as first, which means our blocks + // are always in generation 0 + // This is correct because the GC has never seen the blocks so + // it had no chance of promoting them + + block = compactAllocateBlockInternal(cap, aligned_size, NULL, + previous != NULL ? ALLOCATE_IMPORT_APPEND : ALLOCATE_IMPORT_NEW); + if (previous != NULL) + previous->next = block; + + bd = Bdescr((P_)block); + bd->free = (P_)((W_)bd->start + size); + + return block; +} + +STATIC_INLINE rtsBool +any_needs_fixup(StgCompactNFDataBlock *block) +{ + // ->next pointers are always valid, even if some blocks were + // not allocated where we want them, because compactAllocateAt() + // will take care to adjust them + + do { + if (block->self != block) + return rtsTrue; + block = block->next; + } while (block && block->owner); + + return rtsFalse; +} + +#ifdef DEBUG +static void +spew_failing_pointer(StgWord *fixup_table, uint32_t count, StgWord address) +{ + uint32_t i; + StgWord key, value; + StgCompactNFDataBlock *block; + bdescr *bd; + StgWord size; + + debugBelch("Failed to adjust 0x%lx. Block dump follows...\n", + address); + + for (i = 0; i < count; i++) { + key = fixup_table [2 * i]; + value = fixup_table [2 * i + 1]; + + block = (StgCompactNFDataBlock*)value; + bd = Bdescr((P_)block); + size = (W_)bd->free - (W_)bd->start; + + debugBelch("%d: was 0x%lx-0x%lx, now 0x%lx-0x%lx\n", i, + key, key+size, value, value+size); + } +} +#endif + +STATIC_INLINE StgCompactNFDataBlock * +find_pointer(StgWord *fixup_table, uint32_t count, StgClosure *q) +{ + StgWord address = (W_)q; + uint32_t a, b, c; + StgWord key, value; + bdescr *bd; + + a = 0; + b = count; + while (a < b-1) { + c = (a+b)/2; + + key = fixup_table[c * 2]; + value = fixup_table[c * 2 + 1]; + + if (key > address) + b = c; + else + a = c; + } + + // three cases here: 0, 1 or 2 blocks to check + for ( ; a < b; a++) { + key = fixup_table[a * 2]; + value = fixup_table[a * 2 + 1]; + + if (key > address) + goto fail; + + bd = Bdescr((P_)value); + + if (key + bd->blocks * BLOCK_SIZE <= address) + goto fail; + + return (StgCompactNFDataBlock*)value; + } + + fail: + // We should never get here + +#ifdef DEBUG + spew_failing_pointer(fixup_table, count, address); +#endif + return NULL; +} + +static rtsBool +fixup_one_pointer(StgWord *fixup_table, uint32_t count, StgClosure **p) +{ + StgWord tag; + StgClosure *q; + StgCompactNFDataBlock *block; + + q = *p; + tag = GET_CLOSURE_TAG(q); + q = UNTAG_CLOSURE(q); + + block = find_pointer(fixup_table, count, q); + if (block == NULL) + return rtsFalse; + if (block == block->self) + return rtsTrue; + + q = (StgClosure*)((W_)q - (W_)block->self + (W_)block); + *p = TAG_CLOSURE(tag, q); + + return rtsTrue; +} + +static rtsBool +fixup_mut_arr_ptrs (StgWord *fixup_table, + uint32_t count, + StgMutArrPtrs *a) +{ + StgPtr p, q; + + p = (StgPtr)&a->payload[0]; + q = (StgPtr)&a->payload[a->ptrs]; + for (; p < q; p++) { + if (!fixup_one_pointer(fixup_table, count, (StgClosure**)p)) + return rtsFalse; + } + + return rtsTrue; +} + +static rtsBool +fixup_block(StgCompactNFDataBlock *block, StgWord *fixup_table, uint32_t count) +{ + const StgInfoTable *info; + bdescr *bd; + StgPtr p; + + bd = Bdescr((P_)block); + p = bd->start + sizeofW(StgCompactNFDataBlock); + while (p < bd->free) { + ASSERT (LOOKS_LIKE_CLOSURE_PTR(p)); + info = get_itbl((StgClosure*)p); + + switch (info->type) { + case CONSTR_1_0: + if (!fixup_one_pointer(fixup_table, count, + &((StgClosure*)p)->payload[0])) + return rtsFalse; + case CONSTR_0_1: + p += sizeofW(StgClosure) + 1; + break; + + case CONSTR_2_0: + if (!fixup_one_pointer(fixup_table, count, + &((StgClosure*)p)->payload[1])) + return rtsFalse; + case CONSTR_1_1: + if (!fixup_one_pointer(fixup_table, count, + &((StgClosure*)p)->payload[0])) + return rtsFalse; + case CONSTR_0_2: + p += sizeofW(StgClosure) + 2; + break; + + case CONSTR: + case PRIM: + case CONSTR_STATIC: + case CONSTR_NOCAF_STATIC: + { + StgPtr end; + + end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs; + for (p = (P_)((StgClosure *)p)->payload; p < end; p++) { + if (!fixup_one_pointer(fixup_table, count, (StgClosure **)p)) + return rtsFalse; + } + p += info->layout.payload.nptrs; + break; + } + + case ARR_WORDS: + p += arr_words_sizeW((StgArrBytes*)p); + break; + + case MUT_ARR_PTRS_FROZEN: + case MUT_ARR_PTRS_FROZEN0: + fixup_mut_arr_ptrs(fixup_table, count, (StgMutArrPtrs*)p); + p += mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); + break; + + case SMALL_MUT_ARR_PTRS_FROZEN: + case SMALL_MUT_ARR_PTRS_FROZEN0: + { + uint32_t i; + StgSmallMutArrPtrs *arr = (StgSmallMutArrPtrs*)p; + + for (i = 0; i < arr->ptrs; i++) { + if (!fixup_one_pointer(fixup_table, count, + &arr->payload[i])) + return rtsFalse; + } + + p += sizeofW(StgSmallMutArrPtrs) + arr->ptrs; + break; + } + + case COMPACT_NFDATA: + if (p == (bd->start + sizeofW(StgCompactNFDataBlock))) { + // Ignore the COMPACT_NFDATA header + // (it will be fixed up later) + p += sizeofW(StgCompactNFData); + break; + } + + // fall through + + default: + debugBelch("Invalid non-NFData closure (type %d) in Compact\n", + info->type); + return rtsFalse; + } + } + + return rtsTrue; +} + +static int +cmp_fixup_table_item (const void *e1, const void *e2) +{ + const StgWord *w1 = e1; + const StgWord *w2 = e2; + + return *w1 - *w2; +} + +static StgWord * +build_fixup_table (StgCompactNFDataBlock *block, uint32_t *pcount) +{ + uint32_t count; + StgCompactNFDataBlock *tmp; + StgWord *table; + + count = 0; + tmp = block; + do { + count++; + tmp = tmp->next; + } while(tmp && tmp->owner); + + table = stgMallocBytes(sizeof(StgWord) * 2 * count, "build_fixup_table"); + + count = 0; + do { + table[count * 2] = (W_)block->self; + table[count * 2 + 1] = (W_)block; + count++; + block = block->next; + } while(block && block->owner); + + qsort(table, count, sizeof(StgWord) * 2, cmp_fixup_table_item); + + *pcount = count; + return table; +} + +static rtsBool +fixup_loop(StgCompactNFDataBlock *block, StgClosure **proot) +{ + StgWord *table; + rtsBool ok; + uint32_t count; + + table = build_fixup_table (block, &count); + + do { + if (!fixup_block(block, table, count)) { + ok = rtsFalse; + goto out; + } + + block = block->next; + } while(block && block->owner); + + ok = fixup_one_pointer(table, count, proot); + + out: + stgFree(table); + return ok; +} + +static void +fixup_early(StgCompactNFData *str, StgCompactNFDataBlock *block) +{ + StgCompactNFDataBlock *last; + + do { + last = block; + block = block->next; + } while(block); + + str->last = last; +} + +static void +fixup_late(StgCompactNFData *str, StgCompactNFDataBlock *block) +{ + StgCompactNFDataBlock *nursery; + bdescr *bd; + StgWord totalW; + StgWord totalDataW; + + nursery = block; + totalW = 0; + totalDataW = 0; + do { + block->self = block; + + bd = Bdescr((P_)block); + totalW += bd->blocks * BLOCK_SIZE_W; + + if (block->owner != NULL) { + if (bd->free != bd->start) + nursery = block; + block->owner = str; + totalDataW += bd->blocks * BLOCK_SIZE_W; + } + + block = block->next; + } while(block); + + str->nursery = nursery; + str->totalW = totalW; + str->totalDataW = totalDataW; +} + +static StgClosure * +maybe_fixup_internal_pointers (StgCompactNFDataBlock *block, + StgClosure *root) +{ + rtsBool ok; + StgClosure **proot; + + // Check for fast path + if (!any_needs_fixup(block)) + return root; + + debugBelch("Compact imported at the wrong address, will fix up" + " internal pointers\n"); + + // I am PROOT! + proot = &root; + + ok = fixup_loop(block, proot); + if (!ok) + *proot = NULL; + + return *proot; +} + +StgPtr +compactFixupPointers(StgCompactNFData *str, + StgClosure *root) +{ + StgCompactNFDataBlock *block; + bdescr *bd; + StgWord total_blocks; + + block = compactGetFirstBlock(str); + + fixup_early(str, block); + + root = maybe_fixup_internal_pointers(block, root); + + // Do the late fixup even if we did not fixup all + // internal pointers, we need that for GC and Sanity + fixup_late(str, block); + + // Now we're ready to let the GC, Sanity, the profiler + // etc. know about this object + bd = Bdescr((P_)block); + + total_blocks = str->totalW / BLOCK_SIZE_W; + + ACQUIRE_SM_LOCK; + ASSERT (bd->gen == g0); + ASSERT (g0->n_compact_blocks_in_import >= total_blocks); + g0->n_compact_blocks_in_import -= total_blocks; + g0->n_compact_blocks += total_blocks; + dbl_link_remove(bd, &g0->compact_blocks_in_import); + dbl_link_onto(bd, &g0->compact_objects); + RELEASE_SM_LOCK; + +#if DEBUG + if (root) + verify_consistency_loop(str); +#endif + + return (StgPtr)root; +} diff --git a/rts/sm/CNF.h b/rts/sm/CNF.h new file mode 100644 index 0000000000..b34d9c96c1 --- /dev/null +++ b/rts/sm/CNF.h @@ -0,0 +1,71 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team 1998-2014 + * + * GC support for immutable non-GCed structures + * + * Documentation on the architecture of the Garbage Collector can be + * found in the online commentary: + * + * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC + * + * ---------------------------------------------------------------------------*/ + +#ifndef SM_CNF_H +#define SM_CNF_H + +#include "BeginPrivate.h" + +void initCompact (void); +void exitCompact (void); + +StgCompactNFData *compactNew (Capability *cap, + StgWord size); +StgPtr compactAppend(Capability *cap, + StgCompactNFData *str, + StgClosure *what, + StgWord share); +void compactResize(Capability *cap, + StgCompactNFData *str, + StgWord new_size); +void compactFree (StgCompactNFData *str); +void compactMarkKnown(StgCompactNFData *str); +StgWord compactContains(StgCompactNFData *str, + StgPtr what); +StgWord countCompactBlocks(bdescr *outer); + +StgCompactNFDataBlock *compactAllocateBlock(Capability *cap, + StgWord size, + StgCompactNFDataBlock *previous); +StgPtr compactFixupPointers(StgCompactNFData *str, + StgClosure *root); + +INLINE_HEADER StgCompactNFDataBlock *objectGetCompactBlock (StgClosure *closure); +INLINE_HEADER StgCompactNFDataBlock *objectGetCompactBlock (StgClosure *closure) +{ + bdescr *object_block, *head_block; + + object_block = Bdescr((StgPtr)closure); + + ASSERT ((object_block->flags & BF_COMPACT) != 0); + + if (object_block->blocks == 0) + head_block = object_block->link; + else + head_block = object_block; + + ASSERT ((head_block->flags & BF_COMPACT) != 0); + + return (StgCompactNFDataBlock*)(head_block->start); +} + +INLINE_HEADER StgCompactNFData *objectGetCompact (StgClosure *closure); +INLINE_HEADER StgCompactNFData *objectGetCompact (StgClosure *closure) +{ + StgCompactNFDataBlock *block = objectGetCompactBlock (closure); + return block->owner; +} + +#include "EndPrivate.h" + +#endif // SM_COMPACT_H diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c index ec178e91ef..3528fabb7b 100644 --- a/rts/sm/Compact.c +++ b/rts/sm/Compact.c @@ -470,6 +470,7 @@ update_fwd_large( bdescr *bd ) switch (info->type) { case ARR_WORDS: + case COMPACT_NFDATA: // nothing to follow continue; diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index e53461de63..1f9c5cc8cd 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -25,6 +25,7 @@ #include "Prelude.h" #include "Trace.h" #include "LdvProfile.h" +#include "CNF.h" #if defined(PROF_SPIN) && defined(THREADED_RTS) && defined(PARALLEL_GC) StgWord64 whitehole_spin = 0; @@ -245,7 +246,7 @@ copy(StgClosure **p, const StgInfoTable *info, This just consists of removing the object from the (doubly-linked) gen->large_objects list, and linking it on to the (singly-linked) - gen->new_large_objects list, from where it will be scavenged later. + gct->todo_large_objects list, from where it will be scavenged later. Convention: bd->flags has BF_EVACUATED set for a large object that has been evacuated, or unset otherwise. @@ -305,12 +306,13 @@ evacuate_large(StgPtr p) bd->flags |= BF_EVACUATED; initBdescr(bd, new_gen, new_gen->to); - // If this is a block of pinned objects, we don't have to scan - // these objects, because they aren't allowed to contain any + // If this is a block of pinned or compact objects, we don't have to scan + // these objects, because they aren't allowed to contain any outgoing // pointers. For these blocks, we skip the scavenge stage and put // them straight on the scavenged_large_objects list. if (bd->flags & BF_PINNED) { ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS); + if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync); } dbl_link_onto(bd, &new_gen->scavenged_large_objects); new_gen->n_scavenged_large_blocks += bd->blocks; @@ -356,6 +358,110 @@ evacuate_static_object (StgClosure **link_field, StgClosure *q) } /* ---------------------------------------------------------------------------- + Evacuate an object inside a CompactNFData + + Don't actually evacuate the object. Instead, evacuate the structure + (which is a large object, so it is just relinked onto the new list + of large objects of the generation). + + It is assumed that objects in the struct live in the same generation + as the struct itself all the time. + ------------------------------------------------------------------------- */ +STATIC_INLINE void +evacuate_compact (StgPtr p) +{ + StgCompactNFData *str; + bdescr *bd; + generation *gen, *new_gen; + uint32_t gen_no, new_gen_no; + + str = objectGetCompact((StgClosure*)p); + ASSERT(get_itbl((StgClosure*)str)->type == COMPACT_NFDATA); + + bd = Bdescr((StgPtr)str); + gen_no = bd->gen_no; + + // already evacuated? (we're about to do the same check, + // but we avoid taking the spin-lock) + if (bd->flags & BF_EVACUATED) { + /* Don't forget to set the gct->failed_to_evac flag if we didn't get + * the desired destination (see comments in evacuate()). + */ + if (gen_no < gct->evac_gen_no) { + gct->failed_to_evac = rtsTrue; + TICK_GC_FAILED_PROMOTION(); + } + return; + } + + gen = bd->gen; + gen_no = bd->gen_no; + ACQUIRE_SPIN_LOCK(&gen->sync); + + // already evacuated? + if (bd->flags & BF_EVACUATED) { + /* Don't forget to set the gct->failed_to_evac flag if we didn't get + * the desired destination (see comments in evacuate()). + */ + if (gen_no < gct->evac_gen_no) { + gct->failed_to_evac = rtsTrue; + TICK_GC_FAILED_PROMOTION(); + } + RELEASE_SPIN_LOCK(&gen->sync); + return; + } + + // remove from large_object list + if (bd->u.back) { + bd->u.back->link = bd->link; + } else { // first object in the list + gen->compact_objects = bd->link; + } + if (bd->link) { + bd->link->u.back = bd->u.back; + } + + /* link it on to the evacuated compact object list of the destination gen + */ + new_gen_no = bd->dest_no; + + if (new_gen_no < gct->evac_gen_no) { + if (gct->eager_promotion) { + new_gen_no = gct->evac_gen_no; + } else { + gct->failed_to_evac = rtsTrue; + } + } + + new_gen = &generations[new_gen_no]; + + // Note: for speed we only update the generation of the first block here + // This means that bdescr of subsequent blocks will think they are in + // the wrong generation + // (This should not be a problem because there is no code that checks + // for that - the only code touching the generation of the block is + // in the GC, and that should never see blocks other than the first) + bd->flags |= BF_EVACUATED; + initBdescr(bd, new_gen, new_gen->to); + + if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync); } + dbl_link_onto(bd, &new_gen->live_compact_objects); + new_gen->n_live_compact_blocks += str->totalW / BLOCK_SIZE_W; + if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync); } + + RELEASE_SPIN_LOCK(&gen->sync); + + // Note: the object did not move in memory, because it lives + // in pinned (BF_COMPACT) allocation, so we do not need to rewrite it + // or muck with forwarding pointers + // Also there is no tag to worry about on the struct (tags are used + // for constructors and functions, but a struct is neither). There + // might be a tag on the object pointer, but again we don't change + // the pointer because we don't move the object so we don't need to + // rewrite the tag. +} + +/* ---------------------------------------------------------------------------- Evacuate This is called (eventually) for every live object in the system. @@ -459,8 +565,7 @@ loop: bd = Bdescr((P_)q); - if ((bd->flags & (BF_LARGE | BF_MARKED | BF_EVACUATED)) != 0) { - + if ((bd->flags & (BF_LARGE | BF_MARKED | BF_EVACUATED | BF_COMPACT)) != 0) { // pointer into to-space: just return it. It might be a pointer // into a generation that we aren't collecting (> N), or it // might just be a pointer into to-space. The latter doesn't @@ -478,6 +583,15 @@ loop: return; } + // Check for compact before checking for large, this allows doing the + // right thing for objects that are half way in the middle of the first + // block of a compact (and would be treated as large objects even though + // they are not) + if (bd->flags & BF_COMPACT) { + evacuate_compact((P_)q); + return; + } + /* evacuate large objects by re-linking them onto a different list. */ if (bd->flags & BF_LARGE) { @@ -735,6 +849,12 @@ loop: copy(p,info,q,sizeofW(StgTRecChunk),gen_no); return; + case COMPACT_NFDATA: + // CompactNFData objects are at least one block plus the header + // so they are larger than the large_object_threshold (80% of + // block size) and never copied by value + barf("evacuate: compact nfdata is not large"); + return; default: barf("evacuate: strange closure type %d", (int)(INFO_PTR_TO_STRUCT(info)->type)); } diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 54798719a4..7796f30965 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -47,6 +47,7 @@ #include "RaiseAsync.h" #include "Stable.h" #include "CheckUnload.h" +#include "CNF.h" #include <string.h> // for memset() #include <unistd.h> @@ -592,6 +593,23 @@ GarbageCollect (uint32_t collect_gen, gen->n_large_blocks = gen->n_scavenged_large_blocks; gen->n_large_words = countOccupied(gen->large_objects); gen->n_new_large_words = 0; + + /* COMPACT_NFDATA. The currently live compacts are chained + * to live_compact_objects, quite like large objects. And + * objects left on the compact_objects list are dead. + * + * We don't run a simple freeChain because want to give the + * CNF module some chance to free memory that freeChain would + * not see (namely blocks appended to a CNF through a compactResize). + * + * See Note [Compact Normal Forms] for details. + */ + for (bd = gen->compact_objects; bd; bd = next) { + next = bd->link; + compactFree(((StgCompactNFDataBlock*)bd->start)->owner); + } + gen->compact_objects = gen->live_compact_objects; + gen->n_compact_blocks = gen->n_live_compact_blocks; } else // for generations > N { @@ -605,15 +623,27 @@ GarbageCollect (uint32_t collect_gen, gen->n_large_words += bd->free - bd->start; } + // And same for compacts + for (bd = gen->live_compact_objects; bd; bd = next) { + next = bd->link; + dbl_link_onto(bd, &gen->compact_objects); + } + // add the new blocks we promoted during this GC gen->n_large_blocks += gen->n_scavenged_large_blocks; + gen->n_compact_blocks += gen->n_live_compact_blocks; } ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); ASSERT(countOccupied(gen->large_objects) == gen->n_large_words); + // We can run the same assertion on compact objects because there + // is memory "the GC doesn't see" (directly), but which is still + // accounted in gen->n_compact_blocks gen->scavenged_large_objects = NULL; gen->n_scavenged_large_blocks = 0; + gen->live_compact_objects = NULL; + gen->n_live_compact_blocks = 0; // Count "live" data live_words += genLiveWords(gen); @@ -1207,6 +1237,8 @@ prepare_collected_gen (generation *gen) // initialise the large object queues. ASSERT(gen->scavenged_large_objects == NULL); ASSERT(gen->n_scavenged_large_blocks == 0); + ASSERT(gen->live_compact_objects == NULL); + ASSERT(gen->n_live_compact_blocks == 0); // grab all the partial blocks stashed in the gc_thread workspaces and // move them to the old_blocks list of this gen. @@ -1246,6 +1278,11 @@ prepare_collected_gen (generation *gen) bd->flags &= ~BF_EVACUATED; } + // mark the compact objects as from-space + for (bd = gen->compact_objects; bd; bd = bd->link) { + bd->flags &= ~BF_EVACUATED; + } + // for a compacted generation, we need to allocate the bitmap if (gen->mark) { StgWord bitmap_size; // in bytes @@ -1472,7 +1509,8 @@ resize_generations (void) words = oldest_gen->n_words; } live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W + - oldest_gen->n_large_blocks; + oldest_gen->n_large_blocks + + oldest_gen->n_compact_blocks; // default max size for all generations except zero size = stg_max(live * RtsFlags.GcFlags.oldGenFactor, diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index 62d53e046d..6f6b15c4e8 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -28,6 +28,7 @@ #include "Printer.h" #include "Arena.h" #include "RetainerProfile.h" +#include "CNF.h" /* ----------------------------------------------------------------------------- Forward decls. @@ -424,7 +425,7 @@ checkClosure( const StgClosure* p ) } default: - barf("checkClosure (closure type %d)", info->type); + barf("checkClosure (closure type %d)", info->type); } } @@ -485,6 +486,37 @@ checkLargeObjects(bdescr *bd) } static void +checkCompactObjects(bdescr *bd) +{ + // Compact objects are similar to large objects, + // but they have a StgCompactNFDataBlock at the beginning, + // before the actual closure + + for ( ; bd != NULL; bd = bd->link) { + StgCompactNFDataBlock *block, *last; + StgCompactNFData *str; + StgWord totalW; + + ASSERT (bd->flags & BF_COMPACT); + + block = (StgCompactNFDataBlock*)bd->start; + str = block->owner; + ASSERT ((W_)str == (W_)block + sizeof(StgCompactNFDataBlock)); + + totalW = 0; + for ( ; block ; block = block->next) { + last = block; + ASSERT (block->owner == str); + + totalW += Bdescr((P_)block)->blocks * BLOCK_SIZE_W; + } + + ASSERT (str->totalW == totalW); + ASSERT (str->last == last); + } +} + +static void checkSTACK (StgStack *stack) { StgPtr sp = stack->sp; @@ -715,6 +747,7 @@ static void checkGeneration (generation *gen, } checkLargeObjects(gen->large_objects); + checkCompactObjects(gen->compact_objects); } /* Full heap sanity check. */ @@ -744,6 +777,14 @@ void checkSanity (rtsBool after_gc, rtsBool major_gc) } } +static void +markCompactBlocks(bdescr *bd) +{ + for (; bd != NULL; bd = bd->link) { + compactMarkKnown(((StgCompactNFDataBlock*)bd->start)->owner); + } +} + // If memInventory() calculates that we have a memory leak, this // function will try to find the block(s) that are leaking by marking // all the ones that we know about, and search through memory to find @@ -764,6 +805,7 @@ findMemoryLeak (void) } markBlocks(generations[g].blocks); markBlocks(generations[g].large_objects); + markCompactBlocks(generations[g].compact_objects); } for (i = 0; i < n_nurseries; i++) { @@ -833,8 +875,11 @@ genBlocks (generation *gen) { ASSERT(countBlocks(gen->blocks) == gen->n_blocks); ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); + ASSERT(countCompactBlocks(gen->compact_objects) == gen->n_compact_blocks); + ASSERT(countCompactBlocks(gen->compact_blocks_in_import) == gen->n_compact_blocks_in_import); return gen->n_blocks + gen->n_old_blocks + - countAllocdBlocks(gen->large_objects); + countAllocdBlocks(gen->large_objects) + + gen->n_compact_blocks + gen->n_compact_blocks_in_import; } void diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index 18a30d3bdf..1549df5021 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -795,6 +795,13 @@ scavenge_block (bdescr *bd) break; } + case COMPACT_NFDATA: + // CompactNFData blocks live in compact lists, which we don't + // scavenge, because there nothing to scavenge in them + // so we should never ever see them + barf("scavenge: found unexpected Compact structure"); + break; + default: barf("scavenge: unimplemented/strange closure type %d @ %p", info->type, p); @@ -1953,7 +1960,7 @@ scavenge_large (gen_workspace *ws) // take this object *off* the large objects list and put it on // the scavenged large objects list. This is so that we can - // treat new_large_objects as a stack and push new objects on + // treat todo_large_objects as a stack and push new objects on // the front when evacuating. ws->todo_large_objects = bd->link; diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 7c41f8c64b..3f8889658f 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -100,8 +100,12 @@ initGeneration (generation *gen, int g) gen->n_large_blocks = 0; gen->n_large_words = 0; gen->n_new_large_words = 0; + gen->compact_objects = NULL; + gen->n_compact_blocks = 0; gen->scavenged_large_objects = NULL; gen->n_scavenged_large_blocks = 0; + gen->live_compact_objects = NULL; + gen->n_live_compact_blocks = 0; gen->mark = 0; gen->compact = 0; gen->bitmap = NULL; @@ -1208,12 +1212,13 @@ W_ countOccupied (bdescr *bd) W_ genLiveWords (generation *gen) { - return gen->n_words + gen->n_large_words; + return gen->n_words + gen->n_large_words + + gen->n_compact_blocks * BLOCK_SIZE_W; } W_ genLiveBlocks (generation *gen) { - return gen->n_blocks + gen->n_large_blocks; + return gen->n_blocks + gen->n_large_blocks + gen->n_compact_blocks; } W_ gcThreadLiveWords (uint32_t i, uint32_t g) @@ -1266,7 +1271,8 @@ calcNeeded (rtsBool force_major, memcount *blocks_needed) gen = &generations[g]; blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?) - + gen->n_large_blocks; + + gen->n_large_blocks + + gen->n_compact_blocks; // we need at least this much space needed += blocks; |