diff options
Diffstat (limited to 'rts/sm/Storage.c')
-rw-r--r-- | rts/sm/Storage.c | 126 |
1 files changed, 74 insertions, 52 deletions
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index ffaed5f17c..dcc5b3a3c7 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -197,11 +197,7 @@ initStorage (void) #if defined(THREADED_RTS) initSpinLock(&gc_alloc_block_sync); -#if defined(PROF_SPIN) - whitehole_spin = 0; #endif -#endif - N = 0; for (n = 0; n < n_numa_nodes; n++) { @@ -224,6 +220,7 @@ initStorage (void) void storageAddCapabilities (uint32_t from, uint32_t to) { uint32_t n, g, i, new_n_nurseries; + nursery *old_nurseries; if (RtsFlags.GcFlags.nurseryChunkSize == 0) { new_n_nurseries = to; @@ -233,6 +230,7 @@ void storageAddCapabilities (uint32_t from, uint32_t to) stg_max(to, total_alloc / RtsFlags.GcFlags.nurseryChunkSize); } + old_nurseries = nurseries; if (from > 0) { nurseries = stgReallocBytes(nurseries, new_n_nurseries * sizeof(struct nursery_), @@ -244,8 +242,9 @@ void storageAddCapabilities (uint32_t from, uint32_t to) // we've moved the nurseries, so we have to update the rNursery // pointers from the Capabilities. - for (i = 0; i < to; i++) { - capabilities[i]->r.rNursery = &nurseries[i]; + for (i = 0; i < from; i++) { + uint32_t index = capabilities[i]->r.rNursery - old_nurseries; + capabilities[i]->r.rNursery = &nurseries[index]; } /* The allocation area. Policy: keep the allocation area @@ -307,21 +306,21 @@ freeStorage (bool free_heap) The entry code for every CAF does the following: - - calls newCaf, which builds a CAF_BLACKHOLE on the heap and atomically + - calls newCAF, which builds a CAF_BLACKHOLE on the heap and atomically updates the CAF with IND_STATIC pointing to the CAF_BLACKHOLE - - if newCaf returns zero, it re-enters the CAF (see Note [atomic + - if newCAF returns zero, it re-enters the CAF (see Note [atomic CAF entry]) - pushes an update frame pointing to the CAF_BLACKHOLE - Why do we build an BLACKHOLE in the heap rather than just updating + Why do we build a BLACKHOLE in the heap rather than just updating the thunk directly? It's so that we only need one kind of update frame - otherwise we'd need a static version of the update frame too, and various other parts of the RTS that deal with update frames would also need special cases for static update frames. - newCaf() does the following: + newCAF() does the following: - atomically locks the CAF (see [atomic CAF entry]) @@ -339,7 +338,7 @@ freeStorage (bool free_heap) ------------------ Note [atomic CAF entry] - With THREADED_RTS, newCaf() is required to be atomic (see + With THREADED_RTS, newCAF() is required to be atomic (see #5558). This is because if two threads happened to enter the same CAF simultaneously, they would create two distinct CAF_BLACKHOLEs, and so the normal threadPaused() machinery for detecting duplicate @@ -359,7 +358,7 @@ freeStorage (bool free_heap) - we must be able to *revert* CAFs that have been evaluated, to their pre-evaluated form. - To do this, we use an additional CAF list. When newCaf() is + To do this, we use an additional CAF list. When newCAF() is called on a dynamically-loaded CAF, we add it to the CAF list instead of the old-generation mutable list, and save away its old info pointer (in caf->saved_info) for later reversion. @@ -796,6 +795,20 @@ move_STACK (StgStack *src, StgStack *dest) dest->sp = (StgPtr)dest->sp + diff; } +STATIC_INLINE void +accountAllocation(Capability *cap, W_ n) +{ + TICK_ALLOC_HEAP_NOCTR(WDS(n)); + CCS_ALLOC(cap->r.rCCCS,n); + if (cap->r.rCurrentTSO != NULL) { + // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_) + ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit), + (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit)) + - n*sizeof(W_))); + } + +} + /* ----------------------------------------------------------------------------- StgPtr allocate (Capability *cap, W_ n) @@ -812,21 +825,37 @@ move_STACK (StgStack *src, StgStack *dest) that operation fails, then the whole process will be killed. -------------------------------------------------------------------------- */ +/* + * Allocate some n words of heap memory; terminating + * on heap overflow + */ StgPtr allocate (Capability *cap, W_ n) { + StgPtr p = allocateMightFail(cap, n); + if (p == NULL) { + reportHeapOverflow(); + // heapOverflow() doesn't exit (see #2592), but we aren't + // in a position to do a clean shutdown here: we + // either have to allocate the memory or exit now. + // Allocating the memory would be bad, because the user + // has requested that we not exceed maxHeapSize, so we + // just exit. + stg_exit(EXIT_HEAPOVERFLOW); + } + return p; +} + +/* + * Allocate some n words of heap memory; returning NULL + * on heap overflow + */ +StgPtr +allocateMightFail (Capability *cap, W_ n) +{ bdescr *bd; StgPtr p; - TICK_ALLOC_HEAP_NOCTR(WDS(n)); - CCS_ALLOC(cap->r.rCCCS,n); - if (cap->r.rCurrentTSO != NULL) { - // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_) - ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit), - (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit)) - - n*sizeof(W_))); - } - if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { // The largest number of words such that // the computation of req_blocks will not overflow. @@ -845,16 +874,12 @@ allocate (Capability *cap, W_ n) req_blocks >= HS_INT32_MAX) // avoid overflow when // calling allocGroup() below { - reportHeapOverflow(); - // heapOverflow() doesn't exit (see #2592), but we aren't - // in a position to do a clean shutdown here: we - // either have to allocate the memory or exit now. - // Allocating the memory would be bad, because the user - // has requested that we not exceed maxHeapSize, so we - // just exit. - stg_exit(EXIT_HEAPOVERFLOW); + return NULL; } + // Only credit allocation after we've passed the size check above + accountAllocation(cap, n); + ACQUIRE_SM_LOCK bd = allocGroupOnNode(cap->node,req_blocks); dbl_link_onto(bd, &g0->large_objects); @@ -870,6 +895,7 @@ allocate (Capability *cap, W_ n) /* small allocation (<LARGE_OBJECT_THRESHOLD) */ + accountAllocation(cap, n); bd = cap->r.rCurrentAlloc; if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { @@ -955,7 +981,8 @@ allocate (Capability *cap, W_ n) to pinned ByteArrays, not scavenging is ok. This function is called by newPinnedByteArray# which immediately - fills the allocated memory with a MutableByteArray#. + fills the allocated memory with a MutableByteArray#. Note that + this returns NULL on heap overflow. ------------------------------------------------------------------------- */ StgPtr @@ -967,20 +994,16 @@ allocatePinned (Capability *cap, W_ n) // If the request is for a large object, then allocate() // will give us a pinned object anyway. if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - p = allocate(cap, n); - Bdescr(p)->flags |= BF_PINNED; - return p; - } - - TICK_ALLOC_HEAP_NOCTR(WDS(n)); - CCS_ALLOC(cap->r.rCCCS,n); - if (cap->r.rCurrentTSO != NULL) { - // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_); - ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit), - (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit)) - - n*sizeof(W_))); + p = allocateMightFail(cap, n); + if (p == NULL) { + return NULL; + } else { + Bdescr(p)->flags |= BF_PINNED; + return p; + } } + accountAllocation(cap, n); bd = cap->pinned_object_block; // If we don't have a block of pinned objects yet, or the current @@ -1135,7 +1158,7 @@ dirty_MVAR(StgRegTable *reg, StgClosure *p) * -------------------------------------------------------------------------- */ /* ----------------------------------------------------------------------------- - * [Note allocation accounting] + * Note [allocation accounting] * * - When cap->r.rCurrentNusery moves to a new block in the nursery, * we add the size of the used portion of the previous block to @@ -1241,16 +1264,15 @@ W_ gcThreadLiveBlocks (uint32_t i, uint32_t g) * to store bitmaps and the mark stack. Note: blocks_needed does not * include the blocks in the nursery. * - * Assume: all data currently live will remain live. Generationss + * Assume: all data currently live will remain live. Generations * that will be collected next time will therefore need twice as many * blocks since all the data will be copied. */ extern W_ calcNeeded (bool force_major, memcount *blocks_needed) { - W_ needed = 0, blocks; - uint32_t g, N; - generation *gen; + W_ needed = 0; + uint32_t N; if (force_major) { N = RtsFlags.GcFlags.generations - 1; @@ -1258,12 +1280,12 @@ calcNeeded (bool force_major, memcount *blocks_needed) N = 0; } - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - gen = &generations[g]; + for (uint32_t g = 0; g < RtsFlags.GcFlags.generations; g++) { + generation *gen = &generations[g]; - blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?) - + gen->n_large_blocks - + gen->n_compact_blocks; + W_ blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?) + + gen->n_large_blocks + + gen->n_compact_blocks; // we need at least this much space needed += blocks; |