1 files changed, 74 insertions, 52 deletions
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index ffaed5f17c..dcc5b3a3c7 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -197,11 +197,7 @@ initStorage (void)
 
 #if defined(THREADED_RTS)
   initSpinLock(&gc_alloc_block_sync);
-#if defined(PROF_SPIN)
-  whitehole_spin = 0;
 #endif
-#endif
-
   N = 0;
 
   for (n = 0; n < n_numa_nodes; n++) {
@@ -224,6 +220,7 @@ initStorage (void)
 void storageAddCapabilities (uint32_t from, uint32_t to)
 {
     uint32_t n, g, i, new_n_nurseries;
+    nursery *old_nurseries;
 
     if (RtsFlags.GcFlags.nurseryChunkSize == 0) {
         new_n_nurseries = to;
@@ -233,6 +230,7 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
             stg_max(to, total_alloc / RtsFlags.GcFlags.nurseryChunkSize);
     }
 
+    old_nurseries = nurseries;
     if (from > 0) {
         nurseries = stgReallocBytes(nurseries,
                                     new_n_nurseries * sizeof(struct nursery_),
@@ -244,8 +242,9 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
 
     // we've moved the nurseries, so we have to update the rNursery
     // pointers from the Capabilities.
-    for (i = 0; i < to; i++) {
-        capabilities[i]->r.rNursery = &nurseries[i];
+    for (i = 0; i < from; i++) {
+        uint32_t index = capabilities[i]->r.rNursery - old_nurseries;
+        capabilities[i]->r.rNursery = &nurseries[index];
     }
 
     /* The allocation area.  Policy: keep the allocation area
@@ -307,21 +306,21 @@ freeStorage (bool free_heap)
 
    The entry code for every CAF does the following:
 
-      - calls newCaf, which builds a CAF_BLACKHOLE on the heap and atomically
+      - calls newCAF, which builds a CAF_BLACKHOLE on the heap and atomically
         updates the CAF with IND_STATIC pointing to the CAF_BLACKHOLE
 
-      - if newCaf returns zero, it re-enters the CAF (see Note [atomic
+      - if newCAF returns zero, it re-enters the CAF (see Note [atomic
         CAF entry])
 
       - pushes an update frame pointing to the CAF_BLACKHOLE
 
-   Why do we build an BLACKHOLE in the heap rather than just updating
+   Why do we build a BLACKHOLE in the heap rather than just updating
    the thunk directly?  It's so that we only need one kind of update
    frame - otherwise we'd need a static version of the update frame
    too, and various other parts of the RTS that deal with update
    frames would also need special cases for static update frames.
 
-   newCaf() does the following:
+   newCAF() does the following:
 
       - atomically locks the CAF (see [atomic CAF entry])
 
@@ -339,7 +338,7 @@ freeStorage (bool free_heap)
    ------------------
    Note [atomic CAF entry]
 
-   With THREADED_RTS, newCaf() is required to be atomic (see
+   With THREADED_RTS, newCAF() is required to be atomic (see
    #5558). This is because if two threads happened to enter the same
    CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
    and so the normal threadPaused() machinery for detecting duplicate
@@ -359,7 +358,7 @@ freeStorage (bool free_heap)
       - we must be able to *revert* CAFs that have been evaluated, to
         their pre-evaluated form.
 
-      To do this, we use an additional CAF list.  When newCaf() is
+      To do this, we use an additional CAF list.  When newCAF() is
       called on a dynamically-loaded CAF, we add it to the CAF list
       instead of the old-generation mutable list, and save away its
       old info pointer (in caf->saved_info) for later reversion.
@@ -796,6 +795,20 @@ move_STACK (StgStack *src, StgStack *dest)
     dest->sp = (StgPtr)dest->sp + diff;
 }
 
+STATIC_INLINE void
+accountAllocation(Capability *cap, W_ n)
+{
+    TICK_ALLOC_HEAP_NOCTR(WDS(n));
+    CCS_ALLOC(cap->r.rCCCS,n);
+    if (cap->r.rCurrentTSO != NULL) {
+        // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_)
+        ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
+                     (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
+                      - n*sizeof(W_)));
+    }
+
+}
+
 /* -----------------------------------------------------------------------------
    StgPtr allocate (Capability *cap, W_ n)
 
@@ -812,21 +825,37 @@ move_STACK (StgStack *src, StgStack *dest)
    that operation fails, then the whole process will be killed.
    -------------------------------------------------------------------------- */
 
+/*
+ * Allocate some n words of heap memory; terminating
+ * on heap overflow
+ */
 StgPtr
 allocate (Capability *cap, W_ n)
 {
+    StgPtr p = allocateMightFail(cap, n);
+    if (p == NULL) {
+        reportHeapOverflow();
+        // heapOverflow() doesn't exit (see #2592), but we aren't
+        // in a position to do a clean shutdown here: we
+        // either have to allocate the memory or exit now.
+        // Allocating the memory would be bad, because the user
+        // has requested that we not exceed maxHeapSize, so we
+        // just exit.
+        stg_exit(EXIT_HEAPOVERFLOW);
+    }
+    return p;
+}
+
+/*
+ * Allocate some n words of heap memory; returning NULL
+ * on heap overflow
+ */
+StgPtr
+allocateMightFail (Capability *cap, W_ n)
+{
     bdescr *bd;
     StgPtr p;
 
-    TICK_ALLOC_HEAP_NOCTR(WDS(n));
-    CCS_ALLOC(cap->r.rCCCS,n);
-    if (cap->r.rCurrentTSO != NULL) {
-        // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_)
-        ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
-                     (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
-                      - n*sizeof(W_)));
-    }
-
     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
         // The largest number of words such that
         // the computation of req_blocks will not overflow.
@@ -845,16 +874,12 @@ allocate (Capability *cap, W_ n)
             req_blocks >= HS_INT32_MAX)   // avoid overflow when
                                           // calling allocGroup() below
         {
-            reportHeapOverflow();
-            // heapOverflow() doesn't exit (see #2592), but we aren't
-            // in a position to do a clean shutdown here: we
-            // either have to allocate the memory or exit now.
-            // Allocating the memory would be bad, because the user
-            // has requested that we not exceed maxHeapSize, so we
-            // just exit.
-            stg_exit(EXIT_HEAPOVERFLOW);
+            return NULL;
         }
 
+        // Only credit allocation after we've passed the size check above
+        accountAllocation(cap, n);
+
         ACQUIRE_SM_LOCK
         bd = allocGroupOnNode(cap->node,req_blocks);
         dbl_link_onto(bd, &g0->large_objects);
@@ -870,6 +895,7 @@ allocate (Capability *cap, W_ n)
 
     /* small allocation (<LARGE_OBJECT_THRESHOLD) */
 
+    accountAllocation(cap, n);
     bd = cap->r.rCurrentAlloc;
     if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
 
@@ -955,7 +981,8 @@ allocate (Capability *cap, W_ n)
    to pinned ByteArrays, not scavenging is ok.
 
    This function is called by newPinnedByteArray# which immediately
-   fills the allocated memory with a MutableByteArray#.
+   fills the allocated memory with a MutableByteArray#. Note that
+   this returns NULL on heap overflow.
    ------------------------------------------------------------------------- */
 
 StgPtr
@@ -967,20 +994,16 @@ allocatePinned (Capability *cap, W_ n)
     // If the request is for a large object, then allocate()
     // will give us a pinned object anyway.
     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
-        p = allocate(cap, n);
-        Bdescr(p)->flags |= BF_PINNED;
-        return p;
-    }
-
-    TICK_ALLOC_HEAP_NOCTR(WDS(n));
-    CCS_ALLOC(cap->r.rCCCS,n);
-    if (cap->r.rCurrentTSO != NULL) {
-        // cap->r.rCurrentTSO->alloc_limit -= n*sizeof(W_);
-        ASSIGN_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit),
-                     (PK_Int64((W_*)&(cap->r.rCurrentTSO->alloc_limit))
-                      - n*sizeof(W_)));
+        p = allocateMightFail(cap, n);
+        if (p == NULL) {
+            return NULL;
+        } else {
+            Bdescr(p)->flags |= BF_PINNED;
+            return p;
+        }
     }
 
+    accountAllocation(cap, n);
     bd = cap->pinned_object_block;
 
     // If we don't have a block of pinned objects yet, or the current
@@ -1135,7 +1158,7 @@ dirty_MVAR(StgRegTable *reg, StgClosure *p)
  * -------------------------------------------------------------------------- */
 
 /* -----------------------------------------------------------------------------
- * [Note allocation accounting]
+ * Note [allocation accounting]
  *
  *   - When cap->r.rCurrentNusery moves to a new block in the nursery,
  *     we add the size of the used portion of the previous block to
@@ -1241,16 +1264,15 @@ W_ gcThreadLiveBlocks (uint32_t i, uint32_t g)
  * to store bitmaps and the mark stack.  Note: blocks_needed does not
  * include the blocks in the nursery.
  *
- * Assume: all data currently live will remain live.  Generationss
+ * Assume: all data currently live will remain live.  Generations
  * that will be collected next time will therefore need twice as many
  * blocks since all the data will be copied.
  */
 extern W_
 calcNeeded (bool force_major, memcount *blocks_needed)
 {
-    W_ needed = 0, blocks;
-    uint32_t g, N;
-    generation *gen;
+    W_ needed = 0;
+    uint32_t N;
 
     if (force_major) {
         N = RtsFlags.GcFlags.generations - 1;
@@ -1258,12 +1280,12 @@ calcNeeded (bool force_major, memcount *blocks_needed)
         N = 0;
     }
 
-    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-        gen = &generations[g];
+    for (uint32_t g = 0; g < RtsFlags.GcFlags.generations; g++) {
+        generation *gen = &generations[g];
 
-        blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
-               + gen->n_large_blocks
-               + gen->n_compact_blocks;
+        W_ blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
+                  + gen->n_large_blocks
+                  + gen->n_compact_blocks;
 
         // we need at least this much space
         needed += blocks;