1 files changed, 191 insertions, 208 deletions
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index 3d4bea433d..a5d8553e94 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -30,6 +30,7 @@ import pthread_mutex_lock;
 import pthread_mutex_unlock;
 #endif
 import CLOSURE base_ControlziExceptionziBase_nestedAtomically_closure;
+import CLOSURE base_GHCziIOziException_heapOverflow_closure;
 import EnterCriticalSection;
 import LeaveCriticalSection;
 import CLOSURE ghczmprim_GHCziTypes_False_closure;
@@ -62,7 +63,10 @@ stg_newByteArrayzh ( W_ n )
 
     payload_words = ROUNDUP_BYTES_TO_WDS(n);
     words = BYTES_TO_WDS(SIZEOF_StgArrBytes) + payload_words;
-    ("ptr" p) = ccall allocate(MyCapability() "ptr",words);
+    ("ptr" p) = ccall allocateMightFail(MyCapability() "ptr", words);
+    if (p == NULL) {
+        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
+    }
     TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
     SET_HDR(p, stg_ARR_WORDS_info, CCCS);
     StgArrBytes_bytes(p) = n;
@@ -92,6 +96,9 @@ stg_newPinnedByteArrayzh ( W_ n )
     words = ROUNDUP_BYTES_TO_WDS(bytes);
 
     ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
+    if (p == NULL) {
+        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
+    }
     TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
 
     /* Now we need to move p forward so that the payload is aligned
@@ -130,6 +137,9 @@ stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
     words = ROUNDUP_BYTES_TO_WDS(bytes);
 
     ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
+    if (p == NULL) {
+        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
+    }
     TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
 
     /* Now we need to move p forward so that the payload is aligned
@@ -150,8 +160,9 @@ stg_isByteArrayPinnedzh ( gcptr ba )
     // Pinned byte arrays live in blocks with the BF_PINNED flag set.
     // We also consider BF_LARGE objects to be immovable. See #13894.
     // See the comment in Storage.c:allocatePinned.
+    // We also consider BF_COMPACT objects to be immovable. See #14900.
     flags = TO_W_(bdescr_flags(bd));
-    return (flags & (BF_PINNED | BF_LARGE) != 0);
+    return (flags & (BF_PINNED | BF_LARGE | BF_COMPACT) != 0);
 }
 
 stg_isMutableByteArrayPinnedzh ( gcptr mba )
@@ -240,17 +251,20 @@ stg_newArrayzh ( W_ n /* words */, gcptr init )
     // number of words.
     size = n + mutArrPtrsCardWords(n);
     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
-    ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
+    ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
+    if (arr == NULL) {
+        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
+    }
     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
 
     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
     StgMutArrPtrs_ptrs(arr) = n;
     StgMutArrPtrs_size(arr) = size;
 
-    // Initialise all elements of the the array with the value in R2
+    // Initialise all elements of the array with the value in R2
     p = arr + SIZEOF_StgMutArrPtrs;
   for:
-    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) {
+    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) (likely: True) {
         W_[p] = init;
         p = p + WDS(1);
         goto for;
@@ -261,28 +275,15 @@ stg_newArrayzh ( W_ n /* words */, gcptr init )
 
 stg_unsafeThawArrayzh ( gcptr arr )
 {
-    // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
-    //
-    // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN
-    // normally doesn't.  However, when we freeze a MUT_ARR_PTRS, we leave
-    // it on the mutable list for the GC to remove (removing something from
-    // the mutable list is not easy).
-    //
-    // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
-    // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
-    // to indicate that it is still on the mutable list.
-    //
-    // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
-    // either it is on a mut_list, or it isn't.  We adopt the convention that
-    // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
-    // and MUT_ARR_PTRS_FROZEN otherwise.  In fact it wouldn't matter if
-    // we put it on the mutable list more than once, but it would get scavenged
-    // multiple times during GC, which would be unnecessarily slow.
-    //
-    if (StgHeader_info(arr) != stg_MUT_ARR_PTRS_FROZEN0_info) {
+    // A MUT_ARR_PTRS always lives on a mut_list, but a MUT_ARR_PTRS_FROZEN
+    // doesn't. To decide whether to add the thawed array to a mut_list we check
+    // the info table. MUT_ARR_PTRS_FROZEN_DIRTY means it's already on a
+    // mut_list so no need to add it again. MUT_ARR_PTRS_FROZEN_CLEAN means it's
+    // not and we should add it to a mut_list.
+    if (StgHeader_info(arr) != stg_MUT_ARR_PTRS_FROZEN_DIRTY_info) {
         SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
+        // must be done after SET_INFO, because it ASSERTs closure_MUTABLE():
         recordMutable(arr);
-        // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
         return (arr);
     } else {
         SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info);
@@ -312,7 +313,7 @@ stg_copyMutableArrayArrayzh ( gcptr src, W_ src_off, gcptr dst, W_ dst_off, W_ n
 
 stg_cloneArrayzh ( gcptr src, W_ offset, W_ n )
 {
-    cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
+    cloneArray(stg_MUT_ARR_PTRS_FROZEN_CLEAN_info, src, offset, n)
 }
 
 stg_cloneMutableArrayzh ( gcptr src, W_ offset, W_ n )
@@ -323,7 +324,7 @@ stg_cloneMutableArrayzh ( gcptr src, W_ offset, W_ n )
 // We have to escape the "z" in the name.
 stg_freezzeArrayzh ( gcptr src, W_ offset, W_ n )
 {
-    cloneArray(stg_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
+    cloneArray(stg_MUT_ARR_PTRS_FROZEN_CLEAN_info, src, offset, n)
 }
 
 stg_thawArrayzh ( gcptr src, W_ offset, W_ n )
@@ -366,7 +367,10 @@ stg_newArrayArrayzh ( W_ n /* words */ )
     // number of words.
     size = n + mutArrPtrsCardWords(n);
     words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size;
-    ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
+    ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
+    if (arr == NULL) {
+        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
+    }
     TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0);
 
     SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
@@ -376,7 +380,7 @@ stg_newArrayArrayzh ( W_ n /* words */ )
     // Initialise all elements of the array with a pointer to the new array
     p = arr + SIZEOF_StgMutArrPtrs;
   for:
-    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) {
+    if (p < arr + SIZEOF_StgMutArrPtrs + WDS(n)) (likely: True) {
         W_[p] = arr;
         p = p + WDS(1);
         goto for;
@@ -398,16 +402,19 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
     again: MAYBE_GC(again);
 
     words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n;
-    ("ptr" arr) = ccall allocate(MyCapability() "ptr",words);
+    ("ptr" arr) = ccall allocateMightFail(MyCapability() "ptr",words);
+    if (arr == NULL) {
+        jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
+    }
     TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0);
 
     SET_HDR(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info, CCCS);
     StgSmallMutArrPtrs_ptrs(arr) = n;
 
-    // Initialise all elements of the the array with the value in R2
+    // Initialise all elements of the array with the value in R2
     p = arr + SIZEOF_StgSmallMutArrPtrs;
   for:
-    if (p < arr + SIZEOF_StgSmallMutArrPtrs + WDS(n)) {
+    if (p < arr + SIZEOF_StgSmallMutArrPtrs + WDS(n)) (likely: True) {
         W_[p] = init;
         p = p + WDS(1);
         goto for;
@@ -419,7 +426,7 @@ stg_newSmallArrayzh ( W_ n /* words */, gcptr init )
 stg_unsafeThawSmallArrayzh ( gcptr arr )
 {
     // See stg_unsafeThawArrayzh
-    if (StgHeader_info(arr) != stg_SMALL_MUT_ARR_PTRS_FROZEN0_info) {
+    if (StgHeader_info(arr) != stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info) {
         SET_INFO(arr, stg_SMALL_MUT_ARR_PTRS_DIRTY_info);
         recordMutable(arr);
         // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
@@ -432,7 +439,7 @@ stg_unsafeThawSmallArrayzh ( gcptr arr )
 
 stg_cloneSmallArrayzh ( gcptr src, W_ offset, W_ n )
 {
-    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
+    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info, src, offset, n)
 }
 
 stg_cloneSmallMutableArrayzh ( gcptr src, W_ offset, W_ n )
@@ -443,7 +450,7 @@ stg_cloneSmallMutableArrayzh ( gcptr src, W_ offset, W_ n )
 // We have to escape the "z" in the name.
 stg_freezzeSmallArrayzh ( gcptr src, W_ offset, W_ n )
 {
-    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_info, src, offset, n)
+    cloneSmallArray(stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info, src, offset, n)
 }
 
 stg_thawSmallArrayzh ( gcptr src, W_ offset, W_ n )
@@ -557,9 +564,9 @@ stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new )
 #endif
 }
 
-stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
+stg_atomicModifyMutVar2zh ( gcptr mv, gcptr f )
 {
-    W_ z, x, y, r, h;
+    W_ z, x, y, h;
 
     /* If x is the current contents of the MutVar#, then
        We want to make the new contents point to
@@ -568,13 +575,12 @@ stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
 
        and the return value is
 
-         (sel_1 (f x))
+         (# x, (f x) #)
 
         obviously we can share (f x).
 
          z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
          y = [stg_sel_0 z]   (max (HS + 1) MIN_UPD_SIZE)
-         r = [stg_sel_1 z]   (max (HS + 1) MIN_UPD_SIZE)
     */
 
 #if defined(MIN_UPD_SIZE) && MIN_UPD_SIZE > 1
@@ -593,7 +599,7 @@ stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
 #define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
 #endif
 
-#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
+#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE)
 
     HP_CHK_GEN_TICKY(SIZE);
 
@@ -611,13 +617,6 @@ stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
     LDV_RECORD_CREATE(y);
     StgThunk_payload(y,0) = z;
 
-    TICK_ALLOC_THUNK_1();
-    CCCS_ALLOC(THUNK_1_SIZE);
-    r = y - THUNK_1_SIZE;
-    SET_HDR(r, stg_sel_1_upd_info, CCCS);
-    LDV_RECORD_CREATE(r);
-    StgThunk_payload(r,0) = z;
-
   retry:
     x = StgMutVar_var(mv);
     StgThunk_payload(z,1) = x;
@@ -632,15 +631,66 @@ stg_atomicModifyMutVarzh ( gcptr mv, gcptr f )
         ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
     }
 
-    return (r);
+    return (x,z);
+}
+
+stg_atomicModifyMutVarzuzh ( gcptr mv, gcptr f )
+{
+    W_ z, x, h;
+
+    /* If x is the current contents of the MutVar#, then
+       We want to make the new contents point to
+
+         (f x)
+
+       and the return value is
+
+         (# x, (f x) #)
+
+        obviously we can share (f x).
+
+         z = [stg_ap_2 f x]  (max (HS + 2) MIN_UPD_SIZE)
+    */
+
+#if defined(MIN_UPD_SIZE) && MIN_UPD_SIZE > 2
+#define THUNK_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
+#define TICK_ALLOC_THUNK() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
+#else
+#define THUNK_SIZE (SIZEOF_StgThunkHeader + WDS(2))
+#define TICK_ALLOC_THUNK() TICK_ALLOC_UP_THK(WDS(2),0)
+#endif
+
+    HP_CHK_GEN_TICKY(THUNK_SIZE);
+
+    TICK_ALLOC_THUNK();
+    CCCS_ALLOC(THUNK_SIZE);
+    z = Hp - THUNK_SIZE + WDS(1);
+    SET_HDR(z, stg_ap_2_upd_info, CCCS);
+    LDV_RECORD_CREATE(z);
+    StgThunk_payload(z,0) = f;
+
+  retry:
+    x = StgMutVar_var(mv);
+    StgThunk_payload(z,1) = x;
+#if defined(THREADED_RTS)
+    (h) = prim %cmpxchgW(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, z);
+    if (h != x) { goto retry; }
+#else
+    StgMutVar_var(mv) = z;
+#endif
+
+    if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
+        ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
+    }
+
+    return (x,z);
 }
 
+
 /* -----------------------------------------------------------------------------
    Weak Pointer Primitives
    -------------------------------------------------------------------------- */
 
-STRING(stg_weak_msg,"New weak pointer at %p\n")
-
 stg_mkWeakzh ( gcptr key,
                gcptr value,
                gcptr finalizer /* or stg_NO_FINALIZER_closure */ )
@@ -663,7 +713,7 @@ stg_mkWeakzh ( gcptr key,
         Capability_weak_ptr_list_tl(MyCapability()) = w;
     }
 
-    IF_DEBUG(weak, ccall debugBelch(stg_weak_msg,w));
+    IF_DEBUG(weak, ccall debugBelch("New weak pointer at %p\n",w));
 
     return (w);
 }
@@ -673,8 +723,6 @@ stg_mkWeakNoFinalizzerzh ( gcptr key, gcptr value )
     jump stg_mkWeakzh (key, value, stg_NO_FINALIZER_closure);
 }
 
-STRING(stg_cfinalizer_msg,"Adding a finalizer to %p\n")
-
 stg_addCFinalizzerToWeakzh ( W_ fptr,   // finalizer
                              W_ ptr,
                              W_ flag,   // has environment (0 or 1)
@@ -708,7 +756,7 @@ stg_addCFinalizzerToWeakzh ( W_ fptr,   // finalizer
 
     recordMutable(w);
 
-    IF_DEBUG(weak, ccall debugBelch(stg_cfinalizer_msg,w));
+    IF_DEBUG(weak, ccall debugBelch("Adding a finalizer to %p\n",w));
 
     return (1);
 }
@@ -924,11 +972,6 @@ stg_yieldzh ()
     jump stg_yield_noregs();
 }
 
-stg_myThreadIdzh ()
-{
-    return (CurrentTSO);
-}
-
 stg_labelThreadzh ( gcptr threadid, W_ addr )
 {
 #if defined(DEBUG) || defined(TRACING) || defined(DTRACE)
@@ -1004,6 +1047,7 @@ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
                                         alt_code))
     return (P_ ret)
 {
+    unwind Sp = Sp + SIZEOF_StgCatchRetryFrame;
     W_ r;
     gcptr trec, outer, arg;
 
@@ -1041,11 +1085,10 @@ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME,
 // Atomically frame ------------------------------------------------------------
 
 // This must match StgAtomicallyFrame in Closures.h
-#define ATOMICALLY_FRAME_FIELDS(w_,p_,info_ptr,p1,p2,code,next,result)  \
+#define ATOMICALLY_FRAME_FIELDS(w_,p_,info_ptr,p1,p2,code,result)  \
     w_ info_ptr,                                                        \
     PROF_HDR_FIELDS(w_,p1,p2)                                           \
     p_ code,                                                            \
-    p_ next,                                                            \
     p_ result
 
 
@@ -1054,67 +1097,36 @@ INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME,
                ATOMICALLY_FRAME_FIELDS(W_,P_,
                                        info_ptr, p1, p2,
                                        code,
-                                       next_invariant,
                                        frame_result))
     return (P_ result) // value returned to the frame
 {
     W_ valid;
-    gcptr trec, outer, next_invariant, q;
+    gcptr trec, outer, q;
 
     trec   = StgTSO_trec(CurrentTSO);
     outer  = StgTRecHeader_enclosing_trec(trec);
 
-    if (outer == NO_TREC) {
-        /* First time back at the atomically frame -- pick up invariants */
-        ("ptr" next_invariant) =
-            ccall stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr");
-        frame_result = result;
+    /* Back at the atomically frame */
+    frame_result = result;
 
+    /* try to commit */
+    (valid) = ccall stmCommitTransaction(MyCapability() "ptr", trec "ptr");
+    if (valid != 0) {
+        /* Transaction was valid: commit succeeded */
+        StgTSO_trec(CurrentTSO) = NO_TREC;
+        return (frame_result);
     } else {
-        /* Second/subsequent time back at the atomically frame -- abort the
-         * tx that's checking the invariant and move on to the next one */
-        StgTSO_trec(CurrentTSO) = outer;
-        StgInvariantCheckQueue_my_execution(next_invariant) = trec;
-        ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr");
-        /* Don't free trec -- it's linked from q and will be stashed in the
-         * invariant if we eventually commit. */
-        next_invariant =
-           StgInvariantCheckQueue_next_queue_entry(next_invariant);
-        trec = outer;
-    }
-
-    if (next_invariant != END_INVARIANT_CHECK_QUEUE) {
-        /* We can't commit yet: another invariant to check */
-        ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", trec "ptr");
+        /* Transaction was not valid: try again */
+        ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr",
+                                                 NO_TREC "ptr");
         StgTSO_trec(CurrentTSO) = trec;
-        q = StgInvariantCheckQueue_invariant(next_invariant);
+
         jump stg_ap_v_fast
+            // push the StgAtomicallyFrame again: the code generator is
+            // clever enough to only assign the fields that have changed.
             (ATOMICALLY_FRAME_FIELDS(,,info_ptr,p1,p2,
-                                     code,next_invariant,frame_result))
-            (StgAtomicInvariant_code(q));
-
-    } else {
-
-        /* We've got no more invariants to check, try to commit */
-        (valid) = ccall stmCommitTransaction(MyCapability() "ptr", trec "ptr");
-        if (valid != 0) {
-            /* Transaction was valid: commit succeeded */
-            StgTSO_trec(CurrentTSO) = NO_TREC;
-            return (frame_result);
-        } else {
-            /* Transaction was not valid: try again */
-            ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr",
-                                                     NO_TREC "ptr");
-            StgTSO_trec(CurrentTSO) = trec;
-            next_invariant = END_INVARIANT_CHECK_QUEUE;
-
-            jump stg_ap_v_fast
-                // push the StgAtomicallyFrame again: the code generator is
-                // clever enough to only assign the fields that have changed.
-                (ATOMICALLY_FRAME_FIELDS(,,info_ptr,p1,p2,
-                                         code,next_invariant,frame_result))
-                (code);
-        }
+                                     code,frame_result))
+            (code);
     }
 }
 
@@ -1124,7 +1136,6 @@ INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
                ATOMICALLY_FRAME_FIELDS(W_,P_,
                                        info_ptr, p1, p2,
                                        code,
-                                       next_invariant,
                                        frame_result))
     return (/* no return values */)
 {
@@ -1136,7 +1147,7 @@ INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
         /* Previous attempt is still valid: no point trying again yet */
         jump stg_block_noregs
             (ATOMICALLY_FRAME_FIELDS(,,info_ptr, p1, p2,
-                                     code,next_invariant,frame_result))
+                                     code,frame_result))
             ();
     } else {
         /* Previous attempt is no longer valid: try again */
@@ -1146,7 +1157,7 @@ INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME,
         // change the frame header to stg_atomically_frame_info
         jump stg_ap_v_fast
             (ATOMICALLY_FRAME_FIELDS(,,stg_atomically_frame_info, p1, p2,
-                                     code,next_invariant,frame_result))
+                                     code,frame_result))
             (code);
     }
 }
@@ -1197,7 +1208,7 @@ stg_atomicallyzh (P_ stm)
 {
     P_ old_trec;
     P_ new_trec;
-    P_ code, next_invariant, frame_result;
+    P_ code, frame_result;
 
     // stmStartTransaction may allocate
     MAYBE_GC_P(stg_atomicallyzh, stm);
@@ -1212,7 +1223,6 @@ stg_atomicallyzh (P_ stm)
     }
 
     code = stm;
-    next_invariant = END_INVARIANT_CHECK_QUEUE;
     frame_result = NO_TREC;
 
     /* Start the memory transcation */
@@ -1221,7 +1231,7 @@ stg_atomicallyzh (P_ stm)
 
     jump stg_ap_v_fast
         (ATOMICALLY_FRAME_FIELDS(,,stg_atomically_frame_info, CCCS, 0,
-                                 code,next_invariant,frame_result))
+                                 code,frame_result))
         (stm);
 }
 
@@ -1324,16 +1334,6 @@ retry_pop_stack:
 
     // We've reached the ATOMICALLY_FRAME: attempt to wait
     ASSERT(frame_type == ATOMICALLY_FRAME);
-    if (outer != NO_TREC) {
-        // We called retry while checking invariants, so abort the current
-        // invariant check (merging its TVar accesses into the parents read
-        // set so we'll wait on them)
-        ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr");
-        ccall stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr");
-        trec = outer;
-        StgTSO_trec(CurrentTSO) = trec;
-        outer  = StgTRecHeader_enclosing_trec(trec);
-    }
     ASSERT(outer == NO_TREC);
 
     (r) = ccall stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr");
@@ -1353,20 +1353,6 @@ retry_pop_stack:
     }
 }
 
-stg_checkzh (P_ closure /* STM a */)
-{
-    W_ trec;
-
-    MAYBE_GC_P (stg_checkzh, closure);
-
-    trec = StgTSO_trec(CurrentTSO);
-    ccall stmAddInvariantToCheck(MyCapability() "ptr",
-                                 trec "ptr",
-                                 closure "ptr");
-    return ();
-}
-
-
 stg_newTVarzh (P_ init)
 {
     W_ tv;
@@ -1741,7 +1727,7 @@ loop:
 
     ccall tryWakeupThread(MyCapability() "ptr", tso);
 
-    // If it was an readMVar, then we can still do work,
+    // If it was a readMVar, then we can still do work,
     // so loop back. (XXX: This could take a while)
     if (why_blocked == BlockedOnMVarRead) {
         q = StgMVarTSOQueue_link(q);
@@ -1822,7 +1808,7 @@ loop:
 
     ccall tryWakeupThread(MyCapability() "ptr", tso);
 
-    // If it was an readMVar, then we can still do work,
+    // If it was a readMVar, then we can still do work,
     // so loop back. (XXX: This could take a while)
     if (why_blocked == BlockedOnMVarRead) {
         q = StgMVarTSOQueue_link(q);
@@ -2004,70 +1990,44 @@ stg_mkApUpd0zh ( P_ bco )
 
 stg_unpackClosurezh ( P_ closure )
 {
-    W_ clos, info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
-    clos  = UNTAG(closure);
-    info  = %GET_STD_INFO(clos);
-
-    // Some closures have non-standard layout, so we omit those here.
-    W_ type;
-    type = TO_W_(%INFO_TYPE(info));
-    switch [0 .. N_CLOSURE_TYPES] type {
-    case THUNK_SELECTOR : {
-        ptrs = 1;
-        nptrs = 0;
-        goto out;
-    }
-    case THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1,
-         THUNK_0_2, THUNK_STATIC, AP, PAP, AP_STACK, BCO : {
-        ptrs = 0;
-        nptrs = 0;
-        goto out;
-    }
-    default: {
-        ptrs  = TO_W_(%INFO_PTRS(info));
-        nptrs = TO_W_(%INFO_NPTRS(info));
-        goto out;
-    }}
-
-out:
-    W_ ptrs_arr_sz, ptrs_arr_cards, nptrs_arr_sz;
-    nptrs_arr_sz = SIZEOF_StgArrBytes   + WDS(nptrs);
-    ptrs_arr_cards = mutArrPtrsCardWords(ptrs);
-    ptrs_arr_sz  = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards);
-
-    ALLOC_PRIM_P (ptrs_arr_sz + nptrs_arr_sz, stg_unpackClosurezh, closure);
-
-    ptrs_arr  = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
-    nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
-
-    SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, CCCS);
-    StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
-    StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards;
+    W_ info, ptrs, nptrs, p, ptrs_arr, dat_arr;
+    info  = %GET_STD_INFO(UNTAG(closure));
 
-    p = 0;
+    ptrs  = TO_W_(%INFO_PTRS(info));
+    nptrs = TO_W_(%INFO_NPTRS(info));
 
-write_ptrs:
-    if(p < ptrs) {
-         W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
-         p = p + 1;
-         goto write_ptrs;
-    }
-    /* We can leave the card table uninitialised, since the array is
-       allocated in the nursery.  The GC will fill it in if/when the array
-       is promoted. */
+    W_ clos;
+    clos = UNTAG(closure);
 
-    SET_HDR(nptrs_arr, stg_ARR_WORDS_info, CCCS);
-    StgArrBytes_bytes(nptrs_arr) = WDS(nptrs);
-    p = 0;
+    W_ len;
+    // The array returned is the raw data for the entire closure.
+    // The length is variable based upon the closure type, ptrs, and non-ptrs
+    (len) = foreign "C" heap_view_closureSize(clos "ptr");
+
+    W_ ptrs_arr_sz, ptrs_arr_cards, dat_arr_sz;
+    dat_arr_sz = SIZEOF_StgArrBytes + WDS(len);
 
-write_nptrs:
-    if(p < nptrs) {
-         W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
+    ALLOC_PRIM_P (dat_arr_sz, stg_unpackClosurezh, closure);
+
+    dat_arr = Hp - dat_arr_sz + WDS(1);
+
+
+    SET_HDR(dat_arr, stg_ARR_WORDS_info, CCCS);
+    StgArrBytes_bytes(dat_arr) = WDS(len);
+    p = 0;
+for:
+    if(p < len) {
+         W_[BYTE_ARR_CTS(dat_arr) + WDS(p)] = W_[clos + WDS(p)];
          p = p + 1;
-         goto write_nptrs;
+         goto for;
     }
 
-    return (info, ptrs_arr, nptrs_arr);
+    W_ ptrArray;
+
+    // Follow the pointers
+    ("ptr" ptrArray) = foreign "C" heap_view_closurePtrs(MyCapability() "ptr", clos "ptr");
+
+    return (info, dat_arr, ptrArray);
 }
 
 /* -----------------------------------------------------------------------------
@@ -2118,8 +2078,6 @@ stg_waitWritezh ( W_ fd )
 #endif
 }
 
-
-STRING(stg_delayzh_malloc_str, "stg_delayzh")
 stg_delayzh ( W_ us_delay )
 {
 #if defined(mingw32_HOST_OS)
@@ -2140,7 +2098,7 @@ stg_delayzh ( W_ us_delay )
 
     /* could probably allocate this on the heap instead */
     ("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult,
-                                        stg_delayzh_malloc_str);
+                                        "stg_delayzh");
     (reqID) = ccall addDelayRequest(us_delay);
     StgAsyncIOResult_reqID(ares)   = reqID;
     StgAsyncIOResult_len(ares)     = 0;
@@ -2185,7 +2143,6 @@ while:
 
 
 #if defined(mingw32_HOST_OS)
-STRING(stg_asyncReadzh_malloc_str, "stg_asyncReadzh")
 stg_asyncReadzh ( W_ fd, W_ is_sock, W_ len, W_ buf )
 {
     W_ ares;
@@ -2200,7 +2157,7 @@ stg_asyncReadzh ( W_ fd, W_ is_sock, W_ len, W_ buf )
 
     /* could probably allocate this on the heap instead */
     ("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult,
-                                        stg_asyncReadzh_malloc_str);
+                                        "stg_asyncReadzh");
     (reqID) = ccall addIORequest(fd, 0/*FALSE*/,is_sock,len,buf "ptr");
     StgAsyncIOResult_reqID(ares)   = reqID;
     StgAsyncIOResult_len(ares)     = 0;
@@ -2211,7 +2168,6 @@ stg_asyncReadzh ( W_ fd, W_ is_sock, W_ len, W_ buf )
 #endif
 }
 
-STRING(stg_asyncWritezh_malloc_str, "stg_asyncWritezh")
 stg_asyncWritezh ( W_ fd, W_ is_sock, W_ len, W_ buf )
 {
     W_ ares;
@@ -2225,7 +2181,7 @@ stg_asyncWritezh ( W_ fd, W_ is_sock, W_ len, W_ buf )
     StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
 
     ("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult,
-                                        stg_asyncWritezh_malloc_str);
+                                        "stg_asyncWritezh");
     (reqID) = ccall addIORequest(fd, 1/*TRUE*/,is_sock,len,buf "ptr");
 
     StgAsyncIOResult_reqID(ares)   = reqID;
@@ -2237,7 +2193,6 @@ stg_asyncWritezh ( W_ fd, W_ is_sock, W_ len, W_ buf )
 #endif
 }
 
-STRING(stg_asyncDoProczh_malloc_str, "stg_asyncDoProczh")
 stg_asyncDoProczh ( W_ proc, W_ param )
 {
     W_ ares;
@@ -2252,7 +2207,7 @@ stg_asyncDoProczh ( W_ proc, W_ param )
 
     /* could probably allocate this on the heap instead */
     ("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult,
-                                        stg_asyncDoProczh_malloc_str);
+                                        "stg_asyncDoProczh");
     (reqID) = ccall addDoProcRequest(proc "ptr",param "ptr");
     StgAsyncIOResult_reqID(ares)   = reqID;
     StgAsyncIOResult_len(ares)     = 0;
@@ -2358,7 +2313,7 @@ stg_noDuplicatezh /* no arg list: explicit stack layout */
 
 stg_getApStackValzh ( P_ ap_stack, W_ offset )
 {
-   if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) {
+   if (%INFO_PTR(UNTAG(ap_stack)) == stg_AP_STACK_info) {
        return (1,StgAP_STACK_payload(ap_stack,offset));
    } else {
        return (0,ap_stack);
@@ -2450,6 +2405,14 @@ stg_traceEventzh ( W_ msg )
     return ();
 }
 
+stg_traceBinaryEventzh ( W_ msg, W_ len )
+{
+#if defined(TRACING) || defined(DEBUG)
+    ccall traceUserBinaryMsg(MyCapability() "ptr", msg "ptr", len);
+#endif
+    return ();
+}
+
 // Same code as stg_traceEventzh above but a different kind of event
 // Before changing this code, read the comments in the impl above
 stg_traceMarkerzh ( W_ msg )
@@ -2475,3 +2438,23 @@ stg_traceMarkerzh ( W_ msg )
     return ();
 }
 
+
+stg_getThreadAllocationCounterzh ()
+{
+    // Account for the allocation in the current block
+    W_ offset;
+    offset = Hp - bdescr_start(CurrentNursery);
+    return (StgTSO_alloc_limit(CurrentTSO) - TO_I64(offset));
+}
+
+stg_setThreadAllocationCounterzh ( I64 counter )
+{
+    // Allocation in the current block will be subtracted by
+    // getThreadAllocationCounter#, so we have to offset any existing
+    // allocation here.  See also openNursery/closeNursery in
+    // compiler/codeGen/StgCmmForeign.hs.
+    W_ offset;
+    offset = Hp - bdescr_start(CurrentNursery);
+    StgTSO_alloc_limit(CurrentTSO) = counter + TO_I64(offset);
+    return ();
+}