diff options
author | Ben Gamari <ben@smart-cactus.org> | 2019-12-01 14:31:53 -0500 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2020-10-30 00:41:14 -0400 |
commit | aad1f803471fff1f187b0a0784dc38a867379847 (patch) | |
tree | ad5bdae47bd9d8d6b1bf7e015736110989814326 /rts | |
parent | 3d0f033c5109ae0131b81e6b9aaf2a04ebeba1f4 (diff) | |
download | haskell-aad1f803471fff1f187b0a0784dc38a867379847.tar.gz |
rts/GC: Use atomics
Diffstat (limited to 'rts')
-rw-r--r-- | rts/Capability.h | 6 | ||||
-rw-r--r-- | rts/Schedule.c | 4 | ||||
-rw-r--r-- | rts/sm/Evac.c | 139 | ||||
-rw-r--r-- | rts/sm/GC.c | 73 | ||||
-rw-r--r-- | rts/sm/GCAux.c | 2 | ||||
-rw-r--r-- | rts/sm/GCUtils.c | 13 | ||||
-rw-r--r-- | rts/sm/GCUtils.h | 4 | ||||
-rw-r--r-- | rts/sm/MarkWeak.c | 2 | ||||
-rw-r--r-- | rts/sm/Scav.c | 111 | ||||
-rw-r--r-- | rts/sm/Storage.h | 7 |
10 files changed, 189 insertions, 172 deletions
diff --git a/rts/Capability.h b/rts/Capability.h index 87ec53801a..c79786883c 100644 --- a/rts/Capability.h +++ b/rts/Capability.h @@ -419,14 +419,16 @@ recordMutableCap (const StgClosure *p, Capability *cap, uint32_t gen) // ASSERT(cap->running_task == myTask()); // NO: assertion is violated by performPendingThrowTos() bd = cap->mut_lists[gen]; - if (bd->free >= bd->start + BLOCK_SIZE_W) { + if (RELAXED_LOAD(&bd->free) >= bd->start + BLOCK_SIZE_W) { bdescr *new_bd; new_bd = allocBlockOnNode_lock(cap->node); new_bd->link = bd; + new_bd->free = new_bd->start; bd = new_bd; cap->mut_lists[gen] = bd; } - *bd->free++ = (StgWord)p; + RELAXED_STORE(bd->free, (StgWord) p); + NONATOMIC_ADD(&bd->free, 1); } EXTERN_INLINE void diff --git a/rts/Schedule.c b/rts/Schedule.c index 41d0dba953..89b1574d01 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -433,7 +433,7 @@ run_thread: cap->interrupt = 0; cap->in_haskell = true; - cap->idle = 0; + RELAXED_STORE(&cap->idle, false); dirty_TSO(cap,t); dirty_STACK(cap,t->stackobj); @@ -1780,7 +1780,7 @@ scheduleDoGC (Capability **pcap, Task *task USED_IF_THREADS, debugTrace(DEBUG_sched, "%d idle caps", n_idle_caps); for (i=0; i < n_capabilities; i++) { - capabilities[i]->idle++; + NONATOMIC_ADD(&capabilities[i]->idle, 1); } // For all capabilities participating in this GC, wait until diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index 0ece06016a..b324a59179 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -171,7 +171,11 @@ copy_tag(StgClosure **p, const StgInfoTable *info, #endif return evacuate(p); // does the failed_to_evac stuff } else { - *p = TAG_CLOSURE(tag,(StgClosure*)to); + // This doesn't need to have RELEASE ordering since we are guaranteed + // to scavenge the to-space object on the current core therefore + // no-one else will follow this pointer (FIXME: Is this true in + // light of the selector optimization?). + RELEASE_STORE(p, TAG_CLOSURE(tag,(StgClosure*)to)); } } #else @@ -206,9 +210,9 @@ copy_tag_nolock(StgClosure **p, const StgInfoTable *info, // if somebody else reads the forwarding pointer, we better make // sure there's a closure at the end of it. - write_barrier(); - *p = TAG_CLOSURE(tag,(StgClosure*)to); - src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to); + RELEASE_STORE(p, TAG_CLOSURE(tag,(StgClosure*)to)); + RELEASE_STORE(&src->header.info, \ + (const StgInfoTable *)MK_FORWARDING_PTR(to)); // if (to+size+2 < bd->start + BLOCK_SIZE_W) { // __builtin_prefetch(to + size + 2, 1); @@ -245,7 +249,7 @@ spin: goto spin; } if (IS_FORWARDING_PTR(info)) { - src->header.info = (const StgInfoTable *)info; + RELEASE_STORE(&src->header.info, (const StgInfoTable *)info); evacuate(p); // does the failed_to_evac stuff return false; } @@ -261,9 +265,8 @@ spin: to[i] = from[i]; } - write_barrier(); - *p = (StgClosure *)to; - src->header.info = (const StgInfoTable*)MK_FORWARDING_PTR(to); + RELEASE_STORE(p, (StgClosure *) to); + RELEASE_STORE(&src->header.info, (const StgInfoTable*)MK_FORWARDING_PTR(to)); #if defined(PROFILING) // We store the size of the just evacuated object in the LDV word so that @@ -306,12 +309,12 @@ evacuate_large(StgPtr p) gen_workspace *ws; bd = Bdescr(p); - gen = bd->gen; - gen_no = bd->gen_no; + gen = RELAXED_LOAD(&bd->gen); + gen_no = RELAXED_LOAD(&bd->gen_no); ACQUIRE_SPIN_LOCK(&gen->sync); // already evacuated? - if (bd->flags & BF_EVACUATED) { + if (RELAXED_LOAD(&bd->flags) & BF_EVACUATED) { /* Don't forget to set the gct->failed_to_evac flag if we didn't get * the desired destination (see comments in evacuate()). */ @@ -344,9 +347,9 @@ evacuate_large(StgPtr p) ws = &gct->gens[new_gen_no]; new_gen = &generations[new_gen_no]; - bd->flags |= BF_EVACUATED; + __atomic_fetch_or(&bd->flags, BF_EVACUATED, __ATOMIC_ACQ_REL); if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving && new_gen == oldest_gen)) { - bd->flags |= BF_NONMOVING; + __atomic_fetch_or(&bd->flags, BF_NONMOVING, __ATOMIC_ACQ_REL); } initBdescr(bd, new_gen, new_gen->to); @@ -354,7 +357,7 @@ evacuate_large(StgPtr p) // these objects, because they aren't allowed to contain any outgoing // pointers. For these blocks, we skip the scavenge stage and put // them straight on the scavenged_large_objects list. - if (bd->flags & BF_PINNED) { + if (RELAXED_LOAD(&bd->flags) & BF_PINNED) { ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS); if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync); } @@ -389,7 +392,7 @@ evacuate_static_object (StgClosure **link_field, StgClosure *q) return; } - StgWord link = (StgWord)*link_field; + StgWord link = RELAXED_LOAD((StgWord*) link_field); // See Note [STATIC_LINK fields] for how the link field bits work if (((link & STATIC_BITS) | prev_static_flag) != 3) { @@ -435,7 +438,7 @@ evacuate_compact (StgPtr p) bd = Bdescr((StgPtr)str); gen_no = bd->gen_no; - if (bd->flags & BF_NONMOVING) { + if (RELAXED_LOAD(&bd->flags) & BF_NONMOVING) { // We may have evacuated the block to the nonmoving generation. If so // we need to make sure it is added to the mark queue since the only // reference to it may be from the moving heap. @@ -500,7 +503,7 @@ evacuate_compact (StgPtr p) // in the GC, and that should never see blocks other than the first) bd->flags |= BF_EVACUATED; if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving && new_gen == oldest_gen)) { - bd->flags |= BF_NONMOVING; + __atomic_fetch_or(&bd->flags, BF_NONMOVING, __ATOMIC_RELAXED); } initBdescr(bd, new_gen, new_gen->to); @@ -581,7 +584,7 @@ evacuate(StgClosure **p) const StgInfoTable *info; StgWord tag; - q = *p; + q = RELAXED_LOAD(p); loop: /* The tag and the pointer are split, to be merged after evacing */ @@ -638,10 +641,11 @@ loop: bd = Bdescr((P_)q); - if ((bd->flags & (BF_LARGE | BF_MARKED | BF_EVACUATED | BF_COMPACT | BF_NONMOVING)) != 0) { + uint16_t flags = RELAXED_LOAD(&bd->flags); + if ((flags & (BF_LARGE | BF_MARKED | BF_EVACUATED | BF_COMPACT | BF_NONMOVING)) != 0) { // Pointer to non-moving heap. Non-moving heap is collected using // mark-sweep so this object should be marked and then retained in sweep. - if (RTS_UNLIKELY(bd->flags & BF_NONMOVING)) { + if (RTS_UNLIKELY(RELAXED_LOAD(&bd->flags) & BF_NONMOVING)) { // NOTE: large objects in nonmoving heap are also marked with // BF_NONMOVING. Those are moved to scavenged_large_objects list in // mark phase. @@ -656,11 +660,11 @@ loop: // happen often, but allowing it makes certain things a bit // easier; e.g. scavenging an object is idempotent, so it's OK to // have an object on the mutable list multiple times. - if (bd->flags & BF_EVACUATED) { + if (flags & BF_EVACUATED) { // We aren't copying this object, so we have to check // whether it is already in the target generation. (this is // the write barrier). - if (bd->gen_no < gct->evac_gen_no) { + if (RELAXED_LOAD(&bd->gen_no) < gct->evac_gen_no) { gct->failed_to_evac = true; TICK_GC_FAILED_PROMOTION(); } @@ -671,20 +675,20 @@ loop: // right thing for objects that are half way in the middle of the first // block of a compact (and would be treated as large objects even though // they are not) - if (bd->flags & BF_COMPACT) { + if (flags & BF_COMPACT) { evacuate_compact((P_)q); return; } /* evacuate large objects by re-linking them onto a different list. */ - if (bd->flags & BF_LARGE) { + if (flags & BF_LARGE) { evacuate_large((P_)q); // We may have evacuated the block to the nonmoving generation. If so // we need to make sure it is added to the mark queue since the only // reference to it may be from the moving heap. - if (major_gc && bd->flags & BF_NONMOVING && !deadlock_detect_gc) { + if (major_gc && flags & BF_NONMOVING && !deadlock_detect_gc) { markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q); } return; @@ -702,7 +706,7 @@ loop: gen_no = bd->dest_no; - info = q->header.info; + info = ACQUIRE_LOAD(&q->header.info); if (IS_FORWARDING_PTR(info)) { /* Already evacuated, just return the forwarding address. @@ -722,9 +726,12 @@ loop: * check if gen is too low. */ StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info); - *p = TAG_CLOSURE(tag,e); + RELAXED_STORE(p, TAG_CLOSURE(tag,e)); if (gen_no < gct->evac_gen_no) { // optimisation - if (Bdescr((P_)e)->gen_no < gct->evac_gen_no) { + // The ACQUIRE here is necessary to ensure that we see gen_no if the + // evacuted object lives in a block newly-allocated by a GC thread on + // another core. + if (ACQUIRE_LOAD(&Bdescr((P_)e)->gen_no) < gct->evac_gen_no) { gct->failed_to_evac = true; TICK_GC_FAILED_PROMOTION(); } @@ -752,15 +759,17 @@ loop: if (info == Czh_con_info && // unsigned, so always true: (StgChar)w >= MIN_CHARLIKE && (StgChar)w <= MAX_CHARLIKE) { - *p = TAG_CLOSURE(tag, - (StgClosure *)CHARLIKE_CLOSURE((StgChar)w) - ); + RELAXED_STORE(p, \ + TAG_CLOSURE(tag, \ + (StgClosure *)CHARLIKE_CLOSURE((StgChar)w) + )); } else if (info == Izh_con_info && (StgInt)w >= MIN_INTLIKE && (StgInt)w <= MAX_INTLIKE) { - *p = TAG_CLOSURE(tag, - (StgClosure *)INTLIKE_CLOSURE((StgInt)w) - ); + RELAXED_STORE(p, \ + TAG_CLOSURE(tag, \ + (StgClosure *)INTLIKE_CLOSURE((StgInt)w) + )); } else { copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen_no,tag); @@ -814,10 +823,10 @@ loop: const StgInfoTable *i; r = ((StgInd*)q)->indirectee; if (GET_CLOSURE_TAG(r) == 0) { - i = r->header.info; + i = ACQUIRE_LOAD(&r->header.info); if (IS_FORWARDING_PTR(i)) { r = (StgClosure *)UN_FORWARDING_PTR(i); - i = r->header.info; + i = ACQUIRE_LOAD(&r->header.info); } if (i == &stg_TSO_info || i == &stg_WHITEHOLE_info @@ -842,7 +851,7 @@ loop: ASSERT(i != &stg_IND_info); } q = r; - *p = r; + RELEASE_STORE(p, r); goto loop; } @@ -868,8 +877,8 @@ loop: case IND: // follow chains of indirections, don't evacuate them - q = ((StgInd*)q)->indirectee; - *p = q; + q = RELAXED_LOAD(&((StgInd*)q)->indirectee); + RELAXED_STORE(p, q); goto loop; case RET_BCO: @@ -983,11 +992,12 @@ evacuate_BLACKHOLE(StgClosure **p) ASSERT(GET_CLOSURE_TAG(q) == 0); bd = Bdescr((P_)q); + const uint16_t flags = RELAXED_LOAD(&bd->flags); // blackholes can't be in a compact - ASSERT((bd->flags & BF_COMPACT) == 0); + ASSERT((flags & BF_COMPACT) == 0); - if (RTS_UNLIKELY(bd->flags & BF_NONMOVING)) { + if (RTS_UNLIKELY(RELAXED_LOAD(&bd->flags) & BF_NONMOVING)) { if (major_gc && !deadlock_detect_gc) markQueuePushClosureGC(&gct->cap->upd_rem_set.queue, q); return; @@ -996,18 +1006,18 @@ evacuate_BLACKHOLE(StgClosure **p) // blackholes *can* be in a large object: when raiseAsync() creates an // AP_STACK the payload might be large enough to create a large object. // See #14497. - if (bd->flags & BF_LARGE) { + if (flags & BF_LARGE) { evacuate_large((P_)q); return; } - if (bd->flags & BF_EVACUATED) { + if (flags & BF_EVACUATED) { if (bd->gen_no < gct->evac_gen_no) { gct->failed_to_evac = true; TICK_GC_FAILED_PROMOTION(); } return; } - if (bd->flags & BF_MARKED) { + if (flags & BF_MARKED) { if (!is_marked((P_)q,bd)) { mark((P_)q,bd); push_mark_stack((P_)q); @@ -1015,13 +1025,13 @@ evacuate_BLACKHOLE(StgClosure **p) return; } gen_no = bd->dest_no; - info = q->header.info; + info = ACQUIRE_LOAD(&q->header.info); if (IS_FORWARDING_PTR(info)) { StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info); *p = e; if (gen_no < gct->evac_gen_no) { // optimisation - if (Bdescr((P_)e)->gen_no < gct->evac_gen_no) { + if (ACQUIRE_LOAD(&Bdescr((P_)e)->gen_no) < gct->evac_gen_no) { gct->failed_to_evac = true; TICK_GC_FAILED_PROMOTION(); } @@ -1090,13 +1100,11 @@ unchain_thunk_selectors(StgSelector *p, StgClosure *val) // XXX we do not have BLACKHOLEs any more; replace with // a THUNK_SELECTOR again. This will go into a loop if it is // entered, and should result in a NonTermination exception. - ((StgThunk *)p)->payload[0] = val; - write_barrier(); - SET_INFO((StgClosure *)p, &stg_sel_0_upd_info); + RELAXED_STORE(&((StgThunk *)p)->payload[0], val); + SET_INFO_RELEASE((StgClosure *)p, &stg_sel_0_upd_info); } else { - ((StgInd *)p)->indirectee = val; - write_barrier(); - SET_INFO((StgClosure *)p, &stg_IND_info); + RELAXED_STORE(&((StgInd *)p)->indirectee, val); + SET_INFO_RELEASE((StgClosure *)p, &stg_IND_info); } // For the purposes of LDV profiling, we have created an @@ -1143,7 +1151,7 @@ selector_chain: // save any space in any case, and updating with an indirection is // trickier in a non-collected gen: we would have to update the // mutable list. - if (bd->flags & (BF_EVACUATED | BF_NONMOVING)) { + if (RELAXED_LOAD(&bd->flags) & (BF_EVACUATED | BF_NONMOVING)) { unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p); *q = (StgClosure *)p; // shortcut, behave as for: if (evac) evacuate(q); @@ -1198,8 +1206,7 @@ selector_chain: // need the write-barrier stuff. // - undo the chain we've built to point to p. SET_INFO((StgClosure *)p, (const StgInfoTable *)info_ptr); - write_barrier(); - *q = (StgClosure *)p; + RELEASE_STORE(q, (StgClosure *) p); if (evac) evacuate(q); unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p); return; @@ -1225,7 +1232,7 @@ selector_loop: // from-space during marking, for example. We rely on the property // that evacuate() doesn't mind if it gets passed a to-space pointer. - info = (StgInfoTable*)selectee->header.info; + info = RELAXED_LOAD((StgInfoTable**) &selectee->header.info); if (IS_FORWARDING_PTR(info)) { // We don't follow pointers into to-space; the constructor @@ -1252,7 +1259,7 @@ selector_loop: info->layout.payload.nptrs)); // Select the right field from the constructor - StgClosure *val = selectee->payload[field]; + StgClosure *val = RELAXED_LOAD(&selectee->payload[field]); #if defined(PROFILING) // For the purposes of LDV profiling, we have destroyed @@ -1278,19 +1285,19 @@ selector_loop: // evaluating until we find the real value, and then // update the whole chain to point to the value. val_loop: - info_ptr = (StgWord)UNTAG_CLOSURE(val)->header.info; + info_ptr = ACQUIRE_LOAD((StgWord*) &UNTAG_CLOSURE(val)->header.info); if (!IS_FORWARDING_PTR(info_ptr)) { info = INFO_PTR_TO_STRUCT((StgInfoTable *)info_ptr); switch (info->type) { case IND: case IND_STATIC: - val = ((StgInd *)val)->indirectee; + val = RELAXED_LOAD(&((StgInd *)val)->indirectee); goto val_loop; case THUNK_SELECTOR: // Use payload to make a list of thunk selectors, to be // used in unchain_thunk_selectors - ((StgClosure*)p)->payload[0] = (StgClosure *)prev_thunk_selector; + RELAXED_STORE(&((StgClosure*)p)->payload[0], (StgClosure *)prev_thunk_selector); prev_thunk_selector = p; p = (StgSelector*)val; goto selector_chain; @@ -1298,7 +1305,7 @@ selector_loop: break; } } - ((StgClosure*)p)->payload[0] = (StgClosure *)prev_thunk_selector; + RELAXED_STORE(&((StgClosure*)p)->payload[0], (StgClosure *)prev_thunk_selector); prev_thunk_selector = p; *q = val; @@ -1320,22 +1327,22 @@ selector_loop: case IND: case IND_STATIC: // Again, we might need to untag a constructor. - selectee = UNTAG_CLOSURE( ((StgInd *)selectee)->indirectee ); + selectee = UNTAG_CLOSURE( RELAXED_LOAD(&((StgInd *)selectee)->indirectee) ); goto selector_loop; case BLACKHOLE: { StgClosure *r; const StgInfoTable *i; - r = ((StgInd*)selectee)->indirectee; + r = ACQUIRE_LOAD(&((StgInd*)selectee)->indirectee); // establish whether this BH has been updated, and is now an // indirection, as in evacuate(). if (GET_CLOSURE_TAG(r) == 0) { - i = r->header.info; + i = ACQUIRE_LOAD(&r->header.info); if (IS_FORWARDING_PTR(i)) { r = (StgClosure *)UN_FORWARDING_PTR(i); - i = r->header.info; + i = RELAXED_LOAD(&r->header.info); } if (i == &stg_TSO_info || i == &stg_WHITEHOLE_info @@ -1346,7 +1353,7 @@ selector_loop: ASSERT(i != &stg_IND_info); } - selectee = UNTAG_CLOSURE( ((StgInd *)selectee)->indirectee ); + selectee = UNTAG_CLOSURE( RELAXED_LOAD(&((StgInd *)selectee)->indirectee) ); goto selector_loop; } diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 3d4a151cd5..5e1975076e 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -537,37 +537,37 @@ GarbageCollect (uint32_t collect_gen, const gc_thread* thread; for (i=0; i < n_gc_threads; i++) { - copied += gc_threads[i]->copied; + copied += RELAXED_LOAD(&gc_threads[i]->copied); } for (i=0; i < n_gc_threads; i++) { thread = gc_threads[i]; if (n_gc_threads > 1) { debugTrace(DEBUG_gc,"thread %d:", i); debugTrace(DEBUG_gc," copied %ld", - thread->copied * sizeof(W_)); + RELAXED_LOAD(&thread->copied) * sizeof(W_)); debugTrace(DEBUG_gc," scanned %ld", - thread->scanned * sizeof(W_)); + RELAXED_LOAD(&thread->scanned) * sizeof(W_)); debugTrace(DEBUG_gc," any_work %ld", - thread->any_work); + RELAXED_LOAD(&thread->any_work)); debugTrace(DEBUG_gc," no_work %ld", - thread->no_work); + RELAXED_LOAD(&thread->no_work)); debugTrace(DEBUG_gc," scav_find_work %ld", - thread->scav_find_work); + RELAXED_LOAD(&thread->scav_find_work)); #if defined(THREADED_RTS) && defined(PROF_SPIN) - gc_spin_spin += thread->gc_spin.spin; - gc_spin_yield += thread->gc_spin.yield; - mut_spin_spin += thread->mut_spin.spin; - mut_spin_yield += thread->mut_spin.yield; + gc_spin_spin += RELAXED_LOAD(&thread->gc_spin.spin); + gc_spin_yield += RELAXED_LOAD(&thread->gc_spin.yield); + mut_spin_spin += RELAXED_LOAD(&thread->mut_spin.spin); + mut_spin_yield += RELAXED_LOAD(&thread->mut_spin.yield); #endif - any_work += thread->any_work; - no_work += thread->no_work; - scav_find_work += thread->scav_find_work; + any_work += RELAXED_LOAD(&thread->any_work); + no_work += RELAXED_LOAD(&thread->no_work); + scav_find_work += RELAXED_LOAD(&thread->scav_find_work); - par_max_copied = stg_max(gc_threads[i]->copied, par_max_copied); + par_max_copied = stg_max(RELAXED_LOAD(&thread->copied), par_max_copied); par_balanced_copied_acc += - stg_min(n_gc_threads * gc_threads[i]->copied, copied); + stg_min(n_gc_threads * RELAXED_LOAD(&thread->copied), copied); } } if (n_gc_threads > 1) { @@ -1130,7 +1130,7 @@ inc_running (void) static StgWord dec_running (void) { - ASSERT(gc_running_threads != 0); + ASSERT(RELAXED_LOAD(&gc_running_threads) != 0); return atomic_dec(&gc_running_threads); } @@ -1140,7 +1140,7 @@ any_work (void) int g; gen_workspace *ws; - gct->any_work++; + NONATOMIC_ADD(&gct->any_work, 1); write_barrier(); @@ -1173,7 +1173,7 @@ any_work (void) } #endif - gct->no_work++; + __atomic_fetch_add(&gct->no_work, 1, __ATOMIC_RELAXED); #if defined(THREADED_RTS) yieldThread(); #endif @@ -1214,7 +1214,7 @@ loop: debugTrace(DEBUG_gc, "%d GC threads still running", r); - while (gc_running_threads != 0) { + while (SEQ_CST_LOAD(&gc_running_threads) != 0) { // usleep(1); if (any_work()) { inc_running(); @@ -1251,7 +1251,7 @@ gcWorkerThread (Capability *cap) // measurements more accurate on Linux, perhaps because it syncs // the CPU time across the multiple cores. Without this, CPU time // is heavily skewed towards GC rather than MUT. - gct->wakeup = GC_THREAD_STANDING_BY; + SEQ_CST_STORE(&gct->wakeup, GC_THREAD_STANDING_BY); debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index); ACQUIRE_SPIN_LOCK(&gct->gc_spin); @@ -1278,7 +1278,7 @@ gcWorkerThread (Capability *cap) // Wait until we're told to continue RELEASE_SPIN_LOCK(&gct->gc_spin); - gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE; + SEQ_CST_STORE(&gct->wakeup, GC_THREAD_WAITING_TO_CONTINUE); debugTrace(DEBUG_gc, "GC thread %d waiting to continue...", gct->thread_index); stat_endGCWorker (cap, gct); @@ -1306,7 +1306,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[]) while(retry) { for (i=0; i < n_threads; i++) { if (i == me || idle_cap[i]) continue; - if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) { + if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY) { prodCapability(capabilities[i], cap->running_task); } } @@ -1316,7 +1316,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS, bool idle_cap[]) if (i == me || idle_cap[i]) continue; write_barrier(); interruptCapability(capabilities[i]); - if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) { + if (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY) { retry = true; } } @@ -1373,10 +1373,10 @@ wakeup_gc_threads (uint32_t me USED_IF_THREADS, if (i == me || idle_cap[i]) continue; inc_running(); debugTrace(DEBUG_gc, "waking up gc thread %d", i); - if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) + if (RELAXED_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads"); - gc_threads[i]->wakeup = GC_THREAD_RUNNING; + RELAXED_STORE(&gc_threads[i]->wakeup, GC_THREAD_RUNNING); ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin); RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin); } @@ -1397,9 +1397,8 @@ shutdown_gc_threads (uint32_t me USED_IF_THREADS, for (i=0; i < n_gc_threads; i++) { if (i == me || idle_cap[i]) continue; - while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { + while (SEQ_CST_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_WAITING_TO_CONTINUE) { busy_wait_nop(); - write_barrier(); } } #endif @@ -1414,10 +1413,10 @@ releaseGCThreads (Capability *cap USED_IF_THREADS, bool idle_cap[]) uint32_t i; for (i=0; i < n_threads; i++) { if (i == me || idle_cap[i]) continue; - if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) + if (RELAXED_LOAD(&gc_threads[i]->wakeup) != GC_THREAD_WAITING_TO_CONTINUE) barf("releaseGCThreads"); - gc_threads[i]->wakeup = GC_THREAD_INACTIVE; + RELAXED_STORE(&gc_threads[i]->wakeup, GC_THREAD_INACTIVE); ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin); RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin); } @@ -1433,7 +1432,7 @@ static void stash_mut_list (Capability *cap, uint32_t gen_no) { cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no]; - cap->mut_lists[gen_no] = allocBlockOnNode_sync(cap->node); + RELEASE_STORE(&cap->mut_lists[gen_no], allocBlockOnNode_sync(cap->node)); } /* ---------------------------------------------------------------------------- @@ -1459,9 +1458,11 @@ prepare_collected_gen (generation *gen) // mutable list always has at least one block; this means we can avoid // a check for NULL in recordMutable(). for (i = 0; i < n_capabilities; i++) { - freeChain(capabilities[i]->mut_lists[g]); - capabilities[i]->mut_lists[g] = - allocBlockOnNode(capNoToNumaNode(i)); + bdescr *old = RELAXED_LOAD(&capabilities[i]->mut_lists[g]); + freeChain(old); + + bdescr *new = allocBlockOnNode(capNoToNumaNode(i)); + RELAXED_STORE(&capabilities[i]->mut_lists[g], new); } } @@ -1675,7 +1676,7 @@ collect_pinned_object_blocks (void) bdescr *last = NULL; if (use_nonmoving && gen == oldest_gen) { // Mark objects as belonging to the nonmoving heap - for (bdescr *bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) { + for (bdescr *bd = RELAXED_LOAD(&capabilities[n]->pinned_object_blocks); bd != NULL; bd = bd->link) { bd->flags |= BF_NONMOVING; bd->gen = oldest_gen; bd->gen_no = oldest_gen->no; @@ -1694,8 +1695,8 @@ collect_pinned_object_blocks (void) if (gen->large_objects != NULL) { gen->large_objects->u.back = last; } - gen->large_objects = capabilities[n]->pinned_object_blocks; - capabilities[n]->pinned_object_blocks = NULL; + g0->large_objects = RELAXED_LOAD(&capabilities[n]->pinned_object_blocks); + RELAXED_STORE(&capabilities[n]->pinned_object_blocks, NULL); } } } diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c index 11080c1f22..55b4f99596 100644 --- a/rts/sm/GCAux.c +++ b/rts/sm/GCAux.c @@ -83,7 +83,7 @@ isAlive(StgClosure *p) return p; } - info = q->header.info; + info = RELAXED_LOAD(&q->header.info); if (IS_FORWARDING_PTR(info)) { // alive! diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c index 02c26ddf5e..d58fdc48ae 100644 --- a/rts/sm/GCUtils.c +++ b/rts/sm/GCUtils.c @@ -249,8 +249,8 @@ todo_block_full (uint32_t size, gen_workspace *ws) return p; } - gct->copied += ws->todo_free - bd->free; - bd->free = ws->todo_free; + gct->copied += ws->todo_free - RELAXED_LOAD(&bd->free); + RELAXED_STORE(&bd->free, ws->todo_free); ASSERT(bd->u.scan >= bd->start && bd->u.scan <= bd->free); @@ -330,10 +330,11 @@ alloc_todo_block (gen_workspace *ws, uint32_t size) gct->free_blocks = bd->link; } } - // blocks in to-space get the BF_EVACUATED flag. - bd->flags = BF_EVACUATED; - bd->u.scan = bd->start; initBdescr(bd, ws->gen, ws->gen->to); + RELAXED_STORE(&bd->u.scan, RELAXED_LOAD(&bd->start)); + // blocks in to-space get the BF_EVACUATED flag. + // RELEASE here to ensure that bd->gen is visible to other cores. + RELEASE_STORE(&bd->flags, BF_EVACUATED); } bd->link = NULL; @@ -345,7 +346,7 @@ alloc_todo_block (gen_workspace *ws, uint32_t size) // See Note [big objects] debugTrace(DEBUG_gc, "alloc new todo block %p for gen %d", - bd->free, ws->gen->no); + RELAXED_LOAD(&bd->free), ws->gen->no); return ws->todo_free; } diff --git a/rts/sm/GCUtils.h b/rts/sm/GCUtils.h index a71d6dcb92..798a795deb 100644 --- a/rts/sm/GCUtils.h +++ b/rts/sm/GCUtils.h @@ -67,7 +67,9 @@ recordMutableGen_GC (StgClosure *p, uint32_t gen_no) bd = new_bd; gct->mut_lists[gen_no] = bd; } - *bd->free++ = (StgWord)p; + *bd->free++ = (StgWord) p; + // N.B. we are allocating into our Capability-local mut_list, therefore + // we don't need an atomic increment. } #include "EndPrivate.h" diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c index 3fd4ba6532..b8d120823c 100644 --- a/rts/sm/MarkWeak.c +++ b/rts/sm/MarkWeak.c @@ -414,7 +414,7 @@ markWeakPtrList ( void ) StgWeak *w, **last_w; last_w = &gen->weak_ptr_list; - for (w = gen->weak_ptr_list; w != NULL; w = w->link) { + for (w = gen->weak_ptr_list; w != NULL; w = RELAXED_LOAD(&w->link)) { // w might be WEAK, EVACUATED, or DEAD_WEAK (actually CON_STATIC) here #if defined(DEBUG) diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index 0a4e76a819..9fe2c6006e 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -203,9 +203,9 @@ scavenge_compact(StgCompactNFData *str) gct->eager_promotion = saved_eager; if (gct->failed_to_evac) { - ((StgClosure *)str)->header.info = &stg_COMPACT_NFDATA_DIRTY_info; + RELEASE_STORE(&((StgClosure *)str)->header.info, &stg_COMPACT_NFDATA_DIRTY_info); } else { - ((StgClosure *)str)->header.info = &stg_COMPACT_NFDATA_CLEAN_info; + RELEASE_STORE(&((StgClosure *)str)->header.info, &stg_COMPACT_NFDATA_CLEAN_info); } } @@ -466,9 +466,9 @@ scavenge_block (bdescr *bd) gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { - mvar->header.info = &stg_MVAR_DIRTY_info; + RELEASE_STORE(&mvar->header.info, &stg_MVAR_DIRTY_info); } else { - mvar->header.info = &stg_MVAR_CLEAN_info; + RELEASE_STORE(&mvar->header.info, &stg_MVAR_CLEAN_info); } p += sizeofW(StgMVar); break; @@ -483,9 +483,9 @@ scavenge_block (bdescr *bd) gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { - tvar->header.info = &stg_TVAR_DIRTY_info; + RELEASE_STORE(&tvar->header.info, &stg_TVAR_DIRTY_info); } else { - tvar->header.info = &stg_TVAR_CLEAN_info; + RELEASE_STORE(&tvar->header.info, &stg_TVAR_CLEAN_info); } p += sizeofW(StgTVar); break; @@ -617,9 +617,9 @@ scavenge_block (bdescr *bd) gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { - ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_VAR_DIRTY_info); } else { - ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_VAR_CLEAN_info); } p += sizeofW(StgMutVar); break; @@ -636,9 +636,9 @@ scavenge_block (bdescr *bd) gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { - bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info; + RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_DIRTY_info); } else { - bq->header.info = &stg_BLOCKING_QUEUE_CLEAN_info; + RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_CLEAN_info); } p += sizeofW(StgBlockingQueue); break; @@ -688,9 +688,9 @@ scavenge_block (bdescr *bd) p = scavenge_mut_arr_ptrs((StgMutArrPtrs*)p); if (gct->failed_to_evac) { - ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_DIRTY_info); } else { - ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_CLEAN_info); } gct->eager_promotion = saved_eager_promotion; @@ -705,9 +705,9 @@ scavenge_block (bdescr *bd) p = scavenge_mut_arr_ptrs((StgMutArrPtrs*)p); if (gct->failed_to_evac) { - ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info); } else { - ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info); } break; } @@ -730,9 +730,9 @@ scavenge_block (bdescr *bd) gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { - ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_SMALL_MUT_ARR_PTRS_DIRTY_info); } else { - ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_CLEAN_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_SMALL_MUT_ARR_PTRS_CLEAN_info); } gct->failed_to_evac = true; // always put it on the mutable list. @@ -751,9 +751,9 @@ scavenge_block (bdescr *bd) } if (gct->failed_to_evac) { - ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info); } else { - ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info); } break; } @@ -836,7 +836,7 @@ scavenge_block (bdescr *bd) if (p > bd->free) { gct->copied += ws->todo_free - bd->free; - bd->free = p; + RELEASE_STORE(&bd->free, p); } debugTrace(DEBUG_gc, " scavenged %ld bytes", @@ -891,9 +891,9 @@ scavenge_mark_stack(void) gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { - mvar->header.info = &stg_MVAR_DIRTY_info; + RELEASE_STORE(&mvar->header.info, &stg_MVAR_DIRTY_info); } else { - mvar->header.info = &stg_MVAR_CLEAN_info; + RELEASE_STORE(&mvar->header.info, &stg_MVAR_CLEAN_info); } break; } @@ -907,9 +907,9 @@ scavenge_mark_stack(void) gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { - tvar->header.info = &stg_TVAR_DIRTY_info; + RELEASE_STORE(&tvar->header.info, &stg_TVAR_DIRTY_info); } else { - tvar->header.info = &stg_TVAR_CLEAN_info; + RELEASE_STORE(&tvar->header.info, &stg_TVAR_CLEAN_info); } break; } @@ -1013,9 +1013,9 @@ scavenge_mark_stack(void) gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { - ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_VAR_DIRTY_info); } else { - ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_VAR_CLEAN_info); } break; } @@ -1032,9 +1032,9 @@ scavenge_mark_stack(void) gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { - bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info; + RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_DIRTY_info); } else { - bq->header.info = &stg_BLOCKING_QUEUE_CLEAN_info; + RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_CLEAN_info); } break; } @@ -1080,9 +1080,9 @@ scavenge_mark_stack(void) scavenge_mut_arr_ptrs((StgMutArrPtrs *)p); if (gct->failed_to_evac) { - ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_DIRTY_info); } else { - ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_CLEAN_info); } gct->eager_promotion = saved_eager_promotion; @@ -1099,9 +1099,9 @@ scavenge_mark_stack(void) scavenge_mut_arr_ptrs((StgMutArrPtrs *)p); if (gct->failed_to_evac) { - ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info); } else { - ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info; + RELEASE_STORE(&((StgClosure *) q)->header.info, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info); } break; } @@ -1126,9 +1126,9 @@ scavenge_mark_stack(void) gct->eager_promotion = saved_eager; if (gct->failed_to_evac) { - ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info; + RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_DIRTY_info); } else { - ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_CLEAN_info; + RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_CLEAN_info); } gct->failed_to_evac = true; // mutable anyhow. @@ -1147,9 +1147,9 @@ scavenge_mark_stack(void) } if (gct->failed_to_evac) { - ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info; + RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info); } else { - ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info; + RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info); } break; } @@ -1253,9 +1253,9 @@ scavenge_one(StgPtr p) gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { - mvar->header.info = &stg_MVAR_DIRTY_info; + RELEASE_STORE(&mvar->header.info, &stg_MVAR_DIRTY_info); } else { - mvar->header.info = &stg_MVAR_CLEAN_info; + RELEASE_STORE(&mvar->header.info, &stg_MVAR_CLEAN_info); } break; } @@ -1269,9 +1269,9 @@ scavenge_one(StgPtr p) gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { - tvar->header.info = &stg_TVAR_DIRTY_info; + RELEASE_STORE(&tvar->header.info, &stg_TVAR_DIRTY_info); } else { - tvar->header.info = &stg_TVAR_CLEAN_info; + RELEASE_STORE(&tvar->header.info, &stg_TVAR_CLEAN_info); } break; } @@ -1333,9 +1333,9 @@ scavenge_one(StgPtr p) gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { - ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; + RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_MUT_VAR_DIRTY_info); } else { - ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info; + RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_MUT_VAR_CLEAN_info); } break; } @@ -1352,9 +1352,9 @@ scavenge_one(StgPtr p) gct->eager_promotion = saved_eager_promotion; if (gct->failed_to_evac) { - bq->header.info = &stg_BLOCKING_QUEUE_DIRTY_info; + RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_DIRTY_info); } else { - bq->header.info = &stg_BLOCKING_QUEUE_CLEAN_info; + RELEASE_STORE(&bq->header.info, &stg_BLOCKING_QUEUE_CLEAN_info); } break; } @@ -1400,9 +1400,9 @@ scavenge_one(StgPtr p) scavenge_mut_arr_ptrs((StgMutArrPtrs *)p); if (gct->failed_to_evac) { - ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; + RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_DIRTY_info); } else { - ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info; + RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_CLEAN_info); } gct->eager_promotion = saved_eager_promotion; @@ -1417,9 +1417,9 @@ scavenge_one(StgPtr p) scavenge_mut_arr_ptrs((StgMutArrPtrs *)p); if (gct->failed_to_evac) { - ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info; + RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_FROZEN_DIRTY_info); } else { - ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info; + RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_FROZEN_CLEAN_info); } break; } @@ -1444,9 +1444,9 @@ scavenge_one(StgPtr p) gct->eager_promotion = saved_eager; if (gct->failed_to_evac) { - ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_DIRTY_info; + RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_DIRTY_info); } else { - ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_CLEAN_info; + RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_CLEAN_info); } gct->failed_to_evac = true; @@ -1465,9 +1465,9 @@ scavenge_one(StgPtr p) } if (gct->failed_to_evac) { - ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info; + RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY_info); } else { - ((StgClosure *)q)->header.info = &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info; + RELEASE_STORE(&((StgClosure *)q)->header.info, &stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN_info); } break; } @@ -1653,9 +1653,9 @@ scavenge_mutable_list(bdescr *bd, generation *gen) scavenge_mut_arr_ptrs_marked((StgMutArrPtrs *)p); if (gct->failed_to_evac) { - ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; + RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_DIRTY_info); } else { - ((StgClosure *)p)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info; + RELEASE_STORE(&((StgClosure *)p)->header.info, &stg_MUT_ARR_PTRS_CLEAN_info); } gct->eager_promotion = saved_eager_promotion; @@ -1753,8 +1753,9 @@ scavenge_static(void) /* Take this object *off* the static_objects list, * and put it on the scavenged_static_objects list. */ - gct->static_objects = *STATIC_LINK(info,p); - *STATIC_LINK(info,p) = gct->scavenged_static_objects; + StgClosure **link = STATIC_LINK(info,p); + gct->static_objects = RELAXED_LOAD(link); + RELAXED_STORE(link, gct->scavenged_static_objects); gct->scavenged_static_objects = flagged_p; switch (info -> type) { diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h index 8d90c3ba5f..48ddcf35f5 100644 --- a/rts/sm/Storage.h +++ b/rts/sm/Storage.h @@ -72,8 +72,11 @@ bool getNewNursery (Capability *cap); INLINE_HEADER bool doYouWantToGC(Capability *cap) { + // This is necessarily approximate since otherwise we would need to take + // SM_LOCK to safely look at n_new_large_words. + TSAN_ANNOTATE_BENIGN_RACE(&g0->n_new_large_words, "doYouWantToGC(n_new_large_words)"); return ((cap->r.rCurrentNursery->link == NULL && !getNewNursery(cap)) || - g0->n_new_large_words >= large_alloc_lim); + RELAXED_LOAD(&g0->n_new_large_words) >= large_alloc_lim); } /* ----------------------------------------------------------------------------- @@ -91,7 +94,7 @@ INLINE_HEADER void finishedNurseryBlock (Capability *cap, bdescr *bd) { } INLINE_HEADER void newNurseryBlock (bdescr *bd) { - bd->free = bd->start; + RELAXED_STORE(&bd->free, bd->start); } void updateNurseriesStats (void); |