summaryrefslogtreecommitdiff
path: root/rts/PrimOps.cmm
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2011-11-28 16:48:43 +0000
committerSimon Marlow <marlowsd@gmail.com>2011-11-29 12:21:18 +0000
commit50de6034343abc93a7b01daccff34121042c0e7c (patch)
tree24496a5fc6bc39c6baaa574608e53c5d76c169f6 /rts/PrimOps.cmm
parent1c2b838131134d44004dfdff18c302131478390d (diff)
downloadhaskell-50de6034343abc93a7b01daccff34121042c0e7c.tar.gz
Make profiling work with multiple capabilities (+RTS -N)
This means that both time and heap profiling work for parallel programs. Main internal changes: - CCCS is no longer a global variable; it is now another pseudo-register in the StgRegTable struct. Thus every Capability has its own CCCS. - There is a new built-in CCS called "IDLE", which records ticks for Capabilities in the idle state. If you profile a single-threaded program with +RTS -N2, you'll see about 50% of time in "IDLE". - There is appropriate locking in rts/Profiling.c to protect the shared cost-centre-stack data structures. This patch does enough to get it working, I have cut one big corner: the cost-centre-stack data structure is still shared amongst all Capabilities, which means that multiple Capabilities will race when updating the "allocations" and "entries" fields of a CCS. Not only does this give unpredictable results, but it runs very slowly due to cache line bouncing. It is strongly recommended that you use -fno-prof-count-entries to disable the "entries" count when profiling parallel programs. (I shall add a note to this effect to the docs).
Diffstat (limited to 'rts/PrimOps.cmm')
-rw-r--r--rts/PrimOps.cmm40
1 files changed, 20 insertions, 20 deletions
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index 8836d3bfe6..2ca347e803 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -63,7 +63,7 @@ stg_newByteArrayzh
words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) [];
TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
- SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrWords_bytes(p) = n;
RET_P(p);
}
@@ -96,7 +96,7 @@ stg_newPinnedByteArrayzh
to BA_ALIGN bytes: */
p = p + ((-p - SIZEOF_StgArrWords) & BA_MASK);
- SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrWords_bytes(p) = n;
RET_P(p);
}
@@ -136,7 +136,7 @@ stg_newAlignedPinnedByteArrayzh
<alignment> is a power of 2, which is technically not guaranteed */
p = p + ((-p - SIZEOF_StgArrWords) & (alignment - 1));
- SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrWords_bytes(p) = n;
RET_P(p);
}
@@ -157,7 +157,7 @@ stg_newArrayzh
("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2];
TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
- SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
+ SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
StgMutArrPtrs_ptrs(arr) = n;
StgMutArrPtrs_size(arr) = size;
@@ -225,7 +225,7 @@ stg_newMutVarzh
ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh);
mv = Hp - SIZEOF_StgMutVar + WDS(1);
- SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
+ SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS);
StgMutVar_var(mv) = R1;
RET_P(mv);
@@ -297,21 +297,21 @@ stg_atomicModifyMutVarzh
TICK_ALLOC_THUNK_2();
CCCS_ALLOC(THUNK_2_SIZE);
z = Hp - THUNK_2_SIZE + WDS(1);
- SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
+ SET_HDR(z, stg_ap_2_upd_info, CCCS);
LDV_RECORD_CREATE(z);
StgThunk_payload(z,0) = f;
TICK_ALLOC_THUNK_1();
CCCS_ALLOC(THUNK_1_SIZE);
y = z - THUNK_1_SIZE;
- SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
+ SET_HDR(y, stg_sel_0_upd_info, CCCS);
LDV_RECORD_CREATE(y);
StgThunk_payload(y,0) = z;
TICK_ALLOC_THUNK_1();
CCCS_ALLOC(THUNK_1_SIZE);
r = y - THUNK_1_SIZE;
- SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
+ SET_HDR(r, stg_sel_1_upd_info, CCCS);
LDV_RECORD_CREATE(r);
StgThunk_payload(r,0) = z;
@@ -353,7 +353,7 @@ stg_mkWeakzh
ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh );
w = Hp - SIZEOF_StgWeak + WDS(1);
- SET_HDR(w, stg_WEAK_info, W_[CCCS]);
+ SET_HDR(w, stg_WEAK_info, CCCS);
// We don't care about cfinalizer here.
// Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or
@@ -397,14 +397,14 @@ stg_mkWeakForeignEnvzh
ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh );
w = Hp - SIZEOF_StgWeak + WDS(1);
- SET_HDR(w, stg_WEAK_info, W_[CCCS]);
+ SET_HDR(w, stg_WEAK_info, CCCS);
payload_words = 4;
words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
("ptr" p) = foreign "C" allocate(MyCapability() "ptr", words) [];
TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
- SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrWords_bytes(p) = WDS(payload_words);
StgArrWords_payload(p,0) = fptr;
@@ -877,7 +877,7 @@ stg_atomicallyzh
Sp = Sp - SIZEOF_StgAtomicallyFrame;
frame = Sp;
- SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
+ SET_HDR(frame,stg_atomically_frame_info, CCCS);
StgAtomicallyFrame_code(frame) = R1;
StgAtomicallyFrame_result(frame) = NO_TREC;
StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE;
@@ -903,7 +903,7 @@ stg_catchSTMzh
Sp = Sp - SIZEOF_StgCatchSTMFrame;
frame = Sp;
- SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
+ SET_HDR(frame, stg_catch_stm_frame_info, CCCS);
StgCatchSTMFrame_handler(frame) = R2;
StgCatchSTMFrame_code(frame) = R1;
@@ -941,7 +941,7 @@ stg_catchRetryzh
Sp = Sp - SIZEOF_StgCatchRetryFrame;
frame = Sp;
- SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
+ SET_HDR(frame, stg_catch_retry_frame_info, CCCS);
StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
StgCatchRetryFrame_first_code(frame) = R1;
StgCatchRetryFrame_alt_code(frame) = R2;
@@ -1153,7 +1153,7 @@ stg_newMVarzh
ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, stg_newMVarzh );
mvar = Hp - SIZEOF_StgMVar + WDS(1);
- SET_HDR(mvar,stg_MVAR_DIRTY_info,W_[CCCS]);
+ SET_HDR(mvar,stg_MVAR_DIRTY_info,CCCS);
// MVARs start dirty: generation 0 has no mutable list
StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure;
StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
@@ -1527,7 +1527,7 @@ stg_makeStableNamezh
*/
if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
- SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
+ SET_HDR(sn_obj, stg_STABLE_NAME_info, CCCS);
StgStableName_sn(sn_obj) = index;
snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
} else {
@@ -1578,7 +1578,7 @@ stg_newBCOzh
ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh );
bco = Hp - bytes + WDS(1);
- SET_HDR(bco, stg_BCO_info, W_[CCCS]);
+ SET_HDR(bco, stg_BCO_info, CCCS);
StgBCO_instrs(bco) = R1;
StgBCO_literals(bco) = R2;
@@ -1617,7 +1617,7 @@ stg_mkApUpd0zh
CCCS_ALLOC(SIZEOF_StgAP);
ap = Hp - SIZEOF_StgAP + WDS(1);
- SET_HDR(ap, stg_AP_info, W_[CCCS]);
+ SET_HDR(ap, stg_AP_info, CCCS);
StgAP_n_args(ap) = HALF_W_(0);
StgAP_fun(ap) = R1;
@@ -1668,7 +1668,7 @@ out:
ptrs_arr = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
- SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, W_[CCCS]);
+ SET_HDR(ptrs_arr, stg_MUT_ARR_PTRS_FROZEN_info, CCCS);
StgMutArrPtrs_ptrs(ptrs_arr) = ptrs;
StgMutArrPtrs_size(ptrs_arr) = ptrs + ptrs_arr_cards;
@@ -1683,7 +1683,7 @@ for:
allocated in the nursery. The GC will fill it in if/when the array
is promoted. */
- SET_HDR(nptrs_arr, stg_ARR_WORDS_info, W_[CCCS]);
+ SET_HDR(nptrs_arr, stg_ARR_WORDS_info, CCCS);
StgArrWords_bytes(nptrs_arr) = WDS(nptrs);
p = 0;
for2: