/* ----------------------------------------------------------------------------- * * (c) The GHC Team, 1998-2004 * * Performing updates. * * ---------------------------------------------------------------------------*/ #ifndef UPDATES_H #define UPDATES_H /* ----------------------------------------------------------------------------- Updates We have two layers of update macros. The top layer, UPD_IND() and friends perform all the work of an update. In detail: - if the closure being updated is a blocking queue, then all the threads waiting on the blocking queue are updated. - then the lower level updateWithIndirection() macro is invoked to actually replace the closure with an indirection (see below). -------------------------------------------------------------------------- */ #ifdef TICKY_TICKY # define UPD_IND(updclosure, heapptr) \ UPD_PERM_IND(updclosure,heapptr) # define UPD_SPEC_IND(updclosure, ind_info, heapptr, and_then) \ UPD_PERM_IND(updclosure,heapptr); and_then #else # define SEMI ; # define UPD_IND(updclosure, heapptr) \ UPD_REAL_IND(updclosure,INFO_PTR(stg_IND_info),heapptr,SEMI) # define UPD_SPEC_IND(updclosure, ind_info, heapptr, and_then) \ UPD_REAL_IND(updclosure,ind_info,heapptr,and_then) #endif /* These macros have to work in both C and C--, so here's the * impedence matching: */ #ifdef CMINUSMINUS #define BLOCK_BEGIN #define BLOCK_END #define DECLARE_IPTR(info) W_ info #define FCALL foreign "C" #define INFO_PTR(info) info #define ARG_PTR "ptr" #else #define BLOCK_BEGIN { #define BLOCK_END } #define DECLARE_IPTR(info) const StgInfoTable *(info) #define FCALL /* nothing */ #define INFO_PTR(info) &info #define StgBlockingQueue_blocking_queue(closure) \ (((StgBlockingQueue *)closure)->blocking_queue) #define ARG_PTR /* nothing */ #endif /* UPD_IND actually does a PERM_IND if TICKY_TICKY is on; if you *really* need an IND use UPD_REAL_IND */ #define UPD_REAL_IND(updclosure, ind_info, heapptr, and_then) \ BLOCK_BEGIN \ DECLARE_IPTR(info); \ info = GET_INFO(updclosure); \ updateWithIndirection(ind_info, \ updclosure, \ heapptr, \ and_then); \ BLOCK_END #if defined(PROFILING) || defined(TICKY_TICKY) #define UPD_PERM_IND(updclosure, heapptr) \ BLOCK_BEGIN \ updateWithPermIndirection(updclosure, \ heapptr); \ BLOCK_END #endif #if defined(RTS_SUPPORTS_THREADS) # ifdef TICKY_TICKY # define UPD_IND_NOLOCK(updclosure, heapptr) \ BLOCK_BEGIN \ updateWithPermIndirection(updclosure, \ heapptr); \ BLOCK_END # else # define UPD_IND_NOLOCK(updclosure, heapptr) \ BLOCK_BEGIN \ updateWithIndirection(INFO_PTR(stg_IND_info), \ updclosure, \ heapptr,); \ BLOCK_END # endif #else #define UPD_IND_NOLOCK(updclosure,heapptr) UPD_IND(updclosure,heapptr) #endif /* ----------------------------------------------------------------------------- Awaken any threads waiting on a blocking queue (BLACKHOLE_BQ). -------------------------------------------------------------------------- */ #if defined(PAR) /* In a parallel setup several types of closures might have a blocking queue: BLACKHOLE_BQ ... same as in the default concurrent setup; it will be reawakened via calling UPD_IND on that closure after having finished the computation of the graph FETCH_ME_BQ ... a global indirection (FETCH_ME) may be entered by a local TSO, turning it into a FETCH_ME_BQ; it will be reawakened via calling processResume RBH ... a revertible black hole may be entered by another local TSO, putting it onto its blocking queue; since RBHs only exist while the corresponding closure is in transit, they will be reawakened via calling convertToFetchMe (upon processing an ACK message) In a parallel setup a blocking queue may contain 3 types of closures: TSO ... as in the default concurrent setup BLOCKED_FETCH ... indicating that a TSO on another PE is waiting for the result of the current computation CONSTR ... an RBHSave closure (which contains data ripped out of the closure to make room for a blocking queue; since it only contains data we use the exisiting type of a CONSTR closure); this closure is the end of a blocking queue for an RBH closure; it only exists in this kind of blocking queue and must be at the end of the queue */ extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node); #define DO_AWAKEN_BQ(bqe, node) STGCALL2(awakenBlockedQueue, bqe, node); #define AWAKEN_BQ(info,closure) \ if (info == &stg_BLACKHOLE_BQ_info || \ info == &stg_FETCH_ME_BQ_info || \ get_itbl(closure)->type == RBH) { \ DO_AWAKEN_BQ(((StgBlockingQueue *)closure)->blocking_queue, closure); \ } #elif defined(GRAN) extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node); #define DO_AWAKEN_BQ(bq, node) STGCALL2(awakenBlockedQueue, bq, node); /* In GranSim we don't have FETCH_ME or FETCH_ME_BQ closures, so they are not checked. The rest of the code is the same as for GUM. */ #define AWAKEN_BQ(info,closure) \ if (info == &stg_BLACKHOLE_BQ_info || \ get_itbl(closure)->type == RBH) { \ DO_AWAKEN_BQ(((StgBlockingQueue *)closure)->blocking_queue, closure); \ } #endif /* GRAN || PAR */ /* ----------------------------------------------------------------------------- Updates: lower-level macros which update a closure with an indirection to another closure. There are several variants of this code. PROFILING: -------------------------------------------------------------------------- */ /* LDV profiling: * We call LDV_recordDead_FILL_SLOP_DYNAMIC(p1) regardless of the generation in * which p1 resides. * * Note: * After all, we do *NOT* need to call LDV_RECORD_CREATE() for both IND and * IND_OLDGEN closures because they are inherently used. But, it corrupts * the invariants that every closure keeps its creation time in the profiling * field. So, we call LDV_RECORD_CREATE(). */ /* In the DEBUG case, we also zero out the slop of the old closure, * so that the sanity checker can tell where the next closure is. * * Two important invariants: we should never try to update a closure * to point to itself, and the closure being updated should not * already have been updated (the mutable list will get messed up * otherwise). * * NB. We do *not* do this in THREADED_RTS mode, because when we have the * possibility of multiple threads entering the same closure, zeroing * the slop in one of the threads would have a disastrous effect on * the other (seen in the wild!). */ #ifdef CMINUSMINUS #define FILL_SLOP(p) \ W_ inf; \ W_ sz; \ W_ i; \ inf = %GET_STD_INFO(p); \ if (%INFO_TYPE(inf) != HALF_W_(THUNK_SELECTOR) \ && %INFO_TYPE(inf) != HALF_W_(BLACKHOLE) \ && %INFO_TYPE(inf) != HALF_W_(CAF_BLACKHOLE)) { \ if (%INFO_TYPE(inf) == HALF_W_(AP_STACK)) { \ sz = StgAP_STACK_size(p) + BYTES_TO_WDS(SIZEOF_StgAP_STACK_NoThunkHdr); \ } else { \ if (%INFO_TYPE(inf) == HALF_W_(AP)) { \ sz = TO_W_(StgAP_n_args(p)) + BYTES_TO_WDS(SIZEOF_StgAP_NoThunkHdr); \ } else { \ sz = TO_W_(%INFO_PTRS(inf)) + TO_W_(%INFO_NPTRS(inf)); \ } \ } \ i = 0; \ for: \ if (i < sz) { \ StgThunk_payload(p,i) = 0; \ i = i + 1; \ goto for; \ } \ } #else /* !CMINUSMINUS */ INLINE_HEADER void FILL_SLOP(StgClosure *p) { StgInfoTable *inf = get_itbl(p); nat i, sz; switch (inf->type) { case BLACKHOLE: case CAF_BLACKHOLE: case THUNK_SELECTOR: return; case AP: sz = ((StgAP *)p)->n_args + sizeofW(StgAP) - sizeofW(StgThunkHeader); break; case AP_STACK: sz = ((StgAP_STACK *)p)->size + sizeofW(StgAP_STACK) - sizeofW(StgThunkHeader); break; default: sz = inf->layout.payload.ptrs + inf->layout.payload.nptrs; break; } for (i = 0; i < sz; i++) { ((StgThunk *)p)->payload[i] = 0; } } #endif /* CMINUSMINUS */ #if !defined(DEBUG) || defined(THREADED_RTS) #define DEBUG_FILL_SLOP(p) /* do nothing */ #else #define DEBUG_FILL_SLOP(p) FILL_SLOP(p) #endif /* We have two versions of this macro (sadly), one for use in C-- code, * and the other for C. * * The and_then argument is a performance hack so that we can paste in * the continuation code directly. It helps shave a couple of * instructions off the common case in the update code, which is * worthwhile (the update code is often part of the inner loop). * (except that gcc now appears to common up this code again and * invert the optimisation. Grrrr --SDM). */ #ifdef CMINUSMINUS #define generation(n) (W_[generations] + n*SIZEOF_generation) #define updateWithIndirection(ind_info, p1, p2, and_then) \ W_ bd; \ \ DEBUG_FILL_SLOP(p1); \ LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1); \ StgInd_indirectee(p1) = p2; \ foreign "C" wb() []; \ bd = Bdescr(p1); \ if (bdescr_gen_no(bd) != 0 :: CInt) { \ foreign "C" recordMutableCap(p1 "ptr", \ MyCapability() "ptr", \ bdescr_gen_no(bd)) [R1]; \ SET_INFO(p1, stg_IND_OLDGEN_info); \ LDV_RECORD_CREATE(p1); \ TICK_UPD_OLD_IND(); \ and_then; \ } else { \ SET_INFO(p1, ind_info); \ LDV_RECORD_CREATE(p1); \ TICK_UPD_NEW_IND(); \ and_then; \ } #else #define updateWithIndirection(ind_info, p1, p2, and_then) \ { \ bdescr *bd; \ \ /* cas(p1, 0, &stg_WHITEHOLE_info); */ \ ASSERT( (P_)p1 != (P_)p2 && !closure_IND(p1) ); \ DEBUG_FILL_SLOP(p1); \ LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1); \ ((StgInd *)p1)->indirectee = p2; \ wb(); \ bd = Bdescr((P_)p1); \ if (bd->gen_no != 0) { \ recordMutableGenLock(p1, &generations[bd->gen_no]); \ SET_INFO(p1, &stg_IND_OLDGEN_info); \ TICK_UPD_OLD_IND(); \ and_then; \ } else { \ SET_INFO(p1, ind_info); \ LDV_RECORD_CREATE(p1); \ TICK_UPD_NEW_IND(); \ and_then; \ } \ } #endif /* The permanent indirection version isn't performance critical. We * therefore use an inline C function instead of the C-- macro. */ #ifndef CMINUSMINUS INLINE_HEADER void updateWithPermIndirection(StgClosure *p1, StgClosure *p2) { bdescr *bd; ASSERT( p1 != p2 && !closure_IND(p1) ); /* * @LDV profiling * Destroy the old closure. * Nb: LDV_* stuff cannot mix with ticky-ticky */ LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1); bd = Bdescr((P_)p1); if (bd->gen_no != 0) { recordMutableGenLock(p1, &generations[bd->gen_no]); ((StgInd *)p1)->indirectee = p2; SET_INFO(p1, &stg_IND_OLDGEN_PERM_info); /* * @LDV profiling * We have just created a new closure. */ LDV_RECORD_CREATE(p1); TICK_UPD_OLD_PERM_IND(); } else { ((StgInd *)p1)->indirectee = p2; SET_INFO(p1, &stg_IND_PERM_info); /* * @LDV profiling * We have just created a new closure. */ LDV_RECORD_CREATE(p1); TICK_UPD_NEW_PERM_IND(p1); } } #endif #endif /* UPDATES_H */