/* ----------------------------------------------------------------------------- * * (c) The University of Glasgow 2004 * * Application-related bits. * * This file is written in a subset of C--, extended with various * features specific to GHC. It is compiled by GHC directly. For the * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y. * * -------------------------------------------------------------------------- */ #include "Cmm.h" /* ---------------------------------------------------------------------------- * Evaluate a closure and return it. * * There isn't an info table / return address version of stg_ap_0, because * everything being returned is guaranteed evaluated, so it would be a no-op. */ STRING(stg_ap_0_ret_str,"stg_ap_0_ret... ") stg_ap_0_fast ( P_ fun ) { IF_DEBUG(apply, ccall debugBelch(stg_ap_0_ret_str); ccall printClosure(R1 "ptr")); IF_DEBUG(sanity, ccall checkStackFrame(Sp "ptr")); #if !defined(PROFILING) ENTER(fun); #else /* Note [Evaluating functions with profiling] If we evaluate something like let f = {-# SCC "f" #-} g where g is a function, then updating the thunk for f to point to g would be incorrect: we've lost the SCC annotation. In general, when we evaluate a function and the current CCS is different from the one stored in the function, we need to return a function with the correct CCS in it. The mechanism we use to wrap the function is to create a zero-argument PAP as a proxy object to hold the new CCS, and return that. If the closure we evaluated is itself a PAP, we cannot make a nested PAP, so we copy the original PAP and set the CCS in the new PAP to enterFunCCS(pap->header.prof.ccs). */ again: W_ info; W_ untaggedfun; W_ arity; untaggedfun = UNTAG(fun); info = %INFO_PTR(untaggedfun); switch [INVALID_OBJECT .. N_CLOSURE_TYPES] (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { case IND, IND_STATIC: { fun = StgInd_indirectee(fun); goto again; } case BCO: { arity = TO_W_(StgBCO_arity(untaggedfun)); goto dofun; } case FUN, FUN_1_0, FUN_0_1, FUN_2_0, FUN_1_1, FUN_0_2, FUN_STATIC: { arity = TO_W_(StgFunInfoExtra_arity(%FUN_INFO(info))); dofun: if (CCCS == StgHeader_ccs(untaggedfun)) { return (fun); } else { // We're going to build a new PAP, with zero extra // arguments and therefore the same arity as the // original function. In other words, we're using a // zero-argument PAP as an indirection to the // function, so that we can attach a different CCS to // it. HP_CHK_GEN(SIZEOF_StgPAP); TICK_ALLOC_PAP(SIZEOF_StgPAP, 0); // attribute this allocation to the "overhead of profiling" CCS_ALLOC(BYTES_TO_WDS(SIZEOF_StgPAP), CCS_OVERHEAD); P_ pap; pap = Hp - SIZEOF_StgPAP + WDS(1); SET_HDR(pap, stg_PAP_info, CCCS); StgPAP_arity(pap) = arity; StgPAP_fun(pap) = fun; StgPAP_n_args(pap) = 0; return (pap); } } case PAP: { if (CCCS == StgHeader_ccs(untaggedfun)) { return (fun); } else { // We're going to copy this PAP, and put the new CCS in it fun = untaggedfun; W_ size; size = SIZEOF_StgPAP + WDS(TO_W_(StgPAP_n_args(fun))); HP_CHK_GEN(size); TICK_ALLOC_PAP(size, 0); // attribute this allocation to the "overhead of profiling" CCS_ALLOC(BYTES_TO_WDS(SIZEOF_StgPAP), CCS_OVERHEAD); P_ pap; pap = Hp - size + WDS(1); // We'll lose the original PAP, so we should enter its CCS ccall enterFunCCS(BaseReg "ptr", StgHeader_ccs(fun) "ptr"); SET_HDR(pap, stg_PAP_info, CCCS); StgPAP_arity(pap) = StgPAP_arity(fun); StgPAP_n_args(pap) = StgPAP_n_args(fun); StgPAP_fun(pap) = StgPAP_fun(fun); W_ i; i = TO_W_(StgPAP_n_args(fun)); loop: if (i == 0) { return (pap); } i = i - 1; StgPAP_payload(pap,i) = StgPAP_payload(fun,i); goto loop; } } case AP, AP_STACK, BLACKHOLE, WHITEHOLE, THUNK, THUNK_1_0, THUNK_0_1, THUNK_2_0, THUNK_1_1, THUNK_0_2, THUNK_STATIC, THUNK_SELECTOR: { // We have a thunk of some kind, so evaluate it. // The thunk might evaluate to a function, so we have to // come back here again to adjust its CCS if necessary. // Therefore we need to push a stack frame to look at the // function that gets returned (a stg_restore_ccs_eval // frame), and therefore we need a stack check. STK_CHK_GEN(); // We can't use the value of 'info' any more, because if // STK_CHK_GEN() did a GC then the closure we're looking // at may have changed, e.g. a THUNK_SELECTOR may have // been evaluated by the GC. So we reload the info // pointer now. untaggedfun = UNTAG(fun); info = %INFO_PTR(untaggedfun); jump %ENTRY_CODE(info) (stg_restore_cccs_eval_info, CCCS) (untaggedfun); } default: { jump %ENTRY_CODE(info) (UNTAG(fun)); } } #endif } /* ----------------------------------------------------------------------------- Entry Code for a PAP. This entry code is *only* called by one of the stg_ap functions. On entry: Sp points to the remaining arguments on the stack. If the stack check fails, we can just push the PAP on the stack and return to the scheduler. On entry: R1 points to the PAP. The rest of the function's arguments (apart from those that are already in the PAP) are on the stack, starting at Sp(0). R2 contains an info table which describes these arguments, which is used in the event that the stack check in the entry code below fails. The info table is currently one of the stg_ap_*_ret family, as this code is always entered from those functions. The idea is to copy the chunk of stack from the PAP object onto the stack / into registers, and enter the function. -------------------------------------------------------------------------- */ INFO_TABLE(stg_PAP,/*special layout*/0,0,PAP,"PAP","PAP") { ccall barf("PAP object entered!") never returns; } stg_PAP_apply /* no args => explicit stack */ { W_ Words; W_ pap; pap = R1; Words = TO_W_(StgPAP_n_args(pap)); // // Check for stack overflow and bump the stack pointer. // We have a hand-rolled stack check fragment here, because none of // the canned ones suit this situation. // if (Sp - (WDS(Words) + 2/* see ARG_BCO below */) < SpLim) { // there is a return address in R2 in the event of a // stack check failure. The various stg_apply functions arrange // this before calling stg_PAP_entry. Sp_adj(-1); Sp(0) = R2; jump stg_gc_unpt_r1 [R1]; } Sp_adj(-Words); // profiling TICK_ENT_PAP(); LDV_ENTER(pap); #if defined(PROFILING) ccall enterFunCCS(BaseReg "ptr", StgHeader_ccs(pap) "ptr"); #endif // Reload the stack W_ i; W_ p; p = pap + SIZEOF_StgHeader + OFFSET_StgPAP_payload; i = 0; for: if (i < Words) { Sp(i) = W_[p]; p = p + WDS(1); i = i + 1; goto for; } R1 = StgPAP_fun(pap); /* DEBUGGING CODE, ensures that arity 1 and 2 functions are entered tagged if (TO_W_(StgFunInfoExtra_arity(%FUN_INFO(%INFO_PTR(UNTAG(R1))))) == 1 ) { if (GETTAG(R1)!=1) { W_[0]=1; } } if (TO_W_(StgFunInfoExtra_arity(%FUN_INFO(%INFO_PTR(UNTAG(R1))))) == 2 ) { if (GETTAG(R1)!=2) { W_[0]=1; } } */ // Off we go! TICK_ENT_VIA_NODE(); #if defined(NO_ARG_REGS) jump %GET_ENTRY(UNTAG(R1)) [R1]; #else W_ info; info = %GET_FUN_INFO(UNTAG(R1)); W_ type; type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN) { jump StgFunInfoExtra_slow_apply(info) [R1]; } if (type == ARG_GEN_BIG) { jump StgFunInfoExtra_slow_apply(info) [R1]; } if (type == ARG_BCO) { Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_apply_interp_info; jump stg_yield_to_interpreter []; } jump W_[stg_ap_stack_entries + WDS(TO_W_(StgFunInfoExtra_fun_type(info)))] [R1]; #endif } /* ----------------------------------------------------------------------------- Entry Code for an AP (a PAP with arity zero). The entry code is very similar to a PAP, except there are no further arguments on the stack to worry about, so the stack check is simpler. We must also push an update frame on the stack before applying the function. -------------------------------------------------------------------------- */ INFO_TABLE(stg_AP,/*special layout*/0,0,AP,"AP","AP") /* no args => explicit stack */ { W_ Words; W_ ap; ap = R1; Words = TO_W_(StgAP_n_args(ap)); /* * Check for stack overflow. IMPORTANT: use a _ENTER check here, * because if the check fails, we might end up blackholing this very * closure, in which case we must enter the blackhole on return rather * than continuing to evaluate the now-defunct closure. */ STK_CHK_ENTER(WDS(Words) + SIZEOF_StgUpdateFrame + 2/* see ARG_BCO below */, R1); PUSH_UPD_FRAME(Sp - SIZEOF_StgUpdateFrame, R1); Sp = Sp - SIZEOF_StgUpdateFrame - WDS(Words); TICK_ENT_AP(); LDV_ENTER(ap); ENTER_CCS_THUNK(ap); // Reload the stack W_ i; W_ p; p = ap + SIZEOF_StgHeader + OFFSET_StgAP_payload; i = 0; for: if (i < Words) { Sp(i) = W_[p]; p = p + WDS(1); i = i + 1; goto for; } R1 = StgAP_fun(ap); // Off we go! TICK_ENT_VIA_NODE(); #if defined(NO_ARG_REGS) jump %GET_ENTRY(UNTAG(R1)) [R1]; #else W_ info; info = %GET_FUN_INFO(UNTAG(R1)); W_ type; type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN) { jump StgFunInfoExtra_slow_apply(info) [R1]; } if (type == ARG_GEN_BIG) { jump StgFunInfoExtra_slow_apply(info) [R1]; } if (type == ARG_BCO) { Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_apply_interp_info; jump stg_yield_to_interpreter []; } jump W_[stg_ap_stack_entries + WDS(TO_W_(StgFunInfoExtra_fun_type(info)))] [R1]; #endif } /* AP_NOUPD is exactly like AP, except that no update frame is pushed. Use for thunks that are guaranteed to be entered once only, such as those generated by the byte-code compiler for inserting breakpoints. */ INFO_TABLE(stg_AP_NOUPD,/*special layout*/0,0,AP,"AP_NOUPD","AP_NOUPD") /* no args => explicit stack */ { W_ Words; W_ ap; ap = R1; Words = TO_W_(StgAP_n_args(ap)); /* * Check for stack overflow. IMPORTANT: use a _ENTER check here, * because if the check fails, we might end up blackholing this very * closure, in which case we must enter the blackhole on return rather * than continuing to evaluate the now-defunct closure. */ STK_CHK_ENTER(WDS(Words) + 2/* see ARG_BCO below */, R1); Sp = Sp - WDS(Words); TICK_ENT_AP(); LDV_ENTER(ap); ENTER_CCS_THUNK(ap); // Reload the stack W_ i; W_ p; p = ap + SIZEOF_StgHeader + OFFSET_StgAP_payload; i = 0; for: if (i < Words) { Sp(i) = W_[p]; p = p + WDS(1); i = i + 1; goto for; } R1 = StgAP_fun(ap); // Off we go! TICK_ENT_VIA_NODE(); #if defined(NO_ARG_REGS) jump %GET_ENTRY(UNTAG(R1)) [R1]; #else W_ info; info = %GET_FUN_INFO(UNTAG(R1)); W_ type; type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN) { jump StgFunInfoExtra_slow_apply(info) [R1]; } if (type == ARG_GEN_BIG) { jump StgFunInfoExtra_slow_apply(info) [R1]; } if (type == ARG_BCO) { Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_apply_interp_info; jump stg_yield_to_interpreter []; } jump W_[stg_ap_stack_entries + WDS(TO_W_(StgFunInfoExtra_fun_type(info)))] [R1]; #endif } /* ----------------------------------------------------------------------------- Entry Code for an AP_STACK. Very similar to a PAP and AP. The layout is the same as PAP and AP, except that the payload is a chunk of stack instead of being described by the function's info table. Like an AP, there are no further arguments on the stack to worry about. However, the function closure (ap->fun) does not necessarily point directly to a function, so we have to enter it using stg_ap_0. -------------------------------------------------------------------------- */ /* Note [AP_STACKs must be eagerly blackholed] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Trac #13615 describes a nasty concurrency issue where we can enter into the middle of an ST action multiple times, resulting in duplication of effects. In short, the construction of an AP_STACK allows us to suspend a computation which should not be duplicated. When running with lazy blackholing, we can then enter this AP_STACK multiple times, duplicating the computation with potentially disasterous consequences. For instance, consider the case of a simple ST program which computes a sum using in─place mutation, inplaceSum :: Num a => [a] ─> a inplaceSum xs0 = runST $ do y <─ newSTRef 0 let go [] = readSTRef y go (x : xs) = do modifySTRef y (+x) go xs go xs0 Of course, it is fine if we enter an inplaceSum thunk more than once: the two threads will inhabit different worlds with different STRefs. However, if we suspend some part of inplaceSum (for instance, due to the heap check at the beginning of go) and then multiple threads resume that suspension (as is safe in pure computation) we will have multiple threads concurrently mutating the same STRef. Disaster! Let's consider how this might happen: Consider this situation, ┌─────────┐ ┌───────┐ ┌───────┐ ┌─────────┐ │ TSO 1 │ ╭────→│ go │ │ fun │ │ TSO 2 │ └─────────┘ │ └───────┘ └───────┘ └─────────┘ │ │ ┌─────────┐ │ │ ┌─────────┐ │ │──────╯ ╰──────────────│ │ ├─────────┤ ┌─────────┐ ├─────────┤ │ UPDATE_ │──────────→│ THUNK A │ ╭────────│ UPDATE_ │ │ FRAME │ updatee └─────────┘ │updatee │ FRAME │ ├─────────┤ │ ├─────────┤ │ ... │ │ │ etc. │ ├─────────┤ updatee ┌─────────┐ │ │ UPDATE_ │─────────────────────→│ THUNK B │←───╯ │ FRAME │ └─────────┘ ├─────────┤ │ etc. │ Here we have two threads (TSO 1 and TSO 2) which are in currently pausing (e.g. in threadPaused). Since they are pausing, their stacks are headed by a pointer to the continuation code which we will run on resumption (go and fun, respectively). We also see that there are two thunks on the heap: THUNK A and THUNK B where THUNK B depends upon THUNK A (as in, evaluation of B will force A). We see that thread 1 has THUNK A under evaluation, and both threads have THUNK B under evaluation. As each thread enters threadPaused, threadPaused will walk its stack looking for duplicate computation (see Note [suspend duplicate work], although there is some background described below as well). Let's consider what this check does: Say that TSO 2 begins this check first. The check will walk TSO 2's stack, until it finds the first update frame, which updates THUNK B. Upon finding this frame, it will try to lock THUNK B, replacing it with a BLACKHOLE owned by its TSO. We now have, ┌─────────┐ ┌───────┐ ┌───────┐ ┌─────────┐ │ TSO 1 │ ╭────→│ go │ │ fun │ ╭────────→│ TSO 2 │ └─────────┘ │ └───────┘ └───────┘ │ └─────────┘ │ ↑ ╭─────╯ ┌─────────┐ │ │ │ ┌─────────┐ │ │──────╯ ╰─────────────────│ │ ├─────────┤ updatee ┌─────────┐ │ ├─────────┤ │ UPDATE_ │──────────→│ THUNK A │ │ ╭──────────│ UPDATE_ │ │ FRAME │ └─────────┘ │ │ updatee │ FRAME │ ├─────────┤ │ │ ├─────────┤ │ ... │ owner│ │ │ etc. │ ├─────────┤ updatee ┌────────────┐ │ │ UPDATE_ │──────────────────→│ BLACKHOLE │←─╯ │ FRAME │ └────────────┘ ├─────────┤ │ etc. │ Now consider what happens when TSO 1 runs its duplicate-computation check. Again, we start walking the stack from the top, where we we find the update frame updating THUNK A. We will lock this thunk, replacing it with a BLACKHOLE owned by its TSO. We now have, ┌─────────┐ ┌───────┐ ┌───────┐ ┌─────────┐ │ TSO 1 │←──╮ ╭────→│ go │ │ fun │ ╭────────→│ TSO 2 │ └─────────┘ │ │ └───────┘ └───────┘ │ └─────────┘ │ │ ↑ ╭─────╯ ┌─────────┐ ╰──│─────────╮ │ │ ┌─────────┐ │ │──────╯ │owner ╰─────────────────│ │ ├─────────┤ ┌───────────┐ │ ├─────────┤ │ UPDATE_ │──────────→│ BLACKHOLE │ │ ╭──────────│ UPDATE_ │ │ FRAME │ updatee └───────────┘ │ │ updatee │ FRAME │ ├─────────┤ │ │ ├─────────┤ │ ... │ owner│ │ │ etc. │ ├─────────┤ updatee ┌────────────┐ │ │ UPDATE_ │──────────────────→│ BLACKHOLE │←─╯ │ FRAME │ └────────────┘ ├─────────┤ │ etc. │ Now we will continue walking down TSO 1's stack, next coming across the second update frame, pointing to the now-BLACKHOLE'd THUNK B. At this point threadPaused will correctly conclude that TSO 1 is duplicating a computation being carried out by TSO 2 and attempt to suspend it. The suspension process proceeds by invoking raiseAsync, which walks the stack from the top looking for update frames. For each update frame we take any stack frames preceeding it and construct an AP_STACK heap object from them. We then replace the updatee of the frame with an indirection pointing to the AP_STACK. So, after suspending the first update frame we have, ┌─────────┐ ┌───────┐ ┌───────┐ ┌─────────┐ │ TSO 1 │ ╭────────→│ go │←─╮ │ fun │ ╭───────→│ TSO 2 │ └─────────┘ │ └───────┘ │ └───────┘ │ └─────────┘ │ ┌───────────┐ │ ↑ ╭─────╯ ┌─────────┐ │ │ AP_STACK │ │ │ │ ┌─────────┐ │ │──╯ ├───────────┤ │ ╰────────────────│ │ ├─────────┤ │ │─╯ │ ├─────────┤ │ UPDATE_ │───────╮ └───────────┘ │ ╭──────────│ UPDATE_ │ │ FRAME │updatee│ ↑ │ │ updatee │ FRAME │ ├─────────┤ │ │indirectee │ │ ├─────────┤ │ ... │ ╰→┌───────────┐ │ │ │ etc. │ ├─────────┤updatee │ BLACKHOLE │ │ │ │ UPDATE_ │──╮ └───────────┘ owner│ │ │ FRAME │ │ ┌────────────┐ │ ├─────────┤ ╰───────────────→│ BLACKHOLE │←─╯ │ etc. │ └────────────┘ Finally, we will replace the second update frame with a blackhole so that TSO 1 will block on TSO 2's computation of THUNK B, ┌─────────┐ ┌───────┐ ┌───────┐ ┌─────────┐ │ TSO 1 │ ╭────────→│ go │←─╮ │ fun │ ╭───────→│ TSO 2 │ └─────────┘ │ └───────┘ │ └───────┘ │ └─────────┘ │ ┌───────────┐ │ ↑ ╭─────╯ ┌─────────┐ │ │ AP_STACK │ │ │ │ ┌─────────┐ │ │──╯ ├───────────┤ │ ╰────────────────│ │ ├─────────┤ │ │─╯ │ ├─────────┤ │ UPDATE_ │───────╮ └───────────┘ │ ╭──────────│ UPDATE_ │ │ FRAME │updatee│ ↑ │ │ updatee │ FRAME │ ├─────────┤ │ │indirectee │ │ ├─────────┤ │ ... │ ╰→┌───────────┐ │ │ │ etc. │ ├─────────┤ │ BLACKHOLE │ │ │ │ BLACK │ └───────────┘ owner│ │ │ HOLE │───────────╮ ┌────────────┐ │ ├─────────┤indirectee ╰──────→│ BLACKHOLE │←─╯ │ etc. │ └────────────┘ At first glance there's still nothing terribly alarming here. However, consider what would happen if some other closure held a reference to THUNK A. We would now have leaked an AP_STACK capturing the state of a potentially non-duplicatable computation to heap. Even worse, if two threads had references to THUNK A and both attempted to enter at the same time, they would both succeed if we allowed AP_STACKs to be lazily blackholed. This is the reason why we must be very careful when entering AP_STACKS: they introduce the possibility that we duplicate a computation which could never otherwise be duplicated. For this reason we employ an atomic blackholing strategy when entering AP_STACK closures. */ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK") /* no args => explicit stack */ { W_ Words; W_ ap; ap = R1; Words = StgAP_STACK_size(ap); /* * Check for stack overflow. IMPORTANT: use a _ENTER check here, * because if the check fails, we might end up blackholing this very * closure, in which case we must enter the blackhole on return rather * than continuing to evaluate the now-defunct closure. */ STK_CHK_ENTER(WDS(Words) + SIZEOF_StgUpdateFrame + WDS(AP_STACK_SPLIM), R1); /* ensure there is at least AP_STACK_SPLIM words of headroom available * after unpacking the AP_STACK. See bug #1466 */ PUSH_UPD_FRAME(Sp - SIZEOF_StgUpdateFrame, R1); Sp = Sp - SIZEOF_StgUpdateFrame - WDS(Words); /* * It is imperative that we blackhole lest we may duplicate computation which * must not be duplicated. See Note [AP_STACKs must be eagerly blackholed]. */ W_ old_info; (old_info) = prim %cmpxchgW(ap, stg_AP_STACK_info, stg_WHITEHOLE_info); if (old_info != stg_AP_STACK_info) { /* someone else beat us to it */ jump ENTRY_LBL(stg_WHITEHOLE) (ap); } StgInd_indirectee(ap) = CurrentTSO; prim_write_barrier; SET_INFO(ap, __stg_EAGER_BLACKHOLE_info); TICK_ENT_AP(); LDV_ENTER(ap); ENTER_CCS_THUNK(ap); // Reload the stack W_ i; W_ p; p = ap + SIZEOF_StgHeader + OFFSET_StgAP_STACK_payload; i = 0; for: if (i < Words) { Sp(i) = W_[p]; p = p + WDS(1); i = i + 1; goto for; } // Off we go! TICK_ENT_VIA_NODE(); R1 = StgAP_STACK_fun(ap); ENTER_R1(); } /* ----------------------------------------------------------------------------- AP_STACK_NOUPD - exactly like AP_STACK, but doesn't push an update frame. -------------------------------------------------------------------------- */ INFO_TABLE(stg_AP_STACK_NOUPD,/*special layout*/0,0,AP_STACK, "AP_STACK_NOUPD","AP_STACK_NOUPD") /* no args => explicit stack */ { W_ Words; W_ ap; ap = R1; Words = StgAP_STACK_size(ap); /* * Check for stack overflow. IMPORTANT: use a _NP check here, * because if the check fails, we might end up blackholing this very * closure, in which case we must enter the blackhole on return rather * than continuing to evaluate the now-defunct closure. */ STK_CHK_ENTER(WDS(Words) + WDS(AP_STACK_SPLIM), R1); /* ensure there is at least AP_STACK_SPLIM words of headroom available * after unpacking the AP_STACK. See bug #1466 */ Sp = Sp - WDS(Words); TICK_ENT_AP(); LDV_ENTER(ap); ENTER_CCS_THUNK(ap); // Reload the stack W_ i; W_ p; p = ap + SIZEOF_StgHeader + OFFSET_StgAP_STACK_payload; i = 0; for: if (i < Words) { Sp(i) = W_[p]; p = p + WDS(1); i = i + 1; goto for; } // Off we go! TICK_ENT_VIA_NODE(); R1 = StgAP_STACK_fun(ap); ENTER_R1(); }