summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--includes/Cmm.h7
-rw-r--r--includes/mkDerivedConstants.c3
-rw-r--r--includes/rts/storage/GC.h36
-rw-r--r--rts/Capability.c1
-rw-r--r--rts/Capability.h3
-rw-r--r--rts/Interpreter.c20
-rw-r--r--rts/Linker.c3
-rw-r--r--rts/PrimOps.cmm10
-rw-r--r--rts/ProfHeap.c2
-rw-r--r--rts/RaiseAsync.c4
-rw-r--r--rts/RtsAPI.c34
-rw-r--r--rts/STM.c12
-rw-r--r--rts/Schedule.c4
-rw-r--r--rts/Threads.c6
-rw-r--r--rts/Weak.c2
-rw-r--r--rts/sm/GC.c64
-rw-r--r--rts/sm/MarkWeak.c224
-rw-r--r--rts/sm/Storage.c304
-rw-r--r--rts/sm/Storage.h11
19 files changed, 343 insertions, 407 deletions
diff --git a/includes/Cmm.h b/includes/Cmm.h
index aba5c2e36b..59081e2466 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -380,11 +380,12 @@
HP_CHK_GEN(alloc,liveness,reentry); \
TICK_ALLOC_HEAP_NOCTR(alloc);
-// allocateLocal() allocates from the nursery, so we check to see
+// allocate() allocates from the nursery, so we check to see
// whether the nursery is nearly empty in any function that uses
-// allocateLocal() - this includes many of the primops.
+// allocate() - this includes many of the primops.
#define MAYBE_GC(liveness,reentry) \
- if (bdescr_link(CurrentNursery) == NULL || CInt[alloc_blocks] >= CInt[alloc_blocks_lim]) { \
+ if (bdescr_link(CurrentNursery) == NULL || \
+ step_n_large_blocks(StgRegTable_rNursery(BaseReg)) >= CInt[alloc_blocks_lim]) { \
R9 = liveness; \
R10 = reentry; \
HpAlloc = 0; \
diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c
index b6d0106cf1..ddd2e65983 100644
--- a/includes/mkDerivedConstants.c
+++ b/includes/mkDerivedConstants.c
@@ -230,6 +230,7 @@ main(int argc, char *argv[])
field_offset(StgRegTable, rCurrentNursery);
field_offset(StgRegTable, rHpAlloc);
struct_field(StgRegTable, rRet);
+ struct_field(StgRegTable, rNursery);
def_offset("stgEagerBlackholeInfo", FUN_OFFSET(stgEagerBlackholeInfo));
def_offset("stgGCEnter1", FUN_OFFSET(stgGCEnter1));
@@ -249,6 +250,8 @@ main(int argc, char *argv[])
struct_size(generation);
struct_field(generation, mut_list);
+ struct_field(step, n_large_blocks);
+
struct_size(CostCentreStack);
struct_field(CostCentreStack, ccsID);
struct_field(CostCentreStack, mem_alloc);
diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h
index aa0531382f..1cd57c9045 100644
--- a/includes/rts/storage/GC.h
+++ b/includes/rts/storage/GC.h
@@ -75,10 +75,6 @@ typedef struct step_ {
// ------------------------------------
// Fields below are used during GC only
- // During GC, if we are collecting this step, blocks and n_blocks
- // are copied into the following two fields. After GC, these blocks
- // are freed.
-
#if defined(THREADED_RTS)
char pad[128]; // make sure the following is
// on a separate cache line.
@@ -89,6 +85,9 @@ typedef struct step_ {
int mark; // mark (not copy)? (old gen only)
int compact; // compact (not sweep)? (old gen only)
+ // During GC, if we are collecting this step, blocks and n_blocks
+ // are copied into the following two fields. After GC, these blocks
+ // are freed.
bdescr * old_blocks; // bdescr of first from-space block
unsigned int n_old_blocks; // number of blocks in from-space
unsigned int live_estimate; // for sweeping: estimate of live data
@@ -125,7 +124,6 @@ typedef struct generation_ {
extern generation * generations;
extern generation * g0;
-extern step * g0s0;
extern generation * oldest_gen;
extern step * all_steps;
extern nat total_steps;
@@ -133,21 +131,14 @@ extern nat total_steps;
/* -----------------------------------------------------------------------------
Generic allocation
- StgPtr allocateInGen(generation *g, nat n)
- Allocates a chunk of contiguous store
- n words long in generation g,
- returning a pointer to the first word.
- Always succeeds.
-
- StgPtr allocate(nat n) Equaivalent to allocateInGen(g0)
-
- StgPtr allocateLocal(Capability *cap, nat n)
+ StgPtr allocate(Capability *cap, nat n)
Allocates memory from the nursery in
the current Capability. This can be
done without taking a global lock,
unlike allocate().
- StgPtr allocatePinned(nat n) Allocates a chunk of contiguous store
+ StgPtr allocatePinned(Capability *cap, nat n)
+ Allocates a chunk of contiguous store
n words long, which is at a fixed
address (won't be moved by GC).
Returns a pointer to the first word.
@@ -163,27 +154,16 @@ extern nat total_steps;
allocatePinned, for the
benefit of the ticky-ticky profiler.
- rtsBool doYouWantToGC(void) Returns True if the storage manager is
- ready to perform a GC, False otherwise.
-
- lnat allocatedBytes(void) Returns the number of bytes allocated
- via allocate() since the last GC.
- Used in the reporting of statistics.
-
-------------------------------------------------------------------------- */
-StgPtr allocate ( lnat n );
-StgPtr allocateInGen ( generation *g, lnat n );
-StgPtr allocateLocal ( Capability *cap, lnat n );
-StgPtr allocatePinned ( lnat n );
-lnat allocatedBytes ( void );
+StgPtr allocate ( Capability *cap, lnat n );
+StgPtr allocatePinned ( Capability *cap, lnat n );
/* memory allocator for executable memory */
void * allocateExec(unsigned int len, void **exec_addr);
void freeExec (void *p);
// Used by GC checks in external .cmm code:
-extern nat alloc_blocks;
extern nat alloc_blocks_lim;
/* -----------------------------------------------------------------------------
diff --git a/rts/Capability.c b/rts/Capability.c
index 95050baaa9..0012c24cc8 100644
--- a/rts/Capability.c
+++ b/rts/Capability.c
@@ -253,6 +253,7 @@ initCapability( Capability *cap, nat i )
cap->free_trec_headers = NO_TREC;
cap->transaction_tokens = 0;
cap->context_switch = 0;
+ cap->pinned_object_block = NULL;
}
/* ---------------------------------------------------------------------------
diff --git a/rts/Capability.h b/rts/Capability.h
index 3f01bf3d88..ff6e368130 100644
--- a/rts/Capability.h
+++ b/rts/Capability.h
@@ -69,6 +69,9 @@ struct Capability_ {
bdescr **mut_lists;
bdescr **saved_mut_lists; // tmp use during GC
+ // block for allocating pinned objects into
+ bdescr *pinned_object_block;
+
// Context switch flag. We used to have one global flag, now one
// per capability. Locks required : none (conflicts are harmless)
int context_switch;
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index 339d4d8f88..5197510766 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -89,7 +89,7 @@
STATIC_INLINE StgPtr
allocate_NONUPD (Capability *cap, int n_words)
{
- return allocateLocal(cap, stg_max(sizeofW(StgHeader)+MIN_PAYLOAD_SIZE, n_words));
+ return allocate(cap, stg_max(sizeofW(StgHeader)+MIN_PAYLOAD_SIZE, n_words));
}
int rts_stop_next_breakpoint = 0;
@@ -604,7 +604,7 @@ do_apply:
else /* arity > n */ {
// build a new PAP and return it.
StgPAP *new_pap;
- new_pap = (StgPAP *)allocateLocal(cap, PAP_sizeW(pap->n_args + m));
+ new_pap = (StgPAP *)allocate(cap, PAP_sizeW(pap->n_args + m));
SET_HDR(new_pap,&stg_PAP_info,CCCS);
new_pap->arity = pap->arity - n;
new_pap->n_args = pap->n_args + m;
@@ -649,7 +649,7 @@ do_apply:
// build a PAP and return it.
StgPAP *pap;
nat i;
- pap = (StgPAP *)allocateLocal(cap, PAP_sizeW(m));
+ pap = (StgPAP *)allocate(cap, PAP_sizeW(m));
SET_HDR(pap, &stg_PAP_info,CCCS);
pap->arity = arity - n;
pap->fun = obj;
@@ -718,7 +718,7 @@ do_apply:
run_BCO_return:
// Heap check
- if (doYouWantToGC()) {
+ if (doYouWantToGC(cap)) {
Sp--; Sp[0] = (W_)&stg_enter_info;
RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow);
}
@@ -729,7 +729,7 @@ run_BCO_return:
run_BCO_return_unboxed:
// Heap check
- if (doYouWantToGC()) {
+ if (doYouWantToGC(cap)) {
RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow);
}
// Stack checks aren't necessary at return points, the stack use
@@ -747,7 +747,7 @@ run_BCO_fun:
);
// Heap check
- if (doYouWantToGC()) {
+ if (doYouWantToGC(cap)) {
Sp -= 2;
Sp[1] = (W_)obj;
Sp[0] = (W_)&stg_apply_interp_info; // placeholder, really
@@ -863,7 +863,7 @@ run_BCO:
// stg_apply_interp_info pointer and a pointer to
// the BCO
size_words = BCO_BITMAP_SIZE(obj) + 2;
- new_aps = (StgAP_STACK *) allocateLocal(cap, AP_STACK_sizeW(size_words));
+ new_aps = (StgAP_STACK *) allocate(cap, AP_STACK_sizeW(size_words));
SET_HDR(new_aps,&stg_AP_STACK_info,CCS_SYSTEM);
new_aps->size = size_words;
new_aps->fun = &stg_dummy_ret_closure;
@@ -1082,7 +1082,7 @@ run_BCO:
case bci_ALLOC_AP: {
StgAP* ap;
int n_payload = BCO_NEXT;
- ap = (StgAP*)allocateLocal(cap, AP_sizeW(n_payload));
+ ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
Sp[-1] = (W_)ap;
ap->n_args = n_payload;
SET_HDR(ap, &stg_AP_info, CCS_SYSTEM/*ToDo*/)
@@ -1093,7 +1093,7 @@ run_BCO:
case bci_ALLOC_AP_NOUPD: {
StgAP* ap;
int n_payload = BCO_NEXT;
- ap = (StgAP*)allocateLocal(cap, AP_sizeW(n_payload));
+ ap = (StgAP*)allocate(cap, AP_sizeW(n_payload));
Sp[-1] = (W_)ap;
ap->n_args = n_payload;
SET_HDR(ap, &stg_AP_NOUPD_info, CCS_SYSTEM/*ToDo*/)
@@ -1105,7 +1105,7 @@ run_BCO:
StgPAP* pap;
int arity = BCO_NEXT;
int n_payload = BCO_NEXT;
- pap = (StgPAP*)allocateLocal(cap, PAP_sizeW(n_payload));
+ pap = (StgPAP*)allocate(cap, PAP_sizeW(n_payload));
Sp[-1] = (W_)pap;
pap->n_args = n_payload;
pap->arity = arity;
diff --git a/rts/Linker.c b/rts/Linker.c
index 2dcd21be43..2412864d4d 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -943,9 +943,8 @@ typedef struct _RtsSymbolVal {
SymI_HasProto(stg_writeTVarzh) \
SymI_HasProto(stg_yieldzh) \
SymI_NeedsProto(stg_interp_constr_entry) \
- SymI_HasProto(alloc_blocks) \
SymI_HasProto(alloc_blocks_lim) \
- SymI_HasProto(allocateLocal) \
+ SymI_HasProto(allocate) \
SymI_HasProto(allocateExec) \
SymI_HasProto(freeExec) \
SymI_HasProto(getAllocations) \
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index ac6de81e1c..5e762b17a3 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -58,7 +58,7 @@ stg_newByteArrayzh
n = R1;
payload_words = ROUNDUP_BYTES_TO_WDS(n);
words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
- ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
+ ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) [];
TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
StgArrWords_words(p) = payload_words;
@@ -85,7 +85,7 @@ stg_newPinnedByteArrayzh
/* Now we convert to a number of words: */
words = ROUNDUP_BYTES_TO_WDS(bytes);
- ("ptr" p) = foreign "C" allocatePinned(words) [];
+ ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
/* Now we need to move p forward so that the payload is aligned
@@ -117,7 +117,7 @@ stg_newAlignedPinnedByteArrayzh
/* Now we convert to a number of words: */
words = ROUNDUP_BYTES_TO_WDS(bytes);
- ("ptr" p) = foreign "C" allocatePinned(words) [];
+ ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) [];
TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
/* Now we need to move p forward so that the payload is aligned
@@ -139,7 +139,7 @@ stg_newArrayzh
MAYBE_GC(R2_PTR,stg_newArrayzh);
words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
- ("ptr" arr) = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
+ ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2];
TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
@@ -356,7 +356,7 @@ stg_mkWeakForeignEnvzh
payload_words = 4;
words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
- ("ptr" p) = foreign "C" allocateLocal(MyCapability() "ptr", words) [];
+ ("ptr" p) = foreign "C" allocate(MyCapability() "ptr", words) [];
TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c
index 69dd798ac3..b9fc7b309c 100644
--- a/rts/ProfHeap.c
+++ b/rts/ProfHeap.c
@@ -1086,7 +1086,7 @@ heapCensus( void )
// Traverse the heap, collecting the census info
if (RtsFlags.GcFlags.generations == 1) {
- heapCensusChain( census, g0s0->blocks );
+ heapCensusChain( census, g0->steps[0].blocks );
} else {
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
for (s = 0; s < generations[g].n_steps; s++) {
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c
index a0f78eeff7..fad28035e6 100644
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -792,7 +792,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
// fun field.
//
words = frame - sp - 1;
- ap = (StgAP_STACK *)allocateLocal(cap,AP_STACK_sizeW(words));
+ ap = (StgAP_STACK *)allocate(cap,AP_STACK_sizeW(words));
ap->size = words;
ap->fun = (StgClosure *)sp[0];
@@ -856,7 +856,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
// we've got an exception to raise, so let's pass it to the
// handler in this frame.
//
- raise = (StgThunk *)allocateLocal(cap,sizeofW(StgThunk)+1);
+ raise = (StgThunk *)allocate(cap,sizeofW(StgThunk)+1);
TICK_ALLOC_SE_THK(1,0);
SET_HDR(raise,&stg_raise_info,cf->header.prof.ccs);
raise->payload[0] = exception;
diff --git a/rts/RtsAPI.c b/rts/RtsAPI.c
index 54d1e75672..c4babca4e8 100644
--- a/rts/RtsAPI.c
+++ b/rts/RtsAPI.c
@@ -28,7 +28,7 @@
HaskellObj
rts_mkChar (Capability *cap, HsChar c)
{
- StgClosure *p = (StgClosure *)allocateLocal(cap, CONSTR_sizeW(0,1));
+ StgClosure *p = (StgClosure *)allocate(cap, CONSTR_sizeW(0,1));
SET_HDR(p, Czh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgWord)(StgChar)c;
return p;
@@ -37,7 +37,7 @@ rts_mkChar (Capability *cap, HsChar c)
HaskellObj
rts_mkInt (Capability *cap, HsInt i)
{
- StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
SET_HDR(p, Izh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgInt)i;
return p;
@@ -46,7 +46,7 @@ rts_mkInt (Capability *cap, HsInt i)
HaskellObj
rts_mkInt8 (Capability *cap, HsInt8 i)
{
- StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
SET_HDR(p, I8zh_con_info, CCS_SYSTEM);
/* Make sure we mask out the bits above the lowest 8 */
p->payload[0] = (StgClosure *)(StgInt)i;
@@ -56,7 +56,7 @@ rts_mkInt8 (Capability *cap, HsInt8 i)
HaskellObj
rts_mkInt16 (Capability *cap, HsInt16 i)
{
- StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
SET_HDR(p, I16zh_con_info, CCS_SYSTEM);
/* Make sure we mask out the relevant bits */
p->payload[0] = (StgClosure *)(StgInt)i;
@@ -66,7 +66,7 @@ rts_mkInt16 (Capability *cap, HsInt16 i)
HaskellObj
rts_mkInt32 (Capability *cap, HsInt32 i)
{
- StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
SET_HDR(p, I32zh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgInt)i;
return p;
@@ -75,7 +75,7 @@ rts_mkInt32 (Capability *cap, HsInt32 i)
HaskellObj
rts_mkInt64 (Capability *cap, HsInt64 i)
{
- StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,2));
+ StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,2));
SET_HDR(p, I64zh_con_info, CCS_SYSTEM);
ASSIGN_Int64((P_)&(p->payload[0]), i);
return p;
@@ -84,7 +84,7 @@ rts_mkInt64 (Capability *cap, HsInt64 i)
HaskellObj
rts_mkWord (Capability *cap, HsWord i)
{
- StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
SET_HDR(p, Wzh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgWord)i;
return p;
@@ -94,7 +94,7 @@ HaskellObj
rts_mkWord8 (Capability *cap, HsWord8 w)
{
/* see rts_mkInt* comments */
- StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
SET_HDR(p, W8zh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgWord)(w & 0xff);
return p;
@@ -104,7 +104,7 @@ HaskellObj
rts_mkWord16 (Capability *cap, HsWord16 w)
{
/* see rts_mkInt* comments */
- StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
SET_HDR(p, W16zh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgWord)(w & 0xffff);
return p;
@@ -114,7 +114,7 @@ HaskellObj
rts_mkWord32 (Capability *cap, HsWord32 w)
{
/* see rts_mkInt* comments */
- StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
SET_HDR(p, W32zh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgWord)(w & 0xffffffff);
return p;
@@ -123,7 +123,7 @@ rts_mkWord32 (Capability *cap, HsWord32 w)
HaskellObj
rts_mkWord64 (Capability *cap, HsWord64 w)
{
- StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,2));
+ StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,2));
/* see mk_Int8 comment */
SET_HDR(p, W64zh_con_info, CCS_SYSTEM);
ASSIGN_Word64((P_)&(p->payload[0]), w);
@@ -134,7 +134,7 @@ rts_mkWord64 (Capability *cap, HsWord64 w)
HaskellObj
rts_mkFloat (Capability *cap, HsFloat f)
{
- StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
SET_HDR(p, Fzh_con_info, CCS_SYSTEM);
ASSIGN_FLT((P_)p->payload, (StgFloat)f);
return p;
@@ -143,7 +143,7 @@ rts_mkFloat (Capability *cap, HsFloat f)
HaskellObj
rts_mkDouble (Capability *cap, HsDouble d)
{
- StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,sizeofW(StgDouble)));
+ StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,sizeofW(StgDouble)));
SET_HDR(p, Dzh_con_info, CCS_SYSTEM);
ASSIGN_DBL((P_)p->payload, (StgDouble)d);
return p;
@@ -152,7 +152,7 @@ rts_mkDouble (Capability *cap, HsDouble d)
HaskellObj
rts_mkStablePtr (Capability *cap, HsStablePtr s)
{
- StgClosure *p = (StgClosure *)allocateLocal(cap,sizeofW(StgHeader)+1);
+ StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1);
SET_HDR(p, StablePtr_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)s;
return p;
@@ -161,7 +161,7 @@ rts_mkStablePtr (Capability *cap, HsStablePtr s)
HaskellObj
rts_mkPtr (Capability *cap, HsPtr a)
{
- StgClosure *p = (StgClosure *)allocateLocal(cap,sizeofW(StgHeader)+1);
+ StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1);
SET_HDR(p, Ptr_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)a;
return p;
@@ -170,7 +170,7 @@ rts_mkPtr (Capability *cap, HsPtr a)
HaskellObj
rts_mkFunPtr (Capability *cap, HsFunPtr a)
{
- StgClosure *p = (StgClosure *)allocateLocal(cap,sizeofW(StgHeader)+1);
+ StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1);
SET_HDR(p, FunPtr_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)a;
return p;
@@ -197,7 +197,7 @@ rts_apply (Capability *cap, HaskellObj f, HaskellObj arg)
{
StgThunk *ap;
- ap = (StgThunk *)allocateLocal(cap,sizeofW(StgThunk) + 2);
+ ap = (StgThunk *)allocate(cap,sizeofW(StgThunk) + 2);
SET_HDR(ap, (StgInfoTable *)&stg_ap_2_upd_info, CCS_SYSTEM);
ap->payload[0] = f;
ap->payload[1] = arg;
diff --git a/rts/STM.c b/rts/STM.c
index 7921a6750f..ed5a7224ef 100644
--- a/rts/STM.c
+++ b/rts/STM.c
@@ -412,7 +412,7 @@ static void unpark_waiters_on(Capability *cap, StgTVar *s) {
static StgInvariantCheckQueue *new_stg_invariant_check_queue(Capability *cap,
StgAtomicInvariant *invariant) {
StgInvariantCheckQueue *result;
- result = (StgInvariantCheckQueue *)allocateLocal(cap, sizeofW(StgInvariantCheckQueue));
+ result = (StgInvariantCheckQueue *)allocate(cap, sizeofW(StgInvariantCheckQueue));
SET_HDR (result, &stg_INVARIANT_CHECK_QUEUE_info, CCS_SYSTEM);
result -> invariant = invariant;
result -> my_execution = NO_TREC;
@@ -422,7 +422,7 @@ static StgInvariantCheckQueue *new_stg_invariant_check_queue(Capability *cap,
static StgTVarWatchQueue *new_stg_tvar_watch_queue(Capability *cap,
StgClosure *closure) {
StgTVarWatchQueue *result;
- result = (StgTVarWatchQueue *)allocateLocal(cap, sizeofW(StgTVarWatchQueue));
+ result = (StgTVarWatchQueue *)allocate(cap, sizeofW(StgTVarWatchQueue));
SET_HDR (result, &stg_TVAR_WATCH_QUEUE_info, CCS_SYSTEM);
result -> closure = closure;
return result;
@@ -430,7 +430,7 @@ static StgTVarWatchQueue *new_stg_tvar_watch_queue(Capability *cap,
static StgTRecChunk *new_stg_trec_chunk(Capability *cap) {
StgTRecChunk *result;
- result = (StgTRecChunk *)allocateLocal(cap, sizeofW(StgTRecChunk));
+ result = (StgTRecChunk *)allocate(cap, sizeofW(StgTRecChunk));
SET_HDR (result, &stg_TREC_CHUNK_info, CCS_SYSTEM);
result -> prev_chunk = END_STM_CHUNK_LIST;
result -> next_entry_idx = 0;
@@ -440,7 +440,7 @@ static StgTRecChunk *new_stg_trec_chunk(Capability *cap) {
static StgTRecHeader *new_stg_trec_header(Capability *cap,
StgTRecHeader *enclosing_trec) {
StgTRecHeader *result;
- result = (StgTRecHeader *) allocateLocal(cap, sizeofW(StgTRecHeader));
+ result = (StgTRecHeader *) allocate(cap, sizeofW(StgTRecHeader));
SET_HDR (result, &stg_TREC_HEADER_info, CCS_SYSTEM);
result -> enclosing_trec = enclosing_trec;
@@ -1175,7 +1175,7 @@ void stmAddInvariantToCheck(Capability *cap,
// 1. Allocate an StgAtomicInvariant, set last_execution to NO_TREC
// to signal that this is a new invariant in the current atomic block
- invariant = (StgAtomicInvariant *) allocateLocal(cap, sizeofW(StgAtomicInvariant));
+ invariant = (StgAtomicInvariant *) allocate(cap, sizeofW(StgAtomicInvariant));
TRACE("%p : stmAddInvariantToCheck allocated invariant=%p", trec, invariant);
SET_HDR (invariant, &stg_ATOMIC_INVARIANT_info, CCS_SYSTEM);
invariant -> code = code;
@@ -1657,7 +1657,7 @@ void stmWriteTVar(Capability *cap,
StgTVar *stmNewTVar(Capability *cap,
StgClosure *new_value) {
StgTVar *result;
- result = (StgTVar *)allocateLocal(cap, sizeofW(StgTVar));
+ result = (StgTVar *)allocate(cap, sizeofW(StgTVar));
SET_HDR (result, &stg_TVAR_info, CCS_SYSTEM);
result -> current_value = new_value;
result -> first_watch_queue_entry = END_STM_WATCH_QUEUE;
diff --git a/rts/Schedule.c b/rts/Schedule.c
index bb36f9be07..3ae1fe0a1b 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -2262,7 +2262,7 @@ threadStackOverflow(Capability *cap, StgTSO *tso)
"increasing stack size from %ld words to %d.",
(long)tso->stack_size, new_stack_size);
- dest = (StgTSO *)allocateLocal(cap,new_tso_size);
+ dest = (StgTSO *)allocate(cap,new_tso_size);
TICK_ALLOC_TSO(new_stack_size,0);
/* copy the TSO block and the old stack into the new area */
@@ -2533,7 +2533,7 @@ raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception)
// Only create raise_closure if we need to.
if (raise_closure == NULL) {
raise_closure =
- (StgThunk *)allocateLocal(cap,sizeofW(StgThunk)+1);
+ (StgThunk *)allocate(cap,sizeofW(StgThunk)+1);
SET_HDR(raise_closure, &stg_raise_info, CCCS);
raise_closure->payload[0] = exception;
}
diff --git a/rts/Threads.c b/rts/Threads.c
index 3b209ea95b..9867c1c50e 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -63,7 +63,7 @@ createThread(Capability *cap, nat size)
}
size = round_to_mblocks(size);
- tso = (StgTSO *)allocateLocal(cap, size);
+ tso = (StgTSO *)allocate(cap, size);
stack_size = size - TSO_STRUCT_SIZEW;
TICK_ALLOC_TSO(stack_size, 0);
@@ -102,8 +102,8 @@ createThread(Capability *cap, nat size)
*/
ACQUIRE_LOCK(&sched_mutex);
tso->id = next_thread_id++; // while we have the mutex
- tso->global_link = g0s0->threads;
- g0s0->threads = tso;
+ tso->global_link = cap->r.rNursery->threads;
+ cap->r.rNursery->threads = tso;
RELEASE_LOCK(&sched_mutex);
// ToDo: report the stack size in the event?
diff --git a/rts/Weak.c b/rts/Weak.c
index f5c3a62bda..f5e918a921 100644
--- a/rts/Weak.c
+++ b/rts/Weak.c
@@ -120,7 +120,7 @@ scheduleFinalizers(Capability *cap, StgWeak *list)
debugTrace(DEBUG_weak, "weak: batching %d finalizers", n);
- arr = (StgMutArrPtrs *)allocateLocal(cap, sizeofW(StgMutArrPtrs) + n);
+ arr = (StgMutArrPtrs *)allocate(cap, sizeofW(StgMutArrPtrs) + n);
TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
SET_HDR(arr, &stg_MUT_ARR_PTRS_FROZEN_info, CCS_SYSTEM);
arr->ptrs = n;
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 0593bd7ff3..3f556ab36e 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -425,9 +425,9 @@ SET_GCT(gc_threads[0]);
// g0s0->old_blocks is the old nursery
// g0s0->blocks is to-space from the previous GC
if (RtsFlags.GcFlags.generations == 1) {
- if (g0s0->blocks != NULL) {
- freeChain(g0s0->blocks);
- g0s0->blocks = NULL;
+ if (g0->steps[0].blocks != NULL) {
+ freeChain(g0->steps[0].blocks);
+ g0->steps[0].blocks = NULL;
}
}
@@ -646,18 +646,13 @@ SET_GCT(gc_threads[0]);
/* LARGE OBJECTS. The current live large objects are chained on
* scavenged_large, having been moved during garbage
- * collection from large_objects. Any objects left on
+ * collection from large_objects. Any objects left on the
* large_objects list are therefore dead, so we free them here.
*/
- for (bd = stp->large_objects; bd != NULL; bd = next) {
- next = bd->link;
- freeGroup(bd);
- bd = next;
- }
-
+ freeChain(stp->large_objects);
stp->large_objects = stp->scavenged_large_objects;
stp->n_large_blocks = stp->n_scavenged_large_blocks;
-
+ ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
}
else // for older generations...
{
@@ -672,6 +667,7 @@ SET_GCT(gc_threads[0]);
// add the new blocks we promoted during this GC
stp->n_large_blocks += stp->n_scavenged_large_blocks;
+ ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
}
}
}
@@ -685,18 +681,19 @@ SET_GCT(gc_threads[0]);
// Free the small objects allocated via allocate(), since this will
// all have been copied into G0S1 now.
if (RtsFlags.GcFlags.generations > 1) {
- if (g0s0->blocks != NULL) {
- freeChain(g0s0->blocks);
- g0s0->blocks = NULL;
+ if (g0->steps[0].blocks != NULL) {
+ freeChain(g0->steps[0].blocks);
+ g0->steps[0].blocks = NULL;
}
- g0s0->n_blocks = 0;
- g0s0->n_words = 0;
+ g0->steps[0].n_blocks = 0;
+ g0->steps[0].n_words = 0;
}
- alloc_blocks = 0;
alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
// Start a new pinned_object_block
- pinned_object_block = NULL;
+ for (n = 0; n < n_capabilities; n++) {
+ capabilities[n].pinned_object_block = NULL;
+ }
// Free the mark stack.
if (mark_stack_top_bd != NULL) {
@@ -932,14 +929,23 @@ initGcThreads (void)
void
freeGcThreads (void)
{
+ nat s;
if (gc_threads != NULL) {
#if defined(THREADED_RTS)
nat i;
- for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
+ for (i = 0; i < n_capabilities; i++) {
+ for (s = 0; s < total_steps; s++)
+ {
+ freeWSDeque(gc_threads[i]->steps[s].todo_q);
+ }
stgFree (gc_threads[i]);
}
stgFree (gc_threads);
#else
+ for (s = 0; s < total_steps; s++)
+ {
+ freeWSDeque(gc_threads[0]->steps[s].todo_q);
+ }
stgFree (gc_threads);
#endif
gc_threads = NULL;
@@ -1230,8 +1236,21 @@ init_collected_gen (nat g, nat n_threads)
}
}
+ if (g == 0) {
+ for (i = 0; i < n_capabilities; i++) {
+ stp = &nurseries[i];
+ stp->old_threads = stp->threads;
+ stp->threads = END_TSO_QUEUE;
+ }
+ }
+
for (s = 0; s < generations[g].n_steps; s++) {
+ // generation 0, step 0 doesn't need to-space, unless -G1
+ if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
+ continue;
+ }
+
stp = &generations[g].steps[s];
ASSERT(stp->gen_no == g);
@@ -1240,11 +1259,6 @@ init_collected_gen (nat g, nat n_threads)
stp->old_threads = stp->threads;
stp->threads = END_TSO_QUEUE;
- // generation 0, step 0 doesn't need to-space
- if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
- continue;
- }
-
// deprecate the existing blocks
stp->old_blocks = stp->blocks;
stp->n_old_blocks = stp->n_blocks;
@@ -1642,7 +1656,7 @@ resize_nursery (void)
* performance we get from 3L bytes, reducing to the same
* performance at 2L bytes.
*/
- blocks = g0s0->n_blocks;
+ blocks = generations[0].steps[0].n_blocks;
if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c
index 4f0a7a451b..2f5964f529 100644
--- a/rts/sm/MarkWeak.c
+++ b/rts/sm/MarkWeak.c
@@ -21,6 +21,7 @@
#include "Trace.h"
#include "Schedule.h"
#include "Weak.h"
+#include "Storage.h"
/* -----------------------------------------------------------------------------
Weak Pointers
@@ -82,6 +83,9 @@ StgTSO *resurrected_threads;
// List of blocked threads found to have pending throwTos
StgTSO *exception_threads;
+static void resurrectUnreachableThreads (step *stp);
+static rtsBool tidyThreadList (step *stp);
+
void
initWeakForGC(void)
{
@@ -182,85 +186,23 @@ traverseWeakPtrList(void)
return rtsTrue;
case WeakThreads:
- /* Now deal with the all_threads list, which behaves somewhat like
+ /* Now deal with the step->threads lists, which behave somewhat like
* the weak ptr list. If we discover any threads that are about to
* become garbage, we wake them up and administer an exception.
*/
- {
- StgTSO *t, *tmp, *next, **prev;
- nat g, s;
- step *stp;
+ {
+ nat g, s, n;
- // Traverse thread lists for generations we collected...
- for (g = 0; g <= N; g++) {
- for (s = 0; s < generations[g].n_steps; s++) {
- stp = &generations[g].steps[s];
-
- prev = &stp->old_threads;
-
- for (t = stp->old_threads; t != END_TSO_QUEUE; t = next) {
-
- tmp = (StgTSO *)isAlive((StgClosure *)t);
-
- if (tmp != NULL) {
- t = tmp;
- }
-
- ASSERT(get_itbl(t)->type == TSO);
- if (t->what_next == ThreadRelocated) {
- next = t->_link;
- *prev = next;
- continue;
- }
-
- next = t->global_link;
-
- // This is a good place to check for blocked
- // exceptions. It might be the case that a thread is
- // blocked on delivering an exception to a thread that
- // is also blocked - we try to ensure that this
- // doesn't happen in throwTo(), but it's too hard (or
- // impossible) to close all the race holes, so we
- // accept that some might get through and deal with
- // them here. A GC will always happen at some point,
- // even if the system is otherwise deadlocked.
- //
- // If an unreachable thread has blocked
- // exceptions, we really want to perform the
- // blocked exceptions rather than throwing
- // BlockedIndefinitely exceptions. This is the
- // only place we can discover such threads.
- // The target thread might even be
- // ThreadFinished or ThreadKilled. Bugs here
- // will only be seen when running on a
- // multiprocessor.
- if (t->blocked_exceptions != END_TSO_QUEUE) {
- if (tmp == NULL) {
- evacuate((StgClosure **)&t);
- flag = rtsTrue;
- }
- t->global_link = exception_threads;
- exception_threads = t;
- *prev = next;
- continue;
- }
-
- if (tmp == NULL) {
- // not alive (yet): leave this thread on the
- // old_all_threads list.
- prev = &(t->global_link);
- }
- else {
- // alive
- *prev = next;
-
- // move this thread onto the correct threads list.
- step *new_step;
- new_step = Bdescr((P_)t)->step;
- t->global_link = new_step->threads;
- new_step->threads = t;
- }
- }
+ // Traverse thread lists for generations we collected...
+ for (n = 0; n < n_capabilities; n++) {
+ if (tidyThreadList(&nurseries[n])) {
+ flag = rtsTrue;
+ }
+ }
+ for (g = 0; g <= N; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ if (tidyThreadList(&generations[g].steps[s])) {
+ flag = rtsTrue;
}
}
}
@@ -272,36 +214,18 @@ traverseWeakPtrList(void)
/* And resurrect any threads which were about to become garbage.
*/
{
- nat g, s;
- step *stp;
- StgTSO *t, *tmp, *next;
+ nat g, s, n;
+ for (n = 0; n < n_capabilities; n++) {
+ resurrectUnreachableThreads(&nurseries[n]);
+ }
for (g = 0; g <= N; g++) {
for (s = 0; s < generations[g].n_steps; s++) {
- stp = &generations[g].steps[s];
-
- for (t = stp->old_threads; t != END_TSO_QUEUE; t = next) {
- next = t->global_link;
-
- // ThreadFinished and ThreadComplete: we have to keep
- // these on the all_threads list until they
- // become garbage, because they might get
- // pending exceptions.
- switch (t->what_next) {
- case ThreadKilled:
- case ThreadComplete:
- continue;
- default:
- tmp = t;
- evacuate((StgClosure **)&tmp);
- tmp->global_link = resurrected_threads;
- resurrected_threads = tmp;
- }
- }
+ resurrectUnreachableThreads(&generations[g].steps[s]);
}
}
}
-
+
/* Finally, we can update the blackhole_queue. This queue
* simply strings together TSOs blocked on black holes, it is
* not intended to keep anything alive. Hence, we do not follow
@@ -316,15 +240,113 @@ traverseWeakPtrList(void)
ASSERT(*pt != NULL);
}
}
-
+
weak_stage = WeakDone; // *now* we're done,
return rtsTrue; // but one more round of scavenging, please
-
+ }
+
default:
barf("traverse_weak_ptr_list");
return rtsTrue;
}
+}
+
+ static void resurrectUnreachableThreads (step *stp)
+{
+ StgTSO *t, *tmp, *next;
+
+ for (t = stp->old_threads; t != END_TSO_QUEUE; t = next) {
+ next = t->global_link;
+
+ // ThreadFinished and ThreadComplete: we have to keep
+ // these on the all_threads list until they
+ // become garbage, because they might get
+ // pending exceptions.
+ switch (t->what_next) {
+ case ThreadKilled:
+ case ThreadComplete:
+ continue;
+ default:
+ tmp = t;
+ evacuate((StgClosure **)&tmp);
+ tmp->global_link = resurrected_threads;
+ resurrected_threads = tmp;
+ }
+ }
+}
+
+static rtsBool tidyThreadList (step *stp)
+{
+ StgTSO *t, *tmp, *next, **prev;
+ rtsBool flag = rtsFalse;
+ prev = &stp->old_threads;
+
+ for (t = stp->old_threads; t != END_TSO_QUEUE; t = next) {
+
+ tmp = (StgTSO *)isAlive((StgClosure *)t);
+
+ if (tmp != NULL) {
+ t = tmp;
+ }
+
+ ASSERT(get_itbl(t)->type == TSO);
+ if (t->what_next == ThreadRelocated) {
+ next = t->_link;
+ *prev = next;
+ continue;
+ }
+
+ next = t->global_link;
+
+ // This is a good place to check for blocked
+ // exceptions. It might be the case that a thread is
+ // blocked on delivering an exception to a thread that
+ // is also blocked - we try to ensure that this
+ // doesn't happen in throwTo(), but it's too hard (or
+ // impossible) to close all the race holes, so we
+ // accept that some might get through and deal with
+ // them here. A GC will always happen at some point,
+ // even if the system is otherwise deadlocked.
+ //
+ // If an unreachable thread has blocked
+ // exceptions, we really want to perform the
+ // blocked exceptions rather than throwing
+ // BlockedIndefinitely exceptions. This is the
+ // only place we can discover such threads.
+ // The target thread might even be
+ // ThreadFinished or ThreadKilled. Bugs here
+ // will only be seen when running on a
+ // multiprocessor.
+ if (t->blocked_exceptions != END_TSO_QUEUE) {
+ if (tmp == NULL) {
+ evacuate((StgClosure **)&t);
+ flag = rtsTrue;
+ }
+ t->global_link = exception_threads;
+ exception_threads = t;
+ *prev = next;
+ continue;
+ }
+
+ if (tmp == NULL) {
+ // not alive (yet): leave this thread on the
+ // old_all_threads list.
+ prev = &(t->global_link);
+ }
+ else {
+ // alive
+ *prev = next;
+
+ // move this thread onto the correct threads list.
+ step *new_step;
+ new_step = Bdescr((P_)t)->step;
+ t->global_link = new_step->threads;
+ new_step->threads = t;
+ }
+ }
+
+ return flag;
}
/* -----------------------------------------------------------------------------
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 73ef53f036..5d371b9bf1 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -40,16 +40,14 @@ StgClosure *caf_list = NULL;
StgClosure *revertible_caf_list = NULL;
rtsBool keepCAFs;
-bdescr *pinned_object_block; /* allocate pinned objects into this block */
-nat alloc_blocks; /* number of allocate()d blocks since GC */
-nat alloc_blocks_lim; /* approximate limit on alloc_blocks */
+nat alloc_blocks_lim; /* GC if n_large_blocks in any nursery
+ * reaches this. */
static bdescr *exec_block;
generation *generations = NULL; /* all the generations */
generation *g0 = NULL; /* generation 0, for convenience */
generation *oldest_gen = NULL; /* oldest generation, for convenience */
-step *g0s0 = NULL; /* generation 0, step 0, for convenience */
nat total_steps = 0;
step *all_steps = NULL; /* single array of steps */
@@ -143,14 +141,6 @@ initStorage( void )
* sizeof(struct generation_),
"initStorage: gens");
- /* allocate all the steps into an array. It is important that we do
- it this way, because we need the invariant that two step pointers
- can be directly compared to see which is the oldest.
- Remember that the last generation has only one step. */
- total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps;
- all_steps = stgMallocBytes(total_steps * sizeof(struct step_),
- "initStorage: steps");
-
/* Initialise all generations */
for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
gen = &generations[g];
@@ -166,6 +156,14 @@ initStorage( void )
g0 = &generations[0];
oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
+ /* allocate all the steps into an array. It is important that we do
+ it this way, because we need the invariant that two step pointers
+ can be directly compared to see which is the oldest.
+ Remember that the last generation has only one step. */
+ total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps;
+ all_steps = stgMallocBytes(total_steps * sizeof(struct step_),
+ "initStorage: steps");
+
/* Allocate step structures in each generation */
if (RtsFlags.GcFlags.generations > 1) {
/* Only for multiple-generations */
@@ -187,11 +185,7 @@ initStorage( void )
g0->steps = all_steps;
}
-#ifdef THREADED_RTS
n_nurseries = n_capabilities;
-#else
- n_nurseries = 1;
-#endif
nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
"initStorage: nurseries");
@@ -231,7 +225,6 @@ initStorage( void )
}
generations[0].max_blocks = 0;
- g0s0 = &generations[0].steps[0];
/* The allocation area. Policy: keep the allocation area
* small to begin with, even if we have a large suggested heap
@@ -246,7 +239,6 @@ initStorage( void )
revertible_caf_list = NULL;
/* initialise the allocate() interface */
- alloc_blocks = 0;
alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
exec_block = NULL;
@@ -274,7 +266,7 @@ exitStorage (void)
void
freeStorage (void)
{
- stgFree(g0s0); // frees all the steps
+ stgFree(all_steps); // frees all the steps
stgFree(generations);
freeAllMBlocks();
#if defined(THREADED_RTS)
@@ -423,7 +415,6 @@ allocNursery (step *stp, bdescr *tail, nat blocks)
static void
assignNurseriesToCapabilities (void)
{
-#ifdef THREADED_RTS
nat i;
for (i = 0; i < n_nurseries; i++) {
@@ -431,11 +422,6 @@ assignNurseriesToCapabilities (void)
capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
capabilities[i].r.rCurrentAlloc = NULL;
}
-#else /* THREADED_RTS */
- MainCapability.r.rNursery = &nurseries[0];
- MainCapability.r.rCurrentNursery = nurseries[0].blocks;
- MainCapability.r.rCurrentAlloc = NULL;
-#endif
}
static void
@@ -469,6 +455,10 @@ resetNurseries( void )
ASSERT(bd->step == stp);
IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
}
+ // these large objects are dead, since we have just GC'd
+ freeChain(stp->large_objects);
+ stp->large_objects = NULL;
+ stp->n_large_blocks = 0;
}
assignNurseriesToCapabilities();
}
@@ -481,6 +471,7 @@ countNurseryBlocks (void)
for (i = 0; i < n_nurseries; i++) {
blocks += nurseries[i].n_blocks;
+ blocks += nurseries[i].n_large_blocks;
}
return blocks;
}
@@ -565,129 +556,46 @@ move_TSO (StgTSO *src, StgTSO *dest)
}
/* -----------------------------------------------------------------------------
- The allocate() interface
-
- allocateInGen() function allocates memory directly into a specific
- generation. It always succeeds, and returns a chunk of memory n
- words long. n can be larger than the size of a block if necessary,
- in which case a contiguous block group will be allocated.
-
- allocate(n) is equivalent to allocateInGen(g0).
+ split N blocks off the front of the given bdescr, returning the
+ new block group. We add the remainder to the large_blocks list
+ in the same step as the original block.
-------------------------------------------------------------------------- */
-StgPtr
-allocateInGen (generation *g, lnat n)
-{
- step *stp;
- bdescr *bd;
- StgPtr ret;
-
- ACQUIRE_SM_LOCK;
-
- TICK_ALLOC_HEAP_NOCTR(n);
- CCS_ALLOC(CCCS,n);
-
- stp = &g->steps[0];
-
- if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_))
- {
- lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
-
- // Attempting to allocate an object larger than maxHeapSize
- // should definitely be disallowed. (bug #1791)
- if (RtsFlags.GcFlags.maxHeapSize > 0 &&
- req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
- heapOverflow();
- // heapOverflow() doesn't exit (see #2592), but we aren't
- // in a position to do a clean shutdown here: we
- // either have to allocate the memory or exit now.
- // Allocating the memory would be bad, because the user
- // has requested that we not exceed maxHeapSize, so we
- // just exit.
- stg_exit(EXIT_HEAPOVERFLOW);
- }
-
- bd = allocGroup(req_blocks);
- dbl_link_onto(bd, &stp->large_objects);
- stp->n_large_blocks += bd->blocks; // might be larger than req_blocks
- alloc_blocks += bd->blocks;
- initBdescr(bd, stp);
- bd->flags = BF_LARGE;
- bd->free = bd->start + n;
- ret = bd->start;
- }
- else
- {
- // small allocation (<LARGE_OBJECT_THRESHOLD) */
- bd = stp->blocks;
- if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
- bd = allocBlock();
- initBdescr(bd, stp);
- bd->flags = 0;
- bd->link = stp->blocks;
- stp->blocks = bd;
- stp->n_blocks++;
- alloc_blocks++;
- }
- ret = bd->free;
- bd->free += n;
- }
-
- RELEASE_SM_LOCK;
-
- return ret;
-}
-
-StgPtr
-allocate (lnat n)
-{
- return allocateInGen(g0,n);
-}
-
-lnat
-allocatedBytes( void )
-{
- lnat allocated;
-
- allocated = alloc_blocks * BLOCK_SIZE_W;
- if (pinned_object_block != NULL) {
- allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
- pinned_object_block->free;
- }
-
- return allocated;
-}
-
-// split N blocks off the front of the given bdescr, returning the
-// new block group. We treat the remainder as if it
-// had been freshly allocated in generation 0.
bdescr *
splitLargeBlock (bdescr *bd, nat blocks)
{
bdescr *new_bd;
+ ACQUIRE_SM_LOCK;
+
+ ASSERT(countBlocks(bd->step->large_objects) == bd->step->n_large_blocks);
+
// subtract the original number of blocks from the counter first
bd->step->n_large_blocks -= bd->blocks;
new_bd = splitBlockGroup (bd, blocks);
-
- dbl_link_onto(new_bd, &g0s0->large_objects);
- g0s0->n_large_blocks += new_bd->blocks;
- initBdescr(new_bd, g0s0);
- new_bd->flags = BF_LARGE;
+ initBdescr(new_bd, bd->step);
+ new_bd->flags = BF_LARGE | (bd->flags & BF_EVACUATED);
+ // if new_bd is in an old generation, we have to set BF_EVACUATED
new_bd->free = bd->free;
+ dbl_link_onto(new_bd, &bd->step->large_objects);
+
ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W);
// add the new number of blocks to the counter. Due to the gaps
- // for block descriptor, new_bd->blocks + bd->blocks might not be
+ // for block descriptors, new_bd->blocks + bd->blocks might not be
// equal to the original bd->blocks, which is why we do it this way.
- bd->step->n_large_blocks += bd->blocks;
+ bd->step->n_large_blocks += bd->blocks + new_bd->blocks;
+
+ ASSERT(countBlocks(bd->step->large_objects) == bd->step->n_large_blocks);
+
+ RELEASE_SM_LOCK;
return new_bd;
}
/* -----------------------------------------------------------------------------
- allocateLocal()
+ allocate()
This allocates memory in the current thread - it is intended for
use primarily from STG-land where we have a Capability. It is
@@ -700,13 +608,38 @@ splitLargeBlock (bdescr *bd, nat blocks)
-------------------------------------------------------------------------- */
StgPtr
-allocateLocal (Capability *cap, lnat n)
+allocate (Capability *cap, lnat n)
{
bdescr *bd;
StgPtr p;
+ step *stp;
if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
- return allocateInGen(g0,n);
+ lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
+
+ // Attempting to allocate an object larger than maxHeapSize
+ // should definitely be disallowed. (bug #1791)
+ if (RtsFlags.GcFlags.maxHeapSize > 0 &&
+ req_blocks >= RtsFlags.GcFlags.maxHeapSize) {
+ heapOverflow();
+ // heapOverflow() doesn't exit (see #2592), but we aren't
+ // in a position to do a clean shutdown here: we
+ // either have to allocate the memory or exit now.
+ // Allocating the memory would be bad, because the user
+ // has requested that we not exceed maxHeapSize, so we
+ // just exit.
+ stg_exit(EXIT_HEAPOVERFLOW);
+ }
+
+ stp = &nurseries[cap->no];
+
+ bd = allocGroup(req_blocks);
+ dbl_link_onto(bd, &stp->large_objects);
+ stp->n_large_blocks += bd->blocks; // might be larger than req_blocks
+ initBdescr(bd, stp);
+ bd->flags = BF_LARGE;
+ bd->free = bd->start + n;
+ return bd->start;
}
/* small allocation (<LARGE_OBJECT_THRESHOLD) */
@@ -731,10 +664,8 @@ allocateLocal (Capability *cap, lnat n)
RELEASE_SM_LOCK;
initBdescr(bd, cap->r.rNursery);
bd->flags = 0;
- // NO: alloc_blocks++;
- // calcAllocated() uses the size of the nursery, and we've
- // already bumpted nursery->n_blocks above. We'll GC
- // pretty quickly now anyway, because MAYBE_GC() will
+ // If we had to allocate a new block, then we'll GC
+ // pretty quickly now, because MAYBE_GC() will
// notice that CurrentNursery->link is NULL.
} else {
// we have a block in the nursery: take it and put
@@ -778,39 +709,41 @@ allocateLocal (Capability *cap, lnat n)
------------------------------------------------------------------------- */
StgPtr
-allocatePinned( lnat n )
+allocatePinned (Capability *cap, lnat n)
{
StgPtr p;
- bdescr *bd = pinned_object_block;
+ bdescr *bd;
+ step *stp;
// If the request is for a large object, then allocate()
// will give us a pinned object anyway.
if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
- p = allocate(n);
+ p = allocate(cap, n);
Bdescr(p)->flags |= BF_PINNED;
return p;
}
- ACQUIRE_SM_LOCK;
-
TICK_ALLOC_HEAP_NOCTR(n);
CCS_ALLOC(CCCS,n);
+ bd = cap->pinned_object_block;
+
// If we don't have a block of pinned objects yet, or the current
// one isn't large enough to hold the new object, allocate a new one.
if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
- pinned_object_block = bd = allocBlock();
- dbl_link_onto(bd, &g0s0->large_objects);
- g0s0->n_large_blocks++;
- initBdescr(bd, g0s0);
+ ACQUIRE_SM_LOCK
+ cap->pinned_object_block = bd = allocBlock();
+ RELEASE_SM_LOCK
+ stp = &nurseries[cap->no];
+ dbl_link_onto(bd, &stp->large_objects);
+ stp->n_large_blocks++;
+ initBdescr(bd, stp);
bd->flags = BF_PINNED | BF_LARGE;
bd->free = bd->start;
- alloc_blocks++;
}
p = bd->free;
bd->free += n;
- RELEASE_SM_LOCK;
return p;
}
@@ -900,14 +833,11 @@ calcAllocated( void )
{
nat allocated;
bdescr *bd;
+ nat i;
- allocated = allocatedBytes();
- allocated += countNurseryBlocks() * BLOCK_SIZE_W;
+ allocated = countNurseryBlocks() * BLOCK_SIZE_W;
- {
-#ifdef THREADED_RTS
- nat i;
- for (i = 0; i < n_nurseries; i++) {
+ for (i = 0; i < n_capabilities; i++) {
Capability *cap;
for ( bd = capabilities[i].r.rCurrentNursery->link;
bd != NULL; bd = bd->link ) {
@@ -919,18 +849,10 @@ calcAllocated( void )
allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
- cap->r.rCurrentNursery->free;
}
- }
-#else
- bdescr *current_nursery = MainCapability.r.rCurrentNursery;
-
- for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
- allocated -= BLOCK_SIZE_W;
- }
- if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
- allocated -= (current_nursery->start + BLOCK_SIZE_W)
- - current_nursery->free;
- }
-#endif
+ if (cap->pinned_object_block != NULL) {
+ allocated -= (cap->pinned_object_block->start + BLOCK_SIZE_W) -
+ cap->pinned_object_block->free;
+ }
}
total_allocated += allocated;
@@ -947,16 +869,12 @@ calcLiveBlocks(void)
lnat live = 0;
step *stp;
- if (RtsFlags.GcFlags.generations == 1) {
- return g0s0->n_large_blocks + g0s0->n_blocks;
- }
-
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
for (s = 0; s < generations[g].n_steps; s++) {
/* approximate amount of live data (doesn't take into account slop
* at end of each block).
*/
- if (g == 0 && s == 0) {
+ if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
continue;
}
stp = &generations[g].steps[s];
@@ -988,14 +906,10 @@ calcLiveWords(void)
lnat live;
step *stp;
- if (RtsFlags.GcFlags.generations == 1) {
- return g0s0->n_words + countOccupied(g0s0->large_objects);
- }
-
live = 0;
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
for (s = 0; s < generations[g].n_steps; s++) {
- if (g == 0 && s == 0) continue;
+ if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) continue;
stp = &generations[g].steps[s];
live += stp->n_words + countOccupied(stp->large_objects);
}
@@ -1384,32 +1298,28 @@ checkSanity( void )
{
nat g, s;
- if (RtsFlags.GcFlags.generations == 1) {
- checkHeap(g0s0->blocks);
- checkLargeObjects(g0s0->large_objects);
- } else {
-
- for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- for (s = 0; s < generations[g].n_steps; s++) {
- if (g == 0 && s == 0) { continue; }
- ASSERT(countBlocks(generations[g].steps[s].blocks)
- == generations[g].steps[s].n_blocks);
- ASSERT(countBlocks(generations[g].steps[s].large_objects)
- == generations[g].steps[s].n_large_blocks);
- checkHeap(generations[g].steps[s].blocks);
- checkLargeObjects(generations[g].steps[s].large_objects);
- }
- }
-
- for (s = 0; s < n_nurseries; s++) {
- ASSERT(countBlocks(nurseries[s].blocks)
- == nurseries[s].n_blocks);
- ASSERT(countBlocks(nurseries[s].large_objects)
- == nurseries[s].n_large_blocks);
- }
-
- checkFreeListSanity();
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
+ continue;
+ }
+ ASSERT(countBlocks(generations[g].steps[s].blocks)
+ == generations[g].steps[s].n_blocks);
+ ASSERT(countBlocks(generations[g].steps[s].large_objects)
+ == generations[g].steps[s].n_large_blocks);
+ checkHeap(generations[g].steps[s].blocks);
+ checkLargeObjects(generations[g].steps[s].large_objects);
+ }
+ }
+
+ for (s = 0; s < n_nurseries; s++) {
+ ASSERT(countBlocks(nurseries[s].blocks)
+ == nurseries[s].n_blocks);
+ ASSERT(countBlocks(nurseries[s].large_objects)
+ == nurseries[s].n_large_blocks);
}
+
+ checkFreeListSanity();
#if defined(THREADED_RTS)
// check the stacks too in threaded mode, because we don't do a
diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h
index 5ddcbdcdc2..30bdf54a1d 100644
--- a/rts/sm/Storage.h
+++ b/rts/sm/Storage.h
@@ -9,6 +9,8 @@
#ifndef SM_STORAGE_H
#define SM_STORAGE_H
+#include "Capability.h"
+
BEGIN_RTS_PRIVATE
/* -----------------------------------------------------------------------------
@@ -23,12 +25,11 @@ void freeStorage(void);
Storage manager state
-------------------------------------------------------------------------- */
-extern bdescr * pinned_object_block;
-
INLINE_HEADER rtsBool
-doYouWantToGC( void )
+doYouWantToGC( Capability *cap )
{
- return (alloc_blocks >= alloc_blocks_lim);
+ return (cap->r.rCurrentNursery->link == NULL ||
+ cap->r.rNursery->n_large_blocks >= alloc_blocks_lim);
}
/* for splitting blocks groups in two */
@@ -120,6 +121,8 @@ void dirty_MVAR(StgRegTable *reg, StgClosure *p);
Nursery manipulation
-------------------------------------------------------------------------- */
+extern step *nurseries;
+
void resetNurseries ( void );
void resizeNurseries ( nat blocks );
void resizeNurseriesFixed ( nat blocks );