summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2011-02-02 12:30:49 +0000
committerSimon Marlow <marlowsd@gmail.com>2011-02-02 12:30:49 +0000
commitbef3da1e26639303fccbf26c312d2833eedb486e (patch)
treeb1c8b91568eec0e0ba2ea448981d0e3f17eba741
parentc226846642a2201762ee34f6f71b42f13bc46629 (diff)
downloadhaskell-bef3da1e26639303fccbf26c312d2833eedb486e.tar.gz
A small GC optimisation
Store the *number* of the destination generation in the Bdescr struct, so that in evacuate() we don't have to deref gen to get it. This is another improvement ported over from my GC branch.
-rw-r--r--includes/rts/storage/Block.h43
-rw-r--r--includes/rts/storage/GC.h6
-rw-r--r--rts/Arena.c2
-rw-r--r--rts/sm/Evac.c97
-rw-r--r--rts/sm/GC.c8
-rw-r--r--rts/sm/GCThread.h2
-rw-r--r--rts/sm/MarkWeak.c2
-rw-r--r--rts/sm/Scav.c26
8 files changed, 105 insertions, 81 deletions
diff --git a/includes/rts/storage/Block.h b/includes/rts/storage/Block.h
index f39b99c0dc..c73c9af90a 100644
--- a/includes/rts/storage/Block.h
+++ b/includes/rts/storage/Block.h
@@ -46,26 +46,45 @@
* on a 32-bit machine.
*/
+// Note: fields marked with [READ ONLY] must not be modified by the
+// client of the block allocator API. All other fields can be
+// freely modified.
+
#ifndef CMINUSMINUS
typedef struct bdescr_ {
- StgPtr start; /* start addr of memory */
- StgPtr free; /* first free byte of memory */
- struct bdescr_ *link; /* used for chaining blocks together */
+
+ StgPtr start; // [READ ONLY] start addr of memory
+
+ StgPtr free; // first free byte of memory.
+ // NB. during use this value should lie
+ // between start and start + blocks *
+ // BLOCK_SIZE. Values outside this
+ // range are reserved for use by the
+ // block allocator. In particular, the
+ // value (StgPtr)(-1) is used to
+ // indicate that a block is unallocated.
+
+ struct bdescr_ *link; // used for chaining blocks together
+
union {
- struct bdescr_ *back; /* used (occasionally) for doubly-linked lists*/
- StgWord *bitmap;
- StgPtr scan; /* scan pointer for copying GC */
+ struct bdescr_ *back; // used (occasionally) for doubly-linked lists
+ StgWord *bitmap; // bitmap for marking GC
+ StgPtr scan; // scan pointer for copying GC
} u;
- struct generation_ *gen; /* generation */
- struct generation_ *dest; /* destination gen */
+ struct generation_ *gen; // generation
+
+ StgWord16 gen_no; // gen->no, cached
+ StgWord16 dest_no; // number of destination generation
+ StgWord16 _pad1;
+
+ StgWord16 flags; // block flags, see below
- StgWord32 blocks; /* no. of blocks (if grp head, 0 otherwise) */
+ StgWord32 blocks; // [READ ONLY] no. of blocks in a group
+ // (if group head, 0 otherwise)
- StgWord16 gen_no;
- StgWord16 flags; /* block flags, see below */
#if SIZEOF_VOID_P == 8
- StgWord32 _padding[2];
+ StgWord32 _padding[3];
#else
StgWord32 _padding[0];
#endif
diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h
index c412c96617..7cee670303 100644
--- a/includes/rts/storage/GC.h
+++ b/includes/rts/storage/GC.h
@@ -194,9 +194,9 @@ extern rtsBool keepCAFs;
INLINE_HEADER void initBdescr(bdescr *bd, generation *gen, generation *dest)
{
- bd->gen = gen;
- bd->gen_no = gen->no;
- bd->dest = dest;
+ bd->gen = gen;
+ bd->gen_no = gen->no;
+ bd->dest_no = dest->no;
}
#endif /* RTS_STORAGE_GC_H */
diff --git a/rts/Arena.c b/rts/Arena.c
index 6d7a65b0f3..653eb69706 100644
--- a/rts/Arena.c
+++ b/rts/Arena.c
@@ -86,7 +86,7 @@ arenaAlloc( Arena *arena, size_t size )
bd->gen_no = 0;
bd->gen = NULL;
- bd->dest = NULL;
+ bd->dest_no = 0;
bd->flags = 0;
bd->free = bd->start;
bd->link = arena->current;
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index 9a5f27887e..18ace21b9e 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -51,7 +51,7 @@ STATIC_INLINE void evacuate_large(StgPtr p);
-------------------------------------------------------------------------- */
STATIC_INLINE StgPtr
-alloc_for_copy (nat size, generation *gen)
+alloc_for_copy (nat size, nat gen_no)
{
StgPtr to;
gen_workspace *ws;
@@ -61,17 +61,16 @@ alloc_for_copy (nat size, generation *gen)
* evacuate to an older generation, adjust it here (see comment
* by evacuate()).
*/
- if (gen < gct->evac_gen) {
+ if (gen_no < gct->evac_gen_no) {
if (gct->eager_promotion) {
- gen = gct->evac_gen;
+ gen_no = gct->evac_gen_no;
} else {
gct->failed_to_evac = rtsTrue;
}
}
- ws = &gct->gens[gen->no];
- // this compiles to a single mem access to gen->abs_no only
-
+ ws = &gct->gens[gen_no]; // zero memory references here
+
/* chain a new block onto the to-space for the destination gen if
* necessary.
*/
@@ -91,12 +90,12 @@ alloc_for_copy (nat size, generation *gen)
STATIC_INLINE GNUC_ATTR_HOT void
copy_tag(StgClosure **p, const StgInfoTable *info,
- StgClosure *src, nat size, generation *gen, StgWord tag)
+ StgClosure *src, nat size, nat gen_no, StgWord tag)
{
StgPtr to, from;
nat i;
- to = alloc_for_copy(size,gen);
+ to = alloc_for_copy(size,gen_no);
from = (StgPtr)src;
to[0] = (W_)info;
@@ -133,12 +132,12 @@ copy_tag(StgClosure **p, const StgInfoTable *info,
#if defined(PARALLEL_GC)
STATIC_INLINE void
copy_tag_nolock(StgClosure **p, const StgInfoTable *info,
- StgClosure *src, nat size, generation *gen, StgWord tag)
+ StgClosure *src, nat size, nat gen_no, StgWord tag)
{
StgPtr to, from;
nat i;
- to = alloc_for_copy(size,gen);
+ to = alloc_for_copy(size,gen_no);
from = (StgPtr)src;
to[0] = (W_)info;
@@ -170,7 +169,7 @@ copy_tag_nolock(StgClosure **p, const StgInfoTable *info,
*/
static rtsBool
copyPart(StgClosure **p, StgClosure *src, nat size_to_reserve,
- nat size_to_copy, generation *gen)
+ nat size_to_copy, nat gen_no)
{
StgPtr to, from;
nat i;
@@ -194,7 +193,7 @@ spin:
info = (W_)src->header.info;
#endif
- to = alloc_for_copy(size_to_reserve, gen);
+ to = alloc_for_copy(size_to_reserve, gen_no);
from = (StgPtr)src;
to[0] = info;
@@ -222,9 +221,9 @@ spin:
/* Copy wrappers that don't tag the closure after copying */
STATIC_INLINE GNUC_ATTR_HOT void
copy(StgClosure **p, const StgInfoTable *info,
- StgClosure *src, nat size, generation *gen)
+ StgClosure *src, nat size, nat gen_no)
{
- copy_tag(p,info,src,size,gen,0);
+ copy_tag(p,info,src,size,gen_no,0);
}
/* -----------------------------------------------------------------------------
@@ -243,10 +242,12 @@ evacuate_large(StgPtr p)
{
bdescr *bd;
generation *gen, *new_gen;
+ nat gen_no, new_gen_no;
gen_workspace *ws;
bd = Bdescr(p);
gen = bd->gen;
+ gen_no = bd->gen_no;
ACQUIRE_SPIN_LOCK(&gen->sync_large_objects);
// already evacuated?
@@ -254,7 +255,7 @@ evacuate_large(StgPtr p)
/* Don't forget to set the gct->failed_to_evac flag if we didn't get
* the desired destination (see comments in evacuate()).
*/
- if (gen < gct->evac_gen) {
+ if (gen_no < gct->evac_gen_no) {
gct->failed_to_evac = rtsTrue;
TICK_GC_FAILED_PROMOTION();
}
@@ -274,16 +275,18 @@ evacuate_large(StgPtr p)
/* link it on to the evacuated large object list of the destination gen
*/
- new_gen = bd->dest;
- if (new_gen < gct->evac_gen) {
+ new_gen_no = bd->dest_no;
+
+ if (new_gen_no < gct->evac_gen_no) {
if (gct->eager_promotion) {
- new_gen = gct->evac_gen;
+ new_gen_no = gct->evac_gen_no;
} else {
gct->failed_to_evac = rtsTrue;
}
}
- ws = &gct->gens[new_gen->no];
+ ws = &gct->gens[new_gen_no];
+ new_gen = &generations[new_gen_no];
bd->flags |= BF_EVACUATED;
initBdescr(bd, new_gen, new_gen->to);
@@ -352,7 +355,7 @@ REGPARM1 GNUC_ATTR_HOT void
evacuate(StgClosure **p)
{
bdescr *bd = NULL;
- generation *gen;
+ nat gen_no;
StgClosure *q;
const StgInfoTable *info;
StgWord tag;
@@ -475,7 +478,7 @@ loop:
// We aren't copying this object, so we have to check
// whether it is already in the target generation. (this is
// the write barrier).
- if (bd->gen < gct->evac_gen) {
+ if (bd->gen_no < gct->evac_gen_no) {
gct->failed_to_evac = rtsTrue;
TICK_GC_FAILED_PROMOTION();
}
@@ -499,7 +502,7 @@ loop:
return;
}
- gen = bd->dest;
+ gen_no = bd->dest_no;
info = q->header.info;
if (IS_FORWARDING_PTR(info))
@@ -522,8 +525,8 @@ loop:
*/
StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info);
*p = TAG_CLOSURE(tag,e);
- if (gen < gct->evac_gen) { // optimisation
- if (Bdescr((P_)e)->gen < gct->evac_gen) {
+ if (gen_no < gct->evac_gen_no) { // optimisation
+ if (Bdescr((P_)e)->gen_no < gct->evac_gen_no) {
gct->failed_to_evac = rtsTrue;
TICK_GC_FAILED_PROMOTION();
}
@@ -540,7 +543,7 @@ loop:
case MUT_VAR_DIRTY:
case MVAR_CLEAN:
case MVAR_DIRTY:
- copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen);
+ copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen_no);
return;
// For ints and chars of low value, save space by replacing references to
@@ -552,7 +555,7 @@ loop:
case CONSTR_0_1:
{
#if defined(__PIC__) && defined(mingw32_HOST_OS)
- copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen,tag);
+ copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen_no,tag);
#else
StgWord w = (StgWord)q->payload[0];
if (info == Czh_con_info &&
@@ -569,7 +572,7 @@ loop:
);
}
else {
- copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen,tag);
+ copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen_no,tag);
}
#endif
return;
@@ -578,12 +581,12 @@ loop:
case FUN_0_1:
case FUN_1_0:
case CONSTR_1_0:
- copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen,tag);
+ copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen_no,tag);
return;
case THUNK_1_0:
case THUNK_0_1:
- copy(p,info,q,sizeofW(StgThunk)+1,gen);
+ copy(p,info,q,sizeofW(StgThunk)+1,gen_no);
return;
case THUNK_1_1:
@@ -592,7 +595,7 @@ loop:
#ifdef NO_PROMOTE_THUNKS
#error bitrotted
#endif
- copy(p,info,q,sizeofW(StgThunk)+2,gen);
+ copy(p,info,q,sizeofW(StgThunk)+2,gen_no);
return;
case FUN_1_1:
@@ -600,21 +603,21 @@ loop:
case FUN_0_2:
case CONSTR_1_1:
case CONSTR_2_0:
- copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,gen,tag);
+ copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,gen_no,tag);
return;
case CONSTR_0_2:
- copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,gen,tag);
+ copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,gen_no,tag);
return;
case THUNK:
- copy(p,info,q,thunk_sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen);
+ copy(p,info,q,thunk_sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen_no);
return;
case FUN:
case IND_PERM:
case CONSTR:
- copy_tag_nolock(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen,tag);
+ copy_tag_nolock(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen_no,tag);
return;
case BLACKHOLE:
@@ -632,7 +635,7 @@ loop:
|| i == &stg_WHITEHOLE_info
|| i == &stg_BLOCKING_QUEUE_CLEAN_info
|| i == &stg_BLOCKING_QUEUE_DIRTY_info) {
- copy(p,info,q,sizeofW(StgInd),gen);
+ copy(p,info,q,sizeofW(StgInd),gen_no);
return;
}
ASSERT(i != &stg_IND_info);
@@ -646,11 +649,11 @@ loop:
case WEAK:
case PRIM:
case MUT_PRIM:
- copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen);
+ copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen_no);
return;
case BCO:
- copy(p,info,q,bco_sizeW((StgBCO *)q),gen);
+ copy(p,info,q,bco_sizeW((StgBCO *)q),gen_no);
return;
case THUNK_SELECTOR:
@@ -678,20 +681,20 @@ loop:
barf("evacuate: stack frame at %p\n", q);
case PAP:
- copy(p,info,q,pap_sizeW((StgPAP*)q),gen);
+ copy(p,info,q,pap_sizeW((StgPAP*)q),gen_no);
return;
case AP:
- copy(p,info,q,ap_sizeW((StgAP*)q),gen);
+ copy(p,info,q,ap_sizeW((StgAP*)q),gen_no);
return;
case AP_STACK:
- copy(p,info,q,ap_stack_sizeW((StgAP_STACK*)q),gen);
+ copy(p,info,q,ap_stack_sizeW((StgAP_STACK*)q),gen_no);
return;
case ARR_WORDS:
// just copy the block
- copy(p,info,q,arr_words_sizeW((StgArrWords *)q),gen);
+ copy(p,info,q,arr_words_sizeW((StgArrWords *)q),gen_no);
return;
case MUT_ARR_PTRS_CLEAN:
@@ -699,11 +702,11 @@ loop:
case MUT_ARR_PTRS_FROZEN:
case MUT_ARR_PTRS_FROZEN0:
// just copy the block
- copy(p,info,q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),gen);
+ copy(p,info,q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),gen_no);
return;
case TSO:
- copy(p,info,q,sizeofW(StgTSO),gen);
+ copy(p,info,q,sizeofW(StgTSO),gen_no);
return;
case STACK:
@@ -718,7 +721,7 @@ loop:
rtsBool mine;
mine = copyPart(p,(StgClosure *)stack, stack_sizeW(stack),
- sizeofW(StgStack), gen);
+ sizeofW(StgStack), gen_no);
if (mine) {
new_stack = (StgStack *)*p;
move_STACK(stack, new_stack);
@@ -732,7 +735,7 @@ loop:
}
case TREC_CHUNK:
- copy(p,info,q,sizeofW(StgTRecChunk),gen);
+ copy(p,info,q,sizeofW(StgTRecChunk),gen_no);
return;
default:
@@ -836,7 +839,7 @@ selector_chain:
unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p);
*q = (StgClosure *)p;
// shortcut, behave as for: if (evac) evacuate(q);
- if (evac && bd->gen < gct->evac_gen) {
+ if (evac && bd->gen_no < gct->evac_gen_no) {
gct->failed_to_evac = rtsTrue;
TICK_GC_FAILED_PROMOTION();
}
@@ -1075,7 +1078,7 @@ bale_out:
// check whether it was updated in the meantime.
*q = (StgClosure *)p;
if (evac) {
- copy(q,(const StgInfoTable *)info_ptr,(StgClosure *)p,THUNK_SELECTOR_sizeW(),bd->dest);
+ copy(q,(const StgInfoTable *)info_ptr,(StgClosure *)p,THUNK_SELECTOR_sizeW(),bd->dest_no);
}
unchain_thunk_selectors(prev_thunk_selector, *q);
return;
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 38ae1a7996..c9908359b8 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -344,11 +344,11 @@ SET_GCT(gc_threads[0]);
}
// follow roots from the CAF list (used by GHCi)
- gct->evac_gen = 0;
+ gct->evac_gen_no = 0;
markCAFs(mark_root, gct);
// follow all the roots that the application knows about.
- gct->evac_gen = 0;
+ gct->evac_gen_no = 0;
markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
rtsTrue/*prune sparks*/);
@@ -1062,7 +1062,7 @@ gcWorkerThread (Capability *cap)
#endif
// Every thread evacuates some roots.
- gct->evac_gen = 0;
+ gct->evac_gen_no = 0;
markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
rtsTrue/*prune sparks*/);
scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
@@ -1399,7 +1399,7 @@ init_gc_thread (gc_thread *t)
t->scavenged_static_objects = END_OF_STATIC_LIST;
t->scan_bd = NULL;
t->mut_lists = capabilities[t->thread_index].mut_lists;
- t->evac_gen = 0;
+ t->evac_gen_no = 0;
t->failed_to_evac = rtsFalse;
t->eager_promotion = rtsTrue;
t->thunk_selector_depth = 0;
diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h
index c45eb1bdec..62dd1fb73a 100644
--- a/rts/sm/GCThread.h
+++ b/rts/sm/GCThread.h
@@ -146,7 +146,7 @@ typedef struct gc_thread_ {
// --------------------
// evacuate flags
- generation *evac_gen; // Youngest generation that objects
+ nat evac_gen_no; // Youngest generation that objects
// should be evacuated to in
// evacuate(). (Logically an
// argument to evacuate, but it's
diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c
index 72f0ade797..f4b576ac73 100644
--- a/rts/sm/MarkWeak.c
+++ b/rts/sm/MarkWeak.c
@@ -110,7 +110,7 @@ traverseWeakPtrList(void)
/* doesn't matter where we evacuate values/finalizers to, since
* these pointers are treated as roots (iff the keys are alive).
*/
- gct->evac_gen = 0;
+ gct->evac_gen_no = 0;
last_w = &old_weak_ptr_list;
for (w = old_weak_ptr_list; w != NULL; w = next_w) {
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index d7e16eaf2a..9ac152af53 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -362,11 +362,11 @@ scavenge_fun_srt(const StgInfoTable *info)
/* -----------------------------------------------------------------------------
Scavenge a block from the given scan pointer up to bd->free.
- evac_gen is set by the caller to be either zero (for a step in a
+ evac_gen_no is set by the caller to be either zero (for a step in a
generation < N) or G where G is the generation of the step being
scavenged.
- We sometimes temporarily change evac_gen back to zero if we're
+ We sometimes temporarily change evac_gen_no back to zero if we're
scavenging a mutable object where eager promotion isn't such a good
idea.
-------------------------------------------------------------------------- */
@@ -383,7 +383,7 @@ scavenge_block (bdescr *bd)
bd->start, bd->gen_no, bd->u.scan);
gct->scan_bd = bd;
- gct->evac_gen = bd->gen;
+ gct->evac_gen_no = bd->gen_no;
saved_eager_promotion = gct->eager_promotion;
gct->failed_to_evac = rtsFalse;
@@ -754,7 +754,7 @@ scavenge_mark_stack(void)
StgInfoTable *info;
rtsBool saved_eager_promotion;
- gct->evac_gen = oldest_gen;
+ gct->evac_gen_no = oldest_gen->no;
saved_eager_promotion = gct->eager_promotion;
while ((p = pop_mark_stack())) {
@@ -1041,8 +1041,8 @@ scavenge_mark_stack(void)
if (gct->failed_to_evac) {
gct->failed_to_evac = rtsFalse;
- if (gct->evac_gen) {
- recordMutableGen_GC((StgClosure *)q, gct->evac_gen->no);
+ if (gct->evac_gen_no) {
+ recordMutableGen_GC((StgClosure *)q, gct->evac_gen_no);
}
}
} // while (p = pop_mark_stack())
@@ -1340,8 +1340,10 @@ void
scavenge_mutable_list(bdescr *bd, generation *gen)
{
StgPtr p, q;
+ nat gen_no;
- gct->evac_gen = gen;
+ gen_no = gen->no;
+ gct->evac_gen_no = gen_no;
for (; bd != NULL; bd = bd->link) {
for (q = bd->start; q < bd->free; q++) {
p = (StgPtr)*q;
@@ -1376,7 +1378,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
//
switch (get_itbl((StgClosure *)p)->type) {
case MUT_ARR_PTRS_CLEAN:
- recordMutableGen_GC((StgClosure *)p,gen->no);
+ recordMutableGen_GC((StgClosure *)p,gen_no);
continue;
case MUT_ARR_PTRS_DIRTY:
{
@@ -1394,7 +1396,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
gct->eager_promotion = saved_eager_promotion;
gct->failed_to_evac = rtsFalse;
- recordMutableGen_GC((StgClosure *)p,gen->no);
+ recordMutableGen_GC((StgClosure *)p,gen_no);
continue;
}
default:
@@ -1404,7 +1406,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
if (scavenge_one(p)) {
// didn't manage to promote everything, so put the
// object back on the list.
- recordMutableGen_GC((StgClosure *)p,gen->no);
+ recordMutableGen_GC((StgClosure *)p,gen_no);
}
}
}
@@ -1446,7 +1448,7 @@ scavenge_static(void)
/* Always evacuate straight to the oldest generation for static
* objects */
- gct->evac_gen = oldest_gen;
+ gct->evac_gen_no = oldest_gen->no;
/* keep going until we've scavenged all the objects on the linked
list... */
@@ -1741,7 +1743,7 @@ scavenge_large (gen_workspace *ws)
bdescr *bd;
StgPtr p;
- gct->evac_gen = ws->gen;
+ gct->evac_gen_no = ws->gen->no;
bd = ws->todo_large_objects;