diff options
author | Ömer Sinan Ağacan <omeragacan@gmail.com> | 2019-10-24 12:11:59 +0300 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2020-01-25 05:20:27 -0500 |
commit | 0e57d8a106a61cac11bacb43633b8b4af12d7fdb (patch) | |
tree | 315f1d0bb78331ca338ee194ec1773f0c4bfaf29 /rts/sm | |
parent | be910728d78fdf2ee800828ecdc8a11bd64fdad0 (diff) | |
download | haskell-0e57d8a106a61cac11bacb43633b8b4af12d7fdb.tar.gz |
Fix chaining tagged and untagged ptrs in compacting GC
Currently compacting GC has the invariant that in a chain all fields are tagged
the same. However this does not really hold: root pointers are not tagged, so
when we thread a root we initialize a chain without a tag. When the pointed
objects is evaluated and we have more pointers to it from the heap, we then add
*tagged* fields to the chain (because pointers to it from the heap are tagged),
ending up chaining fields with different tags (pointers from roots are NOT
tagged, pointers from heap are). This breaks the invariant and as a result
compacting GC turns tagged pointers into non-tagged.
This later causes problem in the generated code where we do reads assuming that
the pointer is aligned, e.g.
0x7(%rax) -- assumes that pointer is tagged 1
which causes misaligned reads. This caused #17088.
We fix this using the "pointer tagging for large families" patch (#14373,
!1742):
- With the pointer tagging patch the GC can know what the tagged pointer to a
CONSTR should be (previously we'd need to know the family size -- large
families are always tagged 1, small families are tagged depending on the
constructor).
- Since we now know what the tags should be we no longer need to store the
pointer tag in the info table pointers when forming chains in the compacting
GC.
As a result we no longer need to tag pointers in chains with 1/2 depending on
whether the field points to an info table pointer, or to another field: an info
table pointer is always tagged 0, everything else in the chain is tagged 1. The
lost tags in pointers can be retrieved by looking at the info table.
Finally, instead of using tag 1 for fields and tag 0 for info table pointers, we
use two different tags for fields:
- 1 for fields that have untagged pointers
- 2 for fields that have tagged pointers
When unchaining we then look at the pointer to a field, and depending on its tag
we either leave a tagged pointer or an untagged pointer in the field.
This allows chaining untagged and tagged fields together in compacting GC.
Fixes #17088
Nofib results
-------------
Binaries are smaller because of smaller `Compact.c` code.
make mode=fast EXTRA_RUNTEST_OPTS="-cachegrind" EXTRA_HC_OPTS="-with-rtsopts=-c" NoFibRuns=1
--------------------------------------------------------------------------------
Program Size Allocs Instrs Reads Writes
--------------------------------------------------------------------------------
CS -0.3% 0.0% +0.0% +0.0% +0.0%
CSD -0.3% 0.0% +0.0% +0.0% +0.0%
FS -0.3% 0.0% +0.0% -0.0% -0.0%
S -0.3% 0.0% +5.4% +0.8% +3.9%
VS -0.3% 0.0% +0.0% -0.0% -0.0%
VSD -0.3% 0.0% -0.0% -0.0% -0.2%
VSM -0.3% 0.0% +0.0% +0.0% +0.0%
anna -0.1% 0.0% +0.0% +0.0% +0.0%
ansi -0.3% 0.0% +0.1% +0.0% +0.0%
atom -0.2% 0.0% +0.0% +0.0% +0.0%
awards -0.2% 0.0% +0.0% 0.0% -0.0%
banner -0.3% 0.0% +0.0% +0.0% +0.0%
bernouilli -0.3% 0.0% +0.1% +0.0% +0.0%
binary-trees -0.2% 0.0% +0.0% 0.0% +0.0%
boyer -0.3% 0.0% +0.2% +0.0% +0.0%
boyer2 -0.2% 0.0% +0.2% +0.1% +0.0%
bspt -0.2% 0.0% +0.0% +0.0% +0.0%
cacheprof -0.2% 0.0% +0.0% +0.0% +0.0%
calendar -0.3% 0.0% +0.0% +0.0% +0.0%
cichelli -0.3% 0.0% +1.1% +0.2% +0.5%
circsim -0.2% 0.0% +0.0% -0.0% -0.0%
clausify -0.3% 0.0% +0.0% -0.0% -0.0%
comp_lab_zift -0.2% 0.0% +0.0% +0.0% +0.0%
compress -0.3% 0.0% +0.0% +0.0% +0.0%
compress2 -0.3% 0.0% +0.0% -0.0% -0.0%
constraints -0.3% 0.0% +0.2% +0.1% +0.1%
cryptarithm1 -0.3% 0.0% +0.0% -0.0% 0.0%
cryptarithm2 -0.3% 0.0% +0.0% +0.0% +0.0%
cse -0.3% 0.0% +0.0% +0.0% +0.0%
digits-of-e1 -0.3% 0.0% +0.0% +0.0% +0.0%
digits-of-e2 -0.3% 0.0% +0.0% +0.0% -0.0%
dom-lt -0.2% 0.0% +0.0% +0.0% +0.0%
eliza -0.2% 0.0% +0.0% +0.0% +0.0%
event -0.3% 0.0% +0.1% +0.0% -0.0%
exact-reals -0.2% 0.0% +0.0% +0.0% +0.0%
exp3_8 -0.3% 0.0% +0.0% +0.0% +0.0%
expert -0.2% 0.0% +0.0% +0.0% +0.0%
fannkuch-redux -0.3% 0.0% -0.0% -0.0% -0.0%
fasta -0.3% 0.0% +0.0% +0.0% +0.0%
fem -0.2% 0.0% +0.1% +0.0% +0.0%
fft -0.2% 0.0% +0.0% -0.0% -0.0%
fft2 -0.2% 0.0% +0.0% -0.0% +0.0%
fibheaps -0.3% 0.0% +0.0% -0.0% -0.0%
fish -0.3% 0.0% +0.0% +0.0% +0.0%
fluid -0.2% 0.0% +0.4% +0.1% +0.1%
fulsom -0.2% 0.0% +0.0% +0.0% +0.0%
gamteb -0.2% 0.0% +0.1% +0.0% +0.0%
gcd -0.3% 0.0% +0.0% +0.0% +0.0%
gen_regexps -0.3% 0.0% +0.0% -0.0% -0.0%
genfft -0.3% 0.0% +0.0% +0.0% +0.0%
gg -0.2% 0.0% +0.7% +0.3% +0.2%
grep -0.2% 0.0% +0.0% +0.0% +0.0%
hidden -0.2% 0.0% +0.0% +0.0% +0.0%
hpg -0.2% 0.0% +0.1% +0.0% +0.0%
ida -0.3% 0.0% +0.0% +0.0% +0.0%
infer -0.2% 0.0% +0.0% -0.0% -0.0%
integer -0.3% 0.0% +0.0% +0.0% +0.0%
integrate -0.2% 0.0% +0.0% +0.0% +0.0%
k-nucleotide -0.2% 0.0% +0.0% +0.0% -0.0%
kahan -0.3% 0.0% -0.0% -0.0% -0.0%
knights -0.3% 0.0% +0.0% -0.0% -0.0%
lambda -0.3% 0.0% +0.0% -0.0% -0.0%
last-piece -0.3% 0.0% +0.0% +0.0% +0.0%
lcss -0.3% 0.0% +0.0% +0.0% 0.0%
life -0.3% 0.0% +0.0% -0.0% -0.0%
lift -0.2% 0.0% +0.0% +0.0% +0.0%
linear -0.2% 0.0% +0.0% +0.0% +0.0%
listcompr -0.3% 0.0% +0.0% +0.0% +0.0%
listcopy -0.3% 0.0% +0.0% +0.0% +0.0%
maillist -0.3% 0.0% +0.0% -0.0% -0.0%
mandel -0.2% 0.0% +0.0% +0.0% +0.0%
mandel2 -0.3% 0.0% +0.0% +0.0% +0.0%
mate -0.2% 0.0% +0.0% +0.0% +0.0%
minimax -0.3% 0.0% +0.0% +0.0% +0.0%
mkhprog -0.2% 0.0% +0.0% +0.0% +0.0%
multiplier -0.3% 0.0% +0.0% -0.0% -0.0%
n-body -0.2% 0.0% -0.0% -0.0% -0.0%
nucleic2 -0.2% 0.0% +0.0% +0.0% +0.0%
para -0.2% 0.0% +0.0% -0.0% -0.0%
paraffins -0.3% 0.0% +0.0% -0.0% -0.0%
parser -0.2% 0.0% +0.0% +0.0% +0.0%
parstof -0.2% 0.0% +0.8% +0.2% +0.2%
pic -0.2% 0.0% +0.1% -0.1% -0.1%
pidigits -0.3% 0.0% +0.0% +0.0% +0.0%
power -0.2% 0.0% +0.0% -0.0% -0.0%
pretty -0.3% 0.0% -0.0% -0.0% -0.1%
primes -0.3% 0.0% +0.0% +0.0% -0.0%
primetest -0.2% 0.0% +0.0% -0.0% -0.0%
prolog -0.3% 0.0% +0.0% -0.0% -0.0%
puzzle -0.3% 0.0% +0.0% +0.0% +0.0%
queens -0.3% 0.0% +0.0% +0.0% +0.0%
reptile -0.2% 0.0% +0.2% +0.1% +0.0%
reverse-complem -0.3% 0.0% +0.0% +0.0% +0.0%
rewrite -0.3% 0.0% +0.0% -0.0% -0.0%
rfib -0.2% 0.0% +0.0% +0.0% -0.0%
rsa -0.2% 0.0% +0.0% +0.0% +0.0%
scc -0.3% 0.0% -0.0% -0.0% -0.1%
sched -0.3% 0.0% +0.0% +0.0% +0.0%
scs -0.2% 0.0% +0.1% +0.0% +0.0%
simple -0.2% 0.0% +3.4% +1.0% +1.8%
solid -0.2% 0.0% +0.0% +0.0% +0.0%
sorting -0.3% 0.0% +0.0% +0.0% +0.0%
spectral-norm -0.2% 0.0% -0.0% -0.0% -0.0%
sphere -0.2% 0.0% +0.0% +0.0% +0.0%
symalg -0.2% 0.0% +0.0% +0.0% +0.0%
tak -0.3% 0.0% +0.0% +0.0% -0.0%
transform -0.2% 0.0% +0.2% +0.1% +0.1%
treejoin -0.3% 0.0% +0.2% -0.0% -0.1%
typecheck -0.3% 0.0% +0.0% +0.0% +0.0%
veritas -0.1% 0.0% +0.0% +0.0% +0.0%
wang -0.2% 0.0% +0.0% -0.0% -0.0%
wave4main -0.3% 0.0% +0.0% -0.0% -0.0%
wheel-sieve1 -0.3% 0.0% +0.0% -0.0% -0.0%
wheel-sieve2 -0.3% 0.0% +0.0% -0.0% -0.0%
x2n1 -0.3% 0.0% +0.0% +0.0% +0.0%
--------------------------------------------------------------------------------
Min -0.3% 0.0% -0.0% -0.1% -0.2%
Max -0.1% 0.0% +5.4% +1.0% +3.9%
Geometric Mean -0.3% -0.0% +0.1% +0.0% +0.1%
--------------------------------------------------------------------------------
Program Size Allocs Instrs Reads Writes
--------------------------------------------------------------------------------
circsim -0.2% 0.0% +1.6% +0.4% +0.7%
constraints -0.3% 0.0% +4.3% +1.5% +2.3%
fibheaps -0.3% 0.0% +3.5% +1.2% +1.3%
fulsom -0.2% 0.0% +3.6% +1.2% +1.8%
gc_bench -0.3% 0.0% +4.1% +1.3% +2.3%
hash -0.3% 0.0% +6.6% +2.2% +3.6%
lcss -0.3% 0.0% +0.7% +0.2% +0.7%
mutstore1 -0.3% 0.0% +4.8% +1.4% +2.8%
mutstore2 -0.3% 0.0% +3.4% +1.0% +1.7%
power -0.2% 0.0% +2.7% +0.6% +1.9%
spellcheck -0.3% 0.0% +1.1% +0.4% +0.4%
--------------------------------------------------------------------------------
Min -0.3% 0.0% +0.7% +0.2% +0.4%
Max -0.2% 0.0% +6.6% +2.2% +3.6%
Geometric Mean -0.3% +0.0% +3.3% +1.0% +1.8%
Metric changes
--------------
While it sounds ridiculous, this change causes increased allocations in
the following tests. We concluded that this change can't cause a
difference in allocations and decided to land this patch. Fluctuations
in "bytes allocated" metric is tracked in #17686.
Metric Increase:
Naperian
T10547
T12150
T12234
T12425
T13035
T5837
T6048
Diffstat (limited to 'rts/sm')
-rw-r--r-- | rts/sm/Compact.c | 162 |
1 files changed, 95 insertions, 67 deletions
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c index cd82944abd..1193fd765c 100644 --- a/rts/sm/Compact.c +++ b/rts/sm/Compact.c @@ -37,37 +37,35 @@ /* ---------------------------------------------------------------------------- Threading / unthreading pointers. - The basic idea here is to chain together all the fields pointing at - a particular object, with the root of the chain in the object's - info table field. The original contents of the info pointer goes - at the end of the chain. - - Adding a new field to the chain is a matter of swapping the - contents of the field with the contents of the object's info table - field. - - To unthread the chain, we walk down it updating all the fields on - the chain with the new location of the object. We stop when we - reach the info pointer at the end. - - The main difficulty here is that we need to be able to identify the - info pointer at the end of the chain. We can't use the low bits of - the pointer for this; they are already being used for - pointer-tagging. What's more, we need to retain the - pointer-tagging tag bits on each pointer during the - threading/unthreading process. - - Our solution is as follows: - - an info pointer (chain length zero) is identified by having tag 0 - - in a threaded chain of length > 0: - - the pointer-tagging tag bits are attached to the info pointer - - the first entry in the chain has tag 1 - - second and subsequent entries in the chain have tag 2 - - This exploits the fact that the tag on each pointer to a given - closure is normally the same (if they are not the same, then - presumably the tag is not essential and it therefore doesn't matter - if we throw away some of the tags). + The basic idea here is to chain together all the fields pointing at a + particular object, with the root of the chain in the object's info table + field. The original contents of the info pointer goes at the end of the + chain. + + Adding a new field to the chain is a matter of swapping the contents of the + field with the contents of the object's info table field: + + *field, **field = **field, field + + To unthread the chain, we walk down it updating all the fields on the chain + with the new location of the object. We stop when we reach the info pointer + at the end. + + The main difficulty here is that not all pointers to the same object are + tagged: pointers from roots (e.g. mut_lists) are not tagged, but pointers + from mutators are. So when unthreading a chain we need to distinguish a field + that had a tagged pointer from a field that had an untagged pointer. + + Our solution is as follows: when chaining a field, if the field is NOT + tagged then we tag the pointer to the field with 1. I.e. + + *field, **field = **field, field + 1 + + If the field is tagged then we tag to the pointer to it with 2. + + When unchaining we look at the tag in the pointer to the field, if it's 1 + then we write an untagged pointer to "free" to it, otherwise we tag the + pointer. ------------------------------------------------------------------------- */ STATIC_INLINE W_ @@ -82,10 +80,54 @@ GET_PTR_TAG(W_ p) return p & TAG_MASK; } +static W_ +get_iptr_tag(StgInfoTable *iptr) +{ + const StgInfoTable *info = INFO_PTR_TO_STRUCT(iptr); + switch (info->type) { + case CONSTR: + case CONSTR_1_0: + case CONSTR_0_1: + case CONSTR_2_0: + case CONSTR_1_1: + case CONSTR_0_2: + case CONSTR_NOCAF: + { + W_ con_tag = info->srt + 1; + if (con_tag > TAG_MASK) { + return TAG_MASK; + } else { + return con_tag; + } + } + + case FUN: + case FUN_1_0: + case FUN_0_1: + case FUN_2_0: + case FUN_1_1: + case FUN_0_2: + case FUN_STATIC: + { + const StgFunInfoTable *fun_itbl = FUN_INFO_PTR_TO_STRUCT(iptr); + W_ arity = fun_itbl->f.arity; + if (arity <= TAG_MASK) { + return arity; + } else { + return 0; + } + } + + default: + return 0; + } +} + STATIC_INLINE void thread (StgClosure **p) { StgClosure *q0 = *p; + bool q0_tagged = GET_CLOSURE_TAG(q0) != 0; P_ q = (P_)UNTAG_CLOSURE(q0); // It doesn't look like a closure at the moment, because the info @@ -98,21 +140,8 @@ thread (StgClosure **p) if (bd->flags & BF_MARKED) { W_ iptr = *q; - switch (GET_PTR_TAG(iptr)) - { - case 0: - // this is the info pointer; we are creating a new chain. - // save the original tag at the end of the chain. - *p = (StgClosure *)((W_)iptr + GET_CLOSURE_TAG(q0)); - *q = (W_)p + 1; - break; - case 1: - case 2: - // this is a chain of length 1 or more - *p = (StgClosure *)iptr; - *q = (W_)p + 2; - break; - } + *p = (StgClosure *)iptr; + *q = (W_)p + 1 + (q0_tagged ? 1 : 0); } } } @@ -128,7 +157,7 @@ thread_root (void *user STG_UNUSED, StgClosure **p) STATIC_INLINE void thread_ (void *p) { thread((StgClosure **)p); } STATIC_INLINE void -unthread( P_ p, W_ free ) +unthread( const P_ p, W_ free, W_ tag ) { W_ q = *p; loop: @@ -136,20 +165,21 @@ loop: { case 0: // nothing to do; the chain is length zero + *p = q; return; case 1: { P_ q0 = (P_)(q-1); - W_ r = *q0; // r is the info ptr, tagged with the pointer-tag + W_ r = *q0; *q0 = free; - *p = (W_)UNTAG_PTR(r); - return; + q = r; + goto loop; } case 2: { P_ q0 = (P_)(q-2); W_ r = *q0; - *q0 = free; + *q0 = free + tag; q = r; goto loop; } @@ -162,7 +192,7 @@ loop: // The info pointer is also tagged with the appropriate pointer tag // for this closure, which should be attached to the pointer // subsequently passed to unthread(). -STATIC_INLINE W_ +STATIC_INLINE StgInfoTable* get_threaded_info( P_ p ) { W_ q = (W_)GET_INFO(UNTAG_CLOSURE((StgClosure *)p)); @@ -172,16 +202,13 @@ loop: { case 0: ASSERT(LOOKS_LIKE_INFO_PTR(q)); - return q; + return (StgInfoTable*)q; case 1: - { - W_ r = *(P_)(q-1); - ASSERT(LOOKS_LIKE_INFO_PTR((W_)UNTAG_CONST_CLOSURE((StgClosure *)r))); - return r; - } case 2: - q = *(P_)(q-2); + { + q = *(P_)(UNTAG_PTR(q)); goto loop; + } default: barf("get_threaded_info"); } @@ -353,8 +380,7 @@ thread_stack(P_ p, P_ stack_end) { StgRetFun *ret_fun = (StgRetFun *)p; StgFunInfoTable *fun_info = - FUN_INFO_PTR_TO_STRUCT((StgInfoTable *)UNTAG_PTR( - get_threaded_info((P_)ret_fun->fun))); + FUN_INFO_PTR_TO_STRUCT(get_threaded_info((P_)ret_fun->fun)); // *before* threading it! thread(&ret_fun->fun); p = thread_arg_block(fun_info, ret_fun->payload); @@ -372,7 +398,7 @@ STATIC_INLINE P_ thread_PAP_payload (StgClosure *fun, StgClosure **payload, W_ size) { StgFunInfoTable *fun_info = - FUN_INFO_PTR_TO_STRUCT((StgInfoTable *)UNTAG_PTR(get_threaded_info((P_)fun))); + FUN_INFO_PTR_TO_STRUCT(get_threaded_info((P_)fun)); ASSERT(fun_info->i.type != PAP); P_ p = (P_)payload; @@ -762,8 +788,8 @@ update_fwd_compact( bdescr *blocks ) // ToDo: one possible avenue of attack is to use the fact // that if (p&BLOCK_MASK) >= (free&BLOCK_MASK), then we // definitely have enough room. Also see bug #1147. - W_ iptr = get_threaded_info(p); - StgInfoTable *info = INFO_PTR_TO_STRUCT((StgInfoTable *)UNTAG_PTR(iptr)); + StgInfoTable *iptr = get_threaded_info(p); + StgInfoTable *info = INFO_PTR_TO_STRUCT(iptr); P_ q = p; @@ -783,7 +809,8 @@ update_fwd_compact( bdescr *blocks ) ASSERT(!is_marked(q+1,bd)); } - unthread(q,(W_)free + GET_PTR_TAG(iptr)); + StgWord iptr_tag = get_iptr_tag(iptr); + unthread(q, (W_)free, iptr_tag); free += size; } } @@ -819,8 +846,9 @@ update_bkwd_compact( generation *gen ) free_blocks++; } - W_ iptr = get_threaded_info(p); - unthread(p, (W_)free + GET_PTR_TAG(iptr)); + StgInfoTable *iptr = get_threaded_info(p); + StgWord iptr_tag = get_iptr_tag(iptr); + unthread(p, (W_)free, iptr_tag); ASSERT(LOOKS_LIKE_INFO_PTR((W_)((StgClosure *)p)->header.info)); const StgInfoTable *info = get_itbl((StgClosure *)p); W_ size = closure_sizeW_((StgClosure *)p,info); |