summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
authorÖmer Sinan Ağacan <omeragacan@gmail.com>2019-10-24 12:11:59 +0300
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-01-25 05:20:27 -0500
commit0e57d8a106a61cac11bacb43633b8b4af12d7fdb (patch)
tree315f1d0bb78331ca338ee194ec1773f0c4bfaf29 /rts
parentbe910728d78fdf2ee800828ecdc8a11bd64fdad0 (diff)
downloadhaskell-0e57d8a106a61cac11bacb43633b8b4af12d7fdb.tar.gz
Fix chaining tagged and untagged ptrs in compacting GC
Currently compacting GC has the invariant that in a chain all fields are tagged the same. However this does not really hold: root pointers are not tagged, so when we thread a root we initialize a chain without a tag. When the pointed objects is evaluated and we have more pointers to it from the heap, we then add *tagged* fields to the chain (because pointers to it from the heap are tagged), ending up chaining fields with different tags (pointers from roots are NOT tagged, pointers from heap are). This breaks the invariant and as a result compacting GC turns tagged pointers into non-tagged. This later causes problem in the generated code where we do reads assuming that the pointer is aligned, e.g. 0x7(%rax) -- assumes that pointer is tagged 1 which causes misaligned reads. This caused #17088. We fix this using the "pointer tagging for large families" patch (#14373, !1742): - With the pointer tagging patch the GC can know what the tagged pointer to a CONSTR should be (previously we'd need to know the family size -- large families are always tagged 1, small families are tagged depending on the constructor). - Since we now know what the tags should be we no longer need to store the pointer tag in the info table pointers when forming chains in the compacting GC. As a result we no longer need to tag pointers in chains with 1/2 depending on whether the field points to an info table pointer, or to another field: an info table pointer is always tagged 0, everything else in the chain is tagged 1. The lost tags in pointers can be retrieved by looking at the info table. Finally, instead of using tag 1 for fields and tag 0 for info table pointers, we use two different tags for fields: - 1 for fields that have untagged pointers - 2 for fields that have tagged pointers When unchaining we then look at the pointer to a field, and depending on its tag we either leave a tagged pointer or an untagged pointer in the field. This allows chaining untagged and tagged fields together in compacting GC. Fixes #17088 Nofib results ------------- Binaries are smaller because of smaller `Compact.c` code. make mode=fast EXTRA_RUNTEST_OPTS="-cachegrind" EXTRA_HC_OPTS="-with-rtsopts=-c" NoFibRuns=1 -------------------------------------------------------------------------------- Program Size Allocs Instrs Reads Writes -------------------------------------------------------------------------------- CS -0.3% 0.0% +0.0% +0.0% +0.0% CSD -0.3% 0.0% +0.0% +0.0% +0.0% FS -0.3% 0.0% +0.0% -0.0% -0.0% S -0.3% 0.0% +5.4% +0.8% +3.9% VS -0.3% 0.0% +0.0% -0.0% -0.0% VSD -0.3% 0.0% -0.0% -0.0% -0.2% VSM -0.3% 0.0% +0.0% +0.0% +0.0% anna -0.1% 0.0% +0.0% +0.0% +0.0% ansi -0.3% 0.0% +0.1% +0.0% +0.0% atom -0.2% 0.0% +0.0% +0.0% +0.0% awards -0.2% 0.0% +0.0% 0.0% -0.0% banner -0.3% 0.0% +0.0% +0.0% +0.0% bernouilli -0.3% 0.0% +0.1% +0.0% +0.0% binary-trees -0.2% 0.0% +0.0% 0.0% +0.0% boyer -0.3% 0.0% +0.2% +0.0% +0.0% boyer2 -0.2% 0.0% +0.2% +0.1% +0.0% bspt -0.2% 0.0% +0.0% +0.0% +0.0% cacheprof -0.2% 0.0% +0.0% +0.0% +0.0% calendar -0.3% 0.0% +0.0% +0.0% +0.0% cichelli -0.3% 0.0% +1.1% +0.2% +0.5% circsim -0.2% 0.0% +0.0% -0.0% -0.0% clausify -0.3% 0.0% +0.0% -0.0% -0.0% comp_lab_zift -0.2% 0.0% +0.0% +0.0% +0.0% compress -0.3% 0.0% +0.0% +0.0% +0.0% compress2 -0.3% 0.0% +0.0% -0.0% -0.0% constraints -0.3% 0.0% +0.2% +0.1% +0.1% cryptarithm1 -0.3% 0.0% +0.0% -0.0% 0.0% cryptarithm2 -0.3% 0.0% +0.0% +0.0% +0.0% cse -0.3% 0.0% +0.0% +0.0% +0.0% digits-of-e1 -0.3% 0.0% +0.0% +0.0% +0.0% digits-of-e2 -0.3% 0.0% +0.0% +0.0% -0.0% dom-lt -0.2% 0.0% +0.0% +0.0% +0.0% eliza -0.2% 0.0% +0.0% +0.0% +0.0% event -0.3% 0.0% +0.1% +0.0% -0.0% exact-reals -0.2% 0.0% +0.0% +0.0% +0.0% exp3_8 -0.3% 0.0% +0.0% +0.0% +0.0% expert -0.2% 0.0% +0.0% +0.0% +0.0% fannkuch-redux -0.3% 0.0% -0.0% -0.0% -0.0% fasta -0.3% 0.0% +0.0% +0.0% +0.0% fem -0.2% 0.0% +0.1% +0.0% +0.0% fft -0.2% 0.0% +0.0% -0.0% -0.0% fft2 -0.2% 0.0% +0.0% -0.0% +0.0% fibheaps -0.3% 0.0% +0.0% -0.0% -0.0% fish -0.3% 0.0% +0.0% +0.0% +0.0% fluid -0.2% 0.0% +0.4% +0.1% +0.1% fulsom -0.2% 0.0% +0.0% +0.0% +0.0% gamteb -0.2% 0.0% +0.1% +0.0% +0.0% gcd -0.3% 0.0% +0.0% +0.0% +0.0% gen_regexps -0.3% 0.0% +0.0% -0.0% -0.0% genfft -0.3% 0.0% +0.0% +0.0% +0.0% gg -0.2% 0.0% +0.7% +0.3% +0.2% grep -0.2% 0.0% +0.0% +0.0% +0.0% hidden -0.2% 0.0% +0.0% +0.0% +0.0% hpg -0.2% 0.0% +0.1% +0.0% +0.0% ida -0.3% 0.0% +0.0% +0.0% +0.0% infer -0.2% 0.0% +0.0% -0.0% -0.0% integer -0.3% 0.0% +0.0% +0.0% +0.0% integrate -0.2% 0.0% +0.0% +0.0% +0.0% k-nucleotide -0.2% 0.0% +0.0% +0.0% -0.0% kahan -0.3% 0.0% -0.0% -0.0% -0.0% knights -0.3% 0.0% +0.0% -0.0% -0.0% lambda -0.3% 0.0% +0.0% -0.0% -0.0% last-piece -0.3% 0.0% +0.0% +0.0% +0.0% lcss -0.3% 0.0% +0.0% +0.0% 0.0% life -0.3% 0.0% +0.0% -0.0% -0.0% lift -0.2% 0.0% +0.0% +0.0% +0.0% linear -0.2% 0.0% +0.0% +0.0% +0.0% listcompr -0.3% 0.0% +0.0% +0.0% +0.0% listcopy -0.3% 0.0% +0.0% +0.0% +0.0% maillist -0.3% 0.0% +0.0% -0.0% -0.0% mandel -0.2% 0.0% +0.0% +0.0% +0.0% mandel2 -0.3% 0.0% +0.0% +0.0% +0.0% mate -0.2% 0.0% +0.0% +0.0% +0.0% minimax -0.3% 0.0% +0.0% +0.0% +0.0% mkhprog -0.2% 0.0% +0.0% +0.0% +0.0% multiplier -0.3% 0.0% +0.0% -0.0% -0.0% n-body -0.2% 0.0% -0.0% -0.0% -0.0% nucleic2 -0.2% 0.0% +0.0% +0.0% +0.0% para -0.2% 0.0% +0.0% -0.0% -0.0% paraffins -0.3% 0.0% +0.0% -0.0% -0.0% parser -0.2% 0.0% +0.0% +0.0% +0.0% parstof -0.2% 0.0% +0.8% +0.2% +0.2% pic -0.2% 0.0% +0.1% -0.1% -0.1% pidigits -0.3% 0.0% +0.0% +0.0% +0.0% power -0.2% 0.0% +0.0% -0.0% -0.0% pretty -0.3% 0.0% -0.0% -0.0% -0.1% primes -0.3% 0.0% +0.0% +0.0% -0.0% primetest -0.2% 0.0% +0.0% -0.0% -0.0% prolog -0.3% 0.0% +0.0% -0.0% -0.0% puzzle -0.3% 0.0% +0.0% +0.0% +0.0% queens -0.3% 0.0% +0.0% +0.0% +0.0% reptile -0.2% 0.0% +0.2% +0.1% +0.0% reverse-complem -0.3% 0.0% +0.0% +0.0% +0.0% rewrite -0.3% 0.0% +0.0% -0.0% -0.0% rfib -0.2% 0.0% +0.0% +0.0% -0.0% rsa -0.2% 0.0% +0.0% +0.0% +0.0% scc -0.3% 0.0% -0.0% -0.0% -0.1% sched -0.3% 0.0% +0.0% +0.0% +0.0% scs -0.2% 0.0% +0.1% +0.0% +0.0% simple -0.2% 0.0% +3.4% +1.0% +1.8% solid -0.2% 0.0% +0.0% +0.0% +0.0% sorting -0.3% 0.0% +0.0% +0.0% +0.0% spectral-norm -0.2% 0.0% -0.0% -0.0% -0.0% sphere -0.2% 0.0% +0.0% +0.0% +0.0% symalg -0.2% 0.0% +0.0% +0.0% +0.0% tak -0.3% 0.0% +0.0% +0.0% -0.0% transform -0.2% 0.0% +0.2% +0.1% +0.1% treejoin -0.3% 0.0% +0.2% -0.0% -0.1% typecheck -0.3% 0.0% +0.0% +0.0% +0.0% veritas -0.1% 0.0% +0.0% +0.0% +0.0% wang -0.2% 0.0% +0.0% -0.0% -0.0% wave4main -0.3% 0.0% +0.0% -0.0% -0.0% wheel-sieve1 -0.3% 0.0% +0.0% -0.0% -0.0% wheel-sieve2 -0.3% 0.0% +0.0% -0.0% -0.0% x2n1 -0.3% 0.0% +0.0% +0.0% +0.0% -------------------------------------------------------------------------------- Min -0.3% 0.0% -0.0% -0.1% -0.2% Max -0.1% 0.0% +5.4% +1.0% +3.9% Geometric Mean -0.3% -0.0% +0.1% +0.0% +0.1% -------------------------------------------------------------------------------- Program Size Allocs Instrs Reads Writes -------------------------------------------------------------------------------- circsim -0.2% 0.0% +1.6% +0.4% +0.7% constraints -0.3% 0.0% +4.3% +1.5% +2.3% fibheaps -0.3% 0.0% +3.5% +1.2% +1.3% fulsom -0.2% 0.0% +3.6% +1.2% +1.8% gc_bench -0.3% 0.0% +4.1% +1.3% +2.3% hash -0.3% 0.0% +6.6% +2.2% +3.6% lcss -0.3% 0.0% +0.7% +0.2% +0.7% mutstore1 -0.3% 0.0% +4.8% +1.4% +2.8% mutstore2 -0.3% 0.0% +3.4% +1.0% +1.7% power -0.2% 0.0% +2.7% +0.6% +1.9% spellcheck -0.3% 0.0% +1.1% +0.4% +0.4% -------------------------------------------------------------------------------- Min -0.3% 0.0% +0.7% +0.2% +0.4% Max -0.2% 0.0% +6.6% +2.2% +3.6% Geometric Mean -0.3% +0.0% +3.3% +1.0% +1.8% Metric changes -------------- While it sounds ridiculous, this change causes increased allocations in the following tests. We concluded that this change can't cause a difference in allocations and decided to land this patch. Fluctuations in "bytes allocated" metric is tracked in #17686. Metric Increase: Naperian T10547 T12150 T12234 T12425 T13035 T5837 T6048
Diffstat (limited to 'rts')
-rw-r--r--rts/sm/Compact.c162
1 files changed, 95 insertions, 67 deletions
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c
index cd82944abd..1193fd765c 100644
--- a/rts/sm/Compact.c
+++ b/rts/sm/Compact.c
@@ -37,37 +37,35 @@
/* ----------------------------------------------------------------------------
Threading / unthreading pointers.
- The basic idea here is to chain together all the fields pointing at
- a particular object, with the root of the chain in the object's
- info table field. The original contents of the info pointer goes
- at the end of the chain.
-
- Adding a new field to the chain is a matter of swapping the
- contents of the field with the contents of the object's info table
- field.
-
- To unthread the chain, we walk down it updating all the fields on
- the chain with the new location of the object. We stop when we
- reach the info pointer at the end.
-
- The main difficulty here is that we need to be able to identify the
- info pointer at the end of the chain. We can't use the low bits of
- the pointer for this; they are already being used for
- pointer-tagging. What's more, we need to retain the
- pointer-tagging tag bits on each pointer during the
- threading/unthreading process.
-
- Our solution is as follows:
- - an info pointer (chain length zero) is identified by having tag 0
- - in a threaded chain of length > 0:
- - the pointer-tagging tag bits are attached to the info pointer
- - the first entry in the chain has tag 1
- - second and subsequent entries in the chain have tag 2
-
- This exploits the fact that the tag on each pointer to a given
- closure is normally the same (if they are not the same, then
- presumably the tag is not essential and it therefore doesn't matter
- if we throw away some of the tags).
+ The basic idea here is to chain together all the fields pointing at a
+ particular object, with the root of the chain in the object's info table
+ field. The original contents of the info pointer goes at the end of the
+ chain.
+
+ Adding a new field to the chain is a matter of swapping the contents of the
+ field with the contents of the object's info table field:
+
+ *field, **field = **field, field
+
+ To unthread the chain, we walk down it updating all the fields on the chain
+ with the new location of the object. We stop when we reach the info pointer
+ at the end.
+
+ The main difficulty here is that not all pointers to the same object are
+ tagged: pointers from roots (e.g. mut_lists) are not tagged, but pointers
+ from mutators are. So when unthreading a chain we need to distinguish a field
+ that had a tagged pointer from a field that had an untagged pointer.
+
+ Our solution is as follows: when chaining a field, if the field is NOT
+ tagged then we tag the pointer to the field with 1. I.e.
+
+ *field, **field = **field, field + 1
+
+ If the field is tagged then we tag to the pointer to it with 2.
+
+ When unchaining we look at the tag in the pointer to the field, if it's 1
+ then we write an untagged pointer to "free" to it, otherwise we tag the
+ pointer.
------------------------------------------------------------------------- */
STATIC_INLINE W_
@@ -82,10 +80,54 @@ GET_PTR_TAG(W_ p)
return p & TAG_MASK;
}
+static W_
+get_iptr_tag(StgInfoTable *iptr)
+{
+ const StgInfoTable *info = INFO_PTR_TO_STRUCT(iptr);
+ switch (info->type) {
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_NOCAF:
+ {
+ W_ con_tag = info->srt + 1;
+ if (con_tag > TAG_MASK) {
+ return TAG_MASK;
+ } else {
+ return con_tag;
+ }
+ }
+
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_2_0:
+ case FUN_1_1:
+ case FUN_0_2:
+ case FUN_STATIC:
+ {
+ const StgFunInfoTable *fun_itbl = FUN_INFO_PTR_TO_STRUCT(iptr);
+ W_ arity = fun_itbl->f.arity;
+ if (arity <= TAG_MASK) {
+ return arity;
+ } else {
+ return 0;
+ }
+ }
+
+ default:
+ return 0;
+ }
+}
+
STATIC_INLINE void
thread (StgClosure **p)
{
StgClosure *q0 = *p;
+ bool q0_tagged = GET_CLOSURE_TAG(q0) != 0;
P_ q = (P_)UNTAG_CLOSURE(q0);
// It doesn't look like a closure at the moment, because the info
@@ -98,21 +140,8 @@ thread (StgClosure **p)
if (bd->flags & BF_MARKED)
{
W_ iptr = *q;
- switch (GET_PTR_TAG(iptr))
- {
- case 0:
- // this is the info pointer; we are creating a new chain.
- // save the original tag at the end of the chain.
- *p = (StgClosure *)((W_)iptr + GET_CLOSURE_TAG(q0));
- *q = (W_)p + 1;
- break;
- case 1:
- case 2:
- // this is a chain of length 1 or more
- *p = (StgClosure *)iptr;
- *q = (W_)p + 2;
- break;
- }
+ *p = (StgClosure *)iptr;
+ *q = (W_)p + 1 + (q0_tagged ? 1 : 0);
}
}
}
@@ -128,7 +157,7 @@ thread_root (void *user STG_UNUSED, StgClosure **p)
STATIC_INLINE void thread_ (void *p) { thread((StgClosure **)p); }
STATIC_INLINE void
-unthread( P_ p, W_ free )
+unthread( const P_ p, W_ free, W_ tag )
{
W_ q = *p;
loop:
@@ -136,20 +165,21 @@ loop:
{
case 0:
// nothing to do; the chain is length zero
+ *p = q;
return;
case 1:
{
P_ q0 = (P_)(q-1);
- W_ r = *q0; // r is the info ptr, tagged with the pointer-tag
+ W_ r = *q0;
*q0 = free;
- *p = (W_)UNTAG_PTR(r);
- return;
+ q = r;
+ goto loop;
}
case 2:
{
P_ q0 = (P_)(q-2);
W_ r = *q0;
- *q0 = free;
+ *q0 = free + tag;
q = r;
goto loop;
}
@@ -162,7 +192,7 @@ loop:
// The info pointer is also tagged with the appropriate pointer tag
// for this closure, which should be attached to the pointer
// subsequently passed to unthread().
-STATIC_INLINE W_
+STATIC_INLINE StgInfoTable*
get_threaded_info( P_ p )
{
W_ q = (W_)GET_INFO(UNTAG_CLOSURE((StgClosure *)p));
@@ -172,16 +202,13 @@ loop:
{
case 0:
ASSERT(LOOKS_LIKE_INFO_PTR(q));
- return q;
+ return (StgInfoTable*)q;
case 1:
- {
- W_ r = *(P_)(q-1);
- ASSERT(LOOKS_LIKE_INFO_PTR((W_)UNTAG_CONST_CLOSURE((StgClosure *)r)));
- return r;
- }
case 2:
- q = *(P_)(q-2);
+ {
+ q = *(P_)(UNTAG_PTR(q));
goto loop;
+ }
default:
barf("get_threaded_info");
}
@@ -353,8 +380,7 @@ thread_stack(P_ p, P_ stack_end)
{
StgRetFun *ret_fun = (StgRetFun *)p;
StgFunInfoTable *fun_info =
- FUN_INFO_PTR_TO_STRUCT((StgInfoTable *)UNTAG_PTR(
- get_threaded_info((P_)ret_fun->fun)));
+ FUN_INFO_PTR_TO_STRUCT(get_threaded_info((P_)ret_fun->fun));
// *before* threading it!
thread(&ret_fun->fun);
p = thread_arg_block(fun_info, ret_fun->payload);
@@ -372,7 +398,7 @@ STATIC_INLINE P_
thread_PAP_payload (StgClosure *fun, StgClosure **payload, W_ size)
{
StgFunInfoTable *fun_info =
- FUN_INFO_PTR_TO_STRUCT((StgInfoTable *)UNTAG_PTR(get_threaded_info((P_)fun)));
+ FUN_INFO_PTR_TO_STRUCT(get_threaded_info((P_)fun));
ASSERT(fun_info->i.type != PAP);
P_ p = (P_)payload;
@@ -762,8 +788,8 @@ update_fwd_compact( bdescr *blocks )
// ToDo: one possible avenue of attack is to use the fact
// that if (p&BLOCK_MASK) >= (free&BLOCK_MASK), then we
// definitely have enough room. Also see bug #1147.
- W_ iptr = get_threaded_info(p);
- StgInfoTable *info = INFO_PTR_TO_STRUCT((StgInfoTable *)UNTAG_PTR(iptr));
+ StgInfoTable *iptr = get_threaded_info(p);
+ StgInfoTable *info = INFO_PTR_TO_STRUCT(iptr);
P_ q = p;
@@ -783,7 +809,8 @@ update_fwd_compact( bdescr *blocks )
ASSERT(!is_marked(q+1,bd));
}
- unthread(q,(W_)free + GET_PTR_TAG(iptr));
+ StgWord iptr_tag = get_iptr_tag(iptr);
+ unthread(q, (W_)free, iptr_tag);
free += size;
}
}
@@ -819,8 +846,9 @@ update_bkwd_compact( generation *gen )
free_blocks++;
}
- W_ iptr = get_threaded_info(p);
- unthread(p, (W_)free + GET_PTR_TAG(iptr));
+ StgInfoTable *iptr = get_threaded_info(p);
+ StgWord iptr_tag = get_iptr_tag(iptr);
+ unthread(p, (W_)free, iptr_tag);
ASSERT(LOOKS_LIKE_INFO_PTR((W_)((StgClosure *)p)->header.info));
const StgInfoTable *info = get_itbl((StgClosure *)p);
W_ size = closure_sizeW_((StgClosure *)p,info);