summaryrefslogtreecommitdiff
path: root/rts/PrimOps.cmm
diff options
context:
space:
mode:
authorDaniel Gröber <dxld@darkboxed.org>2020-03-17 07:15:38 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-04-14 23:31:01 -0400
commit41230e2601703df0233860be3f7d53f3a01bdbe5 (patch)
tree195ff7429279efb4af74f6c9dfa2ef8cdbb772fe /rts/PrimOps.cmm
parent7b41f21bbfa9e266ba6654b08c3f9fec549c8bca (diff)
downloadhaskell-41230e2601703df0233860be3f7d53f3a01bdbe5.tar.gz
Zero out pinned block alignment slop when profiling
The heap profiler currently cannot traverse pinned blocks because of alignment slop. This used to just be a minor annoyance as the whole block is accounted into a special cost center rather than the respective object's CCS, cf. #7275. However for the new root profiler we would like to be able to visit _every_ closure on the heap. We need to do this so we can get rid of the current 'flip' bit hack in the heap traversal code. Since info pointers are always non-zero we can in principle skip all the slop in the profiler if we can rely on it being zeroed. This assumption caused problems in the past though, commit a586b33f8e ("rts: Correct handling of LARGE ARR_WORDS in LDV profiler"), part of !1118, tried to use the same trick for BF_LARGE objects but neglected to take into account that shrink*Array# functions don't ensure that slop is zeroed when not compiling with profiling. Later, commit 0c114c6599 ("Handle large ARR_WORDS in heap census (fix as we will only be assuming slop is zeroed when profiling is on. This commit also reduces the ammount of slop we introduce in the first place by calculating the needed alignment before doing the allocation for small objects where we know the next available address. For large objects we don't know how much alignment we'll have to do yet since those details are hidden behind the allocateMightFail function so there we continue to allocate the maximum additional words we'll need to do the alignment. So we don't have to duplicate all this logic in the cmm code we pull it into the RTS allocatePinned function instead. Metric Decrease: T7257 haddock.Cabal haddock.base
Diffstat (limited to 'rts/PrimOps.cmm')
-rw-r--r--rts/PrimOps.cmm21
1 files changed, 3 insertions, 18 deletions
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index e1f6fab93a..57d7b9cea9 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -89,22 +89,15 @@ stg_newPinnedByteArrayzh ( W_ n )
/* When we actually allocate memory, we need to allow space for the
header: */
bytes = bytes + SIZEOF_StgArrBytes;
- /* And we want to align to BA_ALIGN bytes, so we need to allow space
- to shift up to BA_ALIGN - 1 bytes: */
- bytes = bytes + BA_ALIGN - 1;
/* Now we convert to a number of words: */
words = ROUNDUP_BYTES_TO_WDS(bytes);
- ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
+ ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words, BA_ALIGN, SIZEOF_StgArrBytes);
if (p == NULL) {
jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
}
TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
- /* Now we need to move p forward so that the payload is aligned
- to BA_ALIGN bytes: */
- p = p + ((-p - SIZEOF_StgArrBytes) & BA_MASK);
-
/* No write barrier needed since this is a new allocation. */
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(p) = n;
@@ -121,7 +114,7 @@ stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
/* we always supply at least word-aligned memory, so there's no
need to allow extra space for alignment if the requirement is less
than a word. This also prevents mischief with alignment == 0. */
- if (alignment <= SIZEOF_W) { alignment = 1; }
+ if (alignment <= SIZEOF_W) { alignment = SIZEOF_W; }
bytes = n;
@@ -131,23 +124,15 @@ stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
/* When we actually allocate memory, we need to allow space for the
header: */
bytes = bytes + SIZEOF_StgArrBytes;
- /* And we want to align to <alignment> bytes, so we need to allow space
- to shift up to <alignment - 1> bytes: */
- bytes = bytes + alignment - 1;
/* Now we convert to a number of words: */
words = ROUNDUP_BYTES_TO_WDS(bytes);
- ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words);
+ ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words, alignment, SIZEOF_StgArrBytes);
if (p == NULL) {
jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure);
}
TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0);
- /* Now we need to move p forward so that the payload is aligned
- to <alignment> bytes. Note that we are assuming that
- <alignment> is a power of 2, which is technically not guaranteed */
- p = p + ((-p - SIZEOF_StgArrBytes) & (alignment - 1));
-
/* No write barrier needed since this is a new allocation. */
SET_HDR(p, stg_ARR_WORDS_info, CCCS);
StgArrBytes_bytes(p) = n;