diff options
-rw-r--r-- | includes/rts/storage/GC.h | 9 | ||||
-rw-r--r-- | rts/PrimOps.cmm | 21 | ||||
-rw-r--r-- | rts/sm/Storage.c | 55 |
3 files changed, 59 insertions, 26 deletions
diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h index 7931433019..9f4a0dde07 100644 --- a/includes/rts/storage/GC.h +++ b/includes/rts/storage/GC.h @@ -170,10 +170,13 @@ extern generation * oldest_gen; Allocates memory from the nursery in the current Capability. - StgPtr allocatePinned(Capability *cap, W_ n) + StgPtr allocatePinned(Capability *cap, W_ n, W_ alignment, W_ align_off) Allocates a chunk of contiguous store n words long, which is at a fixed - address (won't be moved by GC). + address (won't be moved by GC). The + word at the byte offset 'align_off' + will be aligned to 'alignment', which + must be a power of two. Returns a pointer to the first word. Always succeeds. @@ -191,7 +194,7 @@ extern generation * oldest_gen; StgPtr allocate ( Capability *cap, W_ n ); StgPtr allocateMightFail ( Capability *cap, W_ n ); -StgPtr allocatePinned ( Capability *cap, W_ n ); +StgPtr allocatePinned ( Capability *cap, W_ n, W_ alignment, W_ align_off); /* memory allocator for executable memory */ typedef void* AdjustorWritable; diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index e1f6fab93a..57d7b9cea9 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -89,22 +89,15 @@ stg_newPinnedByteArrayzh ( W_ n ) /* When we actually allocate memory, we need to allow space for the header: */ bytes = bytes + SIZEOF_StgArrBytes; - /* And we want to align to BA_ALIGN bytes, so we need to allow space - to shift up to BA_ALIGN - 1 bytes: */ - bytes = bytes + BA_ALIGN - 1; /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); - ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words); + ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words, BA_ALIGN, SIZEOF_StgArrBytes); if (p == NULL) { jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure); } TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0); - /* Now we need to move p forward so that the payload is aligned - to BA_ALIGN bytes: */ - p = p + ((-p - SIZEOF_StgArrBytes) & BA_MASK); - /* No write barrier needed since this is a new allocation. */ SET_HDR(p, stg_ARR_WORDS_info, CCCS); StgArrBytes_bytes(p) = n; @@ -121,7 +114,7 @@ stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment ) /* we always supply at least word-aligned memory, so there's no need to allow extra space for alignment if the requirement is less than a word. This also prevents mischief with alignment == 0. */ - if (alignment <= SIZEOF_W) { alignment = 1; } + if (alignment <= SIZEOF_W) { alignment = SIZEOF_W; } bytes = n; @@ -131,23 +124,15 @@ stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment ) /* When we actually allocate memory, we need to allow space for the header: */ bytes = bytes + SIZEOF_StgArrBytes; - /* And we want to align to <alignment> bytes, so we need to allow space - to shift up to <alignment - 1> bytes: */ - bytes = bytes + alignment - 1; /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); - ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words); + ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words, alignment, SIZEOF_StgArrBytes); if (p == NULL) { jump stg_raisezh(base_GHCziIOziException_heapOverflow_closure); } TICK_ALLOC_PRIM(SIZEOF_StgArrBytes,WDS(payload_words),0); - /* Now we need to move p forward so that the payload is aligned - to <alignment> bytes. Note that we are assuming that - <alignment> is a power of 2, which is technically not guaranteed */ - p = p + ((-p - SIZEOF_StgArrBytes) & (alignment - 1)); - /* No write barrier needed since this is a new allocation. */ SET_HDR(p, stg_ARR_WORDS_info, CCCS); StgArrBytes_bytes(p) = n; diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 6bcd11df5a..2a86c19fdb 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -1059,6 +1059,24 @@ allocateMightFail (Capability *cap, W_ n) return p; } +/** + * Calculate the number of words we need to add to 'p' so it satisfies the + * alignment constraint '(p + off) & (align-1) == 0'. + */ +#define ALIGN_WITH_OFF_W(p, align, off) \ + (((-((uintptr_t)p) - off) & (align-1)) / sizeof(W_)) + +/** + * When profiling we zero the space used for alignment. This allows us to + * traverse pinned blocks in the heap profiler. + */ +#if defined(PROFILING) +#define MEMSET_IF_PROFILING_W(p, val, len) memset(p, val, (len) * sizeof(W_)) +#else +#define MEMSET_IF_PROFILING_W(p, val, len) \ + do { (void)(p); (void)(val); (void)(len); } while(0) +#endif + /* --------------------------------------------------------------------------- Allocate a fixed/pinned object. @@ -1084,29 +1102,48 @@ allocateMightFail (Capability *cap, W_ n) ------------------------------------------------------------------------- */ StgPtr -allocatePinned (Capability *cap, W_ n) +allocatePinned (Capability *cap, W_ n, W_ alignment, W_ align_off) { StgPtr p; bdescr *bd; + // Alignment and offset have to be a power of two + ASSERT(alignment && !(alignment & (alignment - 1))); + ASSERT(alignment >= sizeof(W_)); + + ASSERT(align_off && !(align_off & (align_off - 1))); + ASSERT(align_off >= sizeof(W_)); + // If the request is for a large object, then allocate() // will give us a pinned object anyway. if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - p = allocateMightFail(cap, n); + // For large objects we don't bother optimizing the number of words + // allocated for alignment reasons. Here we just allocate the maximum + // number of extra words we could possibly need to satisfy the alignment + // constraint. + p = allocateMightFail(cap, n + ROUNDUP_BYTES_TO_WDS(alignment)-1); if (p == NULL) { return NULL; } else { Bdescr(p)->flags |= BF_PINNED; + W_ off = ALIGN_WITH_OFF_W(p, alignment, align_off); + MEMSET_IF_PROFILING_W(p, 0, off); + p += off; + MEMSET_IF_PROFILING_W(p + n, 0, alignment - off - 1); return p; } } - accountAllocation(cap, n); bd = cap->pinned_object_block; + W_ off = 0; + + if(bd) + off = ALIGN_WITH_OFF_W(bd->free, alignment, align_off); + // If we don't have a block of pinned objects yet, or the current // one isn't large enough to hold the new object, get a new one. - if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) { + if (bd == NULL || (bd->free + off + n) > (bd->start + BLOCK_SIZE_W)) { // stash the old block on cap->pinned_object_blocks. On the // next GC cycle these objects will be moved to @@ -1158,11 +1195,19 @@ allocatePinned (Capability *cap, W_ n) // the next GC the BF_EVACUATED flag will be cleared, and the // block will be promoted as usual (if anything in it is // live). + + off = ALIGN_WITH_OFF_W(bd->free, alignment, align_off); } p = bd->free; + + MEMSET_IF_PROFILING_W(p, 0, off); + + n += off; + accountAllocation(cap, n); bd->free += n; - return p; + + return p + off; } /* ----------------------------------------------------------------------------- |