diff options
author | Ömer Sinan Ağacan <omer@well-typed.com> | 2018-03-05 15:57:47 +0300 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2019-10-18 15:27:56 -0400 |
commit | 921e4e360a1244ee63241efc62da28642343fece (patch) | |
tree | 456292c188343645c56c882d393965fe201b273a | |
parent | c4c9904b324736dc5d190a91418e8d8f564d4104 (diff) | |
download | haskell-921e4e360a1244ee63241efc62da28642343fece.tar.gz |
rts/BlockAlloc: Allow aligned allocation requestswip/gc/aligned-block-allocation
This implements support for block group allocations which are aligned to
an integral number of blocks.
This will be used by the nonmoving garbage collector, which uses the
block allocator to allocate the segments which back its heap. These
segments are a fixed number of blocks in size, with each segment being
aligned to the segment size boundary. This allows us to easily find the
segment metadata stored at the beginning of the segment.
-rw-r--r-- | includes/rts/storage/Block.h | 7 | ||||
-rw-r--r-- | rts/sm/BlockAlloc.c | 144 | ||||
-rw-r--r-- | testsuite/tests/rts/testblockalloc.c | 121 |
3 files changed, 234 insertions, 38 deletions
diff --git a/includes/rts/storage/Block.h b/includes/rts/storage/Block.h index ecd6bf5dd8..792a72d717 100644 --- a/includes/rts/storage/Block.h +++ b/includes/rts/storage/Block.h @@ -290,6 +290,13 @@ EXTERN_INLINE bdescr* allocBlock(void) bdescr *allocGroupOnNode(uint32_t node, W_ n); +// Allocate n blocks, aligned at n-block boundary. The returned bdescr will +// have this invariant +// +// bdescr->start % BLOCK_SIZE*n == 0 +// +bdescr *allocAlignedGroupOnNode(uint32_t node, W_ n); + EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node); EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node) { diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c index f9e3d11407..b3e1e2ce75 100644 --- a/rts/sm/BlockAlloc.c +++ b/rts/sm/BlockAlloc.c @@ -310,7 +310,7 @@ setup_tail (bdescr *bd) // Take a free block group bd, and split off a group of size n from // it. Adjust the free list as necessary, and return the new group. static bdescr * -split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln) +split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln /* log_2_ceil(n) */) { bdescr *fg; // free group @@ -325,6 +325,46 @@ split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln) return fg; } +// Take N blocks off the end, free the rest. +static bdescr * +split_block_high (bdescr *bd, W_ n) +{ + ASSERT(bd->blocks > n); + + bdescr* ret = bd + bd->blocks - n; // take n blocks off the end + ret->blocks = n; + ret->start = ret->free = bd->start + (bd->blocks - n)*BLOCK_SIZE_W; + ret->link = NULL; + + bd->blocks -= n; + + setup_tail(ret); + setup_tail(bd); + freeGroup(bd); + + return ret; +} + +// Like `split_block_high`, but takes n blocks off the beginning rather +// than the end. +static bdescr * +split_block_low (bdescr *bd, W_ n) +{ + ASSERT(bd->blocks > n); + + bdescr* bd_ = bd + n; + bd_->blocks = bd->blocks - n; + bd_->start = bd_->free = bd->start + n*BLOCK_SIZE_W; + + bd->blocks = n; + + setup_tail(bd_); + setup_tail(bd); + freeGroup(bd_); + + return bd; +} + /* Only initializes the start pointers on the first megablock and the * blocks field of the first bdescr; callers are responsible for calling * initGroup afterwards. @@ -461,6 +501,108 @@ finish: return bd; } +// Allocate `n` blocks aligned to `n` blocks, e.g. when n = 8, the blocks will +// be aligned at `8 * BLOCK_SIZE`. For a group with `n` blocks this can be used +// for easily accessing the beginning of the group from a location p in the +// group with +// +// p % (BLOCK_SIZE*n) +// +// Used by the non-moving collector for allocating segments. +// +// Because the storage manager does not support aligned allocations, we have to +// allocate `2*n - 1` blocks here to make sure we'll be able to find an aligned +// region in the allocated blocks. After finding the aligned area we want to +// free slop on the low and high sides, and block allocator doesn't support +// freeing only some portion of a megablock (we can only free whole megablocks). +// So we disallow allocating megablocks here, and allow allocating at most +// `BLOCKS_PER_MBLOCK / 2` blocks. +bdescr * +allocAlignedGroupOnNode (uint32_t node, W_ n) +{ + // allocate enough blocks to have enough space aligned at n-block boundary + // free any slops on the low and high side of this space + + // number of blocks to allocate to make sure we have enough aligned space + W_ num_blocks = 2*n - 1; + + if (num_blocks >= BLOCKS_PER_MBLOCK) { + barf("allocAlignedGroupOnNode: allocating megablocks is not supported\n" + " requested blocks: %" FMT_Word "\n" + " required for alignment: %" FMT_Word "\n" + " megablock size (in blocks): %" FMT_Word, + n, num_blocks, (W_) BLOCKS_PER_MBLOCK); + } + + W_ group_size = n * BLOCK_SIZE; + + // To reduce splitting and fragmentation we use allocLargeChunkOnNode here. + // Tweak the max allocation to avoid allocating megablocks. Splitting slop + // below doesn't work with megablocks (freeGroup can't free only a portion + // of a megablock so we can't allocate megablocks and free some parts of + // them). + W_ max_blocks = stg_min(num_blocks * 3, BLOCKS_PER_MBLOCK - 1); + bdescr *bd = allocLargeChunkOnNode(node, num_blocks, max_blocks); + // We may allocate more than num_blocks, so update it + num_blocks = bd->blocks; + + // slop on the low side + W_ slop_low = 0; + if ((uintptr_t)bd->start % group_size != 0) { + slop_low = group_size - ((uintptr_t)bd->start % group_size); + } + + W_ slop_high = (num_blocks * BLOCK_SIZE) - group_size - slop_low; + + ASSERT((slop_low % BLOCK_SIZE) == 0); + ASSERT((slop_high % BLOCK_SIZE) == 0); + + W_ slop_low_blocks = slop_low / BLOCK_SIZE; + W_ slop_high_blocks = slop_high / BLOCK_SIZE; + + ASSERT(slop_low_blocks + slop_high_blocks + n == num_blocks); + +#if defined(DEBUG) + checkFreeListSanity(); + W_ free_before = countFreeList(); +#endif + + if (slop_low_blocks != 0) { + bd = split_block_high(bd, num_blocks - slop_low_blocks); + ASSERT(countBlocks(bd) == num_blocks - slop_low_blocks); + } + +#if defined(DEBUG) + ASSERT(countFreeList() == free_before + slop_low_blocks); + checkFreeListSanity(); +#endif + + // At this point the bd should be aligned, but we may have slop on the high side + ASSERT((uintptr_t)bd->start % group_size == 0); + +#if defined(DEBUG) + free_before = countFreeList(); +#endif + + if (slop_high_blocks != 0) { + bd = split_block_low(bd, n); + ASSERT(bd->blocks == n); + } + +#if defined(DEBUG) + ASSERT(countFreeList() == free_before + slop_high_blocks); + checkFreeListSanity(); +#endif + + // Should still be aligned + ASSERT((uintptr_t)bd->start % group_size == 0); + + // Just to make sure I get this right + ASSERT(Bdescr(bd->start) == bd); + + return bd; +} + STATIC_INLINE uint32_t nodeWithLeastBlocks (void) { diff --git a/testsuite/tests/rts/testblockalloc.c b/testsuite/tests/rts/testblockalloc.c index 577245f45e..53eed24015 100644 --- a/testsuite/tests/rts/testblockalloc.c +++ b/testsuite/tests/rts/testblockalloc.c @@ -3,6 +3,7 @@ #include <stdio.h> extern bdescr *allocGroup_lock_lock(uint32_t n); +extern bdescr *allocAlignedGroupOnNode (uint32_t node, W_ n); extern void freeGroup_lock(bdescr *p); const int ARRSIZE = 256; @@ -13,64 +14,110 @@ const int SEED = 0xf00f00; extern StgWord mblocks_allocated; -int main (int argc, char *argv[]) +static void test_random_alloc(void) { - int i, j, b; - bdescr *a[ARRSIZE]; - srand(SEED); + // repeatedly sweep though the array, allocating new random-sized + // objects and deallocating the old ones. + for (int i=0; i < LOOPS; i++) + { + for (int j=0; j < ARRSIZE; j++) + { + if (i > 0) + { + IF_DEBUG(block_alloc, debugBelch("A%d: freeing %p, %d blocks @ %p\n", j, a[j], a[j]->blocks, a[j]->start)); + freeGroup_lock(a[j]); + DEBUG_ONLY(checkFreeListSanity()); + } + + int b = (rand() % MAXALLOC) + 1; + a[j] = allocGroup_lock(b); + IF_DEBUG(block_alloc, debugBelch("A%d: allocated %p, %d blocks @ %p\n", j, a[j], b, a[j]->start)); + // allocating zero blocks isn't allowed + DEBUG_ONLY(checkFreeListSanity()); + } + } + for (int j=0; j < ARRSIZE; j++) { - RtsConfig conf = defaultRtsConfig; - conf.rts_opts_enabled = RtsOptsAll; - hs_init_ghc(&argc, &argv, conf); + freeGroup_lock(a[j]); } +} + +static void test_sequential_alloc(void) +{ + bdescr *a[ARRSIZE]; - // repeatedly sweep though the array, allocating new random-sized - // objects and deallocating the old ones. - for (i=0; i < LOOPS; i++) - { - for (j=0; j < ARRSIZE; j++) - { - if (i > 0) - { - IF_DEBUG(block_alloc, debugBelch("A%d: freeing %p, %d blocks @ %p\n", j, a[j], a[j]->blocks, a[j]->start)); - freeGroup_lock(a[j]); - DEBUG_ONLY(checkFreeListSanity()); - } - b = (rand() % MAXALLOC) + 1; - a[j] = allocGroup_lock(b); - IF_DEBUG(block_alloc, debugBelch("A%d: allocated %p, %d blocks @ %p\n", j, a[j], b, a[j]->start)); - // allocating zero blocks isn't allowed - DEBUG_ONLY(checkFreeListSanity()); - } - } - - for (j=0; j < ARRSIZE; j++) - { - freeGroup_lock(a[j]); - } - // this time, sweep forwards allocating new blocks, and then // backwards deallocating them. - for (i=0; i < LOOPS; i++) + for (int i=0; i < LOOPS; i++) { - for (j=0; j < ARRSIZE; j++) + for (int j=0; j < ARRSIZE; j++) { - b = (rand() % MAXALLOC) + 1; + int b = (rand() % MAXALLOC) + 1; a[j] = allocGroup_lock(b); IF_DEBUG(block_alloc, debugBelch("B%d,%d: allocated %p, %d blocks @ %p\n", i, j, a[j], b, a[j]->start)); DEBUG_ONLY(checkFreeListSanity()); } - for (j=ARRSIZE-1; j >= 0; j--) + for (int j=ARRSIZE-1; j >= 0; j--) { IF_DEBUG(block_alloc, debugBelch("B%d,%d: freeing %p, %d blocks @ %p\n", i, j, a[j], a[j]->blocks, a[j]->start)); freeGroup_lock(a[j]); DEBUG_ONLY(checkFreeListSanity()); } } - +} + +static void test_aligned_alloc(void) +{ + bdescr *a[ARRSIZE]; + + // this time, sweep forwards allocating new blocks, and then + // backwards deallocating them. + for (int i=0; i < LOOPS; i++) + { + for (int j=0; j < ARRSIZE; j++) + { + // allocAlignedGroupOnNode does not support allocating more than + // BLOCKS_PER_MBLOCK/2 blocks. + int b = rand() % (BLOCKS_PER_MBLOCK / 2); + if (b == 0) { b = 1; } + a[j] = allocAlignedGroupOnNode(0, b); + if ((((W_)(a[j]->start)) % (b*BLOCK_SIZE)) != 0) + { + barf("%p is not aligned to allocation size %d", a[j], b); + } + IF_DEBUG(block_alloc, debugBelch("B%d,%d: allocated %p, %d blocks @ %p\n", i, j, a[j], b, a[j]->start)); + DEBUG_ONLY(checkFreeListSanity()); + } + for (int j=ARRSIZE-1; j >= 0; j--) + { + IF_DEBUG(block_alloc, debugBelch("B%d,%d: freeing %p, %d blocks @ %p\n", i, j, a[j], a[j]->blocks, a[j]->start)); + freeGroup_lock(a[j]); + DEBUG_ONLY(checkFreeListSanity()); + } + } +} + +int main (int argc, char *argv[]) +{ + int i, j, b; + + bdescr *a[ARRSIZE]; + + srand(SEED); + + { + RtsConfig conf = defaultRtsConfig; + conf.rts_opts_enabled = RtsOptsAll; + hs_init_ghc(&argc, &argv, conf); + } + + test_random_alloc(); + test_sequential_alloc(); + test_aligned_alloc(); + DEBUG_ONLY(checkFreeListSanity()); hs_exit(); // will do a memory leak test |