summaryrefslogtreecommitdiff
path: root/rts/sm/MBlock.h
diff options
context:
space:
mode:
Diffstat (limited to 'rts/sm/MBlock.h')
-rw-r--r--rts/sm/MBlock.h208
1 files changed, 0 insertions, 208 deletions
diff --git a/rts/sm/MBlock.h b/rts/sm/MBlock.h
deleted file mode 100644
index f9dddc3138..0000000000
--- a/rts/sm/MBlock.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/* -----------------------------------------------------------------------------
- *
- * (c) The GHC Team, 1998-2008
- *
- * MegaBlock Allocator interface.
- *
- * See wiki commentary at
- * http://hackage.haskell.org/trac/ghc/wiki/Commentary/HeapAlloced
- *
- * ---------------------------------------------------------------------------*/
-
-#ifndef MBLOCK_H
-#define MBLOCK_H
-
-#include "GC.h"
-
-extern lnat RTS_VAR(mblocks_allocated);
-
-extern void initMBlocks(void);
-extern void * getMBlock(void);
-extern void * getMBlocks(nat n);
-extern void freeAllMBlocks(void);
-
-#ifdef DEBUG
-extern void *getFirstMBlock(void);
-extern void *getNextMBlock(void *mblock);
-#endif
-
-/* -----------------------------------------------------------------------------
- The HEAP_ALLOCED() test.
-
- HEAP_ALLOCED is called FOR EVERY SINGLE CLOSURE during GC.
- It needs to be FAST.
-
- See wiki commentary at
- http://hackage.haskell.org/trac/ghc/wiki/Commentary/HeapAlloced
-
- Implementation of HEAP_ALLOCED
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
- Since heap is allocated in chunks of megablocks (MBLOCK_SIZE), we
- can just use a table to record which megablocks in the address
- space belong to the heap. On a 32-bit machine, with 1Mb
- megablocks, using 8 bits for each entry in the table, the table
- requires 4k. Lookups during GC will be fast, because the table
- will be quickly cached (indeed, performance measurements showed no
- measurable difference between doing the table lookup and using a
- constant comparison).
-
- On 64-bit machines, we cache one 12-bit block map that describes
- 4096 megablocks or 4GB of memory. If HEAP_ALLOCED is called for
- an address that is not in the cache, it calls slowIsHeapAlloced
- (see MBlock.c) which will find the block map for the 4GB block in
- question.
- -------------------------------------------------------------------------- */
-
-#if SIZEOF_VOID_P == 4
-extern StgWord8 mblock_map[];
-
-/* On a 32-bit machine a 4KB table is always sufficient */
-# define MBLOCK_MAP_SIZE 4096
-# define MBLOCK_MAP_ENTRY(p) ((StgWord)(p) >> MBLOCK_SHIFT)
-# define HEAP_ALLOCED(p) mblock_map[MBLOCK_MAP_ENTRY(p)]
-# define HEAP_ALLOCED_GC(p) HEAP_ALLOCED(p)
-
-/* -----------------------------------------------------------------------------
- HEAP_ALLOCED for 64-bit machines.
-
- Here are some cache layout options:
-
- [1]
- 16KB cache of 16-bit entries, 1MB lines (capacity 8GB)
- mblock size = 20 bits
- entries = 8192 13 bits
- line size = 0 bits (1 bit of value)
- tag size = 15 bits
- = 48 bits
-
- [2]
- 32KB cache of 16-bit entries, 4MB lines (capacity 32GB)
- mblock size = 20 bits
- entries = 16384 14 bits
- line size = 2 bits (4 bits of value)
- tag size = 12 bits
- = 48 bits
-
- [3]
- 16KB cache of 16-bit entries, 2MB lines (capacity 16GB)
- mblock size = 20 bits
- entries = 8192 13 bits
- line size = 1 bits (2 bits of value)
- tag size = 14 bits
- = 48 bits
-
- [4]
- 4KB cache of 32-bit entries, 16MB lines (capacity 16GB)
- mblock size = 20 bits
- entries = 1024 10 bits
- line size = 4 bits (16 bits of value)
- tag size = 14 bits
- = 48 bits
-
- [5]
- 4KB cache of 64-bit entries, 32MB lines (capacity 16GB)
- mblock size = 20 bits
- entries = 512 9 bits
- line size = 5 bits (32 bits of value)
- tag size = 14 bits
- = 48 bits
-
- We actually use none of the above. After much experimentation it was
- found that optimising the lookup is the most important factor,
- followed by reducing the number of misses. To that end, we use a
- variant of [1] in which each cache entry is ((mblock << 1) + value)
- where value is 0 for non-heap and 1 for heap. The cache entries can
- be 32 bits, since the mblock number is 48-20 = 28 bits, and we need
- 1 bit for the value. The cache can be as big as we like, but
- currently we use 8k entries, giving us 8GB capacity.
-
- ---------------------------------------------------------------------------- */
-
-#elif SIZEOF_VOID_P == 8
-
-#define MBC_LINE_BITS 0
-#define MBC_TAG_BITS 15
-typedef StgWord32 MbcCacheLine; // could use 16, but 32 was faster
-typedef StgWord8 MBlockMapLine;
-
-#define MBLOCK_MAP_LINE(p) (((StgWord)p & 0xffffffff) >> (MBLOCK_SHIFT + MBC_LINE_BITS))
-
-#define MBC_LINE_SIZE (1<<MBC_LINE_BITS)
-#define MBC_SHIFT (48 - MBLOCK_SHIFT - MBC_LINE_BITS - MBC_TAG_BITS)
-#define MBC_ENTRIES (1<<MBC_SHIFT)
-
-extern MbcCacheLine mblock_cache[];
-
-#define MBC_LINE(p) ((StgWord)p >> (MBLOCK_SHIFT + MBC_LINE_BITS))
-
-#define MBLOCK_MAP_ENTRIES (1 << (32 - MBLOCK_SHIFT - MBC_LINE_BITS))
-
-typedef struct {
- StgWord32 addrHigh32;
- MBlockMapLine lines[MBLOCK_MAP_ENTRIES];
-} MBlockMap;
-
-extern lnat mpc_misses;
-
-StgBool HEAP_ALLOCED_miss(StgWord mblock, void *p);
-
-INLINE_HEADER
-StgBool HEAP_ALLOCED(void *p)
-{
- StgWord mblock;
- nat entry_no;
- MbcCacheLine entry, value;
-
- mblock = (StgWord)p >> MBLOCK_SHIFT;
- entry_no = mblock & (MBC_ENTRIES-1);
- entry = mblock_cache[entry_no];
- value = entry ^ (mblock << 1);
- // this formulation coaxes gcc into prioritising the value==1
- // case, which we expect to be the most common.
- // __builtin_expect() didn't have any useful effect (gcc-4.3.0).
- if (value == 1) {
- return 1;
- } else if (value == 0) {
- return 0;
- } else {
- // putting the rest out of line turned out to be a slight
- // performance improvement:
- return HEAP_ALLOCED_miss(mblock,p);
- }
-}
-
-// In the parallel GC, the cache itself is safe to *read*, and can be
-// updated atomically, but we need to place a lock around operations
-// that touch the MBlock map.
-INLINE_HEADER
-StgBool HEAP_ALLOCED_GC(void *p)
-{
- StgWord mblock;
- nat entry_no;
- MbcCacheLine entry, value;
- StgBool b;
-
- mblock = (StgWord)p >> MBLOCK_SHIFT;
- entry_no = mblock & (MBC_ENTRIES-1);
- entry = mblock_cache[entry_no];
- value = entry ^ (mblock << 1);
- if (value == 1) {
- return 1;
- } else if (value == 0) {
- return 0;
- } else {
- // putting the rest out of line turned out to be a slight
- // performance improvement:
- ACQUIRE_SPIN_LOCK(&gc_alloc_block_sync);
- b = HEAP_ALLOCED_miss(mblock,p);
- RELEASE_SPIN_LOCK(&gc_alloc_block_sync);
- return b;
- }
-}
-
-#else
-# error HEAP_ALLOCED not defined
-#endif
-
-#endif /* MBLOCK_H */