NUMA support

Summary: The aim here is to reduce the number of remote memory accesses on systems with a NUMA memory architecture, typically multi-socket servers. Linux provides a NUMA API for doing two things: * Allocating memory local to a particular node * Binding a thread to a particular node When given the +RTS --numa flag, the runtime will * Determine the number of NUMA nodes (N) by querying the OS * Assign capabilities to nodes, so cap C is on node C%N * Bind worker threads on a capability to the correct node * Keep a separate free lists in the block layer for each node * Allocate the nursery for a capability from node-local memory * Allocate blocks in the GC from node-local memory For example, using nofib/parallel/queens on a 24-core 2-socket machine: ``` $ ./Main 15 +RTS -N24 -s -A64m Total time 173.960s ( 7.467s elapsed) $ ./Main 15 +RTS -N24 -s -A64m --numa Total time 150.836s ( 6.423s elapsed) ``` The biggest win here is expected to be allocating from node-local memory, so that means programs using a large -A value (as here). According to perf, on this program the number of remote memory accesses were reduced by more than 50% by using `--numa`. Test Plan: * validate * There's a new flag --debug-numa=<n> that pretends to do NUMA without actually making the OS calls, which is useful for testing the code on non-NUMA systems. * TODO: I need to add some unit tests Reviewers: erikd, austin, rwbarton, ezyang, bgamari, hvr, niteria Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D2199
author: Simon Marlow <marlowsd@gmail.com> 2016-04-23 21:14:49 +0100
committer: Simon Marlow <marlowsd@gmail.com> 2016-06-10 21:25:54 +0100
commit: 9e5ea67e268be2659cd30ebaed7044d298198ab0 (patch)
tree: c395e74ee772ae0d59c852b3cbde743784b08d09 /rts/sm/BlockAlloc.c
parent: b9fa72a24ba2cc3120912e6afedc9280d28d2077 (diff)
download: haskell-9e5ea67e268be2659cd30ebaed7044d298198ab0.tar.gz
1 files changed, 216 insertions, 134 deletions
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index ff1a6460a4..c2859b0c15 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -7,11 +7,11 @@
  * This is the architecture independent part of the block allocator.
  * It requires only the following support from the operating system:
  *
- *    void *getMBlock(uint32_t n);
+ *    void *getMBlocks(uint32_t n);
  *
  * returns the address of an n*MBLOCK_SIZE region of memory, aligned on
  * an MBLOCK_SIZE boundary.  There are no other restrictions on the
- * addresses of memory returned by getMBlock().
+ * addresses of memory returned by getMBlocks().
  *
  * ---------------------------------------------------------------------------*/
 
@@ -25,7 +25,7 @@
 
 #include <string.h>
 
-static void  initMBlock(void *mblock);
+static void  initMBlock(void *mblock, uint32_t node);
 
 /* -----------------------------------------------------------------------------
 
@@ -158,28 +158,55 @@ static void  initMBlock(void *mblock);
 
 // In THREADED_RTS mode, the free list is protected by sm_mutex.
 
-static bdescr *free_list[NUM_FREE_LISTS];
-static bdescr *free_mblock_list;
+static bdescr *free_list[MAX_NUMA_NODES][NUM_FREE_LISTS];
+static bdescr *free_mblock_list[MAX_NUMA_NODES];
 
 W_ n_alloc_blocks;   // currently allocated blocks
 W_ hw_alloc_blocks;  // high-water allocated blocks
 
+W_ n_alloc_blocks_by_node[MAX_NUMA_NODES];
+
 /* -----------------------------------------------------------------------------
    Initialisation
    -------------------------------------------------------------------------- */
 
 void initBlockAllocator(void)
 {
-    uint32_t i;
-    for (i=0; i < NUM_FREE_LISTS; i++) {
-        free_list[i] = NULL;
+    uint32_t i, node;
+    for (node = 0; node < MAX_NUMA_NODES; node++) {
+        for (i=0; i < NUM_FREE_LISTS; i++) {
+            free_list[node][i] = NULL;
+        }
+        free_mblock_list[node] = NULL;
+        n_alloc_blocks_by_node[node] = 0;
     }
-    free_mblock_list = NULL;
     n_alloc_blocks = 0;
     hw_alloc_blocks = 0;
 }
 
 /* -----------------------------------------------------------------------------
+   Accounting
+   -------------------------------------------------------------------------- */
+
+STATIC_INLINE
+void recordAllocatedBlocks(uint32_t node, uint32_t n)
+{
+    n_alloc_blocks += n;
+    n_alloc_blocks_by_node[node] += n;
+    if (n > 0 && n_alloc_blocks > hw_alloc_blocks) {
+        hw_alloc_blocks = n_alloc_blocks;
+    }
+}
+
+STATIC_INLINE
+void recordFreedBlocks(uint32_t node, uint32_t n)
+{
+    ASSERT(n_alloc_blocks >= n);
+    n_alloc_blocks -= n;
+    n_alloc_blocks_by_node[node] -= n;
+}
+
+/* -----------------------------------------------------------------------------
    Allocation
    -------------------------------------------------------------------------- */
 
@@ -248,14 +275,14 @@ log_2_ceil(W_ n)
 }
 
 STATIC_INLINE void
-free_list_insert (bdescr *bd)
+free_list_insert (uint32_t node, bdescr *bd)
 {
     uint32_t ln;
 
     ASSERT(bd->blocks < BLOCKS_PER_MBLOCK);
     ln = log_2(bd->blocks);
 
-    dbl_link_onto(bd, &free_list[ln]);
+    dbl_link_onto(bd, &free_list[node][ln]);
 }
 
 
@@ -284,18 +311,18 @@ setup_tail (bdescr *bd)
 // Take a free block group bd, and split off a group of size n from
 // it.  Adjust the free list as necessary, and return the new group.
 static bdescr *
-split_free_block (bdescr *bd, W_ n, uint32_t ln)
+split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln)
 {
     bdescr *fg; // free group
 
     ASSERT(bd->blocks > n);
-    dbl_link_remove(bd, &free_list[ln]);
+    dbl_link_remove(bd, &free_list[node][ln]);
     fg = bd + bd->blocks - n; // take n blocks off the end
     fg->blocks = n;
     bd->blocks -= n;
     setup_tail(bd);
     ln = log_2(bd->blocks);
-    dbl_link_onto(bd, &free_list[ln]);
+    dbl_link_onto(bd, &free_list[node][ln]);
     return fg;
 }
 
@@ -304,7 +331,7 @@ split_free_block (bdescr *bd, W_ n, uint32_t ln)
  * initGroup afterwards.
  */
 static bdescr *
-alloc_mega_group (StgWord mblocks)
+alloc_mega_group (uint32_t node, StgWord mblocks)
 {
     bdescr *best, *bd, *prev;
     StgWord n;
@@ -313,14 +340,14 @@ alloc_mega_group (StgWord mblocks)
 
     best = NULL;
     prev = NULL;
-    for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
+    for (bd = free_mblock_list[node]; bd != NULL; prev = bd, bd = bd->link)
     {
         if (bd->blocks == n)
         {
             if (prev) {
                 prev->link = bd->link;
             } else {
-                free_mblock_list = bd->link;
+                free_mblock_list[node] = bd->link;
             }
             return bd;
         }
@@ -341,12 +368,17 @@ alloc_mega_group (StgWord mblocks)
                           (best_mblocks-mblocks)*MBLOCK_SIZE);
 
         best->blocks = MBLOCK_GROUP_BLOCKS(best_mblocks - mblocks);
-        initMBlock(MBLOCK_ROUND_DOWN(bd));
+        initMBlock(MBLOCK_ROUND_DOWN(bd), node);
     }
     else
     {
-        void *mblock = getMBlocks(mblocks);
-        initMBlock(mblock);             // only need to init the 1st one
+        void *mblock;
+        if (RtsFlags.GcFlags.numa) {
+            mblock = getMBlocksOnNode(node, mblocks);
+        } else {
+            mblock = getMBlocks(mblocks);
+        }
+        initMBlock(mblock, node); // only need to init the 1st one
         bd = FIRST_BDESCR(mblock);
     }
     bd->blocks = MBLOCK_GROUP_BLOCKS(mblocks);
@@ -354,7 +386,7 @@ alloc_mega_group (StgWord mblocks)
 }
 
 bdescr *
-allocGroup (W_ n)
+allocGroupOnNode (uint32_t node, W_ n)
 {
     bdescr *bd, *rem;
     StgWord ln;
@@ -369,21 +401,19 @@ allocGroup (W_ n)
 
         // n_alloc_blocks doesn't count the extra blocks we get in a
         // megablock group.
-        n_alloc_blocks += mblocks * BLOCKS_PER_MBLOCK;
-        if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
+        recordAllocatedBlocks(node, mblocks * BLOCKS_PER_MBLOCK);
 
-        bd = alloc_mega_group(mblocks);
+        bd = alloc_mega_group(node, mblocks);
         // only the bdescrs of the first MB are required to be initialised
         initGroup(bd);
         goto finish;
     }
 
-    n_alloc_blocks += n;
-    if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
+    recordAllocatedBlocks(node, n);
 
     ln = log_2_ceil(n);
 
-    while (ln < NUM_FREE_LISTS && free_list[ln] == NULL) {
+    while (ln < NUM_FREE_LISTS && free_list[node][ln] == NULL) {
         ln++;
     }
 
@@ -397,27 +427,27 @@ allocGroup (W_ n)
         }
 #endif
 
-        bd = alloc_mega_group(1);
+        bd = alloc_mega_group(node,1);
         bd->blocks = n;
         initGroup(bd);                   // we know the group will fit
         rem = bd + n;
         rem->blocks = BLOCKS_PER_MBLOCK-n;
-        initGroup(rem); // init the slop
-        n_alloc_blocks += rem->blocks;
+        initGroup(rem);                  // init the slop
+        recordAllocatedBlocks(node,rem->blocks);
         freeGroup(rem);                  // add the slop on to the free list
         goto finish;
     }
 
-    bd = free_list[ln];
+    bd = free_list[node][ln];
 
     if (bd->blocks == n)                // exactly the right size!
     {
-        dbl_link_remove(bd, &free_list[ln]);
+        dbl_link_remove(bd, &free_list[node][ln]);
         initGroup(bd);
     }
     else if (bd->blocks >  n)            // block too big...
     {
-        bd = split_free_block(bd, n, ln);
+        bd = split_free_block(bd, node, n, ln);
         ASSERT(bd->blocks == n);
         initGroup(bd);
     }
@@ -432,6 +462,26 @@ finish:
     return bd;
 }
 
+STATIC_INLINE
+uint32_t nodeWithLeastBlocks (void)
+{
+    uint32_t node = 0, i;
+    uint32_t min_blocks = n_alloc_blocks_by_node[0];
+    for (i = 1; i < RtsFlags.GcFlags.nNumaNodes; i++) {
+        if (n_alloc_blocks_by_node[i] < min_blocks) {
+            min_blocks = n_alloc_blocks_by_node[i];
+            node = i;
+        }
+    }
+    return node;
+}
+
+bdescr* allocGroup (W_ n)
+{
+    return allocGroupOnNode(nodeWithLeastBlocks(),n);
+}
+
+
 //
 // Allocate a chunk of blocks that is at least min and at most max
 // blocks in size. This API is used by the nursery allocator that
@@ -448,8 +498,7 @@ finish:
 // fragmentation, but we make sure that we allocate large blocks
 // preferably if there are any.
 //
-bdescr *
-allocLargeChunk (W_ min, W_ max)
+bdescr* allocLargeChunkOnNode (uint32_t node, W_ min, W_ max)
 {
     bdescr *bd;
     StgWord ln, lnmax;
@@ -461,34 +510,38 @@ allocLargeChunk (W_ min, W_ max)
     ln = log_2_ceil(min);
     lnmax = log_2_ceil(max);
 
-    while (ln < NUM_FREE_LISTS && ln < lnmax && free_list[ln] == NULL) {
+    while (ln < NUM_FREE_LISTS && ln < lnmax && free_list[node][ln] == NULL) {
         ln++;
     }
     if (ln == NUM_FREE_LISTS || ln == lnmax) {
-        return allocGroup(max);
+        return allocGroupOnNode(node,max);
     }
-    bd = free_list[ln];
+    bd = free_list[node][ln];
 
     if (bd->blocks <= max)              // exactly the right size!
     {
-        dbl_link_remove(bd, &free_list[ln]);
+        dbl_link_remove(bd, &free_list[node][ln]);
         initGroup(bd);
     }
     else   // block too big...
     {
-        bd = split_free_block(bd, max, ln);
+        bd = split_free_block(bd, node, max, ln);
         ASSERT(bd->blocks == max);
         initGroup(bd);
     }
 
-    n_alloc_blocks += bd->blocks;
-    if (n_alloc_blocks > hw_alloc_blocks) hw_alloc_blocks = n_alloc_blocks;
+    recordAllocatedBlocks(node, bd->blocks);
 
     IF_DEBUG(sanity, memset(bd->start, 0xaa, bd->blocks * BLOCK_SIZE));
     IF_DEBUG(sanity, checkFreeListSanity());
     return bd;
 }
 
+bdescr* allocLargeChunk (W_ min, W_ max)
+{
+    return allocLargeChunkOnNode(nodeWithLeastBlocks(), min, max);
+}
+
 bdescr *
 allocGroup_lock(W_ n)
 {
@@ -500,17 +553,31 @@ allocGroup_lock(W_ n)
 }
 
 bdescr *
-allocBlock(void)
+allocBlock_lock(void)
 {
-    return allocGroup(1);
+    bdescr *bd;
+    ACQUIRE_SM_LOCK;
+    bd = allocBlock();
+    RELEASE_SM_LOCK;
+    return bd;
 }
 
 bdescr *
-allocBlock_lock(void)
+allocGroupOnNode_lock(uint32_t node, W_ n)
 {
     bdescr *bd;
     ACQUIRE_SM_LOCK;
-    bd = allocBlock();
+    bd = allocGroupOnNode(node,n);
+    RELEASE_SM_LOCK;
+    return bd;
+}
+
+bdescr *
+allocBlockOnNode_lock(uint32_t node)
+{
+    bdescr *bd;
+    ACQUIRE_SM_LOCK;
+    bd = allocBlockOnNode(node);
     RELEASE_SM_LOCK;
     return bd;
 }
@@ -542,11 +609,13 @@ static void
 free_mega_group (bdescr *mg)
 {
     bdescr *bd, *prev;
+    uint32_t node;
 
     // Find the right place in the free list.  free_mblock_list is
     // sorted by *address*, not by size as the free_list is.
     prev = NULL;
-    bd = free_mblock_list;
+    node = mg->node;
+    bd = free_mblock_list[node];
     while (bd && bd->start < mg->start) {
         prev = bd;
         bd = bd->link;
@@ -561,8 +630,8 @@ free_mega_group (bdescr *mg)
     }
     else
     {
-        mg->link = free_mblock_list;
-        free_mblock_list = mg;
+        mg->link = free_mblock_list[node];
+        free_mblock_list[node] = mg;
     }
     // coalesce forwards
     coalesce_mblocks(mg);
@@ -575,12 +644,15 @@ void
 freeGroup(bdescr *p)
 {
   StgWord ln;
+  uint32_t node;
 
-  // Todo: not true in multithreaded GC
+  // not true in multithreaded GC:
   // ASSERT_SM_LOCK();
 
   ASSERT(p->free != (P_)-1);
 
+  node = p->node;
+
   p->free = (void *)-1;  /* indicates that this block is free */
   p->gen = NULL;
   p->gen_no = 0;
@@ -597,14 +669,13 @@ freeGroup(bdescr *p)
       // If this is an mgroup, make sure it has the right number of blocks
       ASSERT(p->blocks == MBLOCK_GROUP_BLOCKS(mblocks));
 
-      n_alloc_blocks -= mblocks * BLOCKS_PER_MBLOCK;
+      recordFreedBlocks(node, mblocks * BLOCKS_PER_MBLOCK);
 
       free_mega_group(p);
       return;
   }
 
-  ASSERT(n_alloc_blocks >= p->blocks);
-  n_alloc_blocks -= p->blocks;
+  recordFreedBlocks(node, p->blocks);
 
   // coalesce forwards
   {
@@ -614,7 +685,7 @@ freeGroup(bdescr *p)
       {
           p->blocks += next->blocks;
           ln = log_2(next->blocks);
-          dbl_link_remove(next, &free_list[ln]);
+          dbl_link_remove(next, &free_list[node][ln]);
           if (p->blocks == BLOCKS_PER_MBLOCK)
           {
               free_mega_group(p);
@@ -634,7 +705,7 @@ freeGroup(bdescr *p)
       if (prev->free == (P_)-1)
       {
           ln = log_2(prev->blocks);
-          dbl_link_remove(prev, &free_list[ln]);
+          dbl_link_remove(prev, &free_list[node][ln]);
           prev->blocks += p->blocks;
           if (prev->blocks >= BLOCKS_PER_MBLOCK)
           {
@@ -646,7 +717,7 @@ freeGroup(bdescr *p)
   }
 
   setup_tail(p);
-  free_list_insert(p);
+  free_list_insert(node,p);
 
   IF_DEBUG(sanity, checkFreeListSanity());
 }
@@ -679,7 +750,7 @@ freeChain_lock(bdescr *bd)
 }
 
 static void
-initMBlock(void *mblock)
+initMBlock(void *mblock, uint32_t node)
 {
     bdescr *bd;
     StgWord8 *block;
@@ -695,6 +766,7 @@ initMBlock(void *mblock)
     for (; block <= (StgWord8*)LAST_BLOCK(mblock); bd += 1,
              block += BLOCK_SIZE) {
         bd->start = (void*)block;
+        bd->node = node;
     }
 }
 
@@ -734,28 +806,32 @@ countAllocdBlocks(bdescr *bd)
 
 void returnMemoryToOS(uint32_t n /* megablocks */)
 {
-    static bdescr *bd;
+    bdescr *bd;
+    uint32_t node;
     StgWord size;
 
-    bd = free_mblock_list;
-    while ((n > 0) && (bd != NULL)) {
-        size = BLOCKS_TO_MBLOCKS(bd->blocks);
-        if (size > n) {
-            StgWord newSize = size - n;
-            char *freeAddr = MBLOCK_ROUND_DOWN(bd->start);
-            freeAddr += newSize * MBLOCK_SIZE;
-            bd->blocks = MBLOCK_GROUP_BLOCKS(newSize);
-            freeMBlocks(freeAddr, n);
-            n = 0;
-        }
-        else {
-            char *freeAddr = MBLOCK_ROUND_DOWN(bd->start);
-            n -= size;
-            bd = bd->link;
-            freeMBlocks(freeAddr, size);
+    // ToDo: not fair, we free all the memory starting with node 0.
+    for (node = 0; n > 0 && node < RtsFlags.GcFlags.nNumaNodes; node++) {
+        bd = free_mblock_list[node];
+        while ((n > 0) && (bd != NULL)) {
+            size = BLOCKS_TO_MBLOCKS(bd->blocks);
+            if (size > n) {
+                StgWord newSize = size - n;
+                char *freeAddr = MBLOCK_ROUND_DOWN(bd->start);
+                freeAddr += newSize * MBLOCK_SIZE;
+                bd->blocks = MBLOCK_GROUP_BLOCKS(newSize);
+                freeMBlocks(freeAddr, n);
+                n = 0;
+            }
+            else {
+                char *freeAddr = MBLOCK_ROUND_DOWN(bd->start);
+                n -= size;
+                bd = bd->link;
+                freeMBlocks(freeAddr, size);
+            }
         }
+        free_mblock_list[node] = bd;
     }
-    free_mblock_list = bd;
 
     // Ask the OS to release any address space portion
     // that was associated with the just released MBlocks
@@ -797,68 +873,71 @@ checkFreeListSanity(void)
 {
     bdescr *bd, *prev;
     StgWord ln, min;
+    uint32_t node;
 
-
-    min = 1;
-    for (ln = 0; ln < NUM_FREE_LISTS; ln++) {
-        IF_DEBUG(block_alloc,
-                 debugBelch("free block list [%" FMT_Word "]:\n", ln));
-
-        prev = NULL;
-        for (bd = free_list[ln]; bd != NULL; prev = bd, bd = bd->link)
-        {
+    for (node = 0; node < RtsFlags.GcFlags.nNumaNodes; node++) {
+        min = 1;
+        for (ln = 0; ln < NUM_FREE_LISTS; ln++) {
             IF_DEBUG(block_alloc,
-                     debugBelch("group at %p, length %ld blocks\n",
-                                bd->start, (long)bd->blocks));
-            ASSERT(bd->free == (P_)-1);
-            ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
-            ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
-            ASSERT(bd->link != bd); // catch easy loops
-
-            check_tail(bd);
-
-            if (prev)
-                ASSERT(bd->u.back == prev);
-            else
-                ASSERT(bd->u.back == NULL);
+                     debugBelch("free block list [%" FMT_Word "]:\n", ln));
 
+            prev = NULL;
+            for (bd = free_list[node][ln]; bd != NULL; prev = bd, bd = bd->link)
             {
-                bdescr *next;
-                next = bd + bd->blocks;
-                if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
+                IF_DEBUG(block_alloc,
+                         debugBelch("group at %p, length %ld blocks\n",
+                                    bd->start, (long)bd->blocks));
+                ASSERT(bd->free == (P_)-1);
+                ASSERT(bd->blocks > 0 && bd->blocks < BLOCKS_PER_MBLOCK);
+                ASSERT(bd->blocks >= min && bd->blocks <= (min*2 - 1));
+                ASSERT(bd->link != bd); // catch easy loops
+                ASSERT(bd->node == node);
+
+                check_tail(bd);
+
+                if (prev)
+                    ASSERT(bd->u.back == prev);
+                else
+                    ASSERT(bd->u.back == NULL);
+
                 {
-                    ASSERT(next->free != (P_)-1);
+                    bdescr *next;
+                    next = bd + bd->blocks;
+                    if (next <= LAST_BDESCR(MBLOCK_ROUND_DOWN(bd)))
+                    {
+                        ASSERT(next->free != (P_)-1);
+                    }
                 }
             }
+            min = min << 1;
         }
-        min = min << 1;
-    }
 
-    prev = NULL;
-    for (bd = free_mblock_list; bd != NULL; prev = bd, bd = bd->link)
-    {
-        IF_DEBUG(block_alloc,
-                 debugBelch("mega group at %p, length %ld blocks\n",
-                            bd->start, (long)bd->blocks));
+        prev = NULL;
+        for (bd = free_mblock_list[node]; bd != NULL; prev = bd, bd = bd->link)
+        {
+            IF_DEBUG(block_alloc,
+                     debugBelch("mega group at %p, length %ld blocks\n",
+                                bd->start, (long)bd->blocks));
 
-        ASSERT(bd->link != bd); // catch easy loops
+            ASSERT(bd->link != bd); // catch easy loops
 
-        if (bd->link != NULL)
-        {
-            // make sure the list is sorted
-            ASSERT(bd->start < bd->link->start);
-        }
+            if (bd->link != NULL)
+            {
+                // make sure the list is sorted
+                ASSERT(bd->start < bd->link->start);
+            }
 
-        ASSERT(bd->blocks >= BLOCKS_PER_MBLOCK);
-        ASSERT(MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(bd->blocks))
-               == bd->blocks);
+            ASSERT(bd->blocks >= BLOCKS_PER_MBLOCK);
+            ASSERT(MBLOCK_GROUP_BLOCKS(BLOCKS_TO_MBLOCKS(bd->blocks))
+                   == bd->blocks);
 
-        // make sure we're fully coalesced
-        if (bd->link != NULL)
-        {
-            ASSERT(MBLOCK_ROUND_DOWN(bd->link) !=
-                   (StgWord8*)MBLOCK_ROUND_DOWN(bd) +
-                   BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
+            // make sure we're fully coalesced
+            if (bd->link != NULL)
+            {
+                ASSERT(MBLOCK_ROUND_DOWN(bd->link) !=
+                       (StgWord8*)MBLOCK_ROUND_DOWN(bd) +
+                       BLOCKS_TO_MBLOCKS(bd->blocks) * MBLOCK_SIZE);
+            }
         }
     }
 }
@@ -869,18 +948,21 @@ countFreeList(void)
   bdescr *bd;
   W_ total_blocks = 0;
   StgWord ln;
+  uint32_t node;
 
-  for (ln=0; ln < NUM_FREE_LISTS; ln++) {
-      for (bd = free_list[ln]; bd != NULL; bd = bd->link) {
-          total_blocks += bd->blocks;
+  for (node = 0; node < RtsFlags.GcFlags.nNumaNodes; node++) {
+      for (ln=0; ln < NUM_FREE_LISTS; ln++) {
+          for (bd = free_list[node][ln]; bd != NULL; bd = bd->link) {
+              total_blocks += bd->blocks;
+          }
+      }
+      for (bd = free_mblock_list[node]; bd != NULL; bd = bd->link) {
+          total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
+          // The caller of this function, memInventory(), expects to match
+          // the total number of blocks in the system against mblocks *
+          // BLOCKS_PER_MBLOCK, so we must subtract the space for the
+          // block descriptors from *every* mblock.
       }
-  }
-  for (bd = free_mblock_list; bd != NULL; bd = bd->link) {
-      total_blocks += BLOCKS_PER_MBLOCK * BLOCKS_TO_MBLOCKS(bd->blocks);
-      // The caller of this function, memInventory(), expects to match
-      // the total number of blocks in the system against mblocks *
-      // BLOCKS_PER_MBLOCK, so we must subtract the space for the
-      // block descriptors from *every* mblock.
   }
   return total_blocks;
 }
author	Simon Marlow <marlowsd@gmail.com>	2016-04-23 21:14:49 +0100
committer	Simon Marlow <marlowsd@gmail.com>	2016-06-10 21:25:54 +0100
commit	9e5ea67e268be2659cd30ebaed7044d298198ab0 (patch)
tree	c395e74ee772ae0d59c852b3cbde743784b08d09 /rts/sm/BlockAlloc.c
parent	b9fa72a24ba2cc3120912e6afedc9280d28d2077 (diff)
download	haskell-9e5ea67e268be2659cd30ebaed7044d298198ab0.tar.gz