diff options
Diffstat (limited to 'rts')
-rw-r--r-- | rts/Capability.c | 38 | ||||
-rw-r--r-- | rts/Capability.h | 17 | ||||
-rw-r--r-- | rts/RtsFlags.c | 36 | ||||
-rw-r--r-- | rts/Task.c | 4 | ||||
-rw-r--r-- | rts/posix/OSThreads.c | 1 | ||||
-rw-r--r-- | rts/sm/BlockAlloc.c | 10 | ||||
-rw-r--r-- | rts/sm/MBlock.c | 2 | ||||
-rw-r--r-- | rts/sm/Storage.c | 18 |
8 files changed, 68 insertions, 58 deletions
diff --git a/rts/Capability.c b/rts/Capability.c index 411e64dc7a..7ca220fbd9 100644 --- a/rts/Capability.c +++ b/rts/Capability.c @@ -26,6 +26,7 @@ #include "sm/GC.h" // for gcWorkerThread() #include "STM.h" #include "RtsUtils.h" +#include "sm/OSMem.h" #if !defined(mingw32_HOST_OS) #include "rts/IOManager.h" // for setIOManagerControlFd() @@ -59,6 +60,12 @@ static Capability *last_free_capability[MAX_NUMA_NODES]; */ PendingSync * volatile pending_sync = 0; +// Number of logical NUMA nodes +uint32_t n_numa_nodes; + +// Map logical NUMA node to OS node numbers +uint32_t numa_map[MAX_NUMA_NODES]; + /* Let foreign code get the current Capability -- assuming there is one! * This is useful for unsafe foreign calls because they are called with * the current Capability held, but they are not passed it. For example, @@ -326,6 +333,31 @@ void initCapabilities (void) traceCapsetCreate(CAPSET_OSPROCESS_DEFAULT, CapsetTypeOsProcess); traceCapsetCreate(CAPSET_CLOCKDOMAIN_DEFAULT, CapsetTypeClockdomain); + // Initialise NUMA + if (!RtsFlags.GcFlags.numa) { + n_numa_nodes = 1; + for (i = 0; i < MAX_NUMA_NODES; i++) { + numa_map[i] = 0; + } + } else { + uint32_t nNodes = osNumaNodes(); + if (nNodes > MAX_NUMA_NODES) { + barf("Too many NUMA nodes (max %d)", MAX_NUMA_NODES); + } + StgWord mask = RtsFlags.GcFlags.numaMask & osNumaMask(); + uint32_t logical = 0, physical = 0; + for (; physical < MAX_NUMA_NODES; physical++) { + if (mask & 1) { + numa_map[logical++] = physical; + } + mask = mask >> 1; + } + n_numa_nodes = logical; + if (logical == 0) { + barf("%s: available NUMA node set is empty"); + } + } + #if defined(THREADED_RTS) #ifndef REG_Base @@ -355,7 +387,7 @@ void initCapabilities (void) // There are no free capabilities to begin with. We will start // a worker Task to each Capability, which will quickly put the // Capability on the free list when it finds nothing to do. - for (i = 0; i < RtsFlags.GcFlags.nNumaNodes; i++) { + for (i = 0; i < n_numa_nodes; i++) { last_free_capability[i] = capabilities[0]; } } @@ -730,9 +762,9 @@ void waitForCapability (Capability **pCap, Task *task) // Otherwise, search for a free capability on this node. cap = NULL; for (i = task->node; i < enabled_capabilities; - i += RtsFlags.GcFlags.nNumaNodes) { + i += n_numa_nodes) { // visits all the capabilities on this node, because - // cap[i]->node == i % RtsFlags.GcFlags.nNumaNodes + // cap[i]->node == i % n_numa_nodes if (!capabilities[i]->running_task) { cap = capabilities[i]; break; diff --git a/rts/Capability.h b/rts/Capability.h index 6874379c5f..67b43280eb 100644 --- a/rts/Capability.h +++ b/rts/Capability.h @@ -39,7 +39,7 @@ struct Capability_ { // The NUMA node on which this capability resides. This is used to allocate // node-local memory in allocate(). // - // Note: this is always equal to cap->no % RtsFlags.ParFlags.nNumaNodes. + // Note: this is always equal to cap->no % n_numa_nodes. // The reason we slice it this way is that if we add or remove capabilities // via setNumCapabilities(), then we keep the number of capabilities on each // NUMA node balanced. @@ -159,9 +159,6 @@ struct Capability_ { #endif ; - -#define capNoToNumaNode(n) ((n) % RtsFlags.GcFlags.nNumaNodes) - #if defined(THREADED_RTS) #define ASSERT_TASK_ID(task) ASSERT(task->id == osThreadId()) #else @@ -350,6 +347,18 @@ void markCapabilities (evac_fn evac, void *user); void traverseSparkQueues (evac_fn evac, void *user); /* ----------------------------------------------------------------------------- + NUMA + -------------------------------------------------------------------------- */ + +/* Number of logical NUMA nodes */ +extern uint32_t n_numa_nodes; + +/* Map logical NUMA node to OS node numbers */ +extern uint32_t numa_map[MAX_NUMA_NODES]; + +#define capNoToNumaNode(n) ((n) % n_numa_nodes) + +/* ----------------------------------------------------------------------------- Messages -------------------------------------------------------------------------- */ diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index 25345bf57b..e23f760f43 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -123,7 +123,6 @@ static void errorRtsOptsDisabled (const char *s); void initRtsFlagsDefaults(void) { - uint32_t i; StgWord64 maxStkSize = 8 * getPhysicalMemorySize() / 10; // if getPhysicalMemorySize fails just move along with an 8MB limit if (maxStkSize == 0) @@ -160,10 +159,7 @@ void initRtsFlagsDefaults(void) RtsFlags.GcFlags.heapBase = 0; /* means don't care */ RtsFlags.GcFlags.allocLimitGrace = (100*1024) / BLOCK_SIZE; RtsFlags.GcFlags.numa = rtsFalse; - RtsFlags.GcFlags.nNumaNodes = 1; - for (i = 0; i < MAX_NUMA_NODES; i++) { - RtsFlags.GcFlags.numaMap[i] = 0; - } + RtsFlags.GcFlags.numaMask = 1; RtsFlags.DebugFlags.scheduler = rtsFalse; RtsFlags.DebugFlags.interpreter = rtsFalse; @@ -776,28 +772,8 @@ error = rtsTrue; break; } - uint32_t nNodes = osNumaNodes(); - if (nNodes > MAX_NUMA_NODES) { - errorBelch("%s: Too many NUMA nodes (max %d)", - rts_argv[arg], MAX_NUMA_NODES); - error = rtsTrue; - } else { - RtsFlags.GcFlags.numa = rtsTrue; - mask = mask & osNumaMask(); - uint32_t logical = 0, physical = 0; - for (; physical < MAX_NUMA_NODES; physical++) { - if (mask & 1) { - RtsFlags.GcFlags.numaMap[logical++] = physical; - } - mask = mask >> 1; - } - RtsFlags.GcFlags.nNumaNodes = logical; - if (logical == 0) { - errorBelch("%s: available node set is empty", - rts_argv[arg]); - error = rtsTrue; - } - } + RtsFlags.GcFlags.numa = rtsTrue; + RtsFlags.GcFlags.numaMask = mask; } #endif #if defined(DEBUG) && defined(THREADED_RTS) @@ -821,11 +797,7 @@ error = rtsTrue; } else { RtsFlags.GcFlags.numa = rtsTrue; RtsFlags.DebugFlags.numa = rtsTrue; - RtsFlags.GcFlags.nNumaNodes = nNodes; - uint32_t physical = 0; - for (; physical < MAX_NUMA_NODES; physical++) { - RtsFlags.GcFlags.numaMap[physical] = physical; - } + RtsFlags.GcFlags.numaMask = (1<<nNodes) - 1; } } #endif diff --git a/rts/Task.c b/rts/Task.c index 9a827745ba..9a658e019c 100644 --- a/rts/Task.c +++ b/rts/Task.c @@ -429,7 +429,7 @@ workerStart(Task *task) setThreadAffinity(cap->no, n_capabilities); } if (RtsFlags.GcFlags.numa && !RtsFlags.DebugFlags.numa) { - setThreadNode(RtsFlags.GcFlags.numaMap[task->node]); + setThreadNode(numa_map[task->node]); } // set the thread-local pointer to the Task: @@ -510,7 +510,7 @@ void rts_setInCallCapability ( if (RtsFlags.GcFlags.numa) { task->node = capNoToNumaNode(preferred_capability); if (!DEBUG_IS_ON || !RtsFlags.DebugFlags.numa) { // faking NUMA - setThreadNode(RtsFlags.GcFlags.numaMap[task->node]); + setThreadNode(numa_map[task->node]); } } } diff --git a/rts/posix/OSThreads.c b/rts/posix/OSThreads.c index 35ea2bde21..112a311f79 100644 --- a/rts/posix/OSThreads.c +++ b/rts/posix/OSThreads.c @@ -321,7 +321,6 @@ setThreadAffinity (uint32_t n STG_UNUSED, #if HAVE_LIBNUMA void setThreadNode (uint32_t node) { - ASSERT(node < RtsFlags.GcFlags.nNumaNodes); if (numa_run_on_node(node) == -1) { sysErrorBelch("numa_run_on_node"); stg_exit(1); diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c index c2859b0c15..6c2e96414e 100644 --- a/rts/sm/BlockAlloc.c +++ b/rts/sm/BlockAlloc.c @@ -467,7 +467,7 @@ uint32_t nodeWithLeastBlocks (void) { uint32_t node = 0, i; uint32_t min_blocks = n_alloc_blocks_by_node[0]; - for (i = 1; i < RtsFlags.GcFlags.nNumaNodes; i++) { + for (i = 1; i < n_numa_nodes; i++) { if (n_alloc_blocks_by_node[i] < min_blocks) { min_blocks = n_alloc_blocks_by_node[i]; node = i; @@ -504,7 +504,7 @@ bdescr* allocLargeChunkOnNode (uint32_t node, W_ min, W_ max) StgWord ln, lnmax; if (min >= BLOCKS_PER_MBLOCK) { - return allocGroup(max); + return allocGroupOnNode(node,max); } ln = log_2_ceil(min); @@ -811,7 +811,7 @@ void returnMemoryToOS(uint32_t n /* megablocks */) StgWord size; // ToDo: not fair, we free all the memory starting with node 0. - for (node = 0; n > 0 && node < RtsFlags.GcFlags.nNumaNodes; node++) { + for (node = 0; n > 0 && node < n_numa_nodes; node++) { bd = free_mblock_list[node]; while ((n > 0) && (bd != NULL)) { size = BLOCKS_TO_MBLOCKS(bd->blocks); @@ -875,7 +875,7 @@ checkFreeListSanity(void) StgWord ln, min; uint32_t node; - for (node = 0; node < RtsFlags.GcFlags.nNumaNodes; node++) { + for (node = 0; node < n_numa_nodes; node++) { min = 1; for (ln = 0; ln < NUM_FREE_LISTS; ln++) { IF_DEBUG(block_alloc, @@ -950,7 +950,7 @@ countFreeList(void) StgWord ln; uint32_t node; - for (node = 0; node < RtsFlags.GcFlags.nNumaNodes; node++) { + for (node = 0; node < n_numa_nodes; node++) { for (ln=0; ln < NUM_FREE_LISTS; ln++) { for (bd = free_list[node][ln]; bd != NULL; bd = bd->link) { total_blocks += bd->blocks; diff --git a/rts/sm/MBlock.c b/rts/sm/MBlock.c index 53999d2c4b..4be7fd4356 100644 --- a/rts/sm/MBlock.c +++ b/rts/sm/MBlock.c @@ -594,7 +594,7 @@ getMBlocksOnNode(uint32_t node, uint32_t n) #ifdef DEBUG if (RtsFlags.DebugFlags.numa) return addr; // faking NUMA #endif - osBindMBlocksToNode(addr, n * MBLOCK_SIZE, RtsFlags.GcFlags.numaMap[node]); + osBindMBlocksToNode(addr, n * MBLOCK_SIZE, numa_map[node]); return addr; } diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index a9a7857d43..7c41f8c64b 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -57,7 +57,7 @@ generation *oldest_gen = NULL; /* oldest generation, for convenience */ /* * Array of nurseries, size == n_capabilities * - * nursery[i] belongs to NUMA node (i % RtsFlags.GcFlags.nNumaNodes) + * nursery[i] belongs to NUMA node (i % n_numa_nodes) * This is chosen to be the same convention as capabilities[i], so * that when not using nursery chunks (+RTS -n), we just map * capabilities to nurseries 1:1. @@ -209,7 +209,7 @@ initStorage (void) N = 0; - for (n = 0; n < RtsFlags.GcFlags.nNumaNodes; n++) { + for (n = 0; n < n_numa_nodes; n++) { next_nursery[n] = n; } storageAddCapabilities(0, n_capabilities); @@ -615,7 +615,7 @@ assignNurseriesToCapabilities (uint32_t from, uint32_t to) for (i = from; i < to; i++) { node = capabilities[i]->node; assignNurseryToCapability(capabilities[i], next_nursery[node]); - next_nursery[node] += RtsFlags.GcFlags.nNumaNodes; + next_nursery[node] += n_numa_nodes; } } @@ -642,7 +642,7 @@ resetNurseries (void) { uint32_t n; - for (n = 0; n < RtsFlags.GcFlags.nNumaNodes; n++) { + for (n = 0; n < n_numa_nodes; n++) { next_nursery[n] = n; } assignNurseriesToCapabilities(0, n_capabilities); @@ -758,22 +758,20 @@ getNewNursery (Capability *cap) for(;;) { i = next_nursery[node]; if (i < n_nurseries) { - if (cas(&next_nursery[node], i, - i+RtsFlags.GcFlags.nNumaNodes) == i) { + if (cas(&next_nursery[node], i, i+n_numa_nodes) == i) { assignNurseryToCapability(cap, i); return rtsTrue; } - } else if (RtsFlags.GcFlags.nNumaNodes > 1) { + } else if (n_numa_nodes > 1) { // Try to find an unused nursery chunk on other nodes. We'll get // remote memory, but the rationale is that avoiding GC is better // than avoiding remote memory access. rtsBool lost = rtsFalse; - for (n = 0; n < RtsFlags.GcFlags.nNumaNodes; n++) { + for (n = 0; n < n_numa_nodes; n++) { if (n == node) continue; i = next_nursery[n]; if (i < n_nurseries) { - if (cas(&next_nursery[n], i, - i+RtsFlags.GcFlags.nNumaNodes) == i) { + if (cas(&next_nursery[n], i, i+n_numa_nodes) == i) { assignNurseryToCapability(cap, i); return rtsTrue; } else { |