summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
Diffstat (limited to 'rts')
-rw-r--r--rts/Capability.c38
-rw-r--r--rts/Capability.h17
-rw-r--r--rts/RtsFlags.c36
-rw-r--r--rts/Task.c4
-rw-r--r--rts/posix/OSThreads.c1
-rw-r--r--rts/sm/BlockAlloc.c10
-rw-r--r--rts/sm/MBlock.c2
-rw-r--r--rts/sm/Storage.c18
8 files changed, 68 insertions, 58 deletions
diff --git a/rts/Capability.c b/rts/Capability.c
index 411e64dc7a..7ca220fbd9 100644
--- a/rts/Capability.c
+++ b/rts/Capability.c
@@ -26,6 +26,7 @@
#include "sm/GC.h" // for gcWorkerThread()
#include "STM.h"
#include "RtsUtils.h"
+#include "sm/OSMem.h"
#if !defined(mingw32_HOST_OS)
#include "rts/IOManager.h" // for setIOManagerControlFd()
@@ -59,6 +60,12 @@ static Capability *last_free_capability[MAX_NUMA_NODES];
*/
PendingSync * volatile pending_sync = 0;
+// Number of logical NUMA nodes
+uint32_t n_numa_nodes;
+
+// Map logical NUMA node to OS node numbers
+uint32_t numa_map[MAX_NUMA_NODES];
+
/* Let foreign code get the current Capability -- assuming there is one!
* This is useful for unsafe foreign calls because they are called with
* the current Capability held, but they are not passed it. For example,
@@ -326,6 +333,31 @@ void initCapabilities (void)
traceCapsetCreate(CAPSET_OSPROCESS_DEFAULT, CapsetTypeOsProcess);
traceCapsetCreate(CAPSET_CLOCKDOMAIN_DEFAULT, CapsetTypeClockdomain);
+ // Initialise NUMA
+ if (!RtsFlags.GcFlags.numa) {
+ n_numa_nodes = 1;
+ for (i = 0; i < MAX_NUMA_NODES; i++) {
+ numa_map[i] = 0;
+ }
+ } else {
+ uint32_t nNodes = osNumaNodes();
+ if (nNodes > MAX_NUMA_NODES) {
+ barf("Too many NUMA nodes (max %d)", MAX_NUMA_NODES);
+ }
+ StgWord mask = RtsFlags.GcFlags.numaMask & osNumaMask();
+ uint32_t logical = 0, physical = 0;
+ for (; physical < MAX_NUMA_NODES; physical++) {
+ if (mask & 1) {
+ numa_map[logical++] = physical;
+ }
+ mask = mask >> 1;
+ }
+ n_numa_nodes = logical;
+ if (logical == 0) {
+ barf("%s: available NUMA node set is empty");
+ }
+ }
+
#if defined(THREADED_RTS)
#ifndef REG_Base
@@ -355,7 +387,7 @@ void initCapabilities (void)
// There are no free capabilities to begin with. We will start
// a worker Task to each Capability, which will quickly put the
// Capability on the free list when it finds nothing to do.
- for (i = 0; i < RtsFlags.GcFlags.nNumaNodes; i++) {
+ for (i = 0; i < n_numa_nodes; i++) {
last_free_capability[i] = capabilities[0];
}
}
@@ -730,9 +762,9 @@ void waitForCapability (Capability **pCap, Task *task)
// Otherwise, search for a free capability on this node.
cap = NULL;
for (i = task->node; i < enabled_capabilities;
- i += RtsFlags.GcFlags.nNumaNodes) {
+ i += n_numa_nodes) {
// visits all the capabilities on this node, because
- // cap[i]->node == i % RtsFlags.GcFlags.nNumaNodes
+ // cap[i]->node == i % n_numa_nodes
if (!capabilities[i]->running_task) {
cap = capabilities[i];
break;
diff --git a/rts/Capability.h b/rts/Capability.h
index 6874379c5f..67b43280eb 100644
--- a/rts/Capability.h
+++ b/rts/Capability.h
@@ -39,7 +39,7 @@ struct Capability_ {
// The NUMA node on which this capability resides. This is used to allocate
// node-local memory in allocate().
//
- // Note: this is always equal to cap->no % RtsFlags.ParFlags.nNumaNodes.
+ // Note: this is always equal to cap->no % n_numa_nodes.
// The reason we slice it this way is that if we add or remove capabilities
// via setNumCapabilities(), then we keep the number of capabilities on each
// NUMA node balanced.
@@ -159,9 +159,6 @@ struct Capability_ {
#endif
;
-
-#define capNoToNumaNode(n) ((n) % RtsFlags.GcFlags.nNumaNodes)
-
#if defined(THREADED_RTS)
#define ASSERT_TASK_ID(task) ASSERT(task->id == osThreadId())
#else
@@ -350,6 +347,18 @@ void markCapabilities (evac_fn evac, void *user);
void traverseSparkQueues (evac_fn evac, void *user);
/* -----------------------------------------------------------------------------
+ NUMA
+ -------------------------------------------------------------------------- */
+
+/* Number of logical NUMA nodes */
+extern uint32_t n_numa_nodes;
+
+/* Map logical NUMA node to OS node numbers */
+extern uint32_t numa_map[MAX_NUMA_NODES];
+
+#define capNoToNumaNode(n) ((n) % n_numa_nodes)
+
+/* -----------------------------------------------------------------------------
Messages
-------------------------------------------------------------------------- */
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index 25345bf57b..e23f760f43 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -123,7 +123,6 @@ static void errorRtsOptsDisabled (const char *s);
void initRtsFlagsDefaults(void)
{
- uint32_t i;
StgWord64 maxStkSize = 8 * getPhysicalMemorySize() / 10;
// if getPhysicalMemorySize fails just move along with an 8MB limit
if (maxStkSize == 0)
@@ -160,10 +159,7 @@ void initRtsFlagsDefaults(void)
RtsFlags.GcFlags.heapBase = 0; /* means don't care */
RtsFlags.GcFlags.allocLimitGrace = (100*1024) / BLOCK_SIZE;
RtsFlags.GcFlags.numa = rtsFalse;
- RtsFlags.GcFlags.nNumaNodes = 1;
- for (i = 0; i < MAX_NUMA_NODES; i++) {
- RtsFlags.GcFlags.numaMap[i] = 0;
- }
+ RtsFlags.GcFlags.numaMask = 1;
RtsFlags.DebugFlags.scheduler = rtsFalse;
RtsFlags.DebugFlags.interpreter = rtsFalse;
@@ -776,28 +772,8 @@ error = rtsTrue;
break;
}
- uint32_t nNodes = osNumaNodes();
- if (nNodes > MAX_NUMA_NODES) {
- errorBelch("%s: Too many NUMA nodes (max %d)",
- rts_argv[arg], MAX_NUMA_NODES);
- error = rtsTrue;
- } else {
- RtsFlags.GcFlags.numa = rtsTrue;
- mask = mask & osNumaMask();
- uint32_t logical = 0, physical = 0;
- for (; physical < MAX_NUMA_NODES; physical++) {
- if (mask & 1) {
- RtsFlags.GcFlags.numaMap[logical++] = physical;
- }
- mask = mask >> 1;
- }
- RtsFlags.GcFlags.nNumaNodes = logical;
- if (logical == 0) {
- errorBelch("%s: available node set is empty",
- rts_argv[arg]);
- error = rtsTrue;
- }
- }
+ RtsFlags.GcFlags.numa = rtsTrue;
+ RtsFlags.GcFlags.numaMask = mask;
}
#endif
#if defined(DEBUG) && defined(THREADED_RTS)
@@ -821,11 +797,7 @@ error = rtsTrue;
} else {
RtsFlags.GcFlags.numa = rtsTrue;
RtsFlags.DebugFlags.numa = rtsTrue;
- RtsFlags.GcFlags.nNumaNodes = nNodes;
- uint32_t physical = 0;
- for (; physical < MAX_NUMA_NODES; physical++) {
- RtsFlags.GcFlags.numaMap[physical] = physical;
- }
+ RtsFlags.GcFlags.numaMask = (1<<nNodes) - 1;
}
}
#endif
diff --git a/rts/Task.c b/rts/Task.c
index 9a827745ba..9a658e019c 100644
--- a/rts/Task.c
+++ b/rts/Task.c
@@ -429,7 +429,7 @@ workerStart(Task *task)
setThreadAffinity(cap->no, n_capabilities);
}
if (RtsFlags.GcFlags.numa && !RtsFlags.DebugFlags.numa) {
- setThreadNode(RtsFlags.GcFlags.numaMap[task->node]);
+ setThreadNode(numa_map[task->node]);
}
// set the thread-local pointer to the Task:
@@ -510,7 +510,7 @@ void rts_setInCallCapability (
if (RtsFlags.GcFlags.numa) {
task->node = capNoToNumaNode(preferred_capability);
if (!DEBUG_IS_ON || !RtsFlags.DebugFlags.numa) { // faking NUMA
- setThreadNode(RtsFlags.GcFlags.numaMap[task->node]);
+ setThreadNode(numa_map[task->node]);
}
}
}
diff --git a/rts/posix/OSThreads.c b/rts/posix/OSThreads.c
index 35ea2bde21..112a311f79 100644
--- a/rts/posix/OSThreads.c
+++ b/rts/posix/OSThreads.c
@@ -321,7 +321,6 @@ setThreadAffinity (uint32_t n STG_UNUSED,
#if HAVE_LIBNUMA
void setThreadNode (uint32_t node)
{
- ASSERT(node < RtsFlags.GcFlags.nNumaNodes);
if (numa_run_on_node(node) == -1) {
sysErrorBelch("numa_run_on_node");
stg_exit(1);
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index c2859b0c15..6c2e96414e 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -467,7 +467,7 @@ uint32_t nodeWithLeastBlocks (void)
{
uint32_t node = 0, i;
uint32_t min_blocks = n_alloc_blocks_by_node[0];
- for (i = 1; i < RtsFlags.GcFlags.nNumaNodes; i++) {
+ for (i = 1; i < n_numa_nodes; i++) {
if (n_alloc_blocks_by_node[i] < min_blocks) {
min_blocks = n_alloc_blocks_by_node[i];
node = i;
@@ -504,7 +504,7 @@ bdescr* allocLargeChunkOnNode (uint32_t node, W_ min, W_ max)
StgWord ln, lnmax;
if (min >= BLOCKS_PER_MBLOCK) {
- return allocGroup(max);
+ return allocGroupOnNode(node,max);
}
ln = log_2_ceil(min);
@@ -811,7 +811,7 @@ void returnMemoryToOS(uint32_t n /* megablocks */)
StgWord size;
// ToDo: not fair, we free all the memory starting with node 0.
- for (node = 0; n > 0 && node < RtsFlags.GcFlags.nNumaNodes; node++) {
+ for (node = 0; n > 0 && node < n_numa_nodes; node++) {
bd = free_mblock_list[node];
while ((n > 0) && (bd != NULL)) {
size = BLOCKS_TO_MBLOCKS(bd->blocks);
@@ -875,7 +875,7 @@ checkFreeListSanity(void)
StgWord ln, min;
uint32_t node;
- for (node = 0; node < RtsFlags.GcFlags.nNumaNodes; node++) {
+ for (node = 0; node < n_numa_nodes; node++) {
min = 1;
for (ln = 0; ln < NUM_FREE_LISTS; ln++) {
IF_DEBUG(block_alloc,
@@ -950,7 +950,7 @@ countFreeList(void)
StgWord ln;
uint32_t node;
- for (node = 0; node < RtsFlags.GcFlags.nNumaNodes; node++) {
+ for (node = 0; node < n_numa_nodes; node++) {
for (ln=0; ln < NUM_FREE_LISTS; ln++) {
for (bd = free_list[node][ln]; bd != NULL; bd = bd->link) {
total_blocks += bd->blocks;
diff --git a/rts/sm/MBlock.c b/rts/sm/MBlock.c
index 53999d2c4b..4be7fd4356 100644
--- a/rts/sm/MBlock.c
+++ b/rts/sm/MBlock.c
@@ -594,7 +594,7 @@ getMBlocksOnNode(uint32_t node, uint32_t n)
#ifdef DEBUG
if (RtsFlags.DebugFlags.numa) return addr; // faking NUMA
#endif
- osBindMBlocksToNode(addr, n * MBLOCK_SIZE, RtsFlags.GcFlags.numaMap[node]);
+ osBindMBlocksToNode(addr, n * MBLOCK_SIZE, numa_map[node]);
return addr;
}
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index a9a7857d43..7c41f8c64b 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -57,7 +57,7 @@ generation *oldest_gen = NULL; /* oldest generation, for convenience */
/*
* Array of nurseries, size == n_capabilities
*
- * nursery[i] belongs to NUMA node (i % RtsFlags.GcFlags.nNumaNodes)
+ * nursery[i] belongs to NUMA node (i % n_numa_nodes)
* This is chosen to be the same convention as capabilities[i], so
* that when not using nursery chunks (+RTS -n), we just map
* capabilities to nurseries 1:1.
@@ -209,7 +209,7 @@ initStorage (void)
N = 0;
- for (n = 0; n < RtsFlags.GcFlags.nNumaNodes; n++) {
+ for (n = 0; n < n_numa_nodes; n++) {
next_nursery[n] = n;
}
storageAddCapabilities(0, n_capabilities);
@@ -615,7 +615,7 @@ assignNurseriesToCapabilities (uint32_t from, uint32_t to)
for (i = from; i < to; i++) {
node = capabilities[i]->node;
assignNurseryToCapability(capabilities[i], next_nursery[node]);
- next_nursery[node] += RtsFlags.GcFlags.nNumaNodes;
+ next_nursery[node] += n_numa_nodes;
}
}
@@ -642,7 +642,7 @@ resetNurseries (void)
{
uint32_t n;
- for (n = 0; n < RtsFlags.GcFlags.nNumaNodes; n++) {
+ for (n = 0; n < n_numa_nodes; n++) {
next_nursery[n] = n;
}
assignNurseriesToCapabilities(0, n_capabilities);
@@ -758,22 +758,20 @@ getNewNursery (Capability *cap)
for(;;) {
i = next_nursery[node];
if (i < n_nurseries) {
- if (cas(&next_nursery[node], i,
- i+RtsFlags.GcFlags.nNumaNodes) == i) {
+ if (cas(&next_nursery[node], i, i+n_numa_nodes) == i) {
assignNurseryToCapability(cap, i);
return rtsTrue;
}
- } else if (RtsFlags.GcFlags.nNumaNodes > 1) {
+ } else if (n_numa_nodes > 1) {
// Try to find an unused nursery chunk on other nodes. We'll get
// remote memory, but the rationale is that avoiding GC is better
// than avoiding remote memory access.
rtsBool lost = rtsFalse;
- for (n = 0; n < RtsFlags.GcFlags.nNumaNodes; n++) {
+ for (n = 0; n < n_numa_nodes; n++) {
if (n == node) continue;
i = next_nursery[n];
if (i < n_nurseries) {
- if (cas(&next_nursery[n], i,
- i+RtsFlags.GcFlags.nNumaNodes) == i) {
+ if (cas(&next_nursery[n], i, i+n_numa_nodes) == i) {
assignNurseryToCapability(cap, i);
return rtsTrue;
} else {