summaryrefslogtreecommitdiff
path: root/rts/win32
diff options
context:
space:
mode:
authorTamar Christina <tamar@zhox.com>2016-09-25 20:00:31 +0100
committerTamar Christina <tamar@zhox.com>2016-10-01 23:20:04 +0100
commitc93813d96b1da53a2ebd9c9ac5af6cc3e3443c43 (patch)
treea28cbbec64a5f1c70515a28b6ae3f2f4eaac87c9 /rts/win32
parent1e795a008da8ab2ae88cca04aca01c50967b4397 (diff)
downloadhaskell-c93813d96b1da53a2ebd9c9ac5af6cc3e3443c43.tar.gz
Add NUMA support for Windows
Summary: NOTE: I have been able to do simple testing on emulated NUMA nodes. Real hardware would be needed for a proper test. D2199 Added NUMA support for Linux, I have just filled in the missing pieces following the description of the Linux APIs. Test Plan: Use `bcdedit.exe /set groupsize 2` to modify the kernel again (Similar to D2533). This generates some NUMA nodes: ``` Logical Processor to NUMA Node Map: NUMA Node 0: ** -- NUMA Node 1: -- ** Approximate Cross-NUMA Node Access Cost (relative to fastest): 00 01 00: 1.1 1.1 01: 1.0 1.0 ``` run ` ../test-numa.exe +RTS --numa -RTS` and check PerfMon for NUMA allocations. Reviewers: simonmar, erikd, bgamari, austin Reviewed By: simonmar Subscribers: thomie, #ghc_windows_task_force Differential Revision: https://phabricator.haskell.org/D2534 GHC Trac Issues: #12602
Diffstat (limited to 'rts/win32')
-rw-r--r--rts/win32/OSMem.c81
-rw-r--r--rts/win32/OSThreads.c45
2 files changed, 116 insertions, 10 deletions
diff --git a/rts/win32/OSMem.c b/rts/win32/OSMem.c
index 3d9a304fda..b43636c198 100644
--- a/rts/win32/OSMem.c
+++ b/rts/win32/OSMem.c
@@ -11,9 +11,7 @@
#include "sm/HeapAlloc.h"
#include "RtsUtils.h"
-#if HAVE_WINDOWS_H
#include <windows.h>
-#endif
typedef struct alloc_rec_ {
char* base; // non-aligned base address, directly from VirtualAlloc
@@ -39,11 +37,28 @@ static alloc_rec* allocs = NULL;
/* free_blocks are kept in ascending order, and adjacent blocks are merged */
static block_rec* free_blocks = NULL;
+/* Mingw-w64 does not currently have this in their header. So we have to import it.*/
+typedef LPVOID(WINAPI *VirtualAllocExNumaProc)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD, DWORD);
+
+/* Cache NUMA API call. */
+VirtualAllocExNumaProc VirtualAllocExNuma;
+
void
osMemInit(void)
{
allocs = NULL;
free_blocks = NULL;
+
+ /* Resolve and cache VirtualAllocExNuma. */
+ if (osNumaAvailable() && RtsFlags.GcFlags.numa)
+ {
+ VirtualAllocExNuma = (VirtualAllocExNumaProc)GetProcAddress(GetModuleHandleW(L"kernel32"), "VirtualAllocExNuma");
+ if (!VirtualAllocExNuma)
+ {
+ sysErrorBelch(
+ "osBindMBlocksToNode: VirtualAllocExNuma does not exist. How did you get this far?");
+ }
+ }
}
static
@@ -486,22 +501,72 @@ void osReleaseHeapMemory (void)
rtsBool osNumaAvailable(void)
{
- return rtsFalse;
+ return osNumaNodes() > 1;
}
uint32_t osNumaNodes(void)
{
- return 1;
+ /* Cache the amount of NUMA values. */
+ static ULONG numNumaNodes = 0;
+
+ /* Cache the amount of NUMA nodes. */
+ if (!numNumaNodes && !GetNumaHighestNodeNumber(&numNumaNodes))
+ {
+ numNumaNodes = 1;
+ }
+
+ return numNumaNodes;
}
StgWord osNumaMask(void)
{
- return 1;
+ StgWord numaMask;
+ if (!GetNumaNodeProcessorMask(0, &numaMask))
+ {
+ return 1;
+ }
+ return numaMask;
}
void osBindMBlocksToNode(
- void *addr STG_UNUSED,
- StgWord size STG_UNUSED,
- uint32_t node STG_UNUSED)
+ void *addr,
+ StgWord size,
+ uint32_t node)
{
+ if (osNumaAvailable())
+ {
+ void* temp;
+ if (RtsFlags.GcFlags.numa) {
+ /* Note [base memory]
+ I would like to use addr here to specify the base
+ memory of allocation. The problem is that the address
+ we are requesting is too high. I can't figure out if it's
+ because of my NUMA-emulation or a bug in the code.
+
+ On windows also -xb is broken, it does nothing so that can't
+ be used to tweak it (see #12577). So for now, just let the OS decide.
+ */
+ temp = VirtualAllocExNuma(
+ GetCurrentProcess(),
+ NULL, // addr? See base memory
+ size,
+ MEM_RESERVE | MEM_COMMIT,
+ PAGE_READWRITE,
+ node
+ );
+
+ if (!temp) {
+ if (GetLastError() == ERROR_NOT_ENOUGH_MEMORY) {
+ errorBelch("out of memory");
+ }
+ else {
+ sysErrorBelch(
+ "osBindMBlocksToNode: VirtualAllocExNuma MEM_RESERVE %llu bytes "
+ "at address %p bytes failed",
+ size, addr);
+ }
+ stg_exit(EXIT_FAILURE);
+ }
+ }
+ }
}
diff --git a/rts/win32/OSThreads.c b/rts/win32/OSThreads.c
index c9b594a208..b36c3e53da 100644
--- a/rts/win32/OSThreads.c
+++ b/rts/win32/OSThreads.c
@@ -9,6 +9,7 @@
#include "Rts.h"
#include <windows.h>
+#include "sm/OSMem.h"
#if defined(THREADED_RTS)
#include "RtsUtils.h"
@@ -572,8 +573,48 @@ interruptOSThread (OSThreadId id)
CloseHandle(hdl);
}
-void setThreadNode (uint32_t node STG_UNUSED) { /* nothing */ }
-void releaseThreadNode (void) { /* nothing */ }
+void setThreadNode (uint32_t node)
+{
+ if (osNumaAvailable())
+ {
+ StgWord mask = 0;
+ mask |= 1 << node;
+ if (!SetThreadAffinityMask(GetCurrentThread(), mask))
+ {
+ sysErrorBelch(
+ "setThreadNode: Error setting affinity of thread to NUMA node `%u': %lu.",
+ node, GetLastError());
+ stg_exit(EXIT_FAILURE);
+ }
+ }
+}
+
+void releaseThreadNode (void)
+{
+ if (osNumaAvailable())
+ {
+ StgWord processMask;
+ StgWord systemMask;
+ if (!GetProcessAffinityMask(GetCurrentProcess(),
+ &processMask,
+ &systemMask))
+ {
+ sysErrorBelch(
+ "releaseThreadNode: Error resetting affinity of thread: %lu",
+ GetLastError());
+ stg_exit(EXIT_FAILURE);
+ }
+
+ if (!SetThreadAffinityMask(GetCurrentThread(), processMask))
+ {
+ sysErrorBelch(
+ "releaseThreadNode: Error reseting NUMA affinity mask of thread: %lu.",
+ GetLastError());
+ stg_exit(EXIT_FAILURE);
+ }
+
+ }
+}
#else /* !defined(THREADED_RTS) */