3 files changed, 37 insertions, 27 deletions
diff --git a/rts/sm/HeapAlloc.h b/rts/sm/HeapAlloc.h
index f2760bdaf7..99a62f9023 100644
--- a/rts/sm/HeapAlloc.h
+++ b/rts/sm/HeapAlloc.h
@@ -34,12 +34,12 @@
 
    On 64-bit machines, we have two possibilities. One is to request
    a single chunk of address space that we deem "large enough"
-   (currently 1TB, could easily be extended to, say 16TB or more).
-   Memory from that chunk is GC memory, everything else is not. This
-   case is tricky in that it requires support from the OS to allocate
-   address space without allocating memory (in practice, all modern
-   OSes do this). It's also tricky in that it is the only case where
-   a successful HEAP_ALLOCED(p) check can trigger a segfault when
+   (currently 1TB or the ulimit size, whichever is smaller, although this could
+   easily be extended to, say 16TB or more). Memory from that chunk is GC
+   memory, everything else is not. This case is tricky in that it requires
+   support from the OS to allocate address space without allocating memory (in
+   practice, all modern OSes do this). It's also tricky in that it is the only
+   case where a successful HEAP_ALLOCED(p) check can trigger a segfault when
    accessing p (and for debugging purposes, it will).
 
    Alternatively, the older implementation caches one 12-bit block map
@@ -51,16 +51,14 @@
 
 #ifdef USE_LARGE_ADDRESS_SPACE
 
-extern W_ mblock_address_space_begin;
-#if aarch64_HOST_ARCH
-# define MBLOCK_SPACE_SIZE      ((StgWord)1 << 38) /* 1/4 TB */
-#else
-# define MBLOCK_SPACE_SIZE      ((StgWord)1 << 40) /* 1 TB */
-#endif
+struct mblock_address_range {
+    W_ begin, end;
+    W_ padding[6];  // ensure nothing else inhabits this cache line
+} ATTRIBUTE_ALIGNED(64);
+extern struct mblock_address_range mblock_address_space;
 
-# define HEAP_ALLOCED(p)        ((W_)(p) >= mblock_address_space_begin && \
-                                 (W_)(p) < (mblock_address_space_begin +  \
-                                            MBLOCK_SPACE_SIZE))
+# define HEAP_ALLOCED(p)        ((W_)(p) >= mblock_address_space.begin && \
+                                 (W_)(p) < (mblock_address_space.end))
 # define HEAP_ALLOCED_GC(p)     HEAP_ALLOCED(p)
 
 #elif SIZEOF_VOID_P == 4
diff --git a/rts/sm/MBlock.c b/rts/sm/MBlock.c
index 35a11bf589..e1daa71e2f 100644
--- a/rts/sm/MBlock.c
+++ b/rts/sm/MBlock.c
@@ -96,7 +96,12 @@ typedef struct free_list {
 
 static free_list *free_list_head;
 static W_ mblock_high_watermark;
-W_ mblock_address_space_begin = 0;
+/*
+ * it is quite important that these are in the same cache line as they
+ * are both needed by HEAP_ALLOCED. Moreover, we need to ensure that they
+ * don't share a cache line with anything else to prevent false sharing.
+ */
+struct mblock_address_range mblock_address_space = { 0, 0, {} };
 
 static void *getAllocatedMBlock(free_list **start_iter, W_ startingAt)
 {
@@ -131,7 +136,7 @@ void * getFirstMBlock(void **state STG_UNUSED)
         casted_state = &fake_state;
 
     *casted_state = free_list_head;
-    return getAllocatedMBlock(casted_state, mblock_address_space_begin);
+    return getAllocatedMBlock(casted_state, mblock_address_space.begin);
 }
 
 void * getNextMBlock(void **state STG_UNUSED, void *mblock)
@@ -190,8 +195,7 @@ static void *getFreshMBlocks(nat n)
     W_ size = MBLOCK_SIZE * (W_)n;
     void *addr = (void*)mblock_high_watermark;
 
-    if (mblock_high_watermark + size >
-        mblock_address_space_begin + MBLOCK_SPACE_SIZE)
+    if (mblock_high_watermark + size > mblock_address_space.end)
     {
         // whoa, 1 TB of heap?
         errorBelch("out of memory");
@@ -611,7 +615,8 @@ freeAllMBlocks(void)
 
     osReleaseHeapMemory();
 
-    mblock_address_space_begin = (W_)-1;
+    mblock_address_space.begin = (W_)-1;
+    mblock_address_space.end = (W_)-1;
     mblock_high_watermark = (W_)-1;
 #else
     osFreeAllMBlocks();
@@ -634,9 +639,16 @@ initMBlocks(void)
 
 #ifdef USE_LARGE_ADDRESS_SPACE
     {
-        void *addr = osReserveHeapMemory();
+        W_ size;
+#if aarch64_HOST_ARCH
+        size = (W_)1 << 38; // 1/4 TByte
+#else
+        size = (W_)1 << 40; // 1 TByte
+#endif
+        void *addr = osReserveHeapMemory(size);
 
-        mblock_address_space_begin = (W_)addr;
+        mblock_address_space.begin = (W_)addr;
+        mblock_address_space.end = (W_)addr + size;
         mblock_high_watermark = (W_)addr;
     }
 #elif SIZEOF_VOID_P == 8
diff --git a/rts/sm/OSMem.h b/rts/sm/OSMem.h
index 9a6ccdd7ec..6bcaf65b10 100644
--- a/rts/sm/OSMem.h
+++ b/rts/sm/OSMem.h
@@ -29,13 +29,13 @@ void setExecutable (void *p, W_ len, rtsBool exec);
   we will ever need, which keeps everything nice and consecutive.
 */
 
-// Reserve the large address space blob, and return the address that
-// the OS has chosen for it.  It is not safe to access the memory
-// pointed to by the return value, until that memory is committed
-// using osCommitMemory().
+// Reserve the large address space blob of the given size, and return the
+// address that the OS has chosen for it. It is not safe to access the memory
+// pointed to by the return value, until that memory is committed using
+// osCommitMemory().
 //
 // This function is called once when the block allocator is initialized.
-void *osReserveHeapMemory(void);
+void *osReserveHeapMemory(W_ len);
 
 // Commit (allocate memory for) a piece of address space, which must
 // be within the previously reserved space After this call, it is safe