rts/m32: Accept any address within 4GB of program text

Previously m32 would assume that the program image was located near the start of the address space and therefore assume that it wanted pages in the bottom 4GB of address space. Instead we now check whether they are within 4GB of whereever the program is loaded. This is necessary on Windows, which now tends to place the image in high memory. The eventual goal is to use m32 to allocate memory for linker sections on Windows.
author: Ben Gamari <ben@well-typed.com> 2022-01-20 15:17:10 -0500
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2022-02-06 01:43:56 -0500
commit: 2e9248b7f7f645851ceb49931d10b9c5e58d2bbb (patch)
tree: 84c704f5a9655ee15d3c923bf56713bd8c1be41c
parent: 549292eb3725eca61722ddd2cfb4d964ccba3fc7 (diff)
download: haskell-2e9248b7f7f645851ceb49931d10b9c5e58d2bbb.tar.gz
3 files changed, 78 insertions, 66 deletions
diff --git a/rts/Linker.c b/rts/Linker.c
index 6c13213092..c79f184f23 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -171,62 +171,7 @@ Mutex linker_mutex;
 /* Generic wrapper function to try and Resolve and RunInit oc files */
 int ocTryLoad( ObjectCode* oc );
 
-/* Link objects into the lower 2Gb on x86_64 and AArch64.  GHC assumes the
- * small memory model on this architecture (see gcc docs,
- * -mcmodel=small).
- *
- * MAP_32BIT not available on OpenBSD/amd64
- */
-#if defined(MAP_32BIT) && (defined(x86_64_HOST_ARCH) || (defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH)))
-#define MAP_LOW_MEM
-#define TRY_MAP_32BIT MAP_32BIT
-#else
-#define TRY_MAP_32BIT 0
-#endif
-
-#if defined(aarch64_HOST_ARCH)
-// On AArch64 MAP_32BIT is not available but we are still bound by the small
-// memory model. Consequently we still try using the MAP_LOW_MEM allocation
-// strategy.
-#define MAP_LOW_MEM
-#endif
-
-/*
- * Note [MAP_LOW_MEM]
- * ~~~~~~~~~~~~~~~~~~
- * Due to the small memory model (see above), on x86_64 and AArch64 we have to
- * map all our non-PIC object files into the low 2Gb of the address space (why
- * 2Gb and not 4Gb?  Because all addresses must be reachable using a 32-bit
- * signed PC-relative offset). On x86_64 Linux we can do this using the
- * MAP_32BIT flag to mmap(), however on other OSs (e.g. *BSD, see #2063, and
- * also on Linux inside Xen, see #2512), we can't do this.  So on these
- * systems, we have to pick a base address in the low 2Gb of the address space
- * and try to allocate memory from there.
- *
- * The same holds for aarch64, where the default, even with PIC, model
- * is 4GB. The linker is free to emit AARCH64_ADR_PREL_PG_HI21
- * relocations.
- *
- * We pick a default address based on the OS, but also make this
- * configurable via an RTS flag (+RTS -xm)
- */
-
-#if (defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH))
-// Try to use stg_upd_frame_info as the base. We need to be within +-4GB of that
-// address, otherwise we violate the aarch64 memory model. Any object we load
-// can potentially reference any of the ones we bake into the binary (and list)
-// in RtsSymbols. Thus we'll need to be within +-4GB of those,
-// stg_upd_frame_info is a good candidate as it's referenced often.
-#define MMAP_32BIT_BASE_DEFAULT (void*)&stg_upd_frame_info;
-#elif defined(MAP_32BIT) || DEFAULT_LINKER_ALWAYS_PIC
-// Try to use MAP_32BIT
-#define MMAP_32BIT_BASE_DEFAULT 0
-#else
-// A guess: 1Gb.
-#define MMAP_32BIT_BASE_DEFAULT 0x40000000
-#endif
-
-static void *mmap_32bit_base = (void *)MMAP_32BIT_BASE_DEFAULT;
+static void *mmap_32bit_base = LINKER_LOAD_BASE;
 
 static void ghciRemoveSymbolTable(StrHashTable *table, const SymbolName* key,
     ObjectCode *owner)
diff --git a/rts/LinkerInternals.h b/rts/LinkerInternals.h
index f3d918e355..d2836310cb 100644
--- a/rts/LinkerInternals.h
+++ b/rts/LinkerInternals.h
@@ -434,6 +434,66 @@ resolveSymbolAddr (pathchar* buffer, int size,
 #define USE_CONTIGUOUS_MMAP 0
 #endif
 
+/* Link objects into the lower 2Gb on x86_64 and AArch64.  GHC assumes the
+ * small memory model on this architecture (see gcc docs,
+ * -mcmodel=small).
+ *
+ * MAP_32BIT not available on OpenBSD/amd64
+ */
+#if defined(MAP_32BIT) && (defined(x86_64_HOST_ARCH) || (defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH)))
+#define MAP_LOW_MEM
+#define TRY_MAP_32BIT MAP_32BIT
+#else
+#define TRY_MAP_32BIT 0
+#endif
+
+#if defined(aarch64_HOST_ARCH)
+// On AArch64 MAP_32BIT is not available but we are still bound by the small
+// memory model. Consequently we still try using the MAP_LOW_MEM allocation
+// strategy.
+#define MAP_LOW_MEM
+#endif
+
+/*
+ * Note [MAP_LOW_MEM]
+ * ~~~~~~~~~~~~~~~~~~
+ * Due to the small memory model (see above), on x86_64 and AArch64 we have to
+ * map all our non-PIC object files into the low 2Gb of the address space (why
+ * 2Gb and not 4Gb?  Because all addresses must be reachable using a 32-bit
+ * signed PC-relative offset). On x86_64 Linux we can do this using the
+ * MAP_32BIT flag to mmap(), however on other OSs (e.g. *BSD, see #2063, and
+ * also on Linux inside Xen, see #2512), we can't do this.  So on these
+ * systems, we have to pick a base address in the low 2Gb of the address space
+ * and try to allocate memory from there.
+ *
+ * The same holds for aarch64, where the default, even with PIC, model
+ * is 4GB. The linker is free to emit AARCH64_ADR_PREL_PG_HI21
+ * relocations.
+ *
+ * We pick a default address based on the OS, but also make this
+ * configurable via an RTS flag (+RTS -xm)
+ */
+
+#if defined(aarch64_TARGET_ARCH) || defined(aarch64_HOST_ARCH)
+// Try to use stg_upd_frame_info as the base. We need to be within +-4GB of that
+// address, otherwise we violate the aarch64 memory model. Any object we load
+// can potentially reference any of the ones we bake into the binary (and list)
+// in RtsSymbols. Thus we'll need to be within +-4GB of those,
+// stg_upd_frame_info is a good candidate as it's referenced often.
+#define LINKER_LOAD_BASE ((void *) &stg_upd_frame_info)
+#elif defined(x86_64_HOST_ARCH) && defined(mingw32_HOST_OS)
+// On Windows (which now uses high-entropy ASLR by default) we need to ensure
+// that we map code near the executable image. We use stg_upd_frame_info as a
+// proxy for the image location.
+#define LINKER_LOAD_BASE ((void *) &stg_upd_frame_info)
+#elif defined(MAP_32BIT) || DEFAULT_LINKER_ALWAYS_PIC
+// Try to use MAP_32BIT
+#define LINKER_LOAD_BASE ((void *) 0x0)
+#else
+// A guess: 1 GB.
+#define LINKER_LOAD_BASE ((void *) 0x40000000)
+#endif
+
 HsInt isAlreadyLoaded( pathchar *path );
 OStatus getObjectLoadStatus_ (pathchar *path);
 HsInt loadOc( ObjectCode* oc );
diff --git a/rts/linker/M32Alloc.c b/rts/linker/M32Alloc.c
index 69613d8d7c..6d42ea3599 100644
--- a/rts/linker/M32Alloc.c
+++ b/rts/linker/M32Alloc.c
@@ -145,6 +145,14 @@ The allocator is *not* thread-safe.
 /* Upper bound on the number of pages to keep in the free page pool */
 #define M32_MAX_FREE_PAGE_POOL_SIZE 64
 
+/* A utility to verify that a given address is "acceptable" for use by m32. */
+static bool
+is_okay_address(void *p) {
+  int8_t *here = LINKER_LOAD_BASE;
+  ssize_t displacement = (int8_t *) p - here;
+  return (displacement > -0x7fffffff) && (displacement < 0x7fffffff);
+}
+
 /**
  * Page header
  *
@@ -157,8 +165,7 @@ struct m32_page_t {
     // unprotected_list or protected_list are linked together with this field.
     struct {
       uint32_t size;
-      uint32_t next; // this is a m32_page_t*, truncated to 32-bits. This is safe
-                     // as we are only allocating in the bottom 32-bits
+      struct m32_page_t *next;
     } filled_page;
 
     // Pages in the small-allocation nursery encode their current allocation
@@ -175,10 +182,10 @@ struct m32_page_t {
 static void
 m32_filled_page_set_next(struct m32_page_t *page, struct m32_page_t *next)
 {
-  if (next > (struct m32_page_t *) 0xffffffff) {
-    barf("m32_filled_page_set_next: Page not in lower 32-bits");
+  if (! is_okay_address(next)) {
+    barf("m32_filled_page_set_next: Page not within 4GB of program text");
   }
-  page->filled_page.next = (uint32_t) (uintptr_t) next;
+  page->filled_page.next = next;
 }
 
 static struct m32_page_t *
@@ -242,8 +249,8 @@ m32_alloc_page(void)
     const size_t pgsz = getPageSize();
     const size_t map_sz = pgsz * M32_MAP_PAGES;
     uint8_t *chunk = mmapAnonForLinker(map_sz);
-    if (chunk + map_sz > (uint8_t *) 0xffffffff) {
-      barf("m32_alloc_page: failed to get allocation in lower 32-bits");
+    if (! is_okay_address(chunk + map_sz)) {
+      barf("m32_alloc_page: failed to allocate pages within 4GB of program text (got %p)", chunk);
     }
 
 #define GET_PAGE(i) ((struct m32_page_t *) (chunk + (i) * pgsz))
@@ -389,9 +396,9 @@ m32_alloc(struct m32_allocator_t *alloc, size_t size, size_t alignment)
       if (page == NULL) {
           sysErrorBelch("m32_alloc: Failed to map pages for %zd bytes", size);
           return NULL;
-      } else if (page > (struct m32_page_t *) 0xffffffff) {
-          debugBelch("m32_alloc: warning: Allocation of %zd bytes resulted in pages above 4GB (%p)",
-                     size, page);
+      } else if (! is_okay_address(page)) {
+          barf("m32_alloc: warning: Allocation of %zd bytes resulted in pages above 4GB (%p)",
+               size, page);
       }
       page->filled_page.size = alsize + size;
       m32_allocator_push_filled_list(&alloc->unprotected_list, (struct m32_page_t *) page);
author	Ben Gamari <ben@well-typed.com>	2022-01-20 15:17:10 -0500
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2022-02-06 01:43:56 -0500
commit	2e9248b7f7f645851ceb49931d10b9c5e58d2bbb (patch)
tree	84c704f5a9655ee15d3c923bf56713bd8c1be41c
parent	549292eb3725eca61722ddd2cfb4d964ccba3fc7 (diff)
download	haskell-2e9248b7f7f645851ceb49931d10b9c5e58d2bbb.tar.gz