summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCheng Shao <terrorjack@type.dance>2023-02-11 17:24:02 +0000
committerMarge Bot <ben+marge-bot@smart-cactus.org>2023-02-15 00:17:53 -0500
commit9ca51f9e84abc41ba590203d8bc8df8d6af86db2 (patch)
tree15920fff710ab7b5651f5728ea65dbcacd363016
parent79d8fd6581af62e72727337001029533bf55e64f (diff)
downloadhaskell-9ca51f9e84abc41ba590203d8bc8df8d6af86db2.tar.gz
rts: add the rts_clearMemory function
This patch adds the rts_clearMemory function that does its best to zero out unused RTS memory for a wasm backend use case. See the comment above rts_clearMemory() prototype declaration for more detailed explanation. Closes #22920.
-rw-r--r--rts/RtsSymbols.c1
-rw-r--r--rts/include/RtsAPI.h45
-rw-r--r--rts/sm/BlockAlloc.c14
-rw-r--r--rts/sm/BlockAlloc.h2
-rw-r--r--rts/sm/NonMoving.h4
-rw-r--r--rts/sm/NonMovingSweep.c8
-rw-r--r--rts/sm/Storage.c43
-rw-r--r--rts/sm/Storage.h4
-rw-r--r--testsuite/tests/ffi/should_run/ffi023_c.c1
9 files changed, 118 insertions, 4 deletions
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index a95a1b1231..974f2dbd40 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -925,6 +925,7 @@ extern char **environ;
SymI_HasProto(newArena) \
SymI_HasProto(arenaAlloc) \
SymI_HasProto(arenaFree) \
+ SymI_HasProto(rts_clearMemory) \
RTS_USER_SIGNALS_SYMBOLS \
RTS_INTCHAR_SYMBOLS
diff --git a/rts/include/RtsAPI.h b/rts/include/RtsAPI.h
index 3bede10069..63a0f99fee 100644
--- a/rts/include/RtsAPI.h
+++ b/rts/include/RtsAPI.h
@@ -599,6 +599,51 @@ extern StgWord base_GHCziTopHandler_runNonIO_closure[];
/* ------------------------------------------------------------------------ */
+// This is a public RTS API function that does its best to zero out
+// unused RTS memory. rts_clearMemory() takes the storage manager
+// lock. It's only safe to call rts_clearMemory() when all mutators
+// have stopped and either minor/major garbage collection has just
+// been run.
+//
+// rts_clearMemory() works for all RTS ways on all platforms, though
+// the main intended use case is the pre-initialization of a
+// wasm32-wasi reactor module (#22920). A reactor module is like
+// shared library on other platforms, with foreign exported Haskell
+// functions as entrypoints. At run-time, the user calls hs_init_ghc()
+// to initialize the RTS, after that they can invoke Haskell
+// computation by calling the exported Haskell functions, persisting
+// the memory state across these invocations.
+//
+// Besides hs_init_ghc(), the user may want to invoke some Haskell
+// function to initialize some global state in the user code, this
+// global state is used by subsequent invocations. Now, it's possible
+// to run hs_init_ghc() & custom init logic in Haskell, then snapshot
+// the entire memory into a new wasm module! And the user can call the
+// new wasm module's exports directly, thus eliminating the
+// initialization overhead at run-time entirely.
+//
+// There's one problem though. After the custom init logic runs, the
+// RTS memory contains a lot of garbage data in various places. These
+// garbage data will be snapshotted into the new wasm module, causing
+// a significant size bloat. Therefore, we need an RTS API function
+// that zeros out unused RTS memory.
+//
+// At the end of the day, the custom init function will be a small C
+// function that first calls hs_init_ghc(), then calls a foreign
+// exported Haskell function to initialize whatever global state the
+// other Haskell functions need, followed by a hs_perform_gc() call to
+// do a major GC, and finally an rts_clearMemory() call to zero out
+// the unused RTS memory.
+//
+// Why add rts_clearMemory(), where there's the -DZ RTS flag that
+// zeros freed memory on GC? The -DZ flag actually fills freed memory
+// with a garbage byte like 0xAA, and the flag only works in debug
+// RTS. Why not add a new RTS flag that zeros freed memory on the go?
+// Because it only makes sense to do the zeroing once before
+// snapshotting the memory, but there's no point to pay for the
+// zeroing overhead at the new module's run-time.
+void rts_clearMemory(void);
+
#if defined(__cplusplus)
}
#endif
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index 257dc253ed..158698b08f 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -1395,3 +1395,17 @@ reportUnmarkedBlocks (void)
}
#endif
+
+void clear_free_list(void) {
+ for (uint32_t node = 0; node < n_numa_nodes; ++node) {
+ for (bdescr *bd = free_mblock_list[node]; bd != NULL; bd = bd->link) {
+ clear_blocks(bd);
+ }
+
+ for (int ln = 0; ln < NUM_FREE_LISTS; ++ln) {
+ for (bdescr *bd = free_list[node][ln]; bd != NULL; bd = bd->link) {
+ clear_blocks(bd);
+ }
+ }
+ }
+}
diff --git a/rts/sm/BlockAlloc.h b/rts/sm/BlockAlloc.h
index addee6cb83..5adde6c2c9 100644
--- a/rts/sm/BlockAlloc.h
+++ b/rts/sm/BlockAlloc.h
@@ -32,4 +32,6 @@ void reportUnmarkedBlocks (void);
extern W_ n_alloc_blocks; // currently allocated blocks
extern W_ hw_alloc_blocks; // high-water allocated blocks
+RTS_PRIVATE void clear_free_list(void);
+
#include "EndPrivate.h"
diff --git a/rts/sm/NonMoving.h b/rts/sm/NonMoving.h
index d88a203dcb..37d3581882 100644
--- a/rts/sm/NonMoving.h
+++ b/rts/sm/NonMoving.h
@@ -356,6 +356,10 @@ void print_thread_list(StgTSO* tso);
#endif
+RTS_PRIVATE void clear_segment(struct NonmovingSegment*);
+
+RTS_PRIVATE void clear_segment_free_blocks(struct NonmovingSegment*);
+
#include "EndPrivate.h"
#endif // CMINUSMINUS
diff --git a/rts/sm/NonMovingSweep.c b/rts/sm/NonMovingSweep.c
index ad2b422307..2b0cd44a53 100644
--- a/rts/sm/NonMovingSweep.c
+++ b/rts/sm/NonMovingSweep.c
@@ -106,14 +106,16 @@ void nonmovingGcCafs()
debug_caf_list_snapshot = (StgIndStatic*)END_OF_CAF_LIST;
}
-static void
+#endif
+
+void
clear_segment(struct NonmovingSegment* seg)
{
size_t end = ((size_t)seg) + NONMOVING_SEGMENT_SIZE;
memset(&seg->bitmap, 0, end - (size_t)&seg->bitmap);
}
-static void
+void
clear_segment_free_blocks(struct NonmovingSegment* seg)
{
unsigned int block_size = nonmovingSegmentBlockSize(seg);
@@ -125,8 +127,6 @@ clear_segment_free_blocks(struct NonmovingSegment* seg)
}
}
-#endif
-
GNUC_ATTR_HOT void nonmovingSweep(void)
{
while (nonmovingHeap.sweep_list) {
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 40d8a45806..986f6dd15a 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -1924,3 +1924,46 @@ The compacting collector does nothing to improve megablock
level fragmentation. The role of the compacting GC is to remove object level
fragmentation and to use less memory when collecting. - see #19248
*/
+
+void rts_clearMemory(void) {
+ ACQUIRE_SM_LOCK;
+
+ clear_free_list();
+
+ for (uint32_t i = 0; i < n_nurseries; ++i) {
+ for (bdescr *bd = nurseries[i].blocks; bd; bd = bd->link) {
+ clear_blocks(bd);
+ }
+ }
+
+ for (unsigned int i = 0; i < getNumCapabilities(); ++i) {
+ for (bdescr *bd = getCapability(i)->pinned_object_empty; bd; bd = bd->link) {
+ clear_blocks(bd);
+ }
+
+ for (bdescr *bd = gc_threads[i]->free_blocks; bd; bd = bd->link) {
+ clear_blocks(bd);
+ }
+ }
+
+ if (RtsFlags.GcFlags.useNonmoving)
+ {
+ for (struct NonmovingSegment *seg = nonmovingHeap.free; seg; seg = seg->link) {
+ clear_segment(seg);
+ }
+
+ for (int i = 0; i < NONMOVING_ALLOCA_CNT; ++i) {
+ struct NonmovingAllocator *alloc = nonmovingHeap.allocators[i];
+
+ for (struct NonmovingSegment *seg = alloc->active; seg; seg = seg->link) {
+ clear_segment_free_blocks(seg);
+ }
+
+ for (unsigned int j = 0; j < getNumCapabilities(); ++j) {
+ clear_segment_free_blocks(alloc->current[j]);
+ }
+ }
+ }
+
+ RELEASE_SM_LOCK;
+}
diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h
index 0fecc50208..faec383c8f 100644
--- a/rts/sm/Storage.h
+++ b/rts/sm/Storage.h
@@ -206,4 +206,8 @@ extern StgIndStatic * dyn_caf_list;
extern StgIndStatic * debug_caf_list;
extern StgIndStatic * revertible_caf_list;
+STATIC_INLINE void clear_blocks(bdescr *bd) {
+ memset(bd->start, 0, BLOCK_SIZE * bd->blocks);
+}
+
#include "EndPrivate.h"
diff --git a/testsuite/tests/ffi/should_run/ffi023_c.c b/testsuite/tests/ffi/should_run/ffi023_c.c
index a8a5a15447..979c378b7d 100644
--- a/testsuite/tests/ffi/should_run/ffi023_c.c
+++ b/testsuite/tests/ffi/should_run/ffi023_c.c
@@ -5,5 +5,6 @@
HsInt out (HsInt x)
{
performMajorGC();
+ rts_clearMemory();
return incall(x);
}