rts/SpinLock: Separate out slow path

Not only is this in general a good idea, but it turns out that GCC unrolls the retry loop, resulting is massive code bloat in critical parts of the RTS (e.g. `evacuate`).
author: Ben Gamari <ben@smart-cactus.org> 2020-10-29 16:14:29 -0400
committer: Ben Gamari <ben@smart-cactus.org> 2020-10-30 14:02:19 -0400
commit: 105d43db7bb7f399be3e5474d207e330d1b2da5a (patch)
tree: 7503c1b1f78e841d6439e2aec745f3b39c54bf11
parent: a80cc8577774a744d684cd1f513e373405cd2f2b (diff)
download: haskell-105d43db7bb7f399be3e5474d207e330d1b2da5a.tar.gz
3 files changed, 47 insertions, 10 deletions
diff --git a/includes/rts/SpinLock.h b/includes/rts/SpinLock.h
index 0ac51455dd..c1fe6c866c 100644
--- a/includes/rts/SpinLock.h
+++ b/includes/rts/SpinLock.h
@@ -39,19 +39,14 @@ typedef struct SpinLock_
 #define IF_PROF_SPIN(x)
 #endif
 
+void acquire_spin_lock_slow_path(SpinLock * p);
+
 // acquire spin lock
 INLINE_HEADER void ACQUIRE_SPIN_LOCK(SpinLock * p)
 {
-    do {
-        for (uint32_t i = 0; i < SPIN_COUNT; i++) {
-            StgWord32 r = cas((StgVolatilePtr)&(p->lock), 1, 0);
-            if (r != 0) return;
-            IF_PROF_SPIN(__atomic_fetch_add(&p->spin, 1, __ATOMIC_RELAXED));
-            busy_wait_nop();
-        }
-        IF_PROF_SPIN(__atomic_fetch_add(&p->yield, 1, __ATOMIC_RELAXED));
-        yieldThread();
-    } while (1);
+    StgWord32 r = cas((StgVolatilePtr)&(p->lock), 1, 0);
+    if (RTS_UNLIKELY(r == 0))
+        acquire_spin_lock_slow_path(p);
 }
 
 // release spin lock
diff --git a/rts/SpinLock.c b/rts/SpinLock.c
new file mode 100644
index 0000000000..314918257f
--- /dev/null
+++ b/rts/SpinLock.c
@@ -0,0 +1,41 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2006-2009
+ *
+ * Spin locks
+ *
+ * These are simple spin-only locks as opposed to Mutexes which
+ * probably spin for a while before blocking in the kernel.  We use
+ * these when we are sure that all our threads are actively running on
+ * a CPU, eg. in the GC.
+ *
+ * TODO: measure whether we really need these, or whether Mutexes
+ * would do (and be a bit safer if a CPU becomes loaded).
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#include "PosixSource.h"
+#include "Rts.h"
+
+#if defined(THREADED_RTS)
+
+void acquire_spin_lock_slow_path(SpinLock * p)
+{
+    do {
+        for (uint32_t i = 0; i < SPIN_COUNT; i++) {
+            StgWord32 r = cas((StgVolatilePtr)&(p->lock), 1, 0);
+            if (r != 0) return;
+            IF_PROF_SPIN(__atomic_fetch_add(&p->spin, 1, __ATOMIC_RELAXED));
+            busy_wait_nop();
+        }
+        IF_PROF_SPIN(__atomic_fetch_add(&p->yield, 1, __ATOMIC_RELAXED));
+        yieldThread();
+    } while (1);
+}
+
+#endif
diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in
index 08ebd3d7bf..12a4d68e4a 100644
--- a/rts/rts.cabal.in
+++ b/rts/rts.cabal.in
@@ -462,6 +462,7 @@ library
                STM.c
                Schedule.c
                Sparks.c
+               SpinLock.c
                StableName.c
                StablePtr.c
                StaticPtrTable.c
author	Ben Gamari <ben@smart-cactus.org>	2020-10-29 16:14:29 -0400
committer	Ben Gamari <ben@smart-cactus.org>	2020-10-30 14:02:19 -0400
commit	105d43db7bb7f399be3e5474d207e330d1b2da5a (patch)
tree	7503c1b1f78e841d6439e2aec745f3b39c54bf11
parent	a80cc8577774a744d684cd1f513e373405cd2f2b (diff)
download	haskell-105d43db7bb7f399be3e5474d207e330d1b2da5a.tar.gz