rts/SpinLock: Separate out slow path

Not only is this in general a good idea, but it turns out that GCC unrolls the retry loop, resulting is massive code bloat in critical parts of the RTS (e.g. `evacuate`).
author: Ben Gamari <ben@smart-cactus.org> 2020-10-29 16:14:29 -0400
committer: Ben Gamari <ben@smart-cactus.org> 2020-10-30 14:02:19 -0400
commit: 105d43db7bb7f399be3e5474d207e330d1b2da5a (patch)
tree: 7503c1b1f78e841d6439e2aec745f3b39c54bf11 /includes
parent: a80cc8577774a744d684cd1f513e373405cd2f2b (diff)
download: haskell-105d43db7bb7f399be3e5474d207e330d1b2da5a.tar.gz
1 files changed, 5 insertions, 10 deletions
diff --git a/includes/rts/SpinLock.h b/includes/rts/SpinLock.h
index 0ac51455dd..c1fe6c866c 100644
--- a/includes/rts/SpinLock.h
+++ b/includes/rts/SpinLock.h
@@ -39,19 +39,14 @@ typedef struct SpinLock_
 #define IF_PROF_SPIN(x)
 #endif
 
+void acquire_spin_lock_slow_path(SpinLock * p);
+
 // acquire spin lock
 INLINE_HEADER void ACQUIRE_SPIN_LOCK(SpinLock * p)
 {
-    do {
-        for (uint32_t i = 0; i < SPIN_COUNT; i++) {
-            StgWord32 r = cas((StgVolatilePtr)&(p->lock), 1, 0);
-            if (r != 0) return;
-            IF_PROF_SPIN(__atomic_fetch_add(&p->spin, 1, __ATOMIC_RELAXED));
-            busy_wait_nop();
-        }
-        IF_PROF_SPIN(__atomic_fetch_add(&p->yield, 1, __ATOMIC_RELAXED));
-        yieldThread();
-    } while (1);
+    StgWord32 r = cas((StgVolatilePtr)&(p->lock), 1, 0);
+    if (RTS_UNLIKELY(r == 0))
+        acquire_spin_lock_slow_path(p);
 }
 
 // release spin lock
author	Ben Gamari <ben@smart-cactus.org>	2020-10-29 16:14:29 -0400
committer	Ben Gamari <ben@smart-cactus.org>	2020-10-30 14:02:19 -0400
commit	105d43db7bb7f399be3e5474d207e330d1b2da5a (patch)
tree	7503c1b1f78e841d6439e2aec745f3b39c54bf11 /includes
parent	a80cc8577774a744d684cd1f513e373405cd2f2b (diff)
download	haskell-105d43db7bb7f399be3e5474d207e330d1b2da5a.tar.gz