summaryrefslogtreecommitdiff
path: root/rts/SpinLock.c
diff options
context:
space:
mode:
authorDylan Yudaken <dylany@fb.com>2021-01-14 12:02:52 +0000
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-02-22 18:27:02 -0500
commita1c85db1ae5b85b7bd16f75642662c698b90b7c1 (patch)
treec90caf870abba5aaa9f1cff2896049a05f8a5d00 /rts/SpinLock.c
parentfd0945b7bfa1e36ca79d74f8e6e0918a66d62608 (diff)
downloadhaskell-a1c85db1ae5b85b7bd16f75642662c698b90b7c1.tar.gz
Do not cas on slowpath of SpinLock unnecessarily
This is a well known technique to reduce inter-CPU bus traffic while waiting for the lock by reducing the number of writes.
Diffstat (limited to 'rts/SpinLock.c')
-rw-r--r--rts/SpinLock.c38
1 files changed, 35 insertions, 3 deletions
diff --git a/rts/SpinLock.c b/rts/SpinLock.c
index 5289694aa7..b362d89838 100644
--- a/rts/SpinLock.c
+++ b/rts/SpinLock.c
@@ -24,13 +24,45 @@
#if defined(THREADED_RTS)
+#if defined(PROF_SPIN)
+
+ATTR_ALWAYS_INLINE static inline bool try_acquire_spin_slow_path(SpinLock * p)
+{
+ StgWord r;
+ r = cas((StgVolatilePtr)&(p->lock), 1, 0);
+ if (r == 0) RELAXED_ADD(&p->spin, 1);
+ return r != 0;
+}
+
+#else /* !PROF_SPIN */
+
+ATTR_ALWAYS_INLINE static inline bool try_acquire_spin_slow_path(SpinLock * p)
+{
+ StgWord r;
+ // Note
+ //
+ // Here we first check if we can obtain the lock without trying to cas.
+ // The cas instruction will add extra inter-CPU traffic on most CPU
+ // architectures as it has to invalidate cache lines. Rather than adding
+ // this traffic in the spin loop, we rather restrict it to times when the
+ // lock might be available.
+ //
+ // We do not need to do this when PROF_SPIN is enabled, since we write to
+ // the lock in both cases (acquired/not acquired).
+ r = RELAXED_LOAD(&p->lock);
+ if (r != 0) {
+ r = cas((StgVolatilePtr)&(p->lock), 1, 0);
+ }
+ return r != 0;
+}
+
+#endif
+
void acquire_spin_lock_slow_path(SpinLock * p)
{
do {
for (uint32_t i = 0; i < SPIN_COUNT; i++) {
- StgWord32 r = cas((StgVolatilePtr)&(p->lock), 1, 0);
- if (r != 0) return;
- IF_PROF_SPIN(RELAXED_ADD(&p->spin, 1));
+ if (try_acquire_spin_slow_path(p)) return;
busy_wait_nop();
}
IF_PROF_SPIN(RELAXED_ADD(&p->yield, 1));