Thread.yield fairness with multiple busy yielding threads (#2112)

Thread.yield invoked a trivial blocking section, which basically woke up a competitor and then raced with them to get the ocaml lock back, invoking nanosleep() to help guarantee that the yielder would lose the race. However, until the yielder woke up again and attempted to take the ocaml lock, it wouldn't be marked as a waiter. As a result, if two threads A and B yielded to each other in a tight loop, A's first yield would work well, but then B would execute 10000+ iterations of the loop before A could mark itself as a waiter and be yielded to. This works even worse if A and B are pinned to the same CPU, in which case A can't be marked as a waiter until the kernel preempts B, which can take tens or hundreds of milliseconds! So we reimplement yield; instead of dropping the lock and taking it again (with a wait in the middle), atomically wake a competitor and mark the yielding thread as a waiter. (We essentially inline a failed masterlock_acquire into masterlock_release, specialized for the case where we know another waiter exists and we want them to run instead.) Now, threads yielding to each other very consistently succeed--in that same tight loop, we see a change of control on every iteration (with some very rare exceptions, most likely from other uncommon blocking region invocations.) This also means we don't have to worry about the vagaries of kernel scheduling and whether or not a yielding or a yielded-to thread gets to run first; we consistently let a competing thread run whenever we yield, which is what the API claims to do.
author: ahh <andrewhhunter@gmail.com> 2019-02-08 11:18:27 -0500
committer: Damien Doligez <damien.doligez@gmail.com> 2019-02-08 17:18:27 +0100
commit: 7a27cd56b1b3e4e0b1653f5870d1c397d03f5d30 (patch)
tree: 50bb9fb2d11d7e497f6827c862fe2ab9b1539374
parent: 2b2670e72aaa0ad8fd0b2e881e01370694aba37c (diff)
download: ocaml-7a27cd56b1b3e4e0b1653f5870d1c397d03f5d30.tar.gz
6 files changed, 118 insertions, 26 deletions
diff --git a/Changes b/Changes
index 45ff489076..4ecd8a0b21 100644
--- a/Changes
+++ b/Changes
@@ -687,6 +687,11 @@ OCaml 4.08.0
 * GPR#2173: removed TypedtreeMap
   (Thomas Refis, review by Gabriel Scherer)
 
+- GPR#2112: Fix Thread.yield unfairness with busy threads yielding to each
+  other.
+  (Andrew Hunter, review by Jacques-Henri Jourdan, Spiros Eliopoulos, Stephen
+  Weeks, & Mark Shinwell)
+
 - MPR#7867: Fix #mod_use raising an exception for filenames with no
   extension.
   (Geoff Gole)
diff --git a/otherlibs/systhreads/st_posix.h b/otherlibs/systhreads/st_posix.h
index 1bee21b80a..61ce30bacf 100644
--- a/otherlibs/systhreads/st_posix.h
+++ b/otherlibs/systhreads/st_posix.h
@@ -15,6 +15,7 @@
 
 /* POSIX thread implementation of the "st" interface */
 
+#include <assert.h>
 #include <errno.h>
 #include <string.h>
 #include <stdio.h>
@@ -91,22 +92,6 @@ static void st_thread_join(st_thread_id thr)
   /* best effort: ignore errors */
 }
 
-/* Scheduling hints */
-
-static INLINE void st_thread_yield(void)
-{
-#ifdef __linux__
-  /* sched_yield() doesn't do what we want in Linux 2.6 and up (PR#2663) */
-  /* but not doing anything here would actually disable preemption (PR#7669) */
-  struct timespec t;
-  t.tv_sec = 0;
-  t.tv_nsec = 1;
-  nanosleep(&t, NULL);
-#else
-  sched_yield();
-#endif
-}
-
 /* Thread-specific state */
 
 typedef pthread_key_t st_tlskey;
@@ -171,6 +156,44 @@ static INLINE int st_masterlock_waiters(st_masterlock * m)
   return m->waiters;
 }
 
+/* Scheduling hints */
+
+/* This is mostly equivalent to release(); acquire(), but better. In particular,
+   release(); acquire(); leaves both us and the waiter we signal() racing to
+   acquire the lock. Calling yield or sleep helps there but does not solve the
+   problem. Sleeping ourselves is much more reliable--and since we're handing
+   off the lock to a waiter we know exists, it's safe, as they'll certainly
+   re-wake us later.
+*/
+static INLINE void st_thread_yield(st_masterlock * m)
+{
+  pthread_mutex_lock(&m->lock);
+  /* We must hold the lock to call this. */
+  assert(m->busy);
+
+  /* We already checked this without the lock, but we might have raced--if
+     there's no waiter, there's nothing to do and no one to wake us if we did
+     wait, so just keep going. */
+  if (m->waiters == 0) {
+    pthread_mutex_unlock(&m->lock);
+    return;
+  }
+
+  m->busy = 0;
+  pthread_cond_signal(&m->is_free);
+  m->waiters++;
+  do {
+    /* Note: the POSIX spec prevents the above signal from pairing with this
+       wait, which is good: we'll reliably continue waiting until the next
+       yield() or enter_blocking_section() call (or we see a spurious condvar
+       wakeup, which are rare at best.) */
+       pthread_cond_wait(&m->is_free, &m->lock);
+  } while (m->busy);
+  m->busy = 1;
+  m->waiters--;
+  pthread_mutex_unlock(&m->lock);
+}
+
 /* Mutexes */
 
 typedef pthread_mutex_t * st_mutex;
diff --git a/otherlibs/systhreads/st_stubs.c b/otherlibs/systhreads/st_stubs.c
index c751ffbcf1..db38b0168d 100644
--- a/otherlibs/systhreads/st_stubs.c
+++ b/otherlibs/systhreads/st_stubs.c
@@ -737,9 +737,19 @@ CAMLprim value caml_thread_exit(value unit)   /* ML */
 CAMLprim value caml_thread_yield(value unit)        /* ML */
 {
   if (st_masterlock_waiters(&caml_master_lock) == 0) return Val_unit;
-  caml_enter_blocking_section();
-  st_thread_yield();
-  caml_leave_blocking_section();
+
+  /* Do all the parts of a blocking section enter/leave except lock
+     manipulation, which we'll do more efficiently in st_thread_yield. (Since
+     our blocking section doesn't contain anything interesting, don't bother
+     with saving errno.)
+  */
+  caml_process_pending_signals();
+  caml_thread_save_runtime_state();
+  st_thread_yield(&caml_master_lock);
+  curr_thread = st_tls_get(thread_descriptor_key);
+  caml_thread_restore_runtime_state();
+  caml_process_pending_signals();
+
   return Val_unit;
 }
 
diff --git a/otherlibs/systhreads/st_win32.h b/otherlibs/systhreads/st_win32.h
index 2f2ea66593..fcc25290d9 100644
--- a/otherlibs/systhreads/st_win32.h
+++ b/otherlibs/systhreads/st_win32.h
@@ -99,13 +99,6 @@ static void st_thread_join(st_thread_id thr)
   WaitForSingleObject(thr, INFINITE);
 }
 
-/* Scheduling hints */
-
-static INLINE void st_thread_yield(void)
-{
-  Sleep(0);
-}
-
 /* Thread-specific state */
 
 typedef DWORD st_tlskey;
@@ -158,6 +151,15 @@ static INLINE int st_masterlock_waiters(st_masterlock * m)
   return 1;                     /* info not maintained */
 }
 
+/* Scheduling hints */
+
+static INLINE void st_thread_yield(st_masterlock * m)
+{
+  LeaveCriticalSection(m);
+  Sleep(0);
+  EnterCriticalSection(m);
+}
+
 /* Mutexes */
 
 typedef CRITICAL_SECTION * st_mutex;
diff --git a/testsuite/tests/lib-systhreads/ocamltests b/testsuite/tests/lib-systhreads/ocamltests
index e818e5485c..2899733619 100644
--- a/testsuite/tests/lib-systhreads/ocamltests
+++ b/testsuite/tests/lib-systhreads/ocamltests
@@ -1,2 +1,3 @@
 testfork.ml
 testpreempt.ml
+testyield.ml
diff --git a/testsuite/tests/lib-systhreads/testyield.ml b/testsuite/tests/lib-systhreads/testyield.ml
new file mode 100644
index 0000000000..30e70ce946
--- /dev/null
+++ b/testsuite/tests/lib-systhreads/testyield.ml
@@ -0,0 +1,51 @@
+(* TEST
+   (* Test that yielding between busy threads reliably triggers a thread
+      switch. *)
+   include systhreads
+   * not-windows
+   ** bytecode
+   ** native
+*)
+
+let threads = 4
+
+let are_ready = ref 0
+
+let yields = ref 0
+
+let iters = 50000
+
+let last = ref (-1)
+
+let report thread run_length =
+  (* The below loop tests how many times in a row a loop that calls yield runs
+     without changing threads. Ideally the answer would *always* be one, but
+     it's not clear we can reliably guarantee that unless nothing else ever
+     drops the Ocaml lock, so instead just rely on it being small. *)
+  if run_length > 3
+  then Printf.printf "Thread %d ran %d consecutive iters\n" thread run_length
+
+
+let threads =
+  List.init threads (Thread.create (fun i ->
+    incr are_ready;
+    (* Don't make any progress until all threads are spawned and properly
+       contending for the Ocaml lock. *)
+    while !are_ready < threads do
+      Thread.yield ()
+    done;
+    let consecutive = ref 0 in
+    while !yields < iters do
+      incr yields;
+      last := i;
+      Thread.yield ();
+      incr consecutive;
+      if not (!last = i)
+      then (
+        report i !consecutive;
+        consecutive := 0)
+    done;
+    if !consecutive > 0 then report i !consecutive;
+  ));;
+
+List.iter Thread.join threads
author	ahh <andrewhhunter@gmail.com>	2019-02-08 11:18:27 -0500
committer	Damien Doligez <damien.doligez@gmail.com>	2019-02-08 17:18:27 +0100
commit	7a27cd56b1b3e4e0b1653f5870d1c397d03f5d30 (patch)
tree	50bb9fb2d11d7e497f6827c862fe2ab9b1539374
parent	2b2670e72aaa0ad8fd0b2e881e01370694aba37c (diff)
download	ocaml-7a27cd56b1b3e4e0b1653f5870d1c397d03f5d30.tar.gz