libitm: Fixed conversion to C++11 atomics.

libitm/ * beginend.cc (GTM::gtm_thread::begin_transaction): Add comment. (GTM::gtm_thread::try_commit): Changed memory order. * config/linux/alpha/futex_bits.h (sys_futex0): Take atomic int as parameter. * config/linux/x86/futex_bits.h (sys_futex0): Same. * config/linux/sh/futex_bits.h (sys_futex0): Same. * config/linux/futex_bits.h (sys_futex0): Same. * config/linux/futex.cc (futex_wait, futex_wake): Same. * config/linux/futex.h (futex_wait, futex_wake): Same. * config/linux/rwlock.h (gtm_rwlock::writers, gtm_rwlock::writer_readers, gtm_rwlock::readers): Change to atomic ints. * config/linux/rwlock.cc (gtm_rwlock::read_lock, gtm_rwlock::write_lock_generic, gtm_rwlock::read_unlock, gtm_rwlock::write_unlock): Fix memory orders and fences. * config/posix/rwlock.cc (gtm_rwlock::read_lock, gtm_rwlock::write_lock_generic, gtm_rwlock::read_unlock, gtm_rwlock::write_unlock): Same. * config/linux/rwlock.h (gtm_rwlock::summary): Change to atomic int. * method-gl.cc (gl_mg::init, gl_wt_dispatch::memtransfer_static, gl_wt_dispatch::memset_static, gl_wt_dispatch::begin_or_restart): Add comments. (gl_wt_dispatch::pre_write, gl_wt_dispatch::validate, gl_wt_dispatch::load, gl_wt_dispatch::store, gl_wt_dispatch::try_commit, gl_wt_dispatch::rollback): Fix memory orders and fences. Add comments. From-SVN: r182674
author: Torvald Riegel <triegel@redhat.com> 2011-12-24 01:42:20 +0000
committer: Torvald Riegel <torvald@gcc.gnu.org> 2011-12-24 01:42:20 +0000
commit: 799142bf743cb04f3a86df85c69481a3c44c1ad9 (patch)
tree: 76234a08c818b5da2d7cbe2a47fa07501ef97cb2 /libitm/config
parent: 8b3bff4ce9864ae2444d0c1e18dc57b32e4d0e3a (diff)
download: gcc-799142bf743cb04f3a86df85c69481a3c44c1ad9.tar.gz
10 files changed, 98 insertions, 71 deletions
diff --git a/libitm/config/linux/alpha/futex_bits.h b/libitm/config/linux/alpha/futex_bits.h
index 997bf0b3cff..ef5c10a5c1a 100644
--- a/libitm/config/linux/alpha/futex_bits.h
+++ b/libitm/config/linux/alpha/futex_bits.h
@@ -29,7 +29,7 @@
 #endif
 
 static inline long
-sys_futex0 (int *addr, long op, long val)
+sys_futex0 (std::atomic<int> *addr, long op, long val)
 {
   register long sc_0 __asm__("$0");
   register long sc_16 __asm__("$16");
diff --git a/libitm/config/linux/futex.cc b/libitm/config/linux/futex.cc
index 0889ee5dfec..5707b40c262 100644
--- a/libitm/config/linux/futex.cc
+++ b/libitm/config/linux/futex.cc
@@ -41,7 +41,7 @@ static long int gtm_futex_wake = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;
 
 
 void
-futex_wait (int *addr, int val)
+futex_wait (std::atomic<int> *addr, int val)
 {
   long res;
 
@@ -65,7 +65,7 @@ futex_wait (int *addr, int val)
 
 
 long
-futex_wake (int *addr, int count)
+futex_wake (std::atomic<int> *addr, int count)
 {
   long res = sys_futex0 (addr, gtm_futex_wake, count);
   if (__builtin_expect (res == -ENOSYS, 0))
diff --git a/libitm/config/linux/futex.h b/libitm/config/linux/futex.h
index 00161b47431..02bf5d0e323 100644
--- a/libitm/config/linux/futex.h
+++ b/libitm/config/linux/futex.h
@@ -27,10 +27,12 @@
 #ifndef GTM_FUTEX_H
 #define GTM_FUTEX_H 1
 
+#include "local_atomic"
+
 namespace GTM HIDDEN {
 
-extern void futex_wait (int *addr, int val);
-extern long futex_wake (int *addr, int count);
+extern void futex_wait (std::atomic<int> *addr, int val);
+extern long futex_wake (std::atomic<int> *addr, int count);
 
 }
 
diff --git a/libitm/config/linux/futex_bits.h b/libitm/config/linux/futex_bits.h
index bbc336604e9..9f6be72e3c4 100644
--- a/libitm/config/linux/futex_bits.h
+++ b/libitm/config/linux/futex_bits.h
@@ -33,7 +33,7 @@
 #include <sys/syscall.h>
 
 static inline long
-sys_futex0 (int *addr, long op, long val)
+sys_futex0 (std::atomic<int> *addr, long op, long val)
 {
-  return syscall (SYS_futex, addr, op, val, 0);
+  return syscall (SYS_futex, (int*) addr, op, val, 0);
 }
diff --git a/libitm/config/linux/rwlock.cc b/libitm/config/linux/rwlock.cc
index 3471049083e..f87be2e880a 100644
--- a/libitm/config/linux/rwlock.cc
+++ b/libitm/config/linux/rwlock.cc
@@ -36,10 +36,11 @@ gtm_rwlock::read_lock (gtm_thread *tx)
   for (;;)
     {
       // Fast path: first announce our intent to read, then check for
-      // conflicting intents to write.  Note that direct assignment to
-      // an atomic object is memory_order_seq_cst.
-      tx->shared_state = 0;
-      if (likely(writers == 0))
+      // conflicting intents to write.  The fence ensures that this happens
+      // in exactly this order.
+      tx->shared_state.store (0, memory_order_relaxed);
+      atomic_thread_fence (memory_order_seq_cst);
+      if (likely (writers.load (memory_order_relaxed) == 0))
 	return;
 
       // There seems to be an active, waiting, or confirmed writer, so enter
@@ -50,10 +51,11 @@ gtm_rwlock::read_lock (gtm_thread *tx)
       // We need the barrier here for the same reason that we need it in
       // read_unlock().
       // TODO Potentially too many wake-ups. See comments in read_unlock().
-      tx->shared_state = -1;
-      if (writer_readers > 0)
+      tx->shared_state.store (-1, memory_order_relaxed);
+      atomic_thread_fence (memory_order_seq_cst);
+      if (writer_readers.load (memory_order_relaxed) > 0)
 	{
-	  writer_readers = 0;
+	  writer_readers.store (0, memory_order_relaxed);
 	  futex_wake(&writer_readers, 1);
 	}
 
@@ -61,16 +63,16 @@ gtm_rwlock::read_lock (gtm_thread *tx)
       // writer anymore.
       // TODO Spin here on writers for a while. Consider whether we woke
       // any writers before?
-      while (writers)
+      while (writers.load (memory_order_relaxed))
 	{
 	  // An active writer. Wait until it has finished. To avoid lost
 	  // wake-ups, we need to use Dekker-like synchronization.
 	  // Note that we cannot reset readers to zero when we see that there
 	  // are no writers anymore after the barrier because this pending
 	  // store could then lead to lost wake-ups at other readers.
-	  readers = 1;
-	  atomic_thread_fence(memory_order_acq_rel);
-	  if (writers)
+	  readers.store (1, memory_order_relaxed);
+	  atomic_thread_fence (memory_order_seq_cst);
+	  if (writers.load (memory_order_relaxed))
 	    futex_wait(&readers, 1);
 	}
 
@@ -95,8 +97,8 @@ bool
 gtm_rwlock::write_lock_generic (gtm_thread *tx)
 {
   // Try to acquire the write lock.
-  unsigned int w;
-  if (unlikely((w = __sync_val_compare_and_swap(&writers, 0, 1)) != 0))
+  int w = 0;
+  if (unlikely (!writers.compare_exchange_strong (w, 1)))
     {
       // If this is an upgrade, we must not wait for other writers or
       // upgrades.
@@ -104,19 +106,14 @@ gtm_rwlock::write_lock_generic (gtm_thread *tx)
 	return false;
 
       // There is already a writer. If there are no other waiting writers,
-      // switch to contended mode.
-      // Note that this is actually an atomic exchange, not a TAS. Also,
-      // it's only guaranteed to have acquire semantics, whereas we need a
-      // full barrier to make the Dekker-style synchronization work. However,
-      // we rely on the xchg being a full barrier on the architectures that we
-      // consider here.
-      // ??? Use C++0x atomics as soon as they are available.
+      // switch to contended mode.  We need seq_cst memory order to make the
+      // Dekker-style synchronization work.
       if (w != 2)
-	w = __sync_lock_test_and_set(&writers, 2);
+	w = writers.exchange (2);
       while (w != 0)
 	{
 	  futex_wait(&writers, 2);
-	  w = __sync_lock_test_and_set(&writers, 2);
+	  w = writers.exchange (2);
 	}
     }
 
@@ -130,13 +127,14 @@ gtm_rwlock::write_lock_generic (gtm_thread *tx)
   // TODO In the worst case, this requires one wait/wake pair for each
   // active reader. Reduce this!
   if (tx != 0)
-    tx->shared_state = ~(typeof tx->shared_state)0;
+    tx->shared_state.store (-1, memory_order_relaxed);
 
   for (gtm_thread *it = gtm_thread::list_of_threads; it != 0;
       it = it->next_thread)
     {
       // Use a loop here to check reader flags again after waiting.
-      while (it->shared_state != ~(typeof it->shared_state)0)
+      while (it->shared_state.load (memory_order_relaxed)
+          != ~(typeof it->shared_state)0)
 	{
 	  // An active reader. Wait until it has finished. To avoid lost
 	  // wake-ups, we need to use Dekker-like synchronization.
@@ -144,12 +142,13 @@ gtm_rwlock::write_lock_generic (gtm_thread *tx)
 	  // the barrier that the reader has finished in the meantime;
 	  // however, this is only possible because we are the only writer.
 	  // TODO Spin for a while on this reader flag.
-	  writer_readers = 1;
-	  __sync_synchronize();
-	  if (it->shared_state != ~(typeof it->shared_state)0)
+	  writer_readers.store (1, memory_order_relaxed);
+	  atomic_thread_fence (memory_order_seq_cst);
+	  if (it->shared_state.load (memory_order_relaxed)
+	      != ~(typeof it->shared_state)0)
 	    futex_wait(&writer_readers, 1);
 	  else
-	    writer_readers = 0;
+	    writer_readers.store (0, memory_order_relaxed);
 	}
     }
 
@@ -181,19 +180,28 @@ gtm_rwlock::write_upgrade (gtm_thread *tx)
 void
 gtm_rwlock::read_unlock (gtm_thread *tx)
 {
-  tx->shared_state = ~(typeof tx->shared_state)0;
-
-  // If there is a writer waiting for readers, wake it up. We need the barrier
-  // to avoid lost wake-ups.
+  // We only need release memory order here because of privatization safety
+  // (this ensures that marking the transaction as inactive happens after
+  // any prior data accesses by this transaction, and that neither the
+  // compiler nor the hardware order this store earlier).
+  // ??? We might be able to avoid this release here if the compiler can't
+  // merge the release fence with the subsequent seq_cst fence.
+  tx->shared_state.store (-1, memory_order_release);
+
+  // If there is a writer waiting for readers, wake it up.  We need the fence
+  // to avoid lost wake-ups.  Furthermore, the privatization safety
+  // implementation in gtm_thread::try_commit() relies on the existence of
+  // this seq_cst fence.
   // ??? We might not be the last active reader, so the wake-up might happen
   // too early. How do we avoid this without slowing down readers too much?
   // Each reader could scan the list of txns for other active readers but
   // this can result in many cache misses. Use combining instead?
   // TODO Sends out one wake-up for each reader in the worst case.
-  __sync_synchronize();
-  if (unlikely(writer_readers > 0))
+  atomic_thread_fence (memory_order_seq_cst);
+  if (unlikely (writer_readers.load (memory_order_relaxed) > 0))
     {
-      writer_readers = 0;
+      // No additional barrier needed here (see write_unlock()).
+      writer_readers.store (0, memory_order_relaxed);
       futex_wake(&writer_readers, 1);
     }
 }
@@ -204,11 +212,11 @@ gtm_rwlock::read_unlock (gtm_thread *tx)
 void
 gtm_rwlock::write_unlock ()
 {
-  // This is supposed to be a full barrier.
-  if (__sync_fetch_and_sub(&writers, 1) == 2)
+  // This needs to have seq_cst memory order.
+  if (writers.fetch_sub (1) == 2)
     {
       // There might be waiting writers, so wake them.
-      writers = 0;
+      writers.store (0, memory_order_relaxed);
       if (futex_wake(&writers, 1) == 0)
 	{
 	  // If we did not wake any waiting writers, we might indeed be the
@@ -223,9 +231,13 @@ gtm_rwlock::write_unlock ()
   // No waiting writers, so wake up all waiting readers.
   // Because the fetch_and_sub is a full barrier already, we don't need
   // another barrier here (as in read_unlock()).
-  if (readers > 0)
+  if (readers.load (memory_order_relaxed) > 0)
     {
-      readers = 0;
+      // No additional barrier needed here.  The previous load must be in
+      // modification order because of the coherency constraints.  Late stores
+      // by a reader are not a problem because readers do Dekker-style
+      // synchronization on writers.
+      readers.store (0, memory_order_relaxed);
       futex_wake(&readers, INT_MAX);
     }
 }
diff --git a/libitm/config/linux/rwlock.h b/libitm/config/linux/rwlock.h
index 7e6229b73f2..e5a53c054f6 100644
--- a/libitm/config/linux/rwlock.h
+++ b/libitm/config/linux/rwlock.h
@@ -25,6 +25,7 @@
 #ifndef GTM_RWLOCK_H
 #define GTM_RWLOCK_H
 
+#include "local_atomic"
 #include "common.h"
 
 namespace GTM HIDDEN {
@@ -42,9 +43,9 @@ struct gtm_thread;
 class gtm_rwlock
 {
   // TODO Put futexes on different cachelines?
-  int writers;          // Writers' futex.
-  int writer_readers;   // A confirmed writer waits here for readers.
-  int readers;          // Readers wait here for writers (iff true).
+  std::atomic<int> writers;       // Writers' futex.
+  std::atomic<int> writer_readers;// A confirmed writer waits here for readers.
+  std::atomic<int> readers;       // Readers wait here for writers (iff true).
 
  public:
   gtm_rwlock() : writers(0), writer_readers(0), readers(0) {};
diff --git a/libitm/config/linux/sh/futex_bits.h b/libitm/config/linux/sh/futex_bits.h
index f9381ed6cda..cc85367f72b 100644
--- a/libitm/config/linux/sh/futex_bits.h
+++ b/libitm/config/linux/sh/futex_bits.h
@@ -32,7 +32,7 @@
        trapa #0x14; or r0,r0; or r0,r0; or r0,r0; or r0,r0; or r0,r0"
 
 static inline long
-sys_futex0 (int *addr, long op, long val)
+sys_futex0 (std::atomic<int> *addr, long op, long val)
 {
   int __status;
   register long __r3 asm ("r3") = SYS_futex;
diff --git a/libitm/config/linux/x86/futex_bits.h b/libitm/config/linux/x86/futex_bits.h
index 9a6b102cd7a..732c4ea96b5 100644
--- a/libitm/config/linux/x86/futex_bits.h
+++ b/libitm/config/linux/x86/futex_bits.h
@@ -28,7 +28,7 @@
 # endif
 
 static inline long
-sys_futex0 (int *addr, long op, long val)
+sys_futex0 (std::atomic<int> *addr, long op, long val)
 {
   register long r10 __asm__("%r10") = 0;
   long res;
@@ -49,7 +49,7 @@ sys_futex0 (int *addr, long op, long val)
 # ifdef __PIC__
 
 static inline long
-sys_futex0 (int *addr, int op, int val)
+sys_futex0 (std::atomic<int> *addr, int op, int val)
 {
   long res;
 
@@ -66,7 +66,7 @@ sys_futex0 (int *addr, int op, int val)
 # else
 
 static inline long
-sys_futex0 (int *addr, int op, int val)
+sys_futex0 (std::atomic<int> *addr, int op, int val)
 {
   long res;
 
diff --git a/libitm/config/posix/rwlock.cc b/libitm/config/posix/rwlock.cc
index 2464f041c5a..7ef39982ccf 100644
--- a/libitm/config/posix/rwlock.cc
+++ b/libitm/config/posix/rwlock.cc
@@ -53,10 +53,11 @@ void
 gtm_rwlock::read_lock (gtm_thread *tx)
 {
   // Fast path: first announce our intent to read, then check for conflicting
-  // intents to write.  Note that direct assignment to an atomic object
-  // is memory_order_seq_cst.
-  tx->shared_state = 0;
-  unsigned int sum = this->summary;
+  // intents to write.  The fence ensure that this happens in exactly this
+  // order.
+  tx->shared_state.store (0, memory_order_relaxed);
+  atomic_thread_fence (memory_order_seq_cst);
+  unsigned int sum = this->summary.load (memory_order_relaxed);
   if (likely(!(sum & (a_writer | w_writer))))
     return;
 
@@ -74,7 +75,7 @@ gtm_rwlock::read_lock (gtm_thread *tx)
 
   // Read summary again after acquiring the mutex because it might have
   // changed during waiting for the mutex to become free.
-  sum = this->summary;
+  sum = this->summary.load (memory_order_relaxed);
 
   // If there is a writer waiting for readers, wake it up. Only do that if we
   // might be the last reader that could do the wake-up, otherwise skip the
@@ -91,10 +92,10 @@ gtm_rwlock::read_lock (gtm_thread *tx)
   // If there is an active or waiting writer, we must wait.
   while (sum & (a_writer | w_writer))
     {
-      this->summary = sum | w_reader;
+      this->summary.store (sum | w_reader, memory_order_relaxed);
       this->w_readers++;
       pthread_cond_wait (&this->c_readers, &this->mutex);
-      sum = this->summary;
+      sum = this->summary.load (memory_order_relaxed);
       if (--this->w_readers == 0)
 	sum &= ~w_reader;
     }
@@ -123,7 +124,7 @@ gtm_rwlock::write_lock_generic (gtm_thread *tx)
 {
   pthread_mutex_lock (&this->mutex);
 
-  unsigned int sum = this->summary;
+  unsigned int sum = this->summary.load (memory_order_relaxed);
 
   // If there is an active writer, wait.
   while (sum & a_writer)
@@ -136,23 +137,23 @@ gtm_rwlock::write_lock_generic (gtm_thread *tx)
 	  return false;
 	}
 
-      this->summary = sum | w_writer;
+      this->summary.store (sum | w_writer, memory_order_relaxed);
       this->w_writers++;
       pthread_cond_wait (&this->c_writers, &this->mutex);
-      sum = this->summary;
+      sum = this->summary.load (memory_order_relaxed);
       if (--this->w_writers == 0)
 	sum &= ~w_writer;
     }
 
   // Otherwise we can acquire the lock for write. As a writer, we have
   // priority, so we don't need to take this back.
-  this->summary = sum | a_writer;
+  this->summary.store (sum | a_writer, memory_order_relaxed);
 
   // We still need to wait for active readers to finish. The barrier makes
   // sure that we first set our write intent and check for active readers
   // after that, in strictly this order (similar to the barrier in the fast
   // path of read_lock()).
-  atomic_thread_fence(memory_order_acq_rel);
+  atomic_thread_fence(memory_order_seq_cst);
 
   // If this is an upgrade, we are not a reader anymore.
   if (tx != 0)
@@ -235,8 +236,18 @@ gtm_rwlock::write_upgrade (gtm_thread *tx)
 void
 gtm_rwlock::read_unlock (gtm_thread *tx)
 {
-  tx->shared_state = -1;
-  unsigned int sum = this->summary;
+  // We only need release memory order here because of privatization safety
+  // (this ensures that marking the transaction as inactive happens after
+  // any prior data accesses by this transaction, and that neither the
+  // compiler nor the hardware order this store earlier).
+  // ??? We might be able to avoid this release here if the compiler can't
+  // merge the release fence with the subsequent seq_cst fence.
+  tx->shared_state.store (-1, memory_order_release);
+  // We need this seq_cst fence here to avoid lost wake-ups.  Furthermore,
+  // the privatization safety implementation in gtm_thread::try_commit()
+  // relies on the existence of this seq_cst fence.
+  atomic_thread_fence (memory_order_seq_cst);
+  unsigned int sum = this->summary.load (memory_order_relaxed);
   if (likely(!(sum & (a_writer | w_writer))))
     return;
 
@@ -269,8 +280,8 @@ gtm_rwlock::write_unlock ()
 {
   pthread_mutex_lock (&this->mutex);
 
-  unsigned int sum = this->summary;
-  this->summary = sum & ~a_writer;
+  unsigned int sum = this->summary.load (memory_order_relaxed);
+  this->summary.store (sum & ~a_writer, memory_order_relaxed);
 
   // If there is a waiting writer, wake it.
   if (unlikely (sum & w_writer))
diff --git a/libitm/config/posix/rwlock.h b/libitm/config/posix/rwlock.h
index f538bd059a2..359991ab45b 100644
--- a/libitm/config/posix/rwlock.h
+++ b/libitm/config/posix/rwlock.h
@@ -26,6 +26,7 @@
 #define GTM_RWLOCK_H
 
 #include <pthread.h>
+#include "local_atomic"
 
 namespace GTM HIDDEN {
 
@@ -55,7 +56,7 @@ class gtm_rwlock
   static const unsigned w_writer  = 2;	// The w_writers field != 0
   static const unsigned w_reader  = 4;  // The w_readers field != 0
 
-  unsigned int summary;		// Bitmask of the above.
+  std::atomic<unsigned int> summary;	// Bitmask of the above.
   unsigned int a_readers;	// Nr active readers as observed by a writer
   unsigned int w_readers;	// Nr waiting readers
   unsigned int w_writers;	// Nr waiting writers
author	Torvald Riegel <triegel@redhat.com>	2011-12-24 01:42:20 +0000
committer	Torvald Riegel <torvald@gcc.gnu.org>	2011-12-24 01:42:20 +0000
commit	799142bf743cb04f3a86df85c69481a3c44c1ad9 (patch)
tree	76234a08c818b5da2d7cbe2a47fa07501ef97cb2 /libitm/config
parent	8b3bff4ce9864ae2444d0c1e18dc57b32e4d0e3a (diff)
download	gcc-799142bf743cb04f3a86df85c69481a3c44c1ad9.tar.gz