summaryrefslogtreecommitdiff
path: root/libitm/config/linux/rwlock.cc
diff options
context:
space:
mode:
Diffstat (limited to 'libitm/config/linux/rwlock.cc')
-rw-r--r--libitm/config/linux/rwlock.cc92
1 files changed, 52 insertions, 40 deletions
diff --git a/libitm/config/linux/rwlock.cc b/libitm/config/linux/rwlock.cc
index 3471049083e..f87be2e880a 100644
--- a/libitm/config/linux/rwlock.cc
+++ b/libitm/config/linux/rwlock.cc
@@ -36,10 +36,11 @@ gtm_rwlock::read_lock (gtm_thread *tx)
for (;;)
{
// Fast path: first announce our intent to read, then check for
- // conflicting intents to write. Note that direct assignment to
- // an atomic object is memory_order_seq_cst.
- tx->shared_state = 0;
- if (likely(writers == 0))
+ // conflicting intents to write. The fence ensures that this happens
+ // in exactly this order.
+ tx->shared_state.store (0, memory_order_relaxed);
+ atomic_thread_fence (memory_order_seq_cst);
+ if (likely (writers.load (memory_order_relaxed) == 0))
return;
// There seems to be an active, waiting, or confirmed writer, so enter
@@ -50,10 +51,11 @@ gtm_rwlock::read_lock (gtm_thread *tx)
// We need the barrier here for the same reason that we need it in
// read_unlock().
// TODO Potentially too many wake-ups. See comments in read_unlock().
- tx->shared_state = -1;
- if (writer_readers > 0)
+ tx->shared_state.store (-1, memory_order_relaxed);
+ atomic_thread_fence (memory_order_seq_cst);
+ if (writer_readers.load (memory_order_relaxed) > 0)
{
- writer_readers = 0;
+ writer_readers.store (0, memory_order_relaxed);
futex_wake(&writer_readers, 1);
}
@@ -61,16 +63,16 @@ gtm_rwlock::read_lock (gtm_thread *tx)
// writer anymore.
// TODO Spin here on writers for a while. Consider whether we woke
// any writers before?
- while (writers)
+ while (writers.load (memory_order_relaxed))
{
// An active writer. Wait until it has finished. To avoid lost
// wake-ups, we need to use Dekker-like synchronization.
// Note that we cannot reset readers to zero when we see that there
// are no writers anymore after the barrier because this pending
// store could then lead to lost wake-ups at other readers.
- readers = 1;
- atomic_thread_fence(memory_order_acq_rel);
- if (writers)
+ readers.store (1, memory_order_relaxed);
+ atomic_thread_fence (memory_order_seq_cst);
+ if (writers.load (memory_order_relaxed))
futex_wait(&readers, 1);
}
@@ -95,8 +97,8 @@ bool
gtm_rwlock::write_lock_generic (gtm_thread *tx)
{
// Try to acquire the write lock.
- unsigned int w;
- if (unlikely((w = __sync_val_compare_and_swap(&writers, 0, 1)) != 0))
+ int w = 0;
+ if (unlikely (!writers.compare_exchange_strong (w, 1)))
{
// If this is an upgrade, we must not wait for other writers or
// upgrades.
@@ -104,19 +106,14 @@ gtm_rwlock::write_lock_generic (gtm_thread *tx)
return false;
// There is already a writer. If there are no other waiting writers,
- // switch to contended mode.
- // Note that this is actually an atomic exchange, not a TAS. Also,
- // it's only guaranteed to have acquire semantics, whereas we need a
- // full barrier to make the Dekker-style synchronization work. However,
- // we rely on the xchg being a full barrier on the architectures that we
- // consider here.
- // ??? Use C++0x atomics as soon as they are available.
+ // switch to contended mode. We need seq_cst memory order to make the
+ // Dekker-style synchronization work.
if (w != 2)
- w = __sync_lock_test_and_set(&writers, 2);
+ w = writers.exchange (2);
while (w != 0)
{
futex_wait(&writers, 2);
- w = __sync_lock_test_and_set(&writers, 2);
+ w = writers.exchange (2);
}
}
@@ -130,13 +127,14 @@ gtm_rwlock::write_lock_generic (gtm_thread *tx)
// TODO In the worst case, this requires one wait/wake pair for each
// active reader. Reduce this!
if (tx != 0)
- tx->shared_state = ~(typeof tx->shared_state)0;
+ tx->shared_state.store (-1, memory_order_relaxed);
for (gtm_thread *it = gtm_thread::list_of_threads; it != 0;
it = it->next_thread)
{
// Use a loop here to check reader flags again after waiting.
- while (it->shared_state != ~(typeof it->shared_state)0)
+ while (it->shared_state.load (memory_order_relaxed)
+ != ~(typeof it->shared_state)0)
{
// An active reader. Wait until it has finished. To avoid lost
// wake-ups, we need to use Dekker-like synchronization.
@@ -144,12 +142,13 @@ gtm_rwlock::write_lock_generic (gtm_thread *tx)
// the barrier that the reader has finished in the meantime;
// however, this is only possible because we are the only writer.
// TODO Spin for a while on this reader flag.
- writer_readers = 1;
- __sync_synchronize();
- if (it->shared_state != ~(typeof it->shared_state)0)
+ writer_readers.store (1, memory_order_relaxed);
+ atomic_thread_fence (memory_order_seq_cst);
+ if (it->shared_state.load (memory_order_relaxed)
+ != ~(typeof it->shared_state)0)
futex_wait(&writer_readers, 1);
else
- writer_readers = 0;
+ writer_readers.store (0, memory_order_relaxed);
}
}
@@ -181,19 +180,28 @@ gtm_rwlock::write_upgrade (gtm_thread *tx)
void
gtm_rwlock::read_unlock (gtm_thread *tx)
{
- tx->shared_state = ~(typeof tx->shared_state)0;
-
- // If there is a writer waiting for readers, wake it up. We need the barrier
- // to avoid lost wake-ups.
+ // We only need release memory order here because of privatization safety
+ // (this ensures that marking the transaction as inactive happens after
+ // any prior data accesses by this transaction, and that neither the
+ // compiler nor the hardware order this store earlier).
+ // ??? We might be able to avoid this release here if the compiler can't
+ // merge the release fence with the subsequent seq_cst fence.
+ tx->shared_state.store (-1, memory_order_release);
+
+ // If there is a writer waiting for readers, wake it up. We need the fence
+ // to avoid lost wake-ups. Furthermore, the privatization safety
+ // implementation in gtm_thread::try_commit() relies on the existence of
+ // this seq_cst fence.
// ??? We might not be the last active reader, so the wake-up might happen
// too early. How do we avoid this without slowing down readers too much?
// Each reader could scan the list of txns for other active readers but
// this can result in many cache misses. Use combining instead?
// TODO Sends out one wake-up for each reader in the worst case.
- __sync_synchronize();
- if (unlikely(writer_readers > 0))
+ atomic_thread_fence (memory_order_seq_cst);
+ if (unlikely (writer_readers.load (memory_order_relaxed) > 0))
{
- writer_readers = 0;
+ // No additional barrier needed here (see write_unlock()).
+ writer_readers.store (0, memory_order_relaxed);
futex_wake(&writer_readers, 1);
}
}
@@ -204,11 +212,11 @@ gtm_rwlock::read_unlock (gtm_thread *tx)
void
gtm_rwlock::write_unlock ()
{
- // This is supposed to be a full barrier.
- if (__sync_fetch_and_sub(&writers, 1) == 2)
+ // This needs to have seq_cst memory order.
+ if (writers.fetch_sub (1) == 2)
{
// There might be waiting writers, so wake them.
- writers = 0;
+ writers.store (0, memory_order_relaxed);
if (futex_wake(&writers, 1) == 0)
{
// If we did not wake any waiting writers, we might indeed be the
@@ -223,9 +231,13 @@ gtm_rwlock::write_unlock ()
// No waiting writers, so wake up all waiting readers.
// Because the fetch_and_sub is a full barrier already, we don't need
// another barrier here (as in read_unlock()).
- if (readers > 0)
+ if (readers.load (memory_order_relaxed) > 0)
{
- readers = 0;
+ // No additional barrier needed here. The previous load must be in
+ // modification order because of the coherency constraints. Late stores
+ // by a reader are not a problem because readers do Dekker-style
+ // synchronization on writers.
+ readers.store (0, memory_order_relaxed);
futex_wake(&readers, INT_MAX);
}
}