diff options
Diffstat (limited to 'libitm/config/linux/rwlock.cc')
-rw-r--r-- | libitm/config/linux/rwlock.cc | 92 |
1 files changed, 52 insertions, 40 deletions
diff --git a/libitm/config/linux/rwlock.cc b/libitm/config/linux/rwlock.cc index 3471049083e..f87be2e880a 100644 --- a/libitm/config/linux/rwlock.cc +++ b/libitm/config/linux/rwlock.cc @@ -36,10 +36,11 @@ gtm_rwlock::read_lock (gtm_thread *tx) for (;;) { // Fast path: first announce our intent to read, then check for - // conflicting intents to write. Note that direct assignment to - // an atomic object is memory_order_seq_cst. - tx->shared_state = 0; - if (likely(writers == 0)) + // conflicting intents to write. The fence ensures that this happens + // in exactly this order. + tx->shared_state.store (0, memory_order_relaxed); + atomic_thread_fence (memory_order_seq_cst); + if (likely (writers.load (memory_order_relaxed) == 0)) return; // There seems to be an active, waiting, or confirmed writer, so enter @@ -50,10 +51,11 @@ gtm_rwlock::read_lock (gtm_thread *tx) // We need the barrier here for the same reason that we need it in // read_unlock(). // TODO Potentially too many wake-ups. See comments in read_unlock(). - tx->shared_state = -1; - if (writer_readers > 0) + tx->shared_state.store (-1, memory_order_relaxed); + atomic_thread_fence (memory_order_seq_cst); + if (writer_readers.load (memory_order_relaxed) > 0) { - writer_readers = 0; + writer_readers.store (0, memory_order_relaxed); futex_wake(&writer_readers, 1); } @@ -61,16 +63,16 @@ gtm_rwlock::read_lock (gtm_thread *tx) // writer anymore. // TODO Spin here on writers for a while. Consider whether we woke // any writers before? - while (writers) + while (writers.load (memory_order_relaxed)) { // An active writer. Wait until it has finished. To avoid lost // wake-ups, we need to use Dekker-like synchronization. // Note that we cannot reset readers to zero when we see that there // are no writers anymore after the barrier because this pending // store could then lead to lost wake-ups at other readers. - readers = 1; - atomic_thread_fence(memory_order_acq_rel); - if (writers) + readers.store (1, memory_order_relaxed); + atomic_thread_fence (memory_order_seq_cst); + if (writers.load (memory_order_relaxed)) futex_wait(&readers, 1); } @@ -95,8 +97,8 @@ bool gtm_rwlock::write_lock_generic (gtm_thread *tx) { // Try to acquire the write lock. - unsigned int w; - if (unlikely((w = __sync_val_compare_and_swap(&writers, 0, 1)) != 0)) + int w = 0; + if (unlikely (!writers.compare_exchange_strong (w, 1))) { // If this is an upgrade, we must not wait for other writers or // upgrades. @@ -104,19 +106,14 @@ gtm_rwlock::write_lock_generic (gtm_thread *tx) return false; // There is already a writer. If there are no other waiting writers, - // switch to contended mode. - // Note that this is actually an atomic exchange, not a TAS. Also, - // it's only guaranteed to have acquire semantics, whereas we need a - // full barrier to make the Dekker-style synchronization work. However, - // we rely on the xchg being a full barrier on the architectures that we - // consider here. - // ??? Use C++0x atomics as soon as they are available. + // switch to contended mode. We need seq_cst memory order to make the + // Dekker-style synchronization work. if (w != 2) - w = __sync_lock_test_and_set(&writers, 2); + w = writers.exchange (2); while (w != 0) { futex_wait(&writers, 2); - w = __sync_lock_test_and_set(&writers, 2); + w = writers.exchange (2); } } @@ -130,13 +127,14 @@ gtm_rwlock::write_lock_generic (gtm_thread *tx) // TODO In the worst case, this requires one wait/wake pair for each // active reader. Reduce this! if (tx != 0) - tx->shared_state = ~(typeof tx->shared_state)0; + tx->shared_state.store (-1, memory_order_relaxed); for (gtm_thread *it = gtm_thread::list_of_threads; it != 0; it = it->next_thread) { // Use a loop here to check reader flags again after waiting. - while (it->shared_state != ~(typeof it->shared_state)0) + while (it->shared_state.load (memory_order_relaxed) + != ~(typeof it->shared_state)0) { // An active reader. Wait until it has finished. To avoid lost // wake-ups, we need to use Dekker-like synchronization. @@ -144,12 +142,13 @@ gtm_rwlock::write_lock_generic (gtm_thread *tx) // the barrier that the reader has finished in the meantime; // however, this is only possible because we are the only writer. // TODO Spin for a while on this reader flag. - writer_readers = 1; - __sync_synchronize(); - if (it->shared_state != ~(typeof it->shared_state)0) + writer_readers.store (1, memory_order_relaxed); + atomic_thread_fence (memory_order_seq_cst); + if (it->shared_state.load (memory_order_relaxed) + != ~(typeof it->shared_state)0) futex_wait(&writer_readers, 1); else - writer_readers = 0; + writer_readers.store (0, memory_order_relaxed); } } @@ -181,19 +180,28 @@ gtm_rwlock::write_upgrade (gtm_thread *tx) void gtm_rwlock::read_unlock (gtm_thread *tx) { - tx->shared_state = ~(typeof tx->shared_state)0; - - // If there is a writer waiting for readers, wake it up. We need the barrier - // to avoid lost wake-ups. + // We only need release memory order here because of privatization safety + // (this ensures that marking the transaction as inactive happens after + // any prior data accesses by this transaction, and that neither the + // compiler nor the hardware order this store earlier). + // ??? We might be able to avoid this release here if the compiler can't + // merge the release fence with the subsequent seq_cst fence. + tx->shared_state.store (-1, memory_order_release); + + // If there is a writer waiting for readers, wake it up. We need the fence + // to avoid lost wake-ups. Furthermore, the privatization safety + // implementation in gtm_thread::try_commit() relies on the existence of + // this seq_cst fence. // ??? We might not be the last active reader, so the wake-up might happen // too early. How do we avoid this without slowing down readers too much? // Each reader could scan the list of txns for other active readers but // this can result in many cache misses. Use combining instead? // TODO Sends out one wake-up for each reader in the worst case. - __sync_synchronize(); - if (unlikely(writer_readers > 0)) + atomic_thread_fence (memory_order_seq_cst); + if (unlikely (writer_readers.load (memory_order_relaxed) > 0)) { - writer_readers = 0; + // No additional barrier needed here (see write_unlock()). + writer_readers.store (0, memory_order_relaxed); futex_wake(&writer_readers, 1); } } @@ -204,11 +212,11 @@ gtm_rwlock::read_unlock (gtm_thread *tx) void gtm_rwlock::write_unlock () { - // This is supposed to be a full barrier. - if (__sync_fetch_and_sub(&writers, 1) == 2) + // This needs to have seq_cst memory order. + if (writers.fetch_sub (1) == 2) { // There might be waiting writers, so wake them. - writers = 0; + writers.store (0, memory_order_relaxed); if (futex_wake(&writers, 1) == 0) { // If we did not wake any waiting writers, we might indeed be the @@ -223,9 +231,13 @@ gtm_rwlock::write_unlock () // No waiting writers, so wake up all waiting readers. // Because the fetch_and_sub is a full barrier already, we don't need // another barrier here (as in read_unlock()). - if (readers > 0) + if (readers.load (memory_order_relaxed) > 0) { - readers = 0; + // No additional barrier needed here. The previous load must be in + // modification order because of the coherency constraints. Late stores + // by a reader are not a problem because readers do Dekker-style + // synchronization on writers. + readers.store (0, memory_order_relaxed); futex_wake(&readers, INT_MAX); } } |