diff options
-rw-r--r-- | libitm/ChangeLog | 4 | ||||
-rw-r--r-- | libitm/method-gl.cc | 17 |
2 files changed, 18 insertions, 3 deletions
diff --git a/libitm/ChangeLog b/libitm/ChangeLog index 75410f52ed1..0a06bfec7f4 100644 --- a/libitm/ChangeLog +++ b/libitm/ChangeLog @@ -1,5 +1,9 @@ 2012-02-20 Torvald Riegel <triegel@redhat.com> + * method-gl.cc (gl_wt_dispatch::pre_write): Optimize memory orders. + +2012-02-20 Torvald Riegel <triegel@redhat.com> + * method-gl.cc (gl_wt_dispatch::memtransfer_static): Add missing undo for RfW src. Optimize number of calls to gtm_thr. diff --git a/libitm/method-gl.cc b/libitm/method-gl.cc index 4a18b62d740..503c19e5921 100644 --- a/libitm/method-gl.cc +++ b/libitm/method-gl.cc @@ -104,16 +104,27 @@ protected: tx->restart(RESTART_VALIDATE_WRITE); // CAS global orec from our snapshot time to the locked state. - // We need acq_rel memory order here to synchronize with other loads - // and modifications of orec. + // We need acquire memory order here to synchronize with other + // (ownership) releases of the orec. We do not need acq_rel order + // because whenever another thread reads from this CAS' + // modification, then it will abort anyway and does not rely on + // any further happens-before relation to be established. + // Also note that unlike in ml_wt's increase of the global time + // base (remember that the global orec is used as time base), we do + // not need require memory order here because we do not need to make + // prior orec acquisitions visible to other threads that try to + // extend their snapshot time. if (!o_gl_mg.orec.compare_exchange_strong (now, gl_mg::set_locked(now), - memory_order_acq_rel)) + memory_order_acquire)) tx->restart(RESTART_LOCKED_WRITE); // We use an explicit fence here to avoid having to use release // memory order for all subsequent data stores. This fence will // synchronize with loads of the data with acquire memory order. See // validate() for why this is necessary. + // Adding require memory order to the prior CAS is not sufficient, + // at least according to the Batty et al. formalization of the + // memory model. atomic_thread_fence(memory_order_release); // Set shared_state to new value. |