summaryrefslogtreecommitdiff
path: root/mfbt/Atomics.h
diff options
context:
space:
mode:
Diffstat (limited to 'mfbt/Atomics.h')
-rw-r--r--mfbt/Atomics.h960
1 files changed, 960 insertions, 0 deletions
diff --git a/mfbt/Atomics.h b/mfbt/Atomics.h
new file mode 100644
index 0000000..b9eaae2
--- /dev/null
+++ b/mfbt/Atomics.h
@@ -0,0 +1,960 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Implements (almost always) lock-free atomic operations. The operations here
+ * are a subset of that which can be found in C++11's <atomic> header, with a
+ * different API to enforce consistent memory ordering constraints.
+ *
+ * Anyone caught using |volatile| for inter-thread memory safety needs to be
+ * sent a copy of this header and the C++11 standard.
+ */
+
+#ifndef mozilla_Atomics_h_
+#define mozilla_Atomics_h_
+
+#include "mozilla/Assertions.h"
+#include "mozilla/TypeTraits.h"
+
+#include <stdint.h>
+
+/*
+ * Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK
+ * does not have <atomic>. So be sure to check for <atomic> support
+ * along with C++0x support.
+ */
+#if defined(__clang__)
+ /*
+ * clang doesn't like libstdc++'s version of <atomic> before GCC 4.7,
+ * due to the loose typing of the __sync_* family of functions done by
+ * GCC. We do not have a particularly good way to detect this sort of
+ * case at this point, so just assume that if we're on a Linux system,
+ * we can't use the system's <atomic>.
+ *
+ * OpenBSD uses an old libstdc++ 4.2.1 and thus doesnt have <atomic>.
+ */
+# if !defined(__linux__) && !defined(__OpenBSD__) && \
+ (__cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__)) && \
+ __has_include(<atomic>)
+# define MOZ_HAVE_CXX11_ATOMICS
+# endif
+/*
+ * Android uses a different C++ standard library that does not provide
+ * support for <atomic>
+ */
+#elif defined(__GNUC__) && !defined(__ANDROID__)
+# include "mozilla/Compiler.h"
+# if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && \
+ MOZ_GCC_VERSION_AT_LEAST(4, 5, 2)
+# define MOZ_HAVE_CXX11_ATOMICS
+# endif
+#elif defined(_MSC_VER) && _MSC_VER >= 1700
+# define MOZ_HAVE_CXX11_ATOMICS
+#endif
+
+namespace mozilla {
+
+/**
+ * An enum of memory ordering possibilities for atomics.
+ *
+ * Memory ordering is the observable state of distinct values in memory.
+ * (It's a separate concept from atomicity, which concerns whether an
+ * operation can ever be observed in an intermediate state. Don't
+ * conflate the two!) Given a sequence of operations in source code on
+ * memory, it is *not* always the case that, at all times and on all
+ * cores, those operations will appear to have occurred in that exact
+ * sequence. First, the compiler might reorder that sequence, if it
+ * thinks another ordering will be more efficient. Second, the CPU may
+ * not expose so consistent a view of memory. CPUs will often perform
+ * their own instruction reordering, above and beyond that performed by
+ * the compiler. And each core has its own memory caches, and accesses
+ * (reads and writes both) to "memory" may only resolve to out-of-date
+ * cache entries -- not to the "most recently" performed operation in
+ * some global sense. Any access to a value that may be used by
+ * multiple threads, potentially across multiple cores, must therefore
+ * have a memory ordering imposed on it, for all code on all
+ * threads/cores to have a sufficiently coherent worldview.
+ *
+ * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
+ * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
+ * detail on all this, including examples of how each mode works.
+ *
+ * Note that for simplicity and practicality, not all of the modes in
+ * C++11 are supported. The missing C++11 modes are either subsumed by
+ * the modes we provide below, or not relevant for the CPUs we support
+ * in Gecko. These three modes are confusing enough as it is!
+ */
+enum MemoryOrdering {
+ /*
+ * Relaxed ordering is the simplest memory ordering: none at all.
+ * When the result of a write is observed, nothing may be inferred
+ * about other memory. Writes ostensibly performed "before" on the
+ * writing thread may not yet be visible. Writes performed "after" on
+ * the writing thread may already be visible, if the compiler or CPU
+ * reordered them. (The latter can happen if reads and/or writes get
+ * held up in per-processor caches.) Relaxed ordering means
+ * operations can always use cached values (as long as the actual
+ * updates to atomic values actually occur, correctly, eventually), so
+ * it's usually the fastest sort of atomic access. For this reason,
+ * *it's also the most dangerous kind of access*.
+ *
+ * Relaxed ordering is good for things like process-wide statistics
+ * counters that don't need to be consistent with anything else, so
+ * long as updates themselves are atomic. (And so long as any
+ * observations of that value can tolerate being out-of-date -- if you
+ * need some sort of up-to-date value, you need some sort of other
+ * synchronizing operation.) It's *not* good for locks, mutexes,
+ * reference counts, etc. that mediate access to other memory, or must
+ * be observably consistent with other memory.
+ *
+ * x86 architectures don't take advantage of the optimization
+ * opportunities that relaxed ordering permits. Thus it's possible
+ * that using relaxed ordering will "work" on x86 but fail elsewhere
+ * (ARM, say, which *does* implement non-sequentially-consistent
+ * relaxed ordering semantics). Be extra-careful using relaxed
+ * ordering if you can't easily test non-x86 architectures!
+ */
+ Relaxed,
+ /*
+ * When an atomic value is updated with ReleaseAcquire ordering, and
+ * that new value is observed with ReleaseAcquire ordering, prior
+ * writes (atomic or not) are also observable. What ReleaseAcquire
+ * *doesn't* give you is any observable ordering guarantees for
+ * ReleaseAcquire-ordered operations on different objects. For
+ * example, if there are two cores that each perform ReleaseAcquire
+ * operations on separate objects, each core may or may not observe
+ * the operations made by the other core. The only way the cores can
+ * be synchronized with ReleaseAcquire is if they both
+ * ReleaseAcquire-access the same object. This implies that you can't
+ * necessarily describe some global total ordering of ReleaseAcquire
+ * operations.
+ *
+ * ReleaseAcquire ordering is good for (as the name implies) atomic
+ * operations on values controlling ownership of things: reference
+ * counts, mutexes, and the like. However, if you are thinking about
+ * using these to implement your own locks or mutexes, you should take
+ * a good, hard look at actual lock or mutex primitives first.
+ */
+ ReleaseAcquire,
+ /*
+ * When an atomic value is updated with SequentiallyConsistent
+ * ordering, all writes observable when the update is observed, just
+ * as with ReleaseAcquire ordering. But, furthermore, a global total
+ * ordering of SequentiallyConsistent operations *can* be described.
+ * For example, if two cores perform SequentiallyConsistent operations
+ * on separate objects, one core will observably perform its update
+ * (and all previous operations will have completed), then the other
+ * core will observably perform its update (and all previous
+ * operations will have completed). (Although those previous
+ * operations aren't themselves ordered -- they could be intermixed,
+ * or ordered if they occur on atomic values with ordering
+ * requirements.) SequentiallyConsistent is the *simplest and safest*
+ * ordering of atomic operations -- it's always as if one operation
+ * happens, then another, then another, in some order -- and every
+ * core observes updates to happen in that single order. Because it
+ * has the most synchronization requirements, operations ordered this
+ * way also tend to be slowest.
+ *
+ * SequentiallyConsistent ordering can be desirable when multiple
+ * threads observe objects, and they all have to agree on the
+ * observable order of changes to them. People expect
+ * SequentiallyConsistent ordering, even if they shouldn't, when
+ * writing code, atomic or otherwise. SequentiallyConsistent is also
+ * the ordering of choice when designing lockless data structures. If
+ * you don't know what order to use, use this one.
+ */
+ SequentiallyConsistent,
+};
+
+} // namespace mozilla
+
+// Build up the underlying intrinsics.
+#ifdef MOZ_HAVE_CXX11_ATOMICS
+
+# include <atomic>
+
+namespace mozilla {
+namespace detail {
+
+template<MemoryOrdering Order> struct AtomicOrderConstraints;
+
+template<>
+struct AtomicOrderConstraints<Relaxed>
+{
+ static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;
+ static const std::memory_order LoadOrder = std::memory_order_relaxed;
+ static const std::memory_order StoreOrder = std::memory_order_relaxed;
+};
+
+template<>
+struct AtomicOrderConstraints<ReleaseAcquire>
+{
+ static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;
+ static const std::memory_order LoadOrder = std::memory_order_acquire;
+ static const std::memory_order StoreOrder = std::memory_order_release;
+};
+
+template<>
+struct AtomicOrderConstraints<SequentiallyConsistent>
+{
+ static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;
+ static const std::memory_order LoadOrder = std::memory_order_seq_cst;
+ static const std::memory_order StoreOrder = std::memory_order_seq_cst;
+};
+
+template<typename T, MemoryOrdering Order>
+struct IntrinsicBase
+{
+ typedef std::atomic<T> ValueType;
+ typedef AtomicOrderConstraints<Order> OrderedOp;
+};
+
+template<typename T, MemoryOrdering Order>
+struct IntrinsicMemoryOps : public IntrinsicBase<T, Order>
+{
+ typedef IntrinsicBase<T, Order> Base;
+ static T load(const typename Base::ValueType& ptr) {
+ return ptr.load(Base::OrderedOp::LoadOrder);
+ }
+ static void store(typename Base::ValueType& ptr, T val) {
+ ptr.store(val, Base::OrderedOp::StoreOrder);
+ }
+ static T exchange(typename Base::ValueType& ptr, T val) {
+ return ptr.exchange(val, Base::OrderedOp::AtomicRMWOrder);
+ }
+ static bool compareExchange(typename Base::ValueType& ptr, T oldVal, T newVal) {
+ return ptr.compare_exchange_strong(oldVal, newVal, Base::OrderedOp::AtomicRMWOrder);
+ }
+};
+
+template<typename T, MemoryOrdering Order>
+struct IntrinsicAddSub : public IntrinsicBase<T, Order>
+{
+ typedef IntrinsicBase<T, Order> Base;
+ static T add(typename Base::ValueType& ptr, T val) {
+ return ptr.fetch_add(val, Base::OrderedOp::AtomicRMWOrder);
+ }
+ static T sub(typename Base::ValueType& ptr, T val) {
+ return ptr.fetch_sub(val, Base::OrderedOp::AtomicRMWOrder);
+ }
+};
+
+template<typename T, MemoryOrdering Order>
+struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order>
+{
+ typedef IntrinsicBase<T*, Order> Base;
+ static T* add(typename Base::ValueType& ptr, ptrdiff_t val) {
+ return ptr.fetch_add(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder);
+ }
+ static T* sub(typename Base::ValueType& ptr, ptrdiff_t val) {
+ return ptr.fetch_sub(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder);
+ }
+ private:
+ /*
+ * GCC 4.6's <atomic> header has a bug where adding X to an
+ * atomic<T*> is not the same as adding X to a T*. Hence the need
+ * for this function to provide the correct addend.
+ */
+ static ptrdiff_t fixupAddend(ptrdiff_t val) {
+#if defined(__clang__) || defined(_MSC_VER)
+ return val;
+#elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \
+ !MOZ_GCC_VERSION_AT_LEAST(4, 7, 0)
+ return val * sizeof(T);
+#else
+ return val;
+#endif
+ }
+};
+
+template<typename T, MemoryOrdering Order>
+struct IntrinsicIncDec : public IntrinsicAddSub<T, Order>
+{
+ typedef IntrinsicBase<T, Order> Base;
+ static T inc(typename Base::ValueType& ptr) {
+ return IntrinsicAddSub<T, Order>::add(ptr, 1);
+ }
+ static T dec(typename Base::ValueType& ptr) {
+ return IntrinsicAddSub<T, Order>::sub(ptr, 1);
+ }
+};
+
+template<typename T, MemoryOrdering Order>
+struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
+ public IntrinsicIncDec<T, Order>
+{
+ typedef IntrinsicBase<T, Order> Base;
+ static T or_(typename Base::ValueType& ptr, T val) {
+ return ptr.fetch_or(val, Base::OrderedOp::AtomicRMWOrder);
+ }
+ static T xor_(typename Base::ValueType& ptr, T val) {
+ return ptr.fetch_xor(val, Base::OrderedOp::AtomicRMWOrder);
+ }
+ static T and_(typename Base::ValueType& ptr, T val) {
+ return ptr.fetch_and(val, Base::OrderedOp::AtomicRMWOrder);
+ }
+};
+
+template<typename T, MemoryOrdering Order>
+struct AtomicIntrinsics<T*, Order>
+ : public IntrinsicMemoryOps<T*, Order>, public IntrinsicIncDec<T*, Order>
+{
+};
+
+} // namespace detail
+} // namespace mozilla
+
+#elif defined(__GNUC__)
+
+namespace mozilla {
+namespace detail {
+
+/*
+ * The __sync_* family of intrinsics is documented here:
+ *
+ * http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html
+ *
+ * While these intrinsics are deprecated in favor of the newer __atomic_*
+ * family of intrincs:
+ *
+ * http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html
+ *
+ * any GCC version that supports the __atomic_* intrinsics will also support
+ * the <atomic> header and so will be handled above. We provide a version of
+ * atomics using the __sync_* intrinsics to support older versions of GCC.
+ *
+ * All __sync_* intrinsics that we use below act as full memory barriers, for
+ * both compiler and hardware reordering, except for __sync_lock_test_and_set,
+ * which is a only an acquire barrier. When we call __sync_lock_test_and_set,
+ * we add a barrier above it as appropriate.
+ */
+
+template<MemoryOrdering Order> struct Barrier;
+
+/*
+ * Some processors (in particular, x86) don't require quite so many calls to
+ * __sync_sychronize as our specializations of Barrier produce. If
+ * performance turns out to be an issue, defining these specializations
+ * on a per-processor basis would be a good first tuning step.
+ */
+
+template<>
+struct Barrier<Relaxed>
+{
+ static void beforeLoad() {}
+ static void afterLoad() {}
+ static void beforeStore() {}
+ static void afterStore() {}
+};
+
+template<>
+struct Barrier<ReleaseAcquire>
+{
+ static void beforeLoad() {}
+ static void afterLoad() { __sync_synchronize(); }
+ static void beforeStore() { __sync_synchronize(); }
+ static void afterStore() {}
+};
+
+template<>
+struct Barrier<SequentiallyConsistent>
+{
+ static void beforeLoad() { __sync_synchronize(); }
+ static void afterLoad() { __sync_synchronize(); }
+ static void beforeStore() { __sync_synchronize(); }
+ static void afterStore() { __sync_synchronize(); }
+};
+
+template<typename T, MemoryOrdering Order>
+struct IntrinsicMemoryOps
+{
+ static T load(const T& ptr) {
+ Barrier<Order>::beforeLoad();
+ T val = ptr;
+ Barrier<Order>::afterLoad();
+ return val;
+ }
+ static void store(T& ptr, T val) {
+ Barrier<Order>::beforeStore();
+ ptr = val;
+ Barrier<Order>::afterStore();
+ }
+ static T exchange(T& ptr, T val) {
+ // __sync_lock_test_and_set is only an acquire barrier; loads and stores
+ // can't be moved up from after to before it, but they can be moved down
+ // from before to after it. We may want a stricter ordering, so we need
+ // an explicit barrier.
+
+ Barrier<Order>::beforeStore();
+ return __sync_lock_test_and_set(&ptr, val);
+ }
+ static bool compareExchange(T& ptr, T oldVal, T newVal) {
+ return __sync_bool_compare_and_swap(&ptr, oldVal, newVal);
+ }
+};
+
+template<typename T>
+struct IntrinsicAddSub
+{
+ typedef T ValueType;
+ static T add(T& ptr, T val) {
+ return __sync_fetch_and_add(&ptr, val);
+ }
+ static T sub(T& ptr, T val) {
+ return __sync_fetch_and_sub(&ptr, val);
+ }
+};
+
+template<typename T>
+struct IntrinsicAddSub<T*>
+{
+ typedef T* ValueType;
+ /*
+ * The reinterpret_casts are needed so that
+ * __sync_fetch_and_{add,sub} will properly type-check.
+ *
+ * Also, these functions do not provide standard semantics for
+ * pointer types, so we need to adjust the addend.
+ */
+ static ValueType add(ValueType& ptr, ptrdiff_t val) {
+ ValueType amount = reinterpret_cast<ValueType>(val * sizeof(T));
+ return __sync_fetch_and_add(&ptr, amount);
+ }
+ static ValueType sub(ValueType& ptr, ptrdiff_t val) {
+ ValueType amount = reinterpret_cast<ValueType>(val * sizeof(T));
+ return __sync_fetch_and_sub(&ptr, amount);
+ }
+};
+
+template<typename T>
+struct IntrinsicIncDec : public IntrinsicAddSub<T>
+{
+ static T inc(T& ptr) { return IntrinsicAddSub<T>::add(ptr, 1); }
+ static T dec(T& ptr) { return IntrinsicAddSub<T>::sub(ptr, 1); }
+};
+
+template<typename T, MemoryOrdering Order>
+struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
+ public IntrinsicIncDec<T>
+{
+ static T or_(T& ptr, T val) {
+ return __sync_fetch_and_or(&ptr, val);
+ }
+ static T xor_(T& ptr, T val) {
+ return __sync_fetch_and_xor(&ptr, val);
+ }
+ static T and_(T& ptr, T val) {
+ return __sync_fetch_and_and(&ptr, val);
+ }
+};
+
+template<typename T, MemoryOrdering Order>
+struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
+ public IntrinsicIncDec<T*>
+{
+};
+
+} // namespace detail
+} // namespace mozilla
+
+#elif defined(_MSC_VER)
+
+/*
+ * Windows comes with a full complement of atomic operations.
+ * Unfortunately, most of those aren't available for Windows XP (even if
+ * the compiler supports intrinsics for them), which is the oldest
+ * version of Windows we support. Therefore, we only provide operations
+ * on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows
+ * versions, we support 64-bit datatypes as well.
+ *
+ * To avoid namespace pollution issues, we declare whatever functions we
+ * need ourselves.
+ */
+
+extern "C" {
+long __cdecl _InterlockedExchangeAdd(long volatile* dst, long value);
+long __cdecl _InterlockedOr(long volatile* dst, long value);
+long __cdecl _InterlockedXor(long volatile* dst, long value);
+long __cdecl _InterlockedAnd(long volatile* dst, long value);
+long __cdecl _InterlockedExchange(long volatile *dst, long value);
+long __cdecl _InterlockedCompareExchange(long volatile *dst, long newVal, long oldVal);
+}
+
+# pragma intrinsic(_InterlockedExchangeAdd)
+# pragma intrinsic(_InterlockedOr)
+# pragma intrinsic(_InterlockedXor)
+# pragma intrinsic(_InterlockedAnd)
+# pragma intrinsic(_InterlockedExchange)
+# pragma intrinsic(_InterlockedCompareExchange)
+
+namespace mozilla {
+namespace detail {
+
+# if !defined(_M_IX86) && !defined(_M_X64)
+ /*
+ * The implementations below are optimized for x86ish systems. You
+ * will have to modify them if you are porting to Windows on a
+ * different architecture.
+ */
+# error "Unknown CPU type"
+# endif
+
+/*
+ * The PrimitiveIntrinsics template should define |Type|, the datatype of size
+ * DataSize upon which we operate, and the following eight functions.
+ *
+ * static Type add(Type* ptr, Type val);
+ * static Type sub(Type* ptr, Type val);
+ * static Type or_(Type* ptr, Type val);
+ * static Type xor_(Type* ptr, Type val);
+ * static Type and_(Type* ptr, Type val);
+ *
+ * These functions perform the obvious operation on the value contained in
+ * |*ptr| combined with |val| and return the value previously stored in
+ * |*ptr|.
+ *
+ * static void store(Type* ptr, Type val);
+ *
+ * This function atomically stores |val| into |*ptr| and must provide a full
+ * memory fence after the store to prevent compiler and hardware instruction
+ * reordering. It should also act as a compiler barrier to prevent reads and
+ * writes from moving to after the store.
+ *
+ * static Type exchange(Type* ptr, Type val);
+ *
+ * This function atomically stores |val| into |*ptr| and returns the previous
+ * contents of *ptr;
+ *
+ * static bool compareExchange(Type* ptr, Type oldVal, Type newVal);
+ *
+ * This function atomically performs the following operation:
+ *
+ * if (*ptr == oldVal) {
+ * *ptr = newVal;
+ * return true;
+ * } else {
+ * return false;
+ * }
+ *
+ */
+template<size_t DataSize> struct PrimitiveIntrinsics;
+
+template<>
+struct PrimitiveIntrinsics<4>
+{
+ typedef long Type;
+
+ static Type add(Type* ptr, Type val) {
+ return _InterlockedExchangeAdd(ptr, val);
+ }
+ static Type sub(Type* ptr, Type val) {
+ /*
+ * _InterlockedExchangeSubtract isn't available before Windows 7,
+ * and we must support Windows XP.
+ */
+ return _InterlockedExchangeAdd(ptr, -val);
+ }
+ static Type or_(Type* ptr, Type val) {
+ return _InterlockedOr(ptr, val);
+ }
+ static Type xor_(Type* ptr, Type val) {
+ return _InterlockedXor(ptr, val);
+ }
+ static Type and_(Type* ptr, Type val) {
+ return _InterlockedAnd(ptr, val);
+ }
+ static void store(Type* ptr, Type val) {
+ _InterlockedExchange(ptr, val);
+ }
+ static Type exchange(Type* ptr, Type val) {
+ return _InterlockedExchange(ptr, val);
+ }
+ static bool compareExchange(Type* ptr, Type oldVal, Type newVal) {
+ return _InterlockedCompareExchange(ptr, newVal, oldVal) == oldVal;
+ }
+};
+
+# if defined(_M_X64)
+
+extern "C" {
+long long __cdecl _InterlockedExchangeAdd64(long long volatile* dst,
+ long long value);
+long long __cdecl _InterlockedOr64(long long volatile* dst,
+ long long value);
+long long __cdecl _InterlockedXor64(long long volatile* dst,
+ long long value);
+long long __cdecl _InterlockedAnd64(long long volatile* dst,
+ long long value);
+long long __cdecl _InterlockedExchange64(long long volatile* dst,
+ long long value);
+long long __cdecl _InterlockedCompareExchange64(long long volatile* dst,
+ long long newVal,
+ long long oldVal);
+}
+
+# pragma intrinsic(_InterlockedExchangeAdd64)
+# pragma intrinsic(_InterlockedOr64)
+# pragma intrinsic(_InterlockedXor64)
+# pragma intrinsic(_InterlockedAnd64)
+# pragma intrinsic(_InterlockedExchange64)
+# pragma intrinsic(_InterlockedCompareExchange64)
+
+template <>
+struct PrimitiveIntrinsics<8>
+{
+ typedef __int64 Type;
+
+ static Type add(Type* ptr, Type val) {
+ return _InterlockedExchangeAdd64(ptr, val);
+ }
+ static Type sub(Type* ptr, Type val) {
+ /*
+ * There is no _InterlockedExchangeSubtract64.
+ */
+ return _InterlockedExchangeAdd64(ptr, -val);
+ }
+ static Type or_(Type* ptr, Type val) {
+ return _InterlockedOr64(ptr, val);
+ }
+ static Type xor_(Type* ptr, Type val) {
+ return _InterlockedXor64(ptr, val);
+ }
+ static Type and_(Type* ptr, Type val) {
+ return _InterlockedAnd64(ptr, val);
+ }
+ static void store(Type* ptr, Type val) {
+ _InterlockedExchange64(ptr, val);
+ }
+ static Type exchange(Type* ptr, Type val) {
+ return _InterlockedExchange64(ptr, val);
+ }
+ static bool compareExchange(Type* ptr, Type oldVal, Type newVal) {
+ return _InterlockedCompareExchange64(ptr, newVal, oldVal) == oldVal;
+ }
+};
+
+# endif
+
+extern "C" { void _ReadWriteBarrier(); }
+
+# pragma intrinsic(_ReadWriteBarrier)
+
+template<MemoryOrdering Order> struct Barrier;
+
+/*
+ * We do not provide an afterStore method in Barrier, as Relaxed and
+ * ReleaseAcquire orderings do not require one, and the required barrier
+ * for SequentiallyConsistent is handled by PrimitiveIntrinsics.
+ */
+
+template<>
+struct Barrier<Relaxed>
+{
+ static void beforeLoad() {}
+ static void afterLoad() {}
+ static void beforeStore() {}
+};
+
+template<>
+struct Barrier<ReleaseAcquire>
+{
+ static void beforeLoad() {}
+ static void afterLoad() { _ReadWriteBarrier(); }
+ static void beforeStore() { _ReadWriteBarrier(); }
+};
+
+template<>
+struct Barrier<SequentiallyConsistent>
+{
+ static void beforeLoad() { _ReadWriteBarrier(); }
+ static void afterLoad() { _ReadWriteBarrier(); }
+ static void beforeStore() { _ReadWriteBarrier(); }
+};
+
+template<typename PrimType, typename T>
+struct CastHelper
+{
+ static PrimType toPrimType(T val) { return static_cast<PrimType>(val); }
+ static T fromPrimType(PrimType val) { return static_cast<T>(val); }
+};
+
+template<typename PrimType, typename T>
+struct CastHelper<PrimType, T*>
+{
+ static PrimType toPrimType(T* val) { return reinterpret_cast<PrimType>(val); }
+ static T* fromPrimType(PrimType val) { return reinterpret_cast<T*>(val); }
+};
+
+template<typename T>
+struct IntrinsicBase
+{
+ typedef T ValueType;
+ typedef PrimitiveIntrinsics<sizeof(T)> Primitives;
+ typedef typename Primitives::Type PrimType;
+ MOZ_STATIC_ASSERT(sizeof(PrimType) == sizeof(T),
+ "Selection of PrimitiveIntrinsics was wrong");
+ typedef CastHelper<PrimType, T> Cast;
+};
+
+template<typename T, MemoryOrdering Order>
+struct IntrinsicMemoryOps : public IntrinsicBase<T>
+{
+ static ValueType load(const ValueType& ptr) {
+ Barrier<Order>::beforeLoad();
+ ValueType val = ptr;
+ Barrier<Order>::afterLoad();
+ return val;
+ }
+ static void store(ValueType& ptr, ValueType val) {
+ // For SequentiallyConsistent, Primitives::store() will generate the
+ // proper memory fence. Everything else just needs a barrier before
+ // the store.
+ if (Order == SequentiallyConsistent) {
+ Primitives::store(reinterpret_cast<PrimType*>(&ptr),
+ Cast::toPrimType(val));
+ } else {
+ Barrier<Order>::beforeStore();
+ ptr = val;
+ }
+ }
+ static ValueType exchange(ValueType& ptr, ValueType val) {
+ PrimType oldval =
+ Primitives::exchange(reinterpret_cast<PrimType*>(&ptr),
+ Cast::toPrimType(val));
+ return Cast::fromPrimType(oldval);
+ }
+ static bool compareExchange(ValueType& ptr, ValueType oldVal, ValueType newVal) {
+ return Primitives::compareExchange(reinterpret_cast<PrimType*>(&ptr),
+ Cast::toPrimType(oldVal),
+ Cast::toPrimType(newVal));
+ }
+};
+
+template<typename T>
+struct IntrinsicApplyHelper : public IntrinsicBase<T>
+{
+ typedef PrimType (*BinaryOp)(PrimType*, PrimType);
+ typedef PrimType (*UnaryOp)(PrimType*);
+
+ static ValueType applyBinaryFunction(BinaryOp op, ValueType& ptr,
+ ValueType val) {
+ PrimType* primTypePtr = reinterpret_cast<PrimType*>(&ptr);
+ PrimType primTypeVal = Cast::toPrimType(val);
+ return Cast::fromPrimType(op(primTypePtr, primTypeVal));
+ }
+
+ static ValueType applyUnaryFunction(UnaryOp op, ValueType& ptr) {
+ PrimType* primTypePtr = reinterpret_cast<PrimType*>(&ptr);
+ return Cast::fromPrimType(op(primTypePtr));
+ }
+};
+
+template<typename T>
+struct IntrinsicAddSub : public IntrinsicApplyHelper<T>
+{
+ static ValueType add(ValueType& ptr, ValueType val) {
+ return applyBinaryFunction(&Primitives::add, ptr, val);
+ }
+ static ValueType sub(ValueType& ptr, ValueType val) {
+ return applyBinaryFunction(&Primitives::sub, ptr, val);
+ }
+};
+
+template<typename T>
+struct IntrinsicAddSub<T*> : public IntrinsicApplyHelper<T*>
+{
+ static ValueType add(ValueType& ptr, ptrdiff_t amount) {
+ return applyBinaryFunction(&Primitives::add, ptr,
+ (ValueType)(amount * sizeof(ValueType)));
+ }
+ static ValueType sub(ValueType& ptr, ptrdiff_t amount) {
+ return applyBinaryFunction(&Primitives::sub, ptr,
+ (ValueType)(amount * sizeof(ValueType)));
+ }
+};
+
+template<typename T>
+struct IntrinsicIncDec : public IntrinsicAddSub<T>
+{
+ static ValueType inc(ValueType& ptr) { return add(ptr, 1); }
+ static ValueType dec(ValueType& ptr) { return sub(ptr, 1); }
+};
+
+template<typename T, MemoryOrdering Order>
+struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
+ public IntrinsicIncDec<T>
+{
+ static ValueType or_(ValueType& ptr, T val) {
+ return applyBinaryFunction(&Primitives::or_, ptr, val);
+ }
+ static ValueType xor_(ValueType& ptr, T val) {
+ return applyBinaryFunction(&Primitives::xor_, ptr, val);
+ }
+ static ValueType and_(ValueType& ptr, T val) {
+ return applyBinaryFunction(&Primitives::and_, ptr, val);
+ }
+};
+
+template<typename T, MemoryOrdering Order>
+struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
+ public IntrinsicIncDec<T*>
+{
+};
+
+} // namespace detail
+} // namespace mozilla
+
+#else
+# error "Atomic compiler intrinsics are not supported on your platform"
+#endif
+
+namespace mozilla {
+
+namespace detail {
+
+template<typename T, MemoryOrdering Order>
+class AtomicBase
+{
+ protected:
+ typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;
+ typename Intrinsics::ValueType mValue;
+
+ public:
+ AtomicBase() : mValue() {}
+ AtomicBase(T aInit) { Intrinsics::store(mValue, aInit); }
+
+ T operator++(int) { return Intrinsics::inc(mValue); }
+ T operator--(int) { return Intrinsics::dec(mValue); }
+ T operator++() { return Intrinsics::inc(mValue) + 1; }
+ T operator--() { return Intrinsics::dec(mValue) - 1; }
+
+ operator T() const { return Intrinsics::load(mValue); }
+
+ /**
+ * Performs an atomic swap operation. aValue is stored and the previous
+ * value of this variable is returned.
+ */
+ T exchange(T aValue) {
+ return Intrinsics::exchange(mValue, aValue);
+ }
+ /**
+ * Performs an atomic compare-and-swap operation and returns true if it
+ * succeeded. This is equivalent to atomically doing
+ *
+ * if (mValue == aOldValue) {
+ * mValue = aNewValue;
+ * return true;
+ * } else {
+ * return false;
+ * }
+ */
+ bool compareExchange(T aOldValue, T aNewValue) {
+ return Intrinsics::compareExchange(mValue, aOldValue, aNewValue);
+ }
+
+ private:
+ template<MemoryOrdering AnyOrder>
+ AtomicBase(const AtomicBase<T, AnyOrder>& aCopy) MOZ_DELETE;
+};
+
+} // namespace detail
+
+/**
+ * A wrapper for a type that enforces that all memory accesses are atomic.
+ *
+ * In general, where a variable |T foo| exists, |Atomic<T> foo| can be
+ * used in its place. In addition to atomic store and load operations,
+ * compound assignment and increment/decrement operators are implemented
+ * which perform the corresponding read-modify-write operation
+ * atomically. Finally, an atomic swap method is provided.
+ *
+ * Atomic accesses are sequentially consistent by default. You should
+ * use the default unless you are tall enough to ride the
+ * memory-ordering roller coaster (if you're not sure, you aren't) and
+ * you have a compelling reason to do otherwise.
+ *
+ * There is one exception to the case of atomic memory accesses: providing an
+ * initial value of the atomic value is not guaranteed to be atomic. This is a
+ * deliberate design choice that enables static atomic variables to be declared
+ * without introducing extra static constructors.
+ */
+template<typename T, MemoryOrdering Order = SequentiallyConsistent>
+class Atomic : public detail::AtomicBase<T, Order>
+{
+ // We only support 32-bit types on 32-bit Windows, which constrains our
+ // implementation elsewhere. But we support pointer-sized types everywhere.
+ MOZ_STATIC_ASSERT(sizeof(T) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T) == 8),
+ "mozilla/Atomics.h only supports 32-bit and pointer-sized types");
+ // Regardless of the OS, we only support integral types here.
+ MOZ_STATIC_ASSERT(IsIntegral<T>::value, "can only have integral atomic variables");
+
+ typedef typename detail::AtomicBase<T, Order> Base;
+
+ public:
+ Atomic() : detail::AtomicBase<T, Order>() {}
+ Atomic(T aInit) : detail::AtomicBase<T, Order>(aInit) {}
+
+ T operator+=(T delta) { return Base::Intrinsics::add(Base::mValue, delta) + delta; }
+ T operator-=(T delta) { return Base::Intrinsics::sub(Base::mValue, delta) - delta; }
+ T operator|=(T val) { return Base::Intrinsics::or_(Base::mValue, val) | val; }
+ T operator^=(T val) { return Base::Intrinsics::xor_(Base::mValue, val) ^ val; }
+ T operator&=(T val) { return Base::Intrinsics::and_(Base::mValue, val) & val; }
+
+ T operator=(T aValue) {
+ Base::Intrinsics::store(Base::mValue, aValue);
+ return aValue;
+ }
+
+ private:
+ Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
+};
+
+/**
+ * A partial specialization of Atomic for pointer variables.
+ *
+ * Like Atomic<T>, Atomic<T*> is equivalent in most respects to a regular T*
+ * variable. An atomic compare-and-swap primitive for pointer variables is
+ * provided, as are atomic increment and decement operators. Also provided
+ * are the compound assignment operators for addition and subtraction.
+ * Atomic swap (via exchange()) is included as well.
+ *
+ * Atomic accesses are sequentially consistent by default. You should
+ * use the default unless you are tall enough to ride the
+ * memory-ordering roller coaster (if you're not sure, you aren't) and
+ * you have a compelling reason to do otherwise.
+ *
+ * There is one exception to the case of atomic memory accesses: providing an
+ * initial value of the atomic value is not guaranteed to be atomic. This is a
+ * deliberate design choice that enables static atomic variables to be declared
+ * without introducing extra static constructors.
+ */
+template<typename T, MemoryOrdering Order>
+class Atomic<T*, Order> : public detail::AtomicBase<T*, Order>
+{
+ typedef typename detail::AtomicBase<T*, Order> Base;
+
+ public:
+ Atomic() : detail::AtomicBase<T*, Order>() {}
+ Atomic(T* aInit) : detail::AtomicBase<T*, Order>(aInit) {}
+
+ T* operator +=(ptrdiff_t delta) {
+ return Base::Intrinsics::add(Base::mValue, delta) + delta;
+ }
+ T* operator -=(ptrdiff_t delta) {
+ return Base::Intrinsics::sub(Base::mValue, delta) - delta;
+ }
+
+ T* operator=(T* aValue) {
+ Base::Intrinsics::store(Base::mValue, aValue);
+ return aValue;
+ }
+
+ private:
+ Atomic(Atomic<T*, Order>& aOther) MOZ_DELETE;
+};
+
+} // namespace mozilla
+
+#endif /* mozilla_Atomics_h_ */