//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H #define LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H /* cxa_guard_impl.h - Implements the C++ runtime support for function local * static guards. * The layout of the guard object is the same across ARM and Itanium. * * The first "guard byte" (which is checked by the compiler) is set only upon * the completion of cxa release. * * The second "init byte" does the rest of the bookkeeping. It tracks if * initialization is complete or pending, and if there are waiting threads. * * If the guard variable is 64-bits and the platforms supplies a 32-bit thread * identifier, it is used to detect recursive initialization. The thread ID of * the thread currently performing initialization is stored in the second word. * * Guard Object Layout: * --------------------------------------------------------------------------- * | a+0: guard byte | a+1: init byte | a+2: unused ... | a+4: thread-id ... | * --------------------------------------------------------------------------- * * Note that we don't do what the ABI docs suggest (put a mutex in the guard * object which we acquire in cxa_guard_acquire and release in * cxa_guard_release). Instead we use the init byte to imitate that behaviour, * but without actually holding anything mutex related between aquire and * release/abort. * * Access Protocol: * For each implementation the guard byte is checked and set before accessing * the init byte. * * Overall Design: * The implementation was designed to allow each implementation to be tested * independent of the C++ runtime or platform support. * */ #include "__cxxabi_config.h" #include "include/atomic_support.h" // from libc++ #if defined(__has_include) # if __has_include() # include # endif # if __has_include() # include # endif #endif #include <__threading_support> #include #include #include #include #ifndef _LIBCXXABI_HAS_NO_THREADS # if defined(__ELF__) && defined(_LIBCXXABI_LINK_PTHREAD_LIB) # pragma comment(lib, "pthread") # endif #endif #if defined(__clang__) # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wtautological-pointer-compare" #elif defined(__GNUC__) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Waddress" #endif // To make testing possible, this header is included from both cxa_guard.cpp // and a number of tests. // // For this reason we place everything in an anonymous namespace -- even though // we're in a header. We want the actual implementation and the tests to have // unique definitions of the types in this header (since the tests may depend // on function local statics). // // To enforce this either `BUILDING_CXA_GUARD` or `TESTING_CXA_GUARD` must be // defined when including this file. Only `src/cxa_guard.cpp` should define // the former. #ifdef BUILDING_CXA_GUARD # include "abort_message.h" # define ABORT_WITH_MESSAGE(...) ::abort_message(__VA_ARGS__) #elif defined(TESTING_CXA_GUARD) # define ABORT_WITH_MESSAGE(...) ::abort() #else # error "Either BUILDING_CXA_GUARD or TESTING_CXA_GUARD must be defined" #endif #if __has_feature(thread_sanitizer) extern "C" void __tsan_acquire(void*); extern "C" void __tsan_release(void*); #else # define __tsan_acquire(addr) ((void)0) # define __tsan_release(addr) ((void)0) #endif namespace __cxxabiv1 { // Use an anonymous namespace to ensure that the tests and actual implementation // have unique definitions of these symbols. namespace { //===----------------------------------------------------------------------===// // Misc Utilities //===----------------------------------------------------------------------===// template struct LazyValue { LazyValue() : is_init(false) {} T& get() { if (!is_init) { value = Init(); is_init = true; } return value; } private: T value; bool is_init = false; }; template class AtomicInt { public: using MemoryOrder = std::__libcpp_atomic_order; explicit AtomicInt(IntType* b) : b_(b) {} AtomicInt(AtomicInt const&) = delete; AtomicInt& operator=(AtomicInt const&) = delete; IntType load(MemoryOrder ord) { return std::__libcpp_atomic_load(b_, ord); } void store(IntType val, MemoryOrder ord) { std::__libcpp_atomic_store(b_, val, ord); } IntType exchange(IntType new_val, MemoryOrder ord) { return std::__libcpp_atomic_exchange(b_, new_val, ord); } bool compare_exchange(IntType* expected, IntType desired, MemoryOrder ord_success, MemoryOrder ord_failure) { return std::__libcpp_atomic_compare_exchange(b_, expected, desired, ord_success, ord_failure); } private: IntType* b_; }; //===----------------------------------------------------------------------===// // PlatformGetThreadID //===----------------------------------------------------------------------===// #if defined(__APPLE__) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD) uint32_t PlatformThreadID() { static_assert(sizeof(mach_port_t) == sizeof(uint32_t), ""); return static_cast(pthread_mach_thread_np(std::__libcpp_thread_get_current_id())); } #elif defined(SYS_gettid) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD) uint32_t PlatformThreadID() { static_assert(sizeof(pid_t) == sizeof(uint32_t), ""); return static_cast(syscall(SYS_gettid)); } #else constexpr uint32_t (*PlatformThreadID)() = nullptr; #endif //===----------------------------------------------------------------------===// // GuardByte //===----------------------------------------------------------------------===// static constexpr uint8_t UNSET = 0; static constexpr uint8_t COMPLETE_BIT = (1 << 0); static constexpr uint8_t PENDING_BIT = (1 << 1); static constexpr uint8_t WAITING_BIT = (1 << 2); /// Manages reads and writes to the guard byte. struct GuardByte { GuardByte() = delete; GuardByte(GuardByte const&) = delete; GuardByte& operator=(GuardByte const&) = delete; explicit GuardByte(uint8_t* const guard_byte_address) : guard_byte(guard_byte_address) {} public: /// The guard byte portion of cxa_guard_acquire. Returns true if /// initialization has already been completed. bool acquire() { // if guard_byte is non-zero, we have already completed initialization // (i.e. release has been called) return guard_byte.load(std::_AO_Acquire) != UNSET; } /// The guard byte portion of cxa_guard_release. void release() { guard_byte.store(COMPLETE_BIT, std::_AO_Release); } /// The guard byte portion of cxa_guard_abort. void abort() {} // Nothing to do private: AtomicInt guard_byte; }; //===----------------------------------------------------------------------===// // InitByte Implementations //===----------------------------------------------------------------------===// // // Each initialization byte implementation supports the following methods: // // InitByte(uint8_t* _init_byte_address, uint32_t* _thread_id_address) // Construct the InitByte object, initializing our member variables // // bool acquire() // Called before we start the initialization. Check if someone else has already started, and if // not to signal our intent to start it ourselves. We determine the current status from the init // byte, which is one of 4 possible values: // COMPLETE: Initialization was finished by somebody else. Return true. // PENDING: Somebody has started the initialization already, set the WAITING bit, // then wait for the init byte to get updated with a new value. // (PENDING|WAITING): Somebody has started the initialization already, and we're not the // first one waiting. Wait for the init byte to get updated. // UNSET: Initialization hasn't successfully completed, and nobody is currently // performing the initialization. Set the PENDING bit to indicate our // intention to start the initialization, and return false. // The return value indicates whether initialization has already been completed. // // void release() // Called after successfully completing the initialization. Update the init byte to reflect // that, then if anybody else is waiting, wake them up. // // void abort() // Called after an error is thrown during the initialization. Reset the init byte to UNSET to // indicate that we're no longer performing the initialization, then if anybody is waiting, wake // them up so they can try performing the initialization. // //===----------------------------------------------------------------------===// // Single Threaded Implementation //===----------------------------------------------------------------------===// /// InitByteNoThreads - Doesn't use any inter-thread synchronization when /// managing reads and writes to the init byte. struct InitByteNoThreads { InitByteNoThreads() = delete; InitByteNoThreads(InitByteNoThreads const&) = delete; InitByteNoThreads& operator=(InitByteNoThreads const&) = delete; explicit InitByteNoThreads(uint8_t* _init_byte_address, uint32_t*) : init_byte_address(_init_byte_address) {} /// The init byte portion of cxa_guard_acquire. Returns true if /// initialization has already been completed. bool acquire() { if (*init_byte_address == COMPLETE_BIT) return true; if (*init_byte_address & PENDING_BIT) ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization: do you have a function-local static variable whose initialization depends on that function?"); *init_byte_address = PENDING_BIT; return false; } /// The init byte portion of cxa_guard_release. void release() { *init_byte_address = COMPLETE_BIT; } /// The init byte portion of cxa_guard_abort. void abort() { *init_byte_address = UNSET; } private: /// The address of the byte used during initialization. uint8_t* const init_byte_address; }; //===----------------------------------------------------------------------===// // Global Mutex Implementation //===----------------------------------------------------------------------===// struct LibcppMutex; struct LibcppCondVar; #ifndef _LIBCXXABI_HAS_NO_THREADS struct LibcppMutex { LibcppMutex() = default; LibcppMutex(LibcppMutex const&) = delete; LibcppMutex& operator=(LibcppMutex const&) = delete; bool lock() { return std::__libcpp_mutex_lock(&mutex); } bool unlock() { return std::__libcpp_mutex_unlock(&mutex); } private: friend struct LibcppCondVar; std::__libcpp_mutex_t mutex = _LIBCPP_MUTEX_INITIALIZER; }; struct LibcppCondVar { LibcppCondVar() = default; LibcppCondVar(LibcppCondVar const&) = delete; LibcppCondVar& operator=(LibcppCondVar const&) = delete; bool wait(LibcppMutex& mut) { return std::__libcpp_condvar_wait(&cond, &mut.mutex); } bool broadcast() { return std::__libcpp_condvar_broadcast(&cond); } private: std::__libcpp_condvar_t cond = _LIBCPP_CONDVAR_INITIALIZER; }; #else struct LibcppMutex {}; struct LibcppCondVar {}; #endif // !defined(_LIBCXXABI_HAS_NO_THREADS) /// InitByteGlobalMutex - Uses a global mutex and condition variable (common to /// all static local variables) to manage reads and writes to the init byte. template struct InitByteGlobalMutex { explicit InitByteGlobalMutex(uint8_t* _init_byte_address, uint32_t* _thread_id_address) : init_byte_address(_init_byte_address), thread_id_address(_thread_id_address), has_thread_id_support(_thread_id_address != nullptr && GetThreadID != nullptr) {} public: /// The init byte portion of cxa_guard_acquire. Returns true if /// initialization has already been completed. bool acquire() { LockGuard g("__cxa_guard_acquire"); // Check for possible recursive initialization. if (has_thread_id_support && (*init_byte_address & PENDING_BIT)) { if (*thread_id_address == current_thread_id.get()) ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization: do you have a function-local static variable whose initialization depends on that function?"); } // Wait until the pending bit is not set. while (*init_byte_address & PENDING_BIT) { *init_byte_address |= WAITING_BIT; global_cond.wait(global_mutex); } if (*init_byte_address == COMPLETE_BIT) return true; if (has_thread_id_support) *thread_id_address = current_thread_id.get(); *init_byte_address = PENDING_BIT; return false; } /// The init byte portion of cxa_guard_release. void release() { bool has_waiting; { LockGuard g("__cxa_guard_release"); has_waiting = *init_byte_address & WAITING_BIT; *init_byte_address = COMPLETE_BIT; } if (has_waiting) { if (global_cond.broadcast()) { ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_release"); } } } /// The init byte portion of cxa_guard_abort. void abort() { bool has_waiting; { LockGuard g("__cxa_guard_abort"); if (has_thread_id_support) *thread_id_address = 0; has_waiting = *init_byte_address & WAITING_BIT; *init_byte_address = UNSET; } if (has_waiting) { if (global_cond.broadcast()) { ABORT_WITH_MESSAGE("%s failed to broadcast", "__cxa_guard_abort"); } } } private: /// The address of the byte used during initialization. uint8_t* const init_byte_address; /// An optional address storing an identifier for the thread performing initialization. /// It's used to detect recursive initialization. uint32_t* const thread_id_address; const bool has_thread_id_support; LazyValue current_thread_id; private: struct LockGuard { LockGuard() = delete; LockGuard(LockGuard const&) = delete; LockGuard& operator=(LockGuard const&) = delete; explicit LockGuard(const char* calling_func) : calling_func_(calling_func) { if (global_mutex.lock()) ABORT_WITH_MESSAGE("%s failed to acquire mutex", calling_func_); } ~LockGuard() { if (global_mutex.unlock()) ABORT_WITH_MESSAGE("%s failed to release mutex", calling_func_); } private: const char* const calling_func_; }; }; //===----------------------------------------------------------------------===// // Futex Implementation //===----------------------------------------------------------------------===// #if defined(SYS_futex) void PlatformFutexWait(int* addr, int expect) { constexpr int WAIT = 0; syscall(SYS_futex, addr, WAIT, expect, 0); __tsan_acquire(addr); } void PlatformFutexWake(int* addr) { constexpr int WAKE = 1; __tsan_release(addr); syscall(SYS_futex, addr, WAKE, INT_MAX); } #else constexpr void (*PlatformFutexWait)(int*, int) = nullptr; constexpr void (*PlatformFutexWake)(int*) = nullptr; #endif constexpr bool PlatformSupportsFutex() { return +PlatformFutexWait != nullptr; } /// InitByteFutex - Uses a futex to manage reads and writes to the init byte. template struct InitByteFutex { explicit InitByteFutex(uint8_t* _init_byte_address, uint32_t* _thread_id_address) : init_byte(_init_byte_address), has_thread_id_support(_thread_id_address != nullptr && GetThreadIDArg != nullptr), thread_id(_thread_id_address), base_address(reinterpret_cast(/*_init_byte_address & ~0x3*/ _init_byte_address - 1)) {} public: /// The init byte portion of cxa_guard_acquire. Returns true if /// initialization has already been completed. bool acquire() { while (true) { uint8_t last_val = UNSET; if (init_byte.compare_exchange(&last_val, PENDING_BIT, std::_AO_Acq_Rel, std::_AO_Acquire)) { if (has_thread_id_support) { thread_id.store(current_thread_id.get(), std::_AO_Relaxed); } return false; } if (last_val == COMPLETE_BIT) return true; if (last_val & PENDING_BIT) { // Check for recursive initialization if (has_thread_id_support && thread_id.load(std::_AO_Relaxed) == current_thread_id.get()) { ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization: do you have a function-local static variable whose initialization depends on that function?"); } if ((last_val & WAITING_BIT) == 0) { // This compare exchange can fail for several reasons // (1) another thread finished the whole thing before we got here // (2) another thread set the waiting bit we were trying to thread // (3) another thread had an exception and failed to finish if (!init_byte.compare_exchange(&last_val, PENDING_BIT | WAITING_BIT, std::_AO_Acq_Rel, std::_AO_Release)) { // (1) success, via someone else's work! if (last_val == COMPLETE_BIT) return true; // (3) someone else, bailed on doing the work, retry from the start! if (last_val == UNSET) continue; // (2) the waiting bit got set, so we are happy to keep waiting } } wait_on_initialization(); } } } /// The init byte portion of cxa_guard_release. void release() { uint8_t old = init_byte.exchange(COMPLETE_BIT, std::_AO_Acq_Rel); if (old & WAITING_BIT) wake_all(); } /// The init byte portion of cxa_guard_abort. void abort() { if (has_thread_id_support) thread_id.store(0, std::_AO_Relaxed); uint8_t old = init_byte.exchange(UNSET, std::_AO_Acq_Rel); if (old & WAITING_BIT) wake_all(); } private: /// Use the futex to wait on the current guard variable. Futex expects a /// 32-bit 4-byte aligned address as the first argument, so we use the 4-byte /// aligned address that encompasses the init byte (i.e. the address of the /// raw guard object that was passed to __cxa_guard_acquire/release/abort). void wait_on_initialization() { Wait(base_address, expected_value_for_futex(PENDING_BIT | WAITING_BIT)); } void wake_all() { Wake(base_address); } private: AtomicInt init_byte; const bool has_thread_id_support; // Unsafe to use unless has_thread_id_support AtomicInt thread_id; LazyValue current_thread_id; /// the 4-byte-aligned address that encompasses the init byte (i.e. the /// address of the raw guard object). int* const base_address; /// Create the expected integer value for futex `wait(int* addr, int expected)`. /// We pass the base address as the first argument, So this function creates /// an zero-initialized integer with `b` copied at the correct offset. static int expected_value_for_futex(uint8_t b) { int dest_val = 0; std::memcpy(reinterpret_cast(&dest_val) + 1, &b, 1); return dest_val; } static_assert(Wait != nullptr && Wake != nullptr, ""); }; //===----------------------------------------------------------------------===// // GuardObject //===----------------------------------------------------------------------===// enum class AcquireResult { INIT_IS_DONE, INIT_IS_PENDING, }; constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE; constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING; /// Co-ordinates between GuardByte and InitByte. template struct GuardObject { GuardObject() = delete; GuardObject(GuardObject const&) = delete; GuardObject& operator=(GuardObject const&) = delete; private: GuardByte guard_byte; InitByteT init_byte; public: /// ARM Constructor explicit GuardObject(uint32_t* raw_guard_object) : guard_byte(reinterpret_cast(raw_guard_object)), init_byte(reinterpret_cast(raw_guard_object) + 1, nullptr) {} /// Itanium Constructor explicit GuardObject(uint64_t* raw_guard_object) : guard_byte(reinterpret_cast(raw_guard_object)), init_byte(reinterpret_cast(raw_guard_object) + 1, reinterpret_cast(raw_guard_object) + 1) { } /// Implements __cxa_guard_acquire. AcquireResult cxa_guard_acquire() { // Use short-circuit evaluation to avoid calling init_byte.acquire when // guard_byte.acquire returns true. (i.e. don't call it when we know from // the guard byte that initialization has already been completed) if (guard_byte.acquire() || init_byte.acquire()) return INIT_IS_DONE; return INIT_IS_PENDING; } /// Implements __cxa_guard_release. void cxa_guard_release() { // Update guard byte first, so if somebody is woken up by init_byte.release // and comes all the way back around to __cxa_guard_acquire again, they see // it as having completed initialization. guard_byte.release(); init_byte.release(); } /// Implements __cxa_guard_abort. void cxa_guard_abort() { guard_byte.abort(); init_byte.abort(); } }; //===----------------------------------------------------------------------===// // Convenience Classes //===----------------------------------------------------------------------===// /// NoThreadsGuard - Manages initialization without performing any inter-thread /// synchronization. using NoThreadsGuard = GuardObject; /// GlobalMutexGuard - Manages initialization using a global mutex and /// condition variable. template using GlobalMutexGuard = GuardObject>; /// FutexGuard - Manages initialization using atomics and the futex syscall for /// waiting and waking. template using FutexGuard = GuardObject>; //===----------------------------------------------------------------------===// // //===----------------------------------------------------------------------===// template struct GlobalStatic { static T instance; }; template _LIBCPP_CONSTINIT T GlobalStatic::instance = {}; enum class Implementation { NoThreads, GlobalMutex, Futex }; template struct SelectImplementation; template <> struct SelectImplementation { using type = NoThreadsGuard; }; template <> struct SelectImplementation { using type = GlobalMutexGuard::instance, GlobalStatic::instance, PlatformThreadID>; }; template <> struct SelectImplementation { using type = FutexGuard; }; // TODO(EricWF): We should prefer the futex implementation when available. But // it should be done in a separate step from adding the implementation. constexpr Implementation CurrentImplementation = #if defined(_LIBCXXABI_HAS_NO_THREADS) Implementation::NoThreads; #elif defined(_LIBCXXABI_USE_FUTEX) Implementation::Futex; #else Implementation::GlobalMutex; #endif static_assert(CurrentImplementation != Implementation::Futex || PlatformSupportsFutex(), "Futex selected but not supported"); using SelectedImplementation = SelectImplementation::type; } // end namespace } // end namespace __cxxabiv1 #if defined(__clang__) # pragma clang diagnostic pop #elif defined(__GNUC__) # pragma GCC diagnostic pop #endif #endif // LIBCXXABI_SRC_INCLUDE_CXA_GUARD_IMPL_H