diff options
Diffstat (limited to 'deps/jemalloc/src/tsd.c')
-rw-r--r-- | deps/jemalloc/src/tsd.c | 243 |
1 files changed, 213 insertions, 30 deletions
diff --git a/deps/jemalloc/src/tsd.c b/deps/jemalloc/src/tsd.c index c1430682d..a31f6b969 100644 --- a/deps/jemalloc/src/tsd.c +++ b/deps/jemalloc/src/tsd.c @@ -12,12 +12,16 @@ static unsigned ncleanups; static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX]; +/* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */ +JEMALLOC_DIAGNOSTIC_PUSH +JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS + #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER; -__thread bool JEMALLOC_TLS_MODEL tsd_initialized = false; +JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; +JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false; bool tsd_booted = false; #elif (defined(JEMALLOC_TLS)) -__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER; +JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; pthread_key_t tsd_tsd; bool tsd_booted = false; #elif (defined(_WIN32)) @@ -41,6 +45,7 @@ tsd_init_head_t tsd_init_head = { ql_head_initializer(blocks), MALLOC_MUTEX_INITIALIZER }; + tsd_wrapper_t tsd_boot_wrapper = { false, TSD_INITIALIZER @@ -48,17 +53,164 @@ tsd_wrapper_t tsd_boot_wrapper = { bool tsd_booted = false; #endif +JEMALLOC_DIAGNOSTIC_POP + +/******************************************************************************/ + +/* A list of all the tsds in the nominal state. */ +typedef ql_head(tsd_t) tsd_list_t; +static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds); +static malloc_mutex_t tsd_nominal_tsds_lock; + +/* How many slow-path-enabling features are turned on. */ +static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0); + +static bool +tsd_in_nominal_list(tsd_t *tsd) { + tsd_t *tsd_list; + bool found = false; + /* + * We don't know that tsd is nominal; it might not be safe to get data + * out of it here. + */ + malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock); + ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) { + if (tsd == tsd_list) { + found = true; + break; + } + } + malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock); + return found; +} + +static void +tsd_add_nominal(tsd_t *tsd) { + assert(!tsd_in_nominal_list(tsd)); + assert(tsd_state_get(tsd) <= tsd_state_nominal_max); + ql_elm_new(tsd, TSD_MANGLE(tcache).tsd_link); + malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); + ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link); + malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); +} + +static void +tsd_remove_nominal(tsd_t *tsd) { + assert(tsd_in_nominal_list(tsd)); + assert(tsd_state_get(tsd) <= tsd_state_nominal_max); + malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); + ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link); + malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); +} + +static void +tsd_force_recompute(tsdn_t *tsdn) { + /* + * The stores to tsd->state here need to synchronize with the exchange + * in tsd_slow_update. + */ + atomic_fence(ATOMIC_RELEASE); + malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock); + tsd_t *remote_tsd; + ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) { + assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED) + <= tsd_state_nominal_max); + tsd_atomic_store(&remote_tsd->state, tsd_state_nominal_recompute, + ATOMIC_RELAXED); + } + malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock); +} + +void +tsd_global_slow_inc(tsdn_t *tsdn) { + atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED); + /* + * We unconditionally force a recompute, even if the global slow count + * was already positive. If we didn't, then it would be possible for us + * to return to the user, have the user synchronize externally with some + * other thread, and then have that other thread not have picked up the + * update yet (since the original incrementing thread might still be + * making its way through the tsd list). + */ + tsd_force_recompute(tsdn); +} + +void tsd_global_slow_dec(tsdn_t *tsdn) { + atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED); + /* See the note in ..._inc(). */ + tsd_force_recompute(tsdn); +} + +static bool +tsd_local_slow(tsd_t *tsd) { + return !tsd_tcache_enabled_get(tsd) + || tsd_reentrancy_level_get(tsd) > 0; +} + +bool +tsd_global_slow() { + return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0; +} /******************************************************************************/ +static uint8_t +tsd_state_compute(tsd_t *tsd) { + if (!tsd_nominal(tsd)) { + return tsd_state_get(tsd); + } + /* We're in *a* nominal state; but which one? */ + if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) { + return tsd_state_nominal_slow; + } else { + return tsd_state_nominal; + } +} + void tsd_slow_update(tsd_t *tsd) { - if (tsd_nominal(tsd)) { - if (malloc_slow || !tsd_tcache_enabled_get(tsd) || - tsd_reentrancy_level_get(tsd) > 0) { - tsd->state = tsd_state_nominal_slow; + uint8_t old_state; + do { + uint8_t new_state = tsd_state_compute(tsd); + old_state = tsd_atomic_exchange(&tsd->state, new_state, + ATOMIC_ACQUIRE); + } while (old_state == tsd_state_nominal_recompute); +} + +void +tsd_state_set(tsd_t *tsd, uint8_t new_state) { + /* Only the tsd module can change the state *to* recompute. */ + assert(new_state != tsd_state_nominal_recompute); + uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED); + if (old_state > tsd_state_nominal_max) { + /* + * Not currently in the nominal list, but it might need to be + * inserted there. + */ + assert(!tsd_in_nominal_list(tsd)); + tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED); + if (new_state <= tsd_state_nominal_max) { + tsd_add_nominal(tsd); + } + } else { + /* + * We're currently nominal. If the new state is non-nominal, + * great; we take ourselves off the list and just enter the new + * state. + */ + assert(tsd_in_nominal_list(tsd)); + if (new_state > tsd_state_nominal_max) { + tsd_remove_nominal(tsd); + tsd_atomic_store(&tsd->state, new_state, + ATOMIC_RELAXED); } else { - tsd->state = tsd_state_nominal; + /* + * This is the tricky case. We're transitioning from + * one nominal state to another. The caller can't know + * about any races that are occuring at the same time, + * so we always have to recompute no matter what. + */ + tsd_slow_update(tsd); } } } @@ -87,6 +239,7 @@ tsd_data_init(tsd_t *tsd) { static void assert_tsd_data_cleanup_done(tsd_t *tsd) { assert(!tsd_nominal(tsd)); + assert(!tsd_in_nominal_list(tsd)); assert(*tsd_arenap_get_unsafe(tsd) == NULL); assert(*tsd_iarenap_get_unsafe(tsd) == NULL); assert(*tsd_arenas_tdata_bypassp_get_unsafe(tsd) == true); @@ -97,8 +250,8 @@ assert_tsd_data_cleanup_done(tsd_t *tsd) { static bool tsd_data_init_nocleanup(tsd_t *tsd) { - assert(tsd->state == tsd_state_reincarnated || - tsd->state == tsd_state_minimal_initialized); + assert(tsd_state_get(tsd) == tsd_state_reincarnated || + tsd_state_get(tsd) == tsd_state_minimal_initialized); /* * During reincarnation, there is no guarantee that the cleanup function * will be called (deallocation may happen after all tsd destructors). @@ -117,27 +270,33 @@ tsd_t * tsd_fetch_slow(tsd_t *tsd, bool minimal) { assert(!tsd_fast(tsd)); - if (tsd->state == tsd_state_nominal_slow) { - /* On slow path but no work needed. */ - assert(malloc_slow || !tsd_tcache_enabled_get(tsd) || - tsd_reentrancy_level_get(tsd) > 0 || - *tsd_arenas_tdata_bypassp_get(tsd)); - } else if (tsd->state == tsd_state_uninitialized) { + if (tsd_state_get(tsd) == tsd_state_nominal_slow) { + /* + * On slow path but no work needed. Note that we can't + * necessarily *assert* that we're slow, because we might be + * slow because of an asynchronous modification to global state, + * which might be asynchronously modified *back*. + */ + } else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) { + tsd_slow_update(tsd); + } else if (tsd_state_get(tsd) == tsd_state_uninitialized) { if (!minimal) { - tsd->state = tsd_state_nominal; - tsd_slow_update(tsd); - /* Trigger cleanup handler registration. */ - tsd_set(tsd); - tsd_data_init(tsd); + if (tsd_booted) { + tsd_state_set(tsd, tsd_state_nominal); + tsd_slow_update(tsd); + /* Trigger cleanup handler registration. */ + tsd_set(tsd); + tsd_data_init(tsd); + } } else { - tsd->state = tsd_state_minimal_initialized; + tsd_state_set(tsd, tsd_state_minimal_initialized); tsd_set(tsd); tsd_data_init_nocleanup(tsd); } - } else if (tsd->state == tsd_state_minimal_initialized) { + } else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) { if (!minimal) { /* Switch to fully initialized. */ - tsd->state = tsd_state_nominal; + tsd_state_set(tsd, tsd_state_nominal); assert(*tsd_reentrancy_levelp_get(tsd) >= 1); (*tsd_reentrancy_levelp_get(tsd))--; tsd_slow_update(tsd); @@ -145,12 +304,12 @@ tsd_fetch_slow(tsd_t *tsd, bool minimal) { } else { assert_tsd_data_cleanup_done(tsd); } - } else if (tsd->state == tsd_state_purgatory) { - tsd->state = tsd_state_reincarnated; + } else if (tsd_state_get(tsd) == tsd_state_purgatory) { + tsd_state_set(tsd, tsd_state_reincarnated); tsd_set(tsd); tsd_data_init_nocleanup(tsd); } else { - assert(tsd->state == tsd_state_reincarnated); + assert(tsd_state_get(tsd) == tsd_state_reincarnated); } return tsd; @@ -214,7 +373,7 @@ void tsd_cleanup(void *arg) { tsd_t *tsd = (tsd_t *)arg; - switch (tsd->state) { + switch (tsd_state_get(tsd)) { case tsd_state_uninitialized: /* Do nothing. */ break; @@ -232,7 +391,7 @@ tsd_cleanup(void *arg) { case tsd_state_nominal: case tsd_state_nominal_slow: tsd_do_data_cleanup(tsd); - tsd->state = tsd_state_purgatory; + tsd_state_set(tsd, tsd_state_purgatory); tsd_set(tsd); break; case tsd_state_purgatory: @@ -260,6 +419,10 @@ malloc_tsd_boot0(void) { tsd_t *tsd; ncleanups = 0; + if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock", + WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) { + return NULL; + } if (tsd_boot0()) { return NULL; } @@ -310,7 +473,7 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { # pragma comment(linker, "/INCLUDE:_tls_callback") # else # pragma comment(linker, "/INCLUDE:_tls_used") -# pragma comment(linker, "/INCLUDE:tls_callback") +# pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) ) # endif # pragma section(".CRT$XLY",long,read) #endif @@ -349,3 +512,23 @@ tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) { malloc_mutex_unlock(TSDN_NULL, &head->lock); } #endif + +void +tsd_prefork(tsd_t *tsd) { + malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); +} + +void +tsd_postfork_parent(tsd_t *tsd) { + malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); +} + +void +tsd_postfork_child(tsd_t *tsd) { + malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); + ql_new(&tsd_nominal_tsds); + + if (tsd_state_get(tsd) <= tsd_state_nominal_max) { + tsd_add_nominal(tsd); + } +} |