diff options
Diffstat (limited to 'deps/jemalloc/src/arena.c')
-rw-r--r-- | deps/jemalloc/src/arena.c | 2063 |
1 files changed, 828 insertions, 1235 deletions
diff --git a/deps/jemalloc/src/arena.c b/deps/jemalloc/src/arena.c index ba50e4103..857b27c52 100644 --- a/deps/jemalloc/src/arena.c +++ b/deps/jemalloc/src/arena.c @@ -1,11 +1,12 @@ -#define JEMALLOC_ARENA_C_ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" -#include "jemalloc/internal/div.h" +#include "jemalloc/internal/decay.h" +#include "jemalloc/internal/ehooks.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" +#include "jemalloc/internal/san.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/safety_check.h" @@ -35,34 +36,37 @@ ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT; static atomic_zd_t dirty_decay_ms_default; static atomic_zd_t muzzy_decay_ms_default; -const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = { -#define STEP(step, h, x, y) \ - h, - SMOOTHSTEP -#undef STEP -}; +emap_t arena_emap_global; +pa_central_t arena_pa_central_global; -static div_info_t arena_binind_div_info[SC_NBINS]; +div_info_t arena_binind_div_info[SC_NBINS]; size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT; size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT; + +uint32_t arena_bin_offsets[SC_NBINS]; +static unsigned nbins_total; + static unsigned huge_arena_ind; +const arena_config_t arena_config_default = { + /* .extent_hooks = */ (extent_hooks_t *)&ehooks_default_extent_hooks, + /* .metadata_use_hooks = */ true, +}; + /******************************************************************************/ /* * Function prototypes for static functions that are referenced prior to * definition. */ -static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, - arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit, - size_t npages_decay_max, bool is_background_thread); static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all); -static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, - bin_t *bin); -static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, +static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin); +static void +arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay, + size_t npages_new); /******************************************************************************/ @@ -72,19 +76,17 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, size_t *nactive, size_t *ndirty, size_t *nmuzzy) { *nthreads += arena_nthreads_get(arena, false); *dss = dss_prec_names[arena_dss_prec_get(arena)]; - *dirty_decay_ms = arena_dirty_decay_ms_get(arena); - *muzzy_decay_ms = arena_muzzy_decay_ms_get(arena); - *nactive += atomic_load_zu(&arena->nactive, ATOMIC_RELAXED); - *ndirty += extents_npages_get(&arena->extents_dirty); - *nmuzzy += extents_npages_get(&arena->extents_muzzy); + *dirty_decay_ms = arena_decay_ms_get(arena, extent_state_dirty); + *muzzy_decay_ms = arena_decay_ms_get(arena, extent_state_muzzy); + pa_shard_basic_stats_merge(&arena->pa_shard, nactive, ndirty, nmuzzy); } void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats, - bin_stats_t *bstats, arena_stats_large_t *lstats, - arena_stats_extents_t *estats) { + bin_stats_data_t *bstats, arena_stats_large_t *lstats, + pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats) { cassert(config_stats); arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms, @@ -93,122 +95,74 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, size_t base_allocated, base_resident, base_mapped, metadata_thp; base_stats_get(tsdn, arena->base, &base_allocated, &base_resident, &base_mapped, &metadata_thp); + size_t pac_mapped_sz = pac_mapped(&arena->pa_shard.pac); + astats->mapped += base_mapped + pac_mapped_sz; + astats->resident += base_resident; - arena_stats_lock(tsdn, &arena->stats); + LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx); - arena_stats_accum_zu(&astats->mapped, base_mapped - + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped)); - arena_stats_accum_zu(&astats->retained, - extents_npages_get(&arena->extents_retained) << LG_PAGE); - - atomic_store_zu(&astats->extent_avail, - atomic_load_zu(&arena->extent_avail_cnt, ATOMIC_RELAXED), - ATOMIC_RELAXED); - - arena_stats_accum_u64(&astats->decay_dirty.npurge, - arena_stats_read_u64(tsdn, &arena->stats, - &arena->stats.decay_dirty.npurge)); - arena_stats_accum_u64(&astats->decay_dirty.nmadvise, - arena_stats_read_u64(tsdn, &arena->stats, - &arena->stats.decay_dirty.nmadvise)); - arena_stats_accum_u64(&astats->decay_dirty.purged, - arena_stats_read_u64(tsdn, &arena->stats, - &arena->stats.decay_dirty.purged)); - - arena_stats_accum_u64(&astats->decay_muzzy.npurge, - arena_stats_read_u64(tsdn, &arena->stats, - &arena->stats.decay_muzzy.npurge)); - arena_stats_accum_u64(&astats->decay_muzzy.nmadvise, - arena_stats_read_u64(tsdn, &arena->stats, - &arena->stats.decay_muzzy.nmadvise)); - arena_stats_accum_u64(&astats->decay_muzzy.purged, - arena_stats_read_u64(tsdn, &arena->stats, - &arena->stats.decay_muzzy.purged)); - - arena_stats_accum_zu(&astats->base, base_allocated); - arena_stats_accum_zu(&astats->internal, arena_internal_get(arena)); - arena_stats_accum_zu(&astats->metadata_thp, metadata_thp); - arena_stats_accum_zu(&astats->resident, base_resident + - (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) + - extents_npages_get(&arena->extents_dirty) + - extents_npages_get(&arena->extents_muzzy)) << LG_PAGE))); - arena_stats_accum_zu(&astats->abandoned_vm, atomic_load_zu( - &arena->stats.abandoned_vm, ATOMIC_RELAXED)); + astats->base += base_allocated; + atomic_load_add_store_zu(&astats->internal, arena_internal_get(arena)); + astats->metadata_thp += metadata_thp; for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) { - uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats, + uint64_t nmalloc = locked_read_u64(tsdn, + LOCKEDINT_MTX(arena->stats.mtx), &arena->stats.lstats[i].nmalloc); - arena_stats_accum_u64(&lstats[i].nmalloc, nmalloc); - arena_stats_accum_u64(&astats->nmalloc_large, nmalloc); + locked_inc_u64_unsynchronized(&lstats[i].nmalloc, nmalloc); + astats->nmalloc_large += nmalloc; - uint64_t ndalloc = arena_stats_read_u64(tsdn, &arena->stats, + uint64_t ndalloc = locked_read_u64(tsdn, + LOCKEDINT_MTX(arena->stats.mtx), &arena->stats.lstats[i].ndalloc); - arena_stats_accum_u64(&lstats[i].ndalloc, ndalloc); - arena_stats_accum_u64(&astats->ndalloc_large, ndalloc); + locked_inc_u64_unsynchronized(&lstats[i].ndalloc, ndalloc); + astats->ndalloc_large += ndalloc; - uint64_t nrequests = arena_stats_read_u64(tsdn, &arena->stats, + uint64_t nrequests = locked_read_u64(tsdn, + LOCKEDINT_MTX(arena->stats.mtx), &arena->stats.lstats[i].nrequests); - arena_stats_accum_u64(&lstats[i].nrequests, - nmalloc + nrequests); - arena_stats_accum_u64(&astats->nrequests_large, + locked_inc_u64_unsynchronized(&lstats[i].nrequests, nmalloc + nrequests); + astats->nrequests_large += nmalloc + nrequests; /* nfill == nmalloc for large currently. */ - arena_stats_accum_u64(&lstats[i].nfills, nmalloc); - arena_stats_accum_u64(&astats->nfills_large, nmalloc); + locked_inc_u64_unsynchronized(&lstats[i].nfills, nmalloc); + astats->nfills_large += nmalloc; - uint64_t nflush = arena_stats_read_u64(tsdn, &arena->stats, + uint64_t nflush = locked_read_u64(tsdn, + LOCKEDINT_MTX(arena->stats.mtx), &arena->stats.lstats[i].nflushes); - arena_stats_accum_u64(&lstats[i].nflushes, nflush); - arena_stats_accum_u64(&astats->nflushes_large, nflush); + locked_inc_u64_unsynchronized(&lstats[i].nflushes, nflush); + astats->nflushes_large += nflush; assert(nmalloc >= ndalloc); assert(nmalloc - ndalloc <= SIZE_T_MAX); size_t curlextents = (size_t)(nmalloc - ndalloc); lstats[i].curlextents += curlextents; - arena_stats_accum_zu(&astats->allocated_large, - curlextents * sz_index2size(SC_NBINS + i)); - } - - for (pszind_t i = 0; i < SC_NPSIZES; i++) { - size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes, - retained_bytes; - dirty = extents_nextents_get(&arena->extents_dirty, i); - muzzy = extents_nextents_get(&arena->extents_muzzy, i); - retained = extents_nextents_get(&arena->extents_retained, i); - dirty_bytes = extents_nbytes_get(&arena->extents_dirty, i); - muzzy_bytes = extents_nbytes_get(&arena->extents_muzzy, i); - retained_bytes = - extents_nbytes_get(&arena->extents_retained, i); - - atomic_store_zu(&estats[i].ndirty, dirty, ATOMIC_RELAXED); - atomic_store_zu(&estats[i].nmuzzy, muzzy, ATOMIC_RELAXED); - atomic_store_zu(&estats[i].nretained, retained, ATOMIC_RELAXED); - atomic_store_zu(&estats[i].dirty_bytes, dirty_bytes, - ATOMIC_RELAXED); - atomic_store_zu(&estats[i].muzzy_bytes, muzzy_bytes, - ATOMIC_RELAXED); - atomic_store_zu(&estats[i].retained_bytes, retained_bytes, - ATOMIC_RELAXED); - } - - arena_stats_unlock(tsdn, &arena->stats); - - /* tcache_bytes counts currently cached bytes. */ - atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED); + astats->allocated_large += + curlextents * sz_index2size(SC_NBINS + i); + } + + pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats, + estats, hpastats, secstats, &astats->resident); + + LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx); + + /* Currently cached bytes and sanitizer-stashed bytes in tcache. */ + astats->tcache_bytes = 0; + astats->tcache_stashed_bytes = 0; malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); cache_bin_array_descriptor_t *descriptor; ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) { - szind_t i = 0; - for (; i < SC_NBINS; i++) { - cache_bin_t *tbin = &descriptor->bins_small[i]; - arena_stats_accum_zu(&astats->tcache_bytes, - tbin->ncached * sz_index2size(i)); - } - for (; i < nhbins; i++) { - cache_bin_t *tbin = &descriptor->bins_large[i]; - arena_stats_accum_zu(&astats->tcache_bytes, - tbin->ncached * sz_index2size(i)); + for (szind_t i = 0; i < nhbins; i++) { + cache_bin_t *cache_bin = &descriptor->bins[i]; + cache_bin_sz_t ncached, nstashed; + cache_bin_nitems_get_remote(cache_bin, + &tcache_bin_info[i], &ncached, &nstashed); + + astats->tcache_bytes += ncached * sz_index2size(i); + astats->tcache_stashed_bytes += nstashed * + sz_index2size(i); } } malloc_mutex_prof_read(tsdn, @@ -224,21 +178,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, /* Gather per arena mutex profiling data. */ READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large); - READ_ARENA_MUTEX_PROF_DATA(extent_avail_mtx, - arena_prof_mutex_extent_avail) - READ_ARENA_MUTEX_PROF_DATA(extents_dirty.mtx, - arena_prof_mutex_extents_dirty) - READ_ARENA_MUTEX_PROF_DATA(extents_muzzy.mtx, - arena_prof_mutex_extents_muzzy) - READ_ARENA_MUTEX_PROF_DATA(extents_retained.mtx, - arena_prof_mutex_extents_retained) - READ_ARENA_MUTEX_PROF_DATA(decay_dirty.mtx, - arena_prof_mutex_decay_dirty) - READ_ARENA_MUTEX_PROF_DATA(decay_muzzy.mtx, - arena_prof_mutex_decay_muzzy) READ_ARENA_MUTEX_PROF_DATA(base->mtx, - arena_prof_mutex_base) + arena_prof_mutex_base); #undef READ_ARENA_MUTEX_PROF_DATA + pa_shard_mtx_stats_read(tsdn, &arena->pa_shard, + astats->mutex_prof_data); nstime_copy(&astats->uptime, &arena->create_time); nstime_update(&astats->uptime); @@ -247,55 +191,67 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, for (szind_t i = 0; i < SC_NBINS; i++) { for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { bin_stats_merge(tsdn, &bstats[i], - &arena->bins[i].bin_shards[j]); + arena_get_bin(arena, i, j)); } } } -void -arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, extent_t *extent) { +static void +arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena, + bool is_background_thread) { + if (!background_thread_enabled() || is_background_thread) { + return; + } + background_thread_info_t *info = + arena_background_thread_info_get(arena); + if (background_thread_indefinite_sleep(info)) { + arena_maybe_do_deferred_work(tsdn, arena, + &arena->pa_shard.pac.decay_dirty, 0); + } +} + +/* + * React to deferred work generated by a PAI function. + */ +void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); - extents_dalloc(tsdn, arena, r_extent_hooks, &arena->extents_dirty, - extent); - if (arena_dirty_decay_ms_get(arena) == 0) { + if (decay_immediately(&arena->pa_shard.pac.decay_dirty)) { arena_decay_dirty(tsdn, arena, false, true); - } else { - arena_background_thread_inactivity_check(tsdn, arena, false); } + arena_background_thread_inactivity_check(tsdn, arena, false); } static void * -arena_slab_reg_alloc(extent_t *slab, const bin_info_t *bin_info) { +arena_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) { void *ret; - arena_slab_data_t *slab_data = extent_slab_data_get(slab); + slab_data_t *slab_data = edata_slab_data_get(slab); size_t regind; - assert(extent_nfree_get(slab) > 0); + assert(edata_nfree_get(slab) > 0); assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info)); regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info); - ret = (void *)((uintptr_t)extent_addr_get(slab) + + ret = (void *)((uintptr_t)edata_addr_get(slab) + (uintptr_t)(bin_info->reg_size * regind)); - extent_nfree_dec(slab); + edata_nfree_dec(slab); return ret; } static void -arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info, +arena_slab_reg_alloc_batch(edata_t *slab, const bin_info_t *bin_info, unsigned cnt, void** ptrs) { - arena_slab_data_t *slab_data = extent_slab_data_get(slab); + slab_data_t *slab_data = edata_slab_data_get(slab); - assert(extent_nfree_get(slab) >= cnt); + assert(edata_nfree_get(slab) >= cnt); assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info)); #if (! defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE) for (unsigned i = 0; i < cnt; i++) { size_t regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info); - *(ptrs + i) = (void *)((uintptr_t)extent_addr_get(slab) + + *(ptrs + i) = (void *)((uintptr_t)edata_addr_get(slab) + (uintptr_t)(bin_info->reg_size * regind)); } #else @@ -316,7 +272,7 @@ arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info, * Load from memory locations only once, outside the * hot loop below. */ - uintptr_t base = (uintptr_t)extent_addr_get(slab); + uintptr_t base = (uintptr_t)edata_addr_get(slab); uintptr_t regsize = (uintptr_t)bin_info->reg_size; while (pop--) { size_t bit = cfs_lu(&g); @@ -328,56 +284,7 @@ arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info, slab_data->bitmap[group] = g; } #endif - extent_nfree_sub(slab, cnt); -} - -#ifndef JEMALLOC_JET -static -#endif -size_t -arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) { - size_t diff, regind; - - /* Freeing a pointer outside the slab can cause assertion failure. */ - assert((uintptr_t)ptr >= (uintptr_t)extent_addr_get(slab)); - assert((uintptr_t)ptr < (uintptr_t)extent_past_get(slab)); - /* Freeing an interior pointer can cause assertion failure. */ - assert(((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)) % - (uintptr_t)bin_infos[binind].reg_size == 0); - - diff = (size_t)((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)); - - /* Avoid doing division with a variable divisor. */ - regind = div_compute(&arena_binind_div_info[binind], diff); - - assert(regind < bin_infos[binind].nregs); - - return regind; -} - -static void -arena_slab_reg_dalloc(extent_t *slab, arena_slab_data_t *slab_data, void *ptr) { - szind_t binind = extent_szind_get(slab); - const bin_info_t *bin_info = &bin_infos[binind]; - size_t regind = arena_slab_regind(slab, binind, ptr); - - assert(extent_nfree_get(slab) < bin_info->nregs); - /* Freeing an unallocated pointer can cause assertion failure. */ - assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind)); - - bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind); - extent_nfree_inc(slab); -} - -static void -arena_nactive_add(arena_t *arena, size_t add_pages) { - atomic_fetch_add_zu(&arena->nactive, add_pages, ATOMIC_RELAXED); -} - -static void -arena_nactive_sub(arena_t *arena, size_t sub_pages) { - assert(atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) >= sub_pages); - atomic_fetch_sub_zu(&arena->nactive, sub_pages, ATOMIC_RELAXED); + edata_nfree_sub(slab, cnt); } static void @@ -392,7 +299,7 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) { index = sz_size2index(usize); hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0; - arena_stats_add_u64(tsdn, &arena->stats, + locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx), &arena->stats.lstats[hindex].nmalloc, 1); } @@ -408,551 +315,118 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) { index = sz_size2index(usize); hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0; - arena_stats_add_u64(tsdn, &arena->stats, + locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx), &arena->stats.lstats[hindex].ndalloc, 1); } static void arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize, size_t usize) { - arena_large_dalloc_stats_update(tsdn, arena, oldusize); arena_large_malloc_stats_update(tsdn, arena, usize); + arena_large_dalloc_stats_update(tsdn, arena, oldusize); } -static bool -arena_may_have_muzzy(arena_t *arena) { - return (pages_can_purge_lazy && (arena_muzzy_decay_ms_get(arena) != 0)); -} - -extent_t * +edata_t * arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, - size_t alignment, bool *zero) { - extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER; + size_t alignment, bool zero) { + bool deferred_work_generated = false; + szind_t szind = sz_size2index(usize); + size_t esize = usize + sz_large_pad; - witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), - WITNESS_RANK_CORE, 0); + bool guarded = san_large_extent_decide_guard(tsdn, + arena_get_ehooks(arena), esize, alignment); + edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment, + /* slab */ false, szind, zero, guarded, &deferred_work_generated); + assert(deferred_work_generated == false); - szind_t szind = sz_size2index(usize); - size_t mapped_add; - bool commit = true; - extent_t *extent = extents_alloc(tsdn, arena, &extent_hooks, - &arena->extents_dirty, NULL, usize, sz_large_pad, alignment, false, - szind, zero, &commit); - if (extent == NULL && arena_may_have_muzzy(arena)) { - extent = extents_alloc(tsdn, arena, &extent_hooks, - &arena->extents_muzzy, NULL, usize, sz_large_pad, alignment, - false, szind, zero, &commit); - } - size_t size = usize + sz_large_pad; - if (extent == NULL) { - extent = extent_alloc_wrapper(tsdn, arena, &extent_hooks, NULL, - usize, sz_large_pad, alignment, false, szind, zero, - &commit); + if (edata != NULL) { if (config_stats) { - /* - * extent may be NULL on OOM, but in that case - * mapped_add isn't used below, so there's no need to - * conditionlly set it to 0 here. - */ - mapped_add = size; + LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx); + arena_large_malloc_stats_update(tsdn, arena, usize); + LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx); } - } else if (config_stats) { - mapped_add = 0; } - if (extent != NULL) { - if (config_stats) { - arena_stats_lock(tsdn, &arena->stats); - arena_large_malloc_stats_update(tsdn, arena, usize); - if (mapped_add != 0) { - arena_stats_add_zu(tsdn, &arena->stats, - &arena->stats.mapped, mapped_add); - } - arena_stats_unlock(tsdn, &arena->stats); - } - arena_nactive_add(arena, size >> LG_PAGE); + if (edata != NULL && sz_large_pad != 0) { + arena_cache_oblivious_randomize(tsdn, arena, edata, alignment); } - return extent; + return edata; } void -arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, extent_t *extent) { +arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, edata_t *edata) { if (config_stats) { - arena_stats_lock(tsdn, &arena->stats); + LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx); arena_large_dalloc_stats_update(tsdn, arena, - extent_usize_get(extent)); - arena_stats_unlock(tsdn, &arena->stats); + edata_usize_get(edata)); + LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx); } - arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE); } void -arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent, +arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t oldusize) { - size_t usize = extent_usize_get(extent); - size_t udiff = oldusize - usize; + size_t usize = edata_usize_get(edata); if (config_stats) { - arena_stats_lock(tsdn, &arena->stats); + LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx); arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize); - arena_stats_unlock(tsdn, &arena->stats); + LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx); } - arena_nactive_sub(arena, udiff >> LG_PAGE); } void -arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent, +arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t oldusize) { - size_t usize = extent_usize_get(extent); - size_t udiff = usize - oldusize; + size_t usize = edata_usize_get(edata); if (config_stats) { - arena_stats_lock(tsdn, &arena->stats); + LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx); arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize); - arena_stats_unlock(tsdn, &arena->stats); - } - arena_nactive_add(arena, udiff >> LG_PAGE); -} - -static ssize_t -arena_decay_ms_read(arena_decay_t *decay) { - return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED); -} - -static void -arena_decay_ms_write(arena_decay_t *decay, ssize_t decay_ms) { - atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED); -} - -static void -arena_decay_deadline_init(arena_decay_t *decay) { - /* - * Generate a new deadline that is uniformly random within the next - * epoch after the current one. - */ - nstime_copy(&decay->deadline, &decay->epoch); - nstime_add(&decay->deadline, &decay->interval); - if (arena_decay_ms_read(decay) > 0) { - nstime_t jitter; - - nstime_init(&jitter, prng_range_u64(&decay->jitter_state, - nstime_ns(&decay->interval))); - nstime_add(&decay->deadline, &jitter); - } -} - -static bool -arena_decay_deadline_reached(const arena_decay_t *decay, const nstime_t *time) { - return (nstime_compare(&decay->deadline, time) <= 0); -} - -static size_t -arena_decay_backlog_npages_limit(const arena_decay_t *decay) { - uint64_t sum; - size_t npages_limit_backlog; - unsigned i; - - /* - * For each element of decay_backlog, multiply by the corresponding - * fixed-point smoothstep decay factor. Sum the products, then divide - * to round down to the nearest whole number of pages. - */ - sum = 0; - for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) { - sum += decay->backlog[i] * h_steps[i]; - } - npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP); - - return npages_limit_backlog; -} - -static void -arena_decay_backlog_update_last(arena_decay_t *decay, size_t current_npages) { - size_t npages_delta = (current_npages > decay->nunpurged) ? - current_npages - decay->nunpurged : 0; - decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta; - - if (config_debug) { - if (current_npages > decay->ceil_npages) { - decay->ceil_npages = current_npages; - } - size_t npages_limit = arena_decay_backlog_npages_limit(decay); - assert(decay->ceil_npages >= npages_limit); - if (decay->ceil_npages > npages_limit) { - decay->ceil_npages = npages_limit; - } + LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx); } } -static void -arena_decay_backlog_update(arena_decay_t *decay, uint64_t nadvance_u64, - size_t current_npages) { - if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) { - memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) * - sizeof(size_t)); - } else { - size_t nadvance_z = (size_t)nadvance_u64; - - assert((uint64_t)nadvance_z == nadvance_u64); - - memmove(decay->backlog, &decay->backlog[nadvance_z], - (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t)); - if (nadvance_z > 1) { - memset(&decay->backlog[SMOOTHSTEP_NSTEPS - - nadvance_z], 0, (nadvance_z-1) * sizeof(size_t)); - } - } - - arena_decay_backlog_update_last(decay, current_npages); -} - -static void -arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, - extents_t *extents, size_t current_npages, size_t npages_limit, - bool is_background_thread) { - if (current_npages > npages_limit) { - arena_decay_to_limit(tsdn, arena, decay, extents, false, - npages_limit, current_npages - npages_limit, - is_background_thread); - } -} - -static void -arena_decay_epoch_advance_helper(arena_decay_t *decay, const nstime_t *time, - size_t current_npages) { - assert(arena_decay_deadline_reached(decay, time)); - - nstime_t delta; - nstime_copy(&delta, time); - nstime_subtract(&delta, &decay->epoch); - - uint64_t nadvance_u64 = nstime_divide(&delta, &decay->interval); - assert(nadvance_u64 > 0); - - /* Add nadvance_u64 decay intervals to epoch. */ - nstime_copy(&delta, &decay->interval); - nstime_imultiply(&delta, nadvance_u64); - nstime_add(&decay->epoch, &delta); - - /* Set a new deadline. */ - arena_decay_deadline_init(decay); - - /* Update the backlog. */ - arena_decay_backlog_update(decay, nadvance_u64, current_npages); -} - -static void -arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, - extents_t *extents, const nstime_t *time, bool is_background_thread) { - size_t current_npages = extents_npages_get(extents); - arena_decay_epoch_advance_helper(decay, time, current_npages); - - size_t npages_limit = arena_decay_backlog_npages_limit(decay); - /* We may unlock decay->mtx when try_purge(). Finish logging first. */ - decay->nunpurged = (npages_limit > current_npages) ? npages_limit : - current_npages; - - if (!background_thread_enabled() || is_background_thread) { - arena_decay_try_purge(tsdn, arena, decay, extents, - current_npages, npages_limit, is_background_thread); - } -} - -static void -arena_decay_reinit(arena_decay_t *decay, ssize_t decay_ms) { - arena_decay_ms_write(decay, decay_ms); - if (decay_ms > 0) { - nstime_init(&decay->interval, (uint64_t)decay_ms * - KQU(1000000)); - nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS); - } - - nstime_init(&decay->epoch, 0); - nstime_update(&decay->epoch); - decay->jitter_state = (uint64_t)(uintptr_t)decay; - arena_decay_deadline_init(decay); - decay->nunpurged = 0; - memset(decay->backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t)); -} - -static bool -arena_decay_init(arena_decay_t *decay, ssize_t decay_ms, - arena_stats_decay_t *stats) { - if (config_debug) { - for (size_t i = 0; i < sizeof(arena_decay_t); i++) { - assert(((char *)decay)[i] == 0); - } - decay->ceil_npages = 0; - } - if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY, - malloc_mutex_rank_exclusive)) { - return true; - } - decay->purging = false; - arena_decay_reinit(decay, decay_ms); - /* Memory is zeroed, so there is no need to clear stats. */ - if (config_stats) { - decay->stats = stats; - } - return false; -} - -static bool -arena_decay_ms_valid(ssize_t decay_ms) { - if (decay_ms < -1) { - return false; - } - if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX * - KQU(1000)) { - return true; - } - return false; -} - -static bool -arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, - extents_t *extents, bool is_background_thread) { - malloc_mutex_assert_owner(tsdn, &decay->mtx); - - /* Purge all or nothing if the option is disabled. */ - ssize_t decay_ms = arena_decay_ms_read(decay); - if (decay_ms <= 0) { - if (decay_ms == 0) { - arena_decay_to_limit(tsdn, arena, decay, extents, false, - 0, extents_npages_get(extents), - is_background_thread); - } - return false; - } - - nstime_t time; - nstime_init(&time, 0); - nstime_update(&time); - if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch, &time) - > 0)) { - /* - * Time went backwards. Move the epoch back in time and - * generate a new deadline, with the expectation that time - * typically flows forward for long enough periods of time that - * epochs complete. Unfortunately, this strategy is susceptible - * to clock jitter triggering premature epoch advances, but - * clock jitter estimation and compensation isn't feasible here - * because calls into this code are event-driven. - */ - nstime_copy(&decay->epoch, &time); - arena_decay_deadline_init(decay); +/* + * In situations where we're not forcing a decay (i.e. because the user + * specifically requested it), should we purge ourselves, or wait for the + * background thread to get to it. + */ +static pac_purge_eagerness_t +arena_decide_unforced_purge_eagerness(bool is_background_thread) { + if (is_background_thread) { + return PAC_PURGE_ALWAYS; + } else if (!is_background_thread && background_thread_enabled()) { + return PAC_PURGE_NEVER; } else { - /* Verify that time does not go backwards. */ - assert(nstime_compare(&decay->epoch, &time) <= 0); + return PAC_PURGE_ON_EPOCH_ADVANCE; } - - /* - * If the deadline has been reached, advance to the current epoch and - * purge to the new limit if necessary. Note that dirty pages created - * during the current epoch are not subject to purge until a future - * epoch, so as a result purging only happens during epoch advances, or - * being triggered by background threads (scheduled event). - */ - bool advance_epoch = arena_decay_deadline_reached(decay, &time); - if (advance_epoch) { - arena_decay_epoch_advance(tsdn, arena, decay, extents, &time, - is_background_thread); - } else if (is_background_thread) { - arena_decay_try_purge(tsdn, arena, decay, extents, - extents_npages_get(extents), - arena_decay_backlog_npages_limit(decay), - is_background_thread); - } - - return advance_epoch; -} - -static ssize_t -arena_decay_ms_get(arena_decay_t *decay) { - return arena_decay_ms_read(decay); -} - -ssize_t -arena_dirty_decay_ms_get(arena_t *arena) { - return arena_decay_ms_get(&arena->decay_dirty); -} - -ssize_t -arena_muzzy_decay_ms_get(arena_t *arena) { - return arena_decay_ms_get(&arena->decay_muzzy); -} - -static bool -arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, - extents_t *extents, ssize_t decay_ms) { - if (!arena_decay_ms_valid(decay_ms)) { - return true; - } - - malloc_mutex_lock(tsdn, &decay->mtx); - /* - * Restart decay backlog from scratch, which may cause many dirty pages - * to be immediately purged. It would conceptually be possible to map - * the old backlog onto the new backlog, but there is no justification - * for such complexity since decay_ms changes are intended to be - * infrequent, either between the {-1, 0, >0} states, or a one-time - * arbitrary change during initial arena configuration. - */ - arena_decay_reinit(decay, decay_ms); - arena_maybe_decay(tsdn, arena, decay, extents, false); - malloc_mutex_unlock(tsdn, &decay->mtx); - - return false; -} - -bool -arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena, - ssize_t decay_ms) { - return arena_decay_ms_set(tsdn, arena, &arena->decay_dirty, - &arena->extents_dirty, decay_ms); } bool -arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, +arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state, ssize_t decay_ms) { - return arena_decay_ms_set(tsdn, arena, &arena->decay_muzzy, - &arena->extents_muzzy, decay_ms); + pac_purge_eagerness_t eagerness = arena_decide_unforced_purge_eagerness( + /* is_background_thread */ false); + return pa_decay_ms_set(tsdn, &arena->pa_shard, state, decay_ms, + eagerness); } -static size_t -arena_stash_decayed(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_limit, - size_t npages_decay_max, extent_list_t *decay_extents) { - witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), - WITNESS_RANK_CORE, 0); - - /* Stash extents according to npages_limit. */ - size_t nstashed = 0; - extent_t *extent; - while (nstashed < npages_decay_max && - (extent = extents_evict(tsdn, arena, r_extent_hooks, extents, - npages_limit)) != NULL) { - extent_list_append(decay_extents, extent); - nstashed += extent_size_get(extent) >> LG_PAGE; - } - return nstashed; -} - -static size_t -arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, arena_decay_t *decay, extents_t *extents, - bool all, extent_list_t *decay_extents, bool is_background_thread) { - size_t nmadvise, nunmapped; - size_t npurged; - - if (config_stats) { - nmadvise = 0; - nunmapped = 0; - } - npurged = 0; - - ssize_t muzzy_decay_ms = arena_muzzy_decay_ms_get(arena); - for (extent_t *extent = extent_list_first(decay_extents); extent != - NULL; extent = extent_list_first(decay_extents)) { - if (config_stats) { - nmadvise++; - } - size_t npages = extent_size_get(extent) >> LG_PAGE; - npurged += npages; - extent_list_remove(decay_extents, extent); - switch (extents_state_get(extents)) { - case extent_state_active: - not_reached(); - case extent_state_dirty: - if (!all && muzzy_decay_ms != 0 && - !extent_purge_lazy_wrapper(tsdn, arena, - r_extent_hooks, extent, 0, - extent_size_get(extent))) { - extents_dalloc(tsdn, arena, r_extent_hooks, - &arena->extents_muzzy, extent); - arena_background_thread_inactivity_check(tsdn, - arena, is_background_thread); - break; - } - /* Fall through. */ - case extent_state_muzzy: - extent_dalloc_wrapper(tsdn, arena, r_extent_hooks, - extent); - if (config_stats) { - nunmapped += npages; - } - break; - case extent_state_retained: - default: - not_reached(); - } - } - - if (config_stats) { - arena_stats_lock(tsdn, &arena->stats); - arena_stats_add_u64(tsdn, &arena->stats, &decay->stats->npurge, - 1); - arena_stats_add_u64(tsdn, &arena->stats, - &decay->stats->nmadvise, nmadvise); - arena_stats_add_u64(tsdn, &arena->stats, &decay->stats->purged, - npurged); - arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped, - nunmapped << LG_PAGE); - arena_stats_unlock(tsdn, &arena->stats); - } - - return npurged; -} - -/* - * npages_limit: Decay at most npages_decay_max pages without violating the - * invariant: (extents_npages_get(extents) >= npages_limit). We need an upper - * bound on number of pages in order to prevent unbounded growth (namely in - * stashed), otherwise unbounded new pages could be added to extents during the - * current decay run, so that the purging thread never finishes. - */ -static void -arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, - extents_t *extents, bool all, size_t npages_limit, size_t npages_decay_max, - bool is_background_thread) { - witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), - WITNESS_RANK_CORE, 1); - malloc_mutex_assert_owner(tsdn, &decay->mtx); - - if (decay->purging) { - return; - } - decay->purging = true; - malloc_mutex_unlock(tsdn, &decay->mtx); - - extent_hooks_t *extent_hooks = extent_hooks_get(arena); - - extent_list_t decay_extents; - extent_list_init(&decay_extents); - - size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, extents, - npages_limit, npages_decay_max, &decay_extents); - if (npurge != 0) { - size_t npurged = arena_decay_stashed(tsdn, arena, - &extent_hooks, decay, extents, all, &decay_extents, - is_background_thread); - assert(npurged == npurge); - } - - malloc_mutex_lock(tsdn, &decay->mtx); - decay->purging = false; +ssize_t +arena_decay_ms_get(arena_t *arena, extent_state_t state) { + return pa_decay_ms_get(&arena->pa_shard, state); } static bool -arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, - extents_t *extents, bool is_background_thread, bool all) { +arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay, + pac_decay_stats_t *decay_stats, ecache_t *ecache, + bool is_background_thread, bool all) { if (all) { malloc_mutex_lock(tsdn, &decay->mtx); - arena_decay_to_limit(tsdn, arena, decay, extents, all, 0, - extents_npages_get(extents), is_background_thread); + pac_decay_all(tsdn, &arena->pa_shard.pac, decay, decay_stats, + ecache, /* fully_decay */ all); malloc_mutex_unlock(tsdn, &decay->mtx); - return false; } @@ -960,20 +434,20 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, /* No need to wait if another thread is in progress. */ return true; } - - bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, extents, - is_background_thread); + pac_purge_eagerness_t eagerness = + arena_decide_unforced_purge_eagerness(is_background_thread); + bool epoch_advanced = pac_maybe_decay_purge(tsdn, &arena->pa_shard.pac, + decay, decay_stats, ecache, eagerness); size_t npages_new; if (epoch_advanced) { /* Backlog is updated on epoch advance. */ - npages_new = decay->backlog[SMOOTHSTEP_NSTEPS-1]; + npages_new = decay_epoch_npages_delta(decay); } malloc_mutex_unlock(tsdn, &decay->mtx); if (have_background_thread && background_thread_enabled() && epoch_advanced && !is_background_thread) { - background_thread_interval_check(tsdn, arena, decay, - npages_new); + arena_maybe_do_deferred_work(tsdn, arena, decay, npages_new); } return false; @@ -982,53 +456,143 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) { - return arena_decay_impl(tsdn, arena, &arena->decay_dirty, - &arena->extents_dirty, is_background_thread, all); + return arena_decay_impl(tsdn, arena, &arena->pa_shard.pac.decay_dirty, + &arena->pa_shard.pac.stats->decay_dirty, + &arena->pa_shard.pac.ecache_dirty, is_background_thread, all); } static bool arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) { - return arena_decay_impl(tsdn, arena, &arena->decay_muzzy, - &arena->extents_muzzy, is_background_thread, all); + if (pa_shard_dont_decay_muzzy(&arena->pa_shard)) { + return false; + } + return arena_decay_impl(tsdn, arena, &arena->pa_shard.pac.decay_muzzy, + &arena->pa_shard.pac.stats->decay_muzzy, + &arena->pa_shard.pac.ecache_muzzy, is_background_thread, all); } void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) { + if (all) { + /* + * We should take a purge of "all" to mean "save as much memory + * as possible", including flushing any caches (for situations + * like thread death, or manual purge calls). + */ + sec_flush(tsdn, &arena->pa_shard.hpa_sec); + } if (arena_decay_dirty(tsdn, arena, is_background_thread, all)) { return; } arena_decay_muzzy(tsdn, arena, is_background_thread, all); } +static bool +arena_should_decay_early(tsdn_t *tsdn, arena_t *arena, decay_t *decay, + background_thread_info_t *info, nstime_t *remaining_sleep, + size_t npages_new) { + malloc_mutex_assert_owner(tsdn, &info->mtx); + + if (malloc_mutex_trylock(tsdn, &decay->mtx)) { + return false; + } + + if (!decay_gradually(decay)) { + malloc_mutex_unlock(tsdn, &decay->mtx); + return false; + } + + nstime_init(remaining_sleep, background_thread_wakeup_time_get(info)); + if (nstime_compare(remaining_sleep, &decay->epoch) <= 0) { + malloc_mutex_unlock(tsdn, &decay->mtx); + return false; + } + nstime_subtract(remaining_sleep, &decay->epoch); + if (npages_new > 0) { + uint64_t npurge_new = decay_npages_purge_in(decay, + remaining_sleep, npages_new); + info->npages_to_purge_new += npurge_new; + } + malloc_mutex_unlock(tsdn, &decay->mtx); + return info->npages_to_purge_new > + ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD; +} + +/* + * Check if deferred work needs to be done sooner than planned. + * For decay we might want to wake up earlier because of an influx of dirty + * pages. Rather than waiting for previously estimated time, we proactively + * purge those pages. + * If background thread sleeps indefinitely, always wake up because some + * deferred work has been generated. + */ static void -arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) { - arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE); +arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay, + size_t npages_new) { + background_thread_info_t *info = arena_background_thread_info_get( + arena); + if (malloc_mutex_trylock(tsdn, &info->mtx)) { + /* + * Background thread may hold the mutex for a long period of + * time. We'd like to avoid the variance on application + * threads. So keep this non-blocking, and leave the work to a + * future epoch. + */ + return; + } + if (!background_thread_is_started(info)) { + goto label_done; + } + + nstime_t remaining_sleep; + if (background_thread_indefinite_sleep(info)) { + background_thread_wakeup_early(info, NULL); + } else if (arena_should_decay_early(tsdn, arena, decay, info, + &remaining_sleep, npages_new)) { + info->npages_to_purge_new = 0; + background_thread_wakeup_early(info, &remaining_sleep); + } +label_done: + malloc_mutex_unlock(tsdn, &info->mtx); +} + +/* Called from background threads. */ +void +arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) { + arena_decay(tsdn, arena, true, false); + pa_shard_do_deferred_work(tsdn, &arena->pa_shard); +} - extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER; - arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, slab); +void +arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) { + bool deferred_work_generated = false; + pa_dalloc(tsdn, &arena->pa_shard, slab, &deferred_work_generated); + if (deferred_work_generated) { + arena_handle_deferred_work(tsdn, arena); + } } static void -arena_bin_slabs_nonfull_insert(bin_t *bin, extent_t *slab) { - assert(extent_nfree_get(slab) > 0); - extent_heap_insert(&bin->slabs_nonfull, slab); +arena_bin_slabs_nonfull_insert(bin_t *bin, edata_t *slab) { + assert(edata_nfree_get(slab) > 0); + edata_heap_insert(&bin->slabs_nonfull, slab); if (config_stats) { bin->stats.nonfull_slabs++; } } static void -arena_bin_slabs_nonfull_remove(bin_t *bin, extent_t *slab) { - extent_heap_remove(&bin->slabs_nonfull, slab); +arena_bin_slabs_nonfull_remove(bin_t *bin, edata_t *slab) { + edata_heap_remove(&bin->slabs_nonfull, slab); if (config_stats) { bin->stats.nonfull_slabs--; } } -static extent_t * +static edata_t * arena_bin_slabs_nonfull_tryget(bin_t *bin) { - extent_t *slab = extent_heap_remove_first(&bin->slabs_nonfull); + edata_t *slab = edata_heap_remove_first(&bin->slabs_nonfull); if (slab == NULL) { return NULL; } @@ -1040,30 +604,30 @@ arena_bin_slabs_nonfull_tryget(bin_t *bin) { } static void -arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, extent_t *slab) { - assert(extent_nfree_get(slab) == 0); +arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, edata_t *slab) { + assert(edata_nfree_get(slab) == 0); /* * Tracking extents is required by arena_reset, which is not allowed - * for auto arenas. Bypass this step to avoid touching the extent + * for auto arenas. Bypass this step to avoid touching the edata * linkage (often results in cache misses) for auto arenas. */ if (arena_is_auto(arena)) { return; } - extent_list_append(&bin->slabs_full, slab); + edata_list_active_append(&bin->slabs_full, slab); } static void -arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, extent_t *slab) { +arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, edata_t *slab) { if (arena_is_auto(arena)) { return; } - extent_list_remove(&bin->slabs_full, slab); + edata_list_active_remove(&bin->slabs_full, slab); } static void arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) { - extent_t *slab; + edata_t *slab; malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); if (bin->slabcur != NULL) { @@ -1073,13 +637,13 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) { arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); } - while ((slab = extent_heap_remove_first(&bin->slabs_nonfull)) != NULL) { + while ((slab = edata_heap_remove_first(&bin->slabs_nonfull)) != NULL) { malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); } - for (slab = extent_list_first(&bin->slabs_full); slab != NULL; - slab = extent_list_first(&bin->slabs_full)) { + for (slab = edata_list_active_first(&bin->slabs_full); slab != NULL; + slab = edata_list_active_first(&bin->slabs_full)) { arena_bin_slabs_full_remove(arena, bin, slab); malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); @@ -1111,16 +675,15 @@ arena_reset(tsd_t *tsd, arena_t *arena) { /* Large allocations. */ malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx); - for (extent_t *extent = extent_list_first(&arena->large); extent != - NULL; extent = extent_list_first(&arena->large)) { - void *ptr = extent_base_get(extent); + for (edata_t *edata = edata_list_active_first(&arena->large); + edata != NULL; edata = edata_list_active_first(&arena->large)) { + void *ptr = edata_base_get(edata); size_t usize; malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx); - alloc_ctx_t alloc_ctx; - rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd); - rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx, - (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab); + emap_alloc_ctx_t alloc_ctx; + emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr, + &alloc_ctx); assert(alloc_ctx.szind != SC_NSIZES); if (config_stats || (config_prof && opt_prof)) { @@ -1131,7 +694,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) { if (config_prof && opt_prof) { prof_free(tsd, ptr, usize, &alloc_ctx); } - large_dalloc(tsd_tsdn(tsd), extent); + large_dalloc(tsd_tsdn(tsd), edata); malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx); } malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx); @@ -1139,32 +702,95 @@ arena_reset(tsd_t *tsd, arena_t *arena) { /* Bins. */ for (unsigned i = 0; i < SC_NBINS; i++) { for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { - arena_bin_reset(tsd, arena, - &arena->bins[i].bin_shards[j]); + arena_bin_reset(tsd, arena, arena_get_bin(arena, i, j)); } } + pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard); +} + +static void +arena_prepare_base_deletion_sync_finish(tsd_t *tsd, malloc_mutex_t **mutexes, + unsigned n_mtx) { + for (unsigned i = 0; i < n_mtx; i++) { + malloc_mutex_lock(tsd_tsdn(tsd), mutexes[i]); + malloc_mutex_unlock(tsd_tsdn(tsd), mutexes[i]); + } +} + +#define ARENA_DESTROY_MAX_DELAYED_MTX 32 +static void +arena_prepare_base_deletion_sync(tsd_t *tsd, malloc_mutex_t *mtx, + malloc_mutex_t **delayed_mtx, unsigned *n_delayed) { + if (!malloc_mutex_trylock(tsd_tsdn(tsd), mtx)) { + /* No contention. */ + malloc_mutex_unlock(tsd_tsdn(tsd), mtx); + return; + } + unsigned n = *n_delayed; + assert(n < ARENA_DESTROY_MAX_DELAYED_MTX); + /* Add another to the batch. */ + delayed_mtx[n++] = mtx; - atomic_store_zu(&arena->nactive, 0, ATOMIC_RELAXED); + if (n == ARENA_DESTROY_MAX_DELAYED_MTX) { + arena_prepare_base_deletion_sync_finish(tsd, delayed_mtx, n); + n = 0; + } + *n_delayed = n; } static void -arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) { +arena_prepare_base_deletion(tsd_t *tsd, base_t *base_to_destroy) { /* - * Iterate over the retained extents and destroy them. This gives the - * extent allocator underlying the extent hooks an opportunity to unmap - * all retained memory without having to keep its own metadata - * structures. In practice, virtual memory for dss-allocated extents is - * leaked here, so best practice is to avoid dss for arenas to be - * destroyed, or provide custom extent hooks that track retained - * dss-based extents for later reuse. + * In order to coalesce, emap_try_acquire_edata_neighbor will attempt to + * check neighbor edata's state to determine eligibility. This means + * under certain conditions, the metadata from an arena can be accessed + * w/o holding any locks from that arena. In order to guarantee safe + * memory access, the metadata and the underlying base allocator needs + * to be kept alive, until all pending accesses are done. + * + * 1) with opt_retain, the arena boundary implies the is_head state + * (tracked in the rtree leaf), and the coalesce flow will stop at the + * head state branch. Therefore no cross arena metadata access + * possible. + * + * 2) w/o opt_retain, the arena id needs to be read from the edata_t, + * meaning read only cross-arena metadata access is possible. The + * coalesce attempt will stop at the arena_id mismatch, and is always + * under one of the ecache locks. To allow safe passthrough of such + * metadata accesses, the loop below will iterate through all manual + * arenas' ecache locks. As all the metadata from this base allocator + * have been unlinked from the rtree, after going through all the + * relevant ecache locks, it's safe to say that a) pending accesses are + * all finished, and b) no new access will be generated. */ - extent_hooks_t *extent_hooks = extent_hooks_get(arena); - extent_t *extent; - while ((extent = extents_evict(tsdn, arena, &extent_hooks, - &arena->extents_retained, 0)) != NULL) { - extent_destroy_wrapper(tsdn, arena, &extent_hooks, extent); + if (opt_retain) { + return; } + unsigned destroy_ind = base_ind_get(base_to_destroy); + assert(destroy_ind >= manual_arena_base); + + tsdn_t *tsdn = tsd_tsdn(tsd); + malloc_mutex_t *delayed_mtx[ARENA_DESTROY_MAX_DELAYED_MTX]; + unsigned n_delayed = 0, total = narenas_total_get(); + for (unsigned i = 0; i < total; i++) { + if (i == destroy_ind) { + continue; + } + arena_t *arena = arena_get(tsdn, i, false); + if (arena == NULL) { + continue; + } + pac_t *pac = &arena->pa_shard.pac; + arena_prepare_base_deletion_sync(tsd, &pac->ecache_dirty.mtx, + delayed_mtx, &n_delayed); + arena_prepare_base_deletion_sync(tsd, &pac->ecache_muzzy.mtx, + delayed_mtx, &n_delayed); + arena_prepare_base_deletion_sync(tsd, &pac->ecache_retained.mtx, + delayed_mtx, &n_delayed); + } + arena_prepare_base_deletion_sync_finish(tsd, delayed_mtx, n_delayed); } +#undef ARENA_DESTROY_MAX_DELAYED_MTX void arena_destroy(tsd_t *tsd, arena_t *arena) { @@ -1175,13 +801,10 @@ arena_destroy(tsd_t *tsd, arena_t *arena) { /* * No allocations have occurred since arena_reset() was called. * Furthermore, the caller (arena_i_destroy_ctl()) purged all cached - * extents, so only retained extents may remain. + * extents, so only retained extents may remain and it's safe to call + * pa_shard_destroy_retained. */ - assert(extents_npages_get(&arena->extents_dirty) == 0); - assert(extents_npages_get(&arena->extents_muzzy) == 0); - - /* Deallocate retained memory. */ - arena_destroy_retained(tsd_tsdn(tsd), arena); + pa_shard_destroy(tsd_tsdn(tsd), &arena->pa_shard); /* * Remove the arena pointer from the arenas array. We rely on the fact @@ -1197,316 +820,370 @@ arena_destroy(tsd_t *tsd, arena_t *arena) { /* * Destroy the base allocator, which manages all metadata ever mapped by - * this arena. + * this arena. The prepare function will make sure no pending access to + * the metadata in this base anymore. */ + arena_prepare_base_deletion(tsd, arena->base); base_delete(tsd_tsdn(tsd), arena->base); } -static extent_t * -arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, const bin_info_t *bin_info, - szind_t szind) { - extent_t *slab; - bool zero, commit; - - witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), - WITNESS_RANK_CORE, 0); - - zero = false; - commit = true; - slab = extent_alloc_wrapper(tsdn, arena, r_extent_hooks, NULL, - bin_info->slab_size, 0, PAGE, true, szind, &zero, &commit); - - if (config_stats && slab != NULL) { - arena_stats_mapped_add(tsdn, &arena->stats, - bin_info->slab_size); - } - - return slab; -} - -static extent_t * +static edata_t * arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard, const bin_info_t *bin_info) { + bool deferred_work_generated = false; witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); - extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER; - szind_t szind = sz_size2index(bin_info->reg_size); - bool zero = false; - bool commit = true; - extent_t *slab = extents_alloc(tsdn, arena, &extent_hooks, - &arena->extents_dirty, NULL, bin_info->slab_size, 0, PAGE, true, - binind, &zero, &commit); - if (slab == NULL && arena_may_have_muzzy(arena)) { - slab = extents_alloc(tsdn, arena, &extent_hooks, - &arena->extents_muzzy, NULL, bin_info->slab_size, 0, PAGE, - true, binind, &zero, &commit); + bool guarded = san_slab_extent_decide_guard(tsdn, + arena_get_ehooks(arena)); + edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size, + /* alignment */ PAGE, /* slab */ true, /* szind */ binind, + /* zero */ false, guarded, &deferred_work_generated); + + if (deferred_work_generated) { + arena_handle_deferred_work(tsdn, arena); } + if (slab == NULL) { - slab = arena_slab_alloc_hard(tsdn, arena, &extent_hooks, - bin_info, szind); - if (slab == NULL) { - return NULL; - } + return NULL; } - assert(extent_slab_get(slab)); + assert(edata_slab_get(slab)); /* Initialize slab internals. */ - arena_slab_data_t *slab_data = extent_slab_data_get(slab); - extent_nfree_binshard_set(slab, bin_info->nregs, binshard); + slab_data_t *slab_data = edata_slab_data_get(slab); + edata_nfree_binshard_set(slab, bin_info->nregs, binshard); bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false); - arena_nactive_add(arena, extent_size_get(slab) >> LG_PAGE); - return slab; } -static extent_t * -arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin, - szind_t binind, unsigned binshard) { - extent_t *slab; - const bin_info_t *bin_info; - - /* Look for a usable slab. */ - slab = arena_bin_slabs_nonfull_tryget(bin); - if (slab != NULL) { - return slab; - } - /* No existing slabs have any space available. */ - - bin_info = &bin_infos[binind]; - - /* Allocate a new slab. */ - malloc_mutex_unlock(tsdn, &bin->lock); - /******************************/ - slab = arena_slab_alloc(tsdn, arena, binind, binshard, bin_info); - /********************************/ - malloc_mutex_lock(tsdn, &bin->lock); - if (slab != NULL) { - if (config_stats) { - bin->stats.nslabs++; - bin->stats.curslabs++; - } - return slab; +/* + * Before attempting the _with_fresh_slab approaches below, the _no_fresh_slab + * variants (i.e. through slabcur and nonfull) must be tried first. + */ +static void +arena_bin_refill_slabcur_with_fresh_slab(tsdn_t *tsdn, arena_t *arena, + bin_t *bin, szind_t binind, edata_t *fresh_slab) { + malloc_mutex_assert_owner(tsdn, &bin->lock); + /* Only called after slabcur and nonfull both failed. */ + assert(bin->slabcur == NULL); + assert(edata_heap_first(&bin->slabs_nonfull) == NULL); + assert(fresh_slab != NULL); + + /* A new slab from arena_slab_alloc() */ + assert(edata_nfree_get(fresh_slab) == bin_infos[binind].nregs); + if (config_stats) { + bin->stats.nslabs++; + bin->stats.curslabs++; } + bin->slabcur = fresh_slab; +} - /* - * arena_slab_alloc() failed, but another thread may have made - * sufficient memory available while this one dropped bin->lock above, - * so search one more time. - */ - slab = arena_bin_slabs_nonfull_tryget(bin); - if (slab != NULL) { - return slab; - } +/* Refill slabcur and then alloc using the fresh slab */ +static void * +arena_bin_malloc_with_fresh_slab(tsdn_t *tsdn, arena_t *arena, bin_t *bin, + szind_t binind, edata_t *fresh_slab) { + malloc_mutex_assert_owner(tsdn, &bin->lock); + arena_bin_refill_slabcur_with_fresh_slab(tsdn, arena, bin, binind, + fresh_slab); - return NULL; + return arena_slab_reg_alloc(bin->slabcur, &bin_infos[binind]); } -/* Re-fill bin->slabcur, then call arena_slab_reg_alloc(). */ -static void * -arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin, - szind_t binind, unsigned binshard) { - const bin_info_t *bin_info; - extent_t *slab; +static bool +arena_bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, arena_t *arena, + bin_t *bin) { + malloc_mutex_assert_owner(tsdn, &bin->lock); + /* Only called after arena_slab_reg_alloc[_batch] failed. */ + assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0); - bin_info = &bin_infos[binind]; - if (!arena_is_auto(arena) && bin->slabcur != NULL) { - arena_bin_slabs_full_insert(arena, bin, bin->slabcur); - bin->slabcur = NULL; - } - slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind, binshard); if (bin->slabcur != NULL) { - /* - * Another thread updated slabcur while this one ran without the - * bin lock in arena_bin_nonfull_slab_get(). - */ - if (extent_nfree_get(bin->slabcur) > 0) { - void *ret = arena_slab_reg_alloc(bin->slabcur, - bin_info); - if (slab != NULL) { - /* - * arena_slab_alloc() may have allocated slab, - * or it may have been pulled from - * slabs_nonfull. Therefore it is unsafe to - * make any assumptions about how slab has - * previously been used, and - * arena_bin_lower_slab() must be called, as if - * a region were just deallocated from the slab. - */ - if (extent_nfree_get(slab) == bin_info->nregs) { - arena_dalloc_bin_slab(tsdn, arena, slab, - bin); - } else { - arena_bin_lower_slab(tsdn, arena, slab, - bin); - } - } - return ret; - } - arena_bin_slabs_full_insert(arena, bin, bin->slabcur); - bin->slabcur = NULL; - } - - if (slab == NULL) { - return NULL; } - bin->slabcur = slab; - assert(extent_nfree_get(bin->slabcur) > 0); + /* Look for a usable slab. */ + bin->slabcur = arena_bin_slabs_nonfull_tryget(bin); + assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) > 0); - return arena_slab_reg_alloc(slab, bin_info); + return (bin->slabcur == NULL); } -/* Choose a bin shard and return the locked bin. */ bin_t * -arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind, - unsigned *binshard) { - bin_t *bin; +arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind, + unsigned *binshard_p) { + unsigned binshard; if (tsdn_null(tsdn) || tsd_arena_get(tsdn_tsd(tsdn)) == NULL) { - *binshard = 0; + binshard = 0; } else { - *binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind]; + binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind]; } - assert(*binshard < bin_infos[binind].n_shards); - bin = &arena->bins[binind].bin_shards[*binshard]; - malloc_mutex_lock(tsdn, &bin->lock); - - return bin; + assert(binshard < bin_infos[binind].n_shards); + if (binshard_p != NULL) { + *binshard_p = binshard; + } + return arena_get_bin(arena, binind, binshard); } void -arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) { - unsigned i, nfill, cnt; +arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena, + cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind, + const unsigned nfill) { + assert(cache_bin_ncached_get_local(cache_bin, cache_bin_info) == 0); + + const bin_info_t *bin_info = &bin_infos[binind]; + + CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill); + cache_bin_init_ptr_array_for_fill(cache_bin, cache_bin_info, &ptrs, + nfill); + /* + * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull + * slabs. After both are exhausted, new slabs will be allocated through + * arena_slab_alloc(). + * + * Bin lock is only taken / released right before / after the while(...) + * refill loop, with new slab allocation (which has its own locking) + * kept outside of the loop. This setup facilitates flat combining, at + * the cost of the nested loop (through goto label_refill). + * + * To optimize for cases with contention and limited resources + * (e.g. hugepage-backed or non-overcommit arenas), each fill-iteration + * gets one chance of slab_alloc, and a retry of bin local resources + * after the slab allocation (regardless if slab_alloc failed, because + * the bin lock is dropped during the slab allocation). + * + * In other words, new slab allocation is allowed, as long as there was + * progress since the previous slab_alloc. This is tracked with + * made_progress below, initialized to true to jump start the first + * iteration. + * + * In other words (again), the loop will only terminate early (i.e. stop + * with filled < nfill) after going through the three steps: a) bin + * local exhausted, b) unlock and slab_alloc returns null, c) re-lock + * and bin local fails again. + */ + bool made_progress = true; + edata_t *fresh_slab = NULL; + bool alloc_and_retry = false; + unsigned filled = 0; + unsigned binshard; + bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard); + +label_refill: + malloc_mutex_lock(tsdn, &bin->lock); - assert(tbin->ncached == 0); + while (filled < nfill) { + /* Try batch-fill from slabcur first. */ + edata_t *slabcur = bin->slabcur; + if (slabcur != NULL && edata_nfree_get(slabcur) > 0) { + unsigned tofill = nfill - filled; + unsigned nfree = edata_nfree_get(slabcur); + unsigned cnt = tofill < nfree ? tofill : nfree; + + arena_slab_reg_alloc_batch(slabcur, bin_info, cnt, + &ptrs.ptr[filled]); + made_progress = true; + filled += cnt; + continue; + } + /* Next try refilling slabcur from nonfull slabs. */ + if (!arena_bin_refill_slabcur_no_fresh_slab(tsdn, arena, bin)) { + assert(bin->slabcur != NULL); + continue; + } + + /* Then see if a new slab was reserved already. */ + if (fresh_slab != NULL) { + arena_bin_refill_slabcur_with_fresh_slab(tsdn, arena, + bin, binind, fresh_slab); + assert(bin->slabcur != NULL); + fresh_slab = NULL; + continue; + } + + /* Try slab_alloc if made progress (or never did slab_alloc). */ + if (made_progress) { + assert(bin->slabcur == NULL); + assert(fresh_slab == NULL); + alloc_and_retry = true; + /* Alloc a new slab then come back. */ + break; + } + + /* OOM. */ + + assert(fresh_slab == NULL); + assert(!alloc_and_retry); + break; + } /* while (filled < nfill) loop. */ - if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) { - prof_idump(tsdn); + if (config_stats && !alloc_and_retry) { + bin->stats.nmalloc += filled; + bin->stats.nrequests += cache_bin->tstats.nrequests; + bin->stats.curregs += filled; + bin->stats.nfills++; + cache_bin->tstats.nrequests = 0; + } + + malloc_mutex_unlock(tsdn, &bin->lock); + + if (alloc_and_retry) { + assert(fresh_slab == NULL); + assert(filled < nfill); + assert(made_progress); + + fresh_slab = arena_slab_alloc(tsdn, arena, binind, binshard, + bin_info); + /* fresh_slab NULL case handled in the for loop. */ + + alloc_and_retry = false; + made_progress = false; + goto label_refill; } + assert(filled == nfill || (fresh_slab == NULL && !made_progress)); + /* Release if allocated but not used. */ + if (fresh_slab != NULL) { + assert(edata_nfree_get(fresh_slab) == bin_info->nregs); + arena_slab_dalloc(tsdn, arena, fresh_slab); + fresh_slab = NULL; + } + + cache_bin_finish_fill(cache_bin, cache_bin_info, &ptrs, filled); + arena_decay_tick(tsdn, arena); +} + +size_t +arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind, + void **ptrs, size_t nfill, bool zero) { + assert(binind < SC_NBINS); + const bin_info_t *bin_info = &bin_infos[binind]; + const size_t nregs = bin_info->nregs; + assert(nregs > 0); + const size_t usize = bin_info->reg_size; + + const bool manual_arena = !arena_is_auto(arena); unsigned binshard; - bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard); - - for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> - tcache->lg_fill_div[binind]); i < nfill; i += cnt) { - extent_t *slab; - if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > - 0) { - unsigned tofill = nfill - i; - cnt = tofill < extent_nfree_get(slab) ? - tofill : extent_nfree_get(slab); - arena_slab_reg_alloc_batch( - slab, &bin_infos[binind], cnt, - tbin->avail - nfill + i); - } else { - cnt = 1; - void *ptr = arena_bin_malloc_hard(tsdn, arena, bin, - binind, binshard); - /* - * OOM. tbin->avail isn't yet filled down to its first - * element, so the successful allocations (if any) must - * be moved just before tbin->avail before bailing out. - */ - if (ptr == NULL) { - if (i > 0) { - memmove(tbin->avail - i, - tbin->avail - nfill, - i * sizeof(void *)); - } - break; - } - /* Insert such that low regions get used first. */ - *(tbin->avail - nfill + i) = ptr; + bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard); + + size_t nslab = 0; + size_t filled = 0; + edata_t *slab = NULL; + edata_list_active_t fulls; + edata_list_active_init(&fulls); + + while (filled < nfill && (slab = arena_slab_alloc(tsdn, arena, binind, + binshard, bin_info)) != NULL) { + assert((size_t)edata_nfree_get(slab) == nregs); + ++nslab; + size_t batch = nfill - filled; + if (batch > nregs) { + batch = nregs; + } + assert(batch > 0); + arena_slab_reg_alloc_batch(slab, bin_info, (unsigned)batch, + &ptrs[filled]); + assert(edata_addr_get(slab) == ptrs[filled]); + if (zero) { + memset(ptrs[filled], 0, batch * usize); } - if (config_fill && unlikely(opt_junk_alloc)) { - for (unsigned j = 0; j < cnt; j++) { - void* ptr = *(tbin->avail - nfill + i + j); - arena_alloc_junk_small(ptr, &bin_infos[binind], - true); + filled += batch; + if (batch == nregs) { + if (manual_arena) { + edata_list_active_append(&fulls, slab); } + slab = NULL; } } + + malloc_mutex_lock(tsdn, &bin->lock); + /* + * Only the last slab can be non-empty, and the last slab is non-empty + * iff slab != NULL. + */ + if (slab != NULL) { + arena_bin_lower_slab(tsdn, arena, slab, bin); + } + if (manual_arena) { + edata_list_active_concat(&bin->slabs_full, &fulls); + } + assert(edata_list_active_empty(&fulls)); if (config_stats) { - bin->stats.nmalloc += i; - bin->stats.nrequests += tbin->tstats.nrequests; - bin->stats.curregs += i; - bin->stats.nfills++; - tbin->tstats.nrequests = 0; + bin->stats.nslabs += nslab; + bin->stats.curslabs += nslab; + bin->stats.nmalloc += filled; + bin->stats.nrequests += filled; + bin->stats.curregs += filled; } malloc_mutex_unlock(tsdn, &bin->lock); - tbin->ncached = i; + arena_decay_tick(tsdn, arena); + return filled; } -void -arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info, bool zero) { - if (!zero) { - memset(ptr, JEMALLOC_ALLOC_JUNK, bin_info->reg_size); +/* + * Without allocating a new slab, try arena_slab_reg_alloc() and re-fill + * bin->slabcur if necessary. + */ +static void * +arena_bin_malloc_no_fresh_slab(tsdn_t *tsdn, arena_t *arena, bin_t *bin, + szind_t binind) { + malloc_mutex_assert_owner(tsdn, &bin->lock); + if (bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0) { + if (arena_bin_refill_slabcur_no_fresh_slab(tsdn, arena, bin)) { + return NULL; + } } -} -static void -arena_dalloc_junk_small_impl(void *ptr, const bin_info_t *bin_info) { - memset(ptr, JEMALLOC_FREE_JUNK, bin_info->reg_size); + assert(bin->slabcur != NULL && edata_nfree_get(bin->slabcur) > 0); + return arena_slab_reg_alloc(bin->slabcur, &bin_infos[binind]); } -arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small = - arena_dalloc_junk_small_impl; static void * arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) { - void *ret; - bin_t *bin; - size_t usize; - extent_t *slab; - assert(binind < SC_NBINS); - usize = sz_index2size(binind); + const bin_info_t *bin_info = &bin_infos[binind]; + size_t usize = sz_index2size(binind); unsigned binshard; - bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard); - - if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) { - ret = arena_slab_reg_alloc(slab, &bin_infos[binind]); - } else { - ret = arena_bin_malloc_hard(tsdn, arena, bin, binind, binshard); - } + bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard); + malloc_mutex_lock(tsdn, &bin->lock); + edata_t *fresh_slab = NULL; + void *ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind); if (ret == NULL) { malloc_mutex_unlock(tsdn, &bin->lock); - return NULL; + /******************************/ + fresh_slab = arena_slab_alloc(tsdn, arena, binind, binshard, + bin_info); + /********************************/ + malloc_mutex_lock(tsdn, &bin->lock); + /* Retry since the lock was dropped. */ + ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind); + if (ret == NULL) { + if (fresh_slab == NULL) { + /* OOM */ + malloc_mutex_unlock(tsdn, &bin->lock); + return NULL; + } + ret = arena_bin_malloc_with_fresh_slab(tsdn, arena, bin, + binind, fresh_slab); + fresh_slab = NULL; + } } - if (config_stats) { bin->stats.nmalloc++; bin->stats.nrequests++; bin->stats.curregs++; } malloc_mutex_unlock(tsdn, &bin->lock); - if (config_prof && arena_prof_accum(tsdn, arena, usize)) { - prof_idump(tsdn); - } - if (!zero) { - if (config_fill) { - if (unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ret, - &bin_infos[binind], false); - } else if (unlikely(opt_zero)) { - memset(ret, 0, usize); - } - } - } else { - if (config_fill && unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ret, &bin_infos[binind], - true); - } + if (fresh_slab != NULL) { + arena_slab_dalloc(tsdn, arena, fresh_slab); + } + if (zero) { memset(ret, 0, usize); } - arena_decay_tick(tsdn, arena); + return ret; } @@ -1533,10 +1210,17 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache) { void *ret; - if (usize <= SC_SMALL_MAXCLASS - && (alignment < PAGE - || (alignment == PAGE && (usize & PAGE_MASK) == 0))) { + if (usize <= SC_SMALL_MAXCLASS) { /* Small; alignment doesn't require special slab placement. */ + + /* usize should be a result of sz_sa2u() */ + assert((usize & (alignment - 1)) == 0); + + /* + * Small usize can't come from an alignment larger than a page. + */ + assert(alignment <= PAGE); + ret = arena_malloc(tsdn, arena, usize, sz_size2index(usize), zero, tcache, true); } else { @@ -1560,33 +1244,22 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) { safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS); } - rtree_ctx_t rtree_ctx_fallback; - rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); - - extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx, - (uintptr_t)ptr, true); - arena_t *arena = extent_arena_get(extent); + edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); szind_t szind = sz_size2index(usize); - extent_szind_set(extent, szind); - rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, - szind, false); - - prof_accum_cancel(tsdn, &arena->prof_accum, usize); + edata_szind_set(edata, szind); + emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false); assert(isalloc(tsdn, ptr) == usize); } static size_t -arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr) { +arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) { cassert(config_prof); assert(ptr != NULL); - extent_szind_set(extent, SC_NBINS); - rtree_ctx_t rtree_ctx_fallback; - rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); - rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, - SC_NBINS, false); + edata_szind_set(edata, SC_NBINS); + emap_remap(tsdn, &arena_emap_global, edata, SC_NBINS, /* slab */ false); assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS); @@ -1599,9 +1272,9 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, cassert(config_prof); assert(opt_prof); - extent_t *extent = iealloc(tsdn, ptr); - size_t usize = extent_usize_get(extent); - size_t bumped_usize = arena_prof_demote(tsdn, extent, ptr); + edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); + size_t usize = edata_usize_get(edata); + size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr); if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) { /* * Currently, we only do redzoning for small sampled @@ -1614,17 +1287,17 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, sz_size2index(bumped_usize), slow_path); } else { - large_dalloc(tsdn, extent); + large_dalloc(tsdn, edata); } } static void -arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, bin_t *bin) { +arena_dissociate_bin_slab(arena_t *arena, edata_t *slab, bin_t *bin) { /* Dissociate slab from bin. */ if (slab == bin->slabcur) { bin->slabcur = NULL; } else { - szind_t binind = extent_szind_get(slab); + szind_t binind = edata_szind_get(slab); const bin_info_t *bin_info = &bin_infos[binind]; /* @@ -1641,24 +1314,9 @@ arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, bin_t *bin) { } static void -arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, +arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin) { - assert(slab != bin->slabcur); - - malloc_mutex_unlock(tsdn, &bin->lock); - /******************************/ - arena_slab_dalloc(tsdn, arena, slab); - /****************************/ - malloc_mutex_lock(tsdn, &bin->lock); - if (config_stats) { - bin->stats.curslabs--; - } -} - -static void -arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, - bin_t *bin) { - assert(extent_nfree_get(slab) > 0); + assert(edata_nfree_get(slab) > 0); /* * Make sure that if bin->slabcur is non-NULL, it refers to the @@ -1666,9 +1324,9 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, * than proactively keeping it pointing at the oldest/lowest non-full * slab. */ - if (bin->slabcur != NULL && extent_snad_comp(bin->slabcur, slab) > 0) { + if (bin->slabcur != NULL && edata_snad_comp(bin->slabcur, slab) > 0) { /* Switch slabcur. */ - if (extent_nfree_get(bin->slabcur) > 0) { + if (edata_nfree_get(bin->slabcur) > 0) { arena_bin_slabs_nonfull_insert(bin, bin->slabcur); } else { arena_bin_slabs_full_insert(arena, bin, bin->slabcur); @@ -1683,56 +1341,54 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, } static void -arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin, - szind_t binind, extent_t *slab, void *ptr, bool junked) { - arena_slab_data_t *slab_data = extent_slab_data_get(slab); - const bin_info_t *bin_info = &bin_infos[binind]; - - if (!junked && config_fill && unlikely(opt_junk_free)) { - arena_dalloc_junk_small(ptr, bin_info); - } - - arena_slab_reg_dalloc(slab, slab_data, ptr); - unsigned nfree = extent_nfree_get(slab); - if (nfree == bin_info->nregs) { - arena_dissociate_bin_slab(arena, slab, bin); - arena_dalloc_bin_slab(tsdn, arena, slab, bin); - } else if (nfree == 1 && slab != bin->slabcur) { - arena_bin_slabs_full_remove(arena, bin, slab); - arena_bin_lower_slab(tsdn, arena, slab, bin); - } +arena_dalloc_bin_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) { + malloc_mutex_assert_owner(tsdn, &bin->lock); + assert(slab != bin->slabcur); if (config_stats) { - bin->stats.ndalloc++; - bin->stats.curregs--; + bin->stats.curslabs--; } } void -arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin, - szind_t binind, extent_t *extent, void *ptr) { - arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, extent, ptr, - true); +arena_dalloc_bin_locked_handle_newly_empty(tsdn_t *tsdn, arena_t *arena, + edata_t *slab, bin_t *bin) { + arena_dissociate_bin_slab(arena, slab, bin); + arena_dalloc_bin_slab_prepare(tsdn, slab, bin); +} + +void +arena_dalloc_bin_locked_handle_newly_nonempty(tsdn_t *tsdn, arena_t *arena, + edata_t *slab, bin_t *bin) { + arena_bin_slabs_full_remove(arena, bin, slab); + arena_bin_lower_slab(tsdn, arena, slab, bin); } static void -arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) { - szind_t binind = extent_szind_get(extent); - unsigned binshard = extent_binshard_get(extent); - bin_t *bin = &arena->bins[binind].bin_shards[binshard]; +arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) { + szind_t binind = edata_szind_get(edata); + unsigned binshard = edata_binshard_get(edata); + bin_t *bin = arena_get_bin(arena, binind, binshard); malloc_mutex_lock(tsdn, &bin->lock); - arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, extent, ptr, - false); + arena_dalloc_bin_locked_info_t info; + arena_dalloc_bin_locked_begin(&info, binind); + bool ret = arena_dalloc_bin_locked_step(tsdn, arena, bin, + &info, binind, edata, ptr); + arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info); malloc_mutex_unlock(tsdn, &bin->lock); + + if (ret) { + arena_slab_dalloc(tsdn, arena, edata); + } } void arena_dalloc_small(tsdn_t *tsdn, void *ptr) { - extent_t *extent = iealloc(tsdn, ptr); - arena_t *arena = extent_arena_get(extent); + edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); + arena_t *arena = arena_get_from_edata(edata); - arena_dalloc_bin(tsdn, arena, extent, ptr); + arena_dalloc_bin(tsdn, arena, edata, ptr); arena_decay_tick(tsdn, arena); } @@ -1743,7 +1399,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, /* Calls with non-zero extra had to clamp extra. */ assert(extra == 0 || size + extra <= SC_LARGE_MAXCLASS); - extent_t *extent = iealloc(tsdn, ptr); + edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr); if (unlikely(size > SC_LARGE_MAXCLASS)) { ret = true; goto done; @@ -1766,18 +1422,19 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, goto done; } - arena_decay_tick(tsdn, extent_arena_get(extent)); + arena_t *arena = arena_get_from_edata(edata); + arena_decay_tick(tsdn, arena); ret = false; } else if (oldsize >= SC_LARGE_MINCLASS && usize_max >= SC_LARGE_MINCLASS) { - ret = large_ralloc_no_move(tsdn, extent, usize_min, usize_max, + ret = large_ralloc_no_move(tsdn, edata, usize_min, usize_max, zero); } else { ret = true; } done: - assert(extent == iealloc(tsdn, ptr)); - *newsize = extent_usize_get(extent); + assert(edata == emap_edata_lookup(tsdn, &arena_emap_global, ptr)); + *newsize = edata_usize_get(edata); return ret; } @@ -1800,7 +1457,7 @@ void * arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache, hook_ralloc_args_t *hook_args) { - size_t usize = sz_s2u(size); + size_t usize = alignment == 0 ? sz_s2u(size) : sz_sa2u(size, alignment); if (unlikely(usize == 0 || size > SC_LARGE_MAXCLASS)) { return NULL; } @@ -1850,6 +1507,29 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, return ret; } +ehooks_t * +arena_get_ehooks(arena_t *arena) { + return base_ehooks_get(arena->base); +} + +extent_hooks_t * +arena_set_extent_hooks(tsd_t *tsd, arena_t *arena, + extent_hooks_t *extent_hooks) { + background_thread_info_t *info; + if (have_background_thread) { + info = arena_background_thread_info_get(arena); + malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); + } + /* No using the HPA now that we have the custom hooks. */ + pa_shard_disable_hpa(tsd_tsdn(tsd), &arena->pa_shard); + extent_hooks_t *ret = base_extent_hooks_set(arena->base, extent_hooks); + if (have_background_thread) { + malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); + } + + return ret; +} + dss_prec_t arena_dss_prec_get(arena_t *arena) { return (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_ACQUIRE); @@ -1871,7 +1551,7 @@ arena_dirty_decay_ms_default_get(void) { bool arena_dirty_decay_ms_default_set(ssize_t decay_ms) { - if (!arena_decay_ms_valid(decay_ms)) { + if (!decay_ms_valid(decay_ms)) { return true; } atomic_store_zd(&dirty_decay_ms_default, decay_ms, ATOMIC_RELAXED); @@ -1885,7 +1565,7 @@ arena_muzzy_decay_ms_default_get(void) { bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms) { - if (!arena_decay_ms_valid(decay_ms)) { + if (!decay_ms_valid(decay_ms)) { return true; } atomic_store_zd(&muzzy_decay_ms_default, decay_ms, ATOMIC_RELAXED); @@ -1896,26 +1576,8 @@ bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit, size_t *new_limit) { assert(opt_retain); - - pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0); - if (new_limit != NULL) { - size_t limit = *new_limit; - /* Grow no more than the new limit. */ - if ((new_ind = sz_psz2ind(limit + 1) - 1) >= SC_NPSIZES) { - return true; - } - } - - malloc_mutex_lock(tsd_tsdn(tsd), &arena->extent_grow_mtx); - if (old_limit != NULL) { - *old_limit = sz_pind2sz(arena->retain_grow_limit); - } - if (new_limit != NULL) { - arena->retain_grow_limit = new_ind; - } - malloc_mutex_unlock(tsd_tsdn(tsd), &arena->extent_grow_mtx); - - return false; + return pac_retain_grow_limit_get_set(tsd_tsdn(tsd), + &arena->pa_shard.pac, old_limit, new_limit); } unsigned @@ -1933,13 +1595,8 @@ arena_nthreads_dec(arena_t *arena, bool internal) { atomic_fetch_sub_u(&arena->nthreads[internal], 1, ATOMIC_RELAXED); } -size_t -arena_extent_sn_next(arena_t *arena) { - return atomic_fetch_add_zu(&arena->extent_sn_next, 1, ATOMIC_RELAXED); -} - arena_t * -arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { +arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) { arena_t *arena; base_t *base; unsigned i; @@ -1947,16 +1604,13 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { if (ind == 0) { base = b0get(); } else { - base = base_new(tsdn, ind, extent_hooks); + base = base_new(tsdn, ind, config->extent_hooks, + config->metadata_use_hooks); if (base == NULL) { return NULL; } } - unsigned nbins_total = 0; - for (i = 0; i < SC_NBINS; i++) { - nbins_total += bin_infos[i].n_shards; - } size_t arena_size = sizeof(arena_t) + sizeof(bin_t) * nbins_total; arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE); if (arena == NULL) { @@ -1980,110 +1634,56 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { } } - if (config_prof) { - if (prof_accum_init(tsdn, &arena->prof_accum)) { - goto label_error; - } - } - - if (config_cache_oblivious) { - /* - * A nondeterministic seed based on the address of arena reduces - * the likelihood of lockstep non-uniform cache index - * utilization among identical concurrent processes, but at the - * cost of test repeatability. For debug builds, instead use a - * deterministic seed. - */ - atomic_store_zu(&arena->offset_state, config_debug ? ind : - (size_t)(uintptr_t)arena, ATOMIC_RELAXED); - } - - atomic_store_zu(&arena->extent_sn_next, 0, ATOMIC_RELAXED); - atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(), ATOMIC_RELAXED); - atomic_store_zu(&arena->nactive, 0, ATOMIC_RELAXED); - - extent_list_init(&arena->large); + edata_list_active_init(&arena->large); if (malloc_mutex_init(&arena->large_mtx, "arena_large", WITNESS_RANK_ARENA_LARGE, malloc_mutex_rank_exclusive)) { goto label_error; } - /* - * Delay coalescing for dirty extents despite the disruptive effect on - * memory layout for best-fit extent allocation, since cached extents - * are likely to be reused soon after deallocation, and the cost of - * merging/splitting extents is non-trivial. - */ - if (extents_init(tsdn, &arena->extents_dirty, extent_state_dirty, - true)) { - goto label_error; - } - /* - * Coalesce muzzy extents immediately, because operations on them are in - * the critical path much less often than for dirty extents. - */ - if (extents_init(tsdn, &arena->extents_muzzy, extent_state_muzzy, - false)) { - goto label_error; - } - /* - * Coalesce retained extents immediately, in part because they will - * never be evicted (and therefore there's no opportunity for delayed - * coalescing), but also because operations on retained extents are not - * in the critical path. - */ - if (extents_init(tsdn, &arena->extents_retained, extent_state_retained, - false)) { - goto label_error; - } - - if (arena_decay_init(&arena->decay_dirty, - arena_dirty_decay_ms_default_get(), &arena->stats.decay_dirty)) { - goto label_error; - } - if (arena_decay_init(&arena->decay_muzzy, - arena_muzzy_decay_ms_default_get(), &arena->stats.decay_muzzy)) { - goto label_error; - } - - arena->extent_grow_next = sz_psz2ind(HUGEPAGE); - arena->retain_grow_limit = sz_psz2ind(SC_LARGE_MAXCLASS); - if (malloc_mutex_init(&arena->extent_grow_mtx, "extent_grow", - WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) { - goto label_error; - } - - extent_avail_new(&arena->extent_avail); - if (malloc_mutex_init(&arena->extent_avail_mtx, "extent_avail", - WITNESS_RANK_EXTENT_AVAIL, malloc_mutex_rank_exclusive)) { + nstime_t cur_time; + nstime_init_update(&cur_time); + if (pa_shard_init(tsdn, &arena->pa_shard, &arena_pa_central_global, + &arena_emap_global, base, ind, &arena->stats.pa_shard_stats, + LOCKEDINT_MTX(arena->stats.mtx), &cur_time, oversize_threshold, + arena_dirty_decay_ms_default_get(), + arena_muzzy_decay_ms_default_get())) { goto label_error; } /* Initialize bins. */ - uintptr_t bin_addr = (uintptr_t)arena + sizeof(arena_t); atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE); - for (i = 0; i < SC_NBINS; i++) { - unsigned nshards = bin_infos[i].n_shards; - arena->bins[i].bin_shards = (bin_t *)bin_addr; - bin_addr += nshards * sizeof(bin_t); - for (unsigned j = 0; j < nshards; j++) { - bool err = bin_init(&arena->bins[i].bin_shards[j]); - if (err) { - goto label_error; - } + for (i = 0; i < nbins_total; i++) { + bool err = bin_init(&arena->bins[i]); + if (err) { + goto label_error; } } - assert(bin_addr == (uintptr_t)arena + arena_size); arena->base = base; /* Set arena before creating background threads. */ arena_set(ind, arena); + arena->ind = ind; - nstime_init(&arena->create_time, 0); - nstime_update(&arena->create_time); + nstime_init_update(&arena->create_time); + + /* + * We turn on the HPA if set to. There are two exceptions: + * - Custom extent hooks (we should only return memory allocated from + * them in that case). + * - Arena 0 initialization. In this case, we're mid-bootstrapping, and + * so arena_hpa_global is not yet initialized. + */ + if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) { + hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts; + hpa_shard_opts.deferral_allowed = background_thread_enabled(); + if (pa_shard_enable_hpa(tsdn, &arena->pa_shard, + &hpa_shard_opts, &opt_hpa_sec_opts)) { + goto label_error; + } + } /* We don't support reentrancy for arena 0 bootstrapping. */ if (ind != 0) { @@ -2129,10 +1729,12 @@ arena_choose_huge(tsd_t *tsd) { * expected for huge allocations. */ if (arena_dirty_decay_ms_default_get() > 0) { - arena_dirty_decay_ms_set(tsd_tsdn(tsd), huge_arena, 0); + arena_decay_ms_set(tsd_tsdn(tsd), huge_arena, + extent_state_dirty, 0); } if (arena_muzzy_decay_ms_default_get() > 0) { - arena_muzzy_decay_ms_set(tsd_tsdn(tsd), huge_arena, 0); + arena_decay_ms_set(tsd_tsdn(tsd), huge_arena, + extent_state_muzzy, 0); } } @@ -2167,8 +1769,8 @@ arena_is_huge(unsigned arena_ind) { return (arena_ind == huge_arena_ind); } -void -arena_boot(sc_data_t *sc_data) { +bool +arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) { arena_dirty_decay_ms_default_set(opt_dirty_decay_ms); arena_muzzy_decay_ms_default_set(opt_muzzy_decay_ms); for (unsigned i = 0; i < SC_NBINS; i++) { @@ -2176,12 +1778,20 @@ arena_boot(sc_data_t *sc_data) { div_init(&arena_binind_div_info[i], (1U << sc->lg_base) + (sc->ndelta << sc->lg_delta)); } + + uint32_t cur_offset = (uint32_t)offsetof(arena_t, bins); + for (szind_t i = 0; i < SC_NBINS; i++) { + arena_bin_offsets[i] = cur_offset; + nbins_total += bin_infos[i].n_shards; + cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t)); + } + return pa_central_init(&arena_pa_central_global, base, hpa, + &hpa_hooks_default); } void arena_prefork0(tsdn_t *tsdn, arena_t *arena) { - malloc_mutex_prefork(tsdn, &arena->decay_dirty.mtx); - malloc_mutex_prefork(tsdn, &arena->decay_muzzy.mtx); + pa_shard_prefork0(tsdn, &arena->pa_shard); } void @@ -2193,59 +1803,50 @@ arena_prefork1(tsdn_t *tsdn, arena_t *arena) { void arena_prefork2(tsdn_t *tsdn, arena_t *arena) { - malloc_mutex_prefork(tsdn, &arena->extent_grow_mtx); + pa_shard_prefork2(tsdn, &arena->pa_shard); } void arena_prefork3(tsdn_t *tsdn, arena_t *arena) { - extents_prefork(tsdn, &arena->extents_dirty); - extents_prefork(tsdn, &arena->extents_muzzy); - extents_prefork(tsdn, &arena->extents_retained); + pa_shard_prefork3(tsdn, &arena->pa_shard); } void arena_prefork4(tsdn_t *tsdn, arena_t *arena) { - malloc_mutex_prefork(tsdn, &arena->extent_avail_mtx); + pa_shard_prefork4(tsdn, &arena->pa_shard); } void arena_prefork5(tsdn_t *tsdn, arena_t *arena) { - base_prefork(tsdn, arena->base); + pa_shard_prefork5(tsdn, &arena->pa_shard); } void arena_prefork6(tsdn_t *tsdn, arena_t *arena) { - malloc_mutex_prefork(tsdn, &arena->large_mtx); + base_prefork(tsdn, arena->base); } void arena_prefork7(tsdn_t *tsdn, arena_t *arena) { - for (unsigned i = 0; i < SC_NBINS; i++) { - for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { - bin_prefork(tsdn, &arena->bins[i].bin_shards[j]); - } + malloc_mutex_prefork(tsdn, &arena->large_mtx); +} + +void +arena_prefork8(tsdn_t *tsdn, arena_t *arena) { + for (unsigned i = 0; i < nbins_total; i++) { + bin_prefork(tsdn, &arena->bins[i]); } } void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) { - unsigned i; - - for (i = 0; i < SC_NBINS; i++) { - for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { - bin_postfork_parent(tsdn, - &arena->bins[i].bin_shards[j]); - } + for (unsigned i = 0; i < nbins_total; i++) { + bin_postfork_parent(tsdn, &arena->bins[i]); } + malloc_mutex_postfork_parent(tsdn, &arena->large_mtx); base_postfork_parent(tsdn, arena->base); - malloc_mutex_postfork_parent(tsdn, &arena->extent_avail_mtx); - extents_postfork_parent(tsdn, &arena->extents_dirty); - extents_postfork_parent(tsdn, &arena->extents_muzzy); - extents_postfork_parent(tsdn, &arena->extents_retained); - malloc_mutex_postfork_parent(tsdn, &arena->extent_grow_mtx); - malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx); - malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx); + pa_shard_postfork_parent(tsdn, &arena->pa_shard); if (config_stats) { malloc_mutex_postfork_parent(tsdn, &arena->tcache_ql_mtx); } @@ -2253,8 +1854,6 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) { void arena_postfork_child(tsdn_t *tsdn, arena_t *arena) { - unsigned i; - atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED); atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED); if (tsd_arena_get(tsdn_tsd(tsdn)) == arena) { @@ -2266,32 +1865,26 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) { if (config_stats) { ql_new(&arena->tcache_ql); ql_new(&arena->cache_bin_array_descriptor_ql); - tcache_t *tcache = tcache_get(tsdn_tsd(tsdn)); - if (tcache != NULL && tcache->arena == arena) { - ql_elm_new(tcache, link); - ql_tail_insert(&arena->tcache_ql, tcache, link); + tcache_slow_t *tcache_slow = tcache_slow_get(tsdn_tsd(tsdn)); + if (tcache_slow != NULL && tcache_slow->arena == arena) { + tcache_t *tcache = tcache_slow->tcache; + ql_elm_new(tcache_slow, link); + ql_tail_insert(&arena->tcache_ql, tcache_slow, link); cache_bin_array_descriptor_init( - &tcache->cache_bin_array_descriptor, - tcache->bins_small, tcache->bins_large); + &tcache_slow->cache_bin_array_descriptor, + tcache->bins); ql_tail_insert(&arena->cache_bin_array_descriptor_ql, - &tcache->cache_bin_array_descriptor, link); + &tcache_slow->cache_bin_array_descriptor, link); } } - for (i = 0; i < SC_NBINS; i++) { - for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { - bin_postfork_child(tsdn, &arena->bins[i].bin_shards[j]); - } + for (unsigned i = 0; i < nbins_total; i++) { + bin_postfork_child(tsdn, &arena->bins[i]); } + malloc_mutex_postfork_child(tsdn, &arena->large_mtx); base_postfork_child(tsdn, arena->base); - malloc_mutex_postfork_child(tsdn, &arena->extent_avail_mtx); - extents_postfork_child(tsdn, &arena->extents_dirty); - extents_postfork_child(tsdn, &arena->extents_muzzy); - extents_postfork_child(tsdn, &arena->extents_retained); - malloc_mutex_postfork_child(tsdn, &arena->extent_grow_mtx); - malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx); - malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx); + pa_shard_postfork_child(tsdn, &arena->pa_shard); if (config_stats) { malloc_mutex_postfork_child(tsdn, &arena->tcache_ql_mtx); } |