diff options
Diffstat (limited to 'deps/jemalloc/src/arena.c')
-rw-r--r-- | deps/jemalloc/src/arena.c | 555 |
1 files changed, 405 insertions, 150 deletions
diff --git a/deps/jemalloc/src/arena.c b/deps/jemalloc/src/arena.c index 5d55bf1a0..ba50e4103 100644 --- a/deps/jemalloc/src/arena.c +++ b/deps/jemalloc/src/arena.c @@ -8,9 +8,11 @@ #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/safety_check.h" #include "jemalloc/internal/util.h" +JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS + /******************************************************************************/ /* Data. */ @@ -40,7 +42,11 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = { #undef STEP }; -static div_info_t arena_binind_div_info[NBINS]; +static div_info_t arena_binind_div_info[SC_NBINS]; + +size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT; +size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT; +static unsigned huge_arena_ind; /******************************************************************************/ /* @@ -61,7 +67,7 @@ static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, /******************************************************************************/ void -arena_basic_stats_merge(UNUSED tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, +arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy) { *nthreads += arena_nthreads_get(arena, false); @@ -77,7 +83,8 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats, - bin_stats_t *bstats, arena_stats_large_t *lstats) { + bin_stats_t *bstats, arena_stats_large_t *lstats, + arena_stats_extents_t *estats) { cassert(config_stats); arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms, @@ -94,6 +101,10 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, arena_stats_accum_zu(&astats->retained, extents_npages_get(&arena->extents_retained) << LG_PAGE); + atomic_store_zu(&astats->extent_avail, + atomic_load_zu(&arena->extent_avail_cnt, ATOMIC_RELAXED), + ATOMIC_RELAXED); + arena_stats_accum_u64(&astats->decay_dirty.npurge, arena_stats_read_u64(tsdn, &arena->stats, &arena->stats.decay_dirty.npurge)); @@ -121,8 +132,10 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) + extents_npages_get(&arena->extents_dirty) + extents_npages_get(&arena->extents_muzzy)) << LG_PAGE))); + arena_stats_accum_zu(&astats->abandoned_vm, atomic_load_zu( + &arena->stats.abandoned_vm, ATOMIC_RELAXED)); - for (szind_t i = 0; i < NSIZES - NBINS; i++) { + for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) { uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats, &arena->stats.lstats[i].nmalloc); arena_stats_accum_u64(&lstats[i].nmalloc, nmalloc); @@ -140,12 +153,43 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, arena_stats_accum_u64(&astats->nrequests_large, nmalloc + nrequests); + /* nfill == nmalloc for large currently. */ + arena_stats_accum_u64(&lstats[i].nfills, nmalloc); + arena_stats_accum_u64(&astats->nfills_large, nmalloc); + + uint64_t nflush = arena_stats_read_u64(tsdn, &arena->stats, + &arena->stats.lstats[i].nflushes); + arena_stats_accum_u64(&lstats[i].nflushes, nflush); + arena_stats_accum_u64(&astats->nflushes_large, nflush); + assert(nmalloc >= ndalloc); assert(nmalloc - ndalloc <= SIZE_T_MAX); size_t curlextents = (size_t)(nmalloc - ndalloc); lstats[i].curlextents += curlextents; arena_stats_accum_zu(&astats->allocated_large, - curlextents * sz_index2size(NBINS + i)); + curlextents * sz_index2size(SC_NBINS + i)); + } + + for (pszind_t i = 0; i < SC_NPSIZES; i++) { + size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes, + retained_bytes; + dirty = extents_nextents_get(&arena->extents_dirty, i); + muzzy = extents_nextents_get(&arena->extents_muzzy, i); + retained = extents_nextents_get(&arena->extents_retained, i); + dirty_bytes = extents_nbytes_get(&arena->extents_dirty, i); + muzzy_bytes = extents_nbytes_get(&arena->extents_muzzy, i); + retained_bytes = + extents_nbytes_get(&arena->extents_retained, i); + + atomic_store_zu(&estats[i].ndirty, dirty, ATOMIC_RELAXED); + atomic_store_zu(&estats[i].nmuzzy, muzzy, ATOMIC_RELAXED); + atomic_store_zu(&estats[i].nretained, retained, ATOMIC_RELAXED); + atomic_store_zu(&estats[i].dirty_bytes, dirty_bytes, + ATOMIC_RELAXED); + atomic_store_zu(&estats[i].muzzy_bytes, muzzy_bytes, + ATOMIC_RELAXED); + atomic_store_zu(&estats[i].retained_bytes, retained_bytes, + ATOMIC_RELAXED); } arena_stats_unlock(tsdn, &arena->stats); @@ -156,7 +200,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, cache_bin_array_descriptor_t *descriptor; ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) { szind_t i = 0; - for (; i < NBINS; i++) { + for (; i < SC_NBINS; i++) { cache_bin_t *tbin = &descriptor->bins_small[i]; arena_stats_accum_zu(&astats->tcache_bytes, tbin->ncached * sz_index2size(i)); @@ -200,8 +244,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, nstime_update(&astats->uptime); nstime_subtract(&astats->uptime, &arena->create_time); - for (szind_t i = 0; i < NBINS; i++) { - bin_stats_merge(tsdn, &bstats[i], &arena->bins[i]); + for (szind_t i = 0; i < SC_NBINS; i++) { + for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { + bin_stats_merge(tsdn, &bstats[i], + &arena->bins[i].bin_shards[j]); + } } } @@ -236,6 +283,54 @@ arena_slab_reg_alloc(extent_t *slab, const bin_info_t *bin_info) { return ret; } +static void +arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info, + unsigned cnt, void** ptrs) { + arena_slab_data_t *slab_data = extent_slab_data_get(slab); + + assert(extent_nfree_get(slab) >= cnt); + assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info)); + +#if (! defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE) + for (unsigned i = 0; i < cnt; i++) { + size_t regind = bitmap_sfu(slab_data->bitmap, + &bin_info->bitmap_info); + *(ptrs + i) = (void *)((uintptr_t)extent_addr_get(slab) + + (uintptr_t)(bin_info->reg_size * regind)); + } +#else + unsigned group = 0; + bitmap_t g = slab_data->bitmap[group]; + unsigned i = 0; + while (i < cnt) { + while (g == 0) { + g = slab_data->bitmap[++group]; + } + size_t shift = group << LG_BITMAP_GROUP_NBITS; + size_t pop = popcount_lu(g); + if (pop > (cnt - i)) { + pop = cnt - i; + } + + /* + * Load from memory locations only once, outside the + * hot loop below. + */ + uintptr_t base = (uintptr_t)extent_addr_get(slab); + uintptr_t regsize = (uintptr_t)bin_info->reg_size; + while (pop--) { + size_t bit = cfs_lu(&g); + size_t regind = shift + bit; + *(ptrs + i) = (void *)(base + regsize * regind); + + i++; + } + slab_data->bitmap[group] = g; + } +#endif + extent_nfree_sub(slab, cnt); +} + #ifndef JEMALLOC_JET static #endif @@ -291,11 +386,11 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) { cassert(config_stats); - if (usize < LARGE_MINCLASS) { - usize = LARGE_MINCLASS; + if (usize < SC_LARGE_MINCLASS) { + usize = SC_LARGE_MINCLASS; } index = sz_size2index(usize); - hindex = (index >= NBINS) ? index - NBINS : 0; + hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0; arena_stats_add_u64(tsdn, &arena->stats, &arena->stats.lstats[hindex].nmalloc, 1); @@ -307,11 +402,11 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) { cassert(config_stats); - if (usize < LARGE_MINCLASS) { - usize = LARGE_MINCLASS; + if (usize < SC_LARGE_MINCLASS) { + usize = SC_LARGE_MINCLASS; } index = sz_size2index(usize); - hindex = (index >= NBINS) ? index - NBINS : 0; + hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0; arena_stats_add_u64(tsdn, &arena->stats, &arena->stats.lstats[hindex].ndalloc, 1); @@ -324,6 +419,11 @@ arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize, arena_large_malloc_stats_update(tsdn, arena, usize); } +static bool +arena_may_have_muzzy(arena_t *arena) { + return (pages_can_purge_lazy && (arena_muzzy_decay_ms_get(arena) != 0)); +} + extent_t * arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool *zero) { @@ -338,7 +438,7 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, extent_t *extent = extents_alloc(tsdn, arena, &extent_hooks, &arena->extents_dirty, NULL, usize, sz_large_pad, alignment, false, szind, zero, &commit); - if (extent == NULL) { + if (extent == NULL && arena_may_have_muzzy(arena)) { extent = extents_alloc(tsdn, arena, &extent_hooks, &arena->extents_muzzy, NULL, usize, sz_large_pad, alignment, false, szind, zero, &commit); @@ -743,7 +843,7 @@ static size_t arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, arena_decay_t *decay, extents_t *extents, bool all, extent_list_t *decay_extents, bool is_background_thread) { - UNUSED size_t nmadvise, nunmapped; + size_t nmadvise, nunmapped; size_t npurged; if (config_stats) { @@ -834,7 +934,7 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, extents, npages_limit, npages_decay_max, &decay_extents); if (npurge != 0) { - UNUSED size_t npurged = arena_decay_stashed(tsdn, arena, + size_t npurged = arena_decay_stashed(tsdn, arena, &extent_hooks, decay, extents, all, &decay_extents, is_background_thread); assert(npurged == npurge); @@ -863,7 +963,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, extents, is_background_thread); - UNUSED size_t npages_new; + size_t npages_new; if (epoch_advanced) { /* Backlog is updated on epoch advance. */ npages_new = decay->backlog[SMOOTHSTEP_NSTEPS-1]; @@ -913,11 +1013,17 @@ static void arena_bin_slabs_nonfull_insert(bin_t *bin, extent_t *slab) { assert(extent_nfree_get(slab) > 0); extent_heap_insert(&bin->slabs_nonfull, slab); + if (config_stats) { + bin->stats.nonfull_slabs++; + } } static void arena_bin_slabs_nonfull_remove(bin_t *bin, extent_t *slab) { extent_heap_remove(&bin->slabs_nonfull, slab); + if (config_stats) { + bin->stats.nonfull_slabs--; + } } static extent_t * @@ -928,6 +1034,7 @@ arena_bin_slabs_nonfull_tryget(bin_t *bin) { } if (config_stats) { bin->stats.reslabs++; + bin->stats.nonfull_slabs--; } return slab; } @@ -954,6 +1061,37 @@ arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, extent_t *slab) { extent_list_remove(&bin->slabs_full, slab); } +static void +arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) { + extent_t *slab; + + malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); + if (bin->slabcur != NULL) { + slab = bin->slabcur; + bin->slabcur = NULL; + malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); + arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); + malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); + } + while ((slab = extent_heap_remove_first(&bin->slabs_nonfull)) != NULL) { + malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); + arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); + malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); + } + for (slab = extent_list_first(&bin->slabs_full); slab != NULL; + slab = extent_list_first(&bin->slabs_full)) { + arena_bin_slabs_full_remove(arena, bin, slab); + malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); + arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); + malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); + } + if (config_stats) { + bin->stats.curregs = 0; + bin->stats.curslabs = 0; + } + malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); +} + void arena_reset(tsd_t *tsd, arena_t *arena) { /* @@ -983,7 +1121,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) { rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd); rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx, (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab); - assert(alloc_ctx.szind != NSIZES); + assert(alloc_ctx.szind != SC_NSIZES); if (config_stats || (config_prof && opt_prof)) { usize = sz_index2size(alloc_ctx.szind); @@ -999,35 +1137,11 @@ arena_reset(tsd_t *tsd, arena_t *arena) { malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx); /* Bins. */ - for (unsigned i = 0; i < NBINS; i++) { - extent_t *slab; - bin_t *bin = &arena->bins[i]; - malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); - if (bin->slabcur != NULL) { - slab = bin->slabcur; - bin->slabcur = NULL; - malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); - arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); - malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); + for (unsigned i = 0; i < SC_NBINS; i++) { + for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { + arena_bin_reset(tsd, arena, + &arena->bins[i].bin_shards[j]); } - while ((slab = extent_heap_remove_first(&bin->slabs_nonfull)) != - NULL) { - malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); - arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); - malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); - } - for (slab = extent_list_first(&bin->slabs_full); slab != NULL; - slab = extent_list_first(&bin->slabs_full)) { - arena_bin_slabs_full_remove(arena, bin, slab); - malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); - arena_slab_dalloc(tsd_tsdn(tsd), arena, slab); - malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); - } - if (config_stats) { - bin->stats.curregs = 0; - bin->stats.curslabs = 0; - } - malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock); } atomic_store_zu(&arena->nactive, 0, ATOMIC_RELAXED); @@ -1112,7 +1226,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, } static extent_t * -arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, +arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard, const bin_info_t *bin_info) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); @@ -1124,7 +1238,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, extent_t *slab = extents_alloc(tsdn, arena, &extent_hooks, &arena->extents_dirty, NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero, &commit); - if (slab == NULL) { + if (slab == NULL && arena_may_have_muzzy(arena)) { slab = extents_alloc(tsdn, arena, &extent_hooks, &arena->extents_muzzy, NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero, &commit); @@ -1140,7 +1254,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, /* Initialize slab internals. */ arena_slab_data_t *slab_data = extent_slab_data_get(slab); - extent_nfree_set(slab, bin_info->nregs); + extent_nfree_binshard_set(slab, bin_info->nregs, binshard); bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false); arena_nactive_add(arena, extent_size_get(slab) >> LG_PAGE); @@ -1150,7 +1264,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, static extent_t * arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin, - szind_t binind) { + szind_t binind, unsigned binshard) { extent_t *slab; const bin_info_t *bin_info; @@ -1166,7 +1280,7 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin, /* Allocate a new slab. */ malloc_mutex_unlock(tsdn, &bin->lock); /******************************/ - slab = arena_slab_alloc(tsdn, arena, binind, bin_info); + slab = arena_slab_alloc(tsdn, arena, binind, binshard, bin_info); /********************************/ malloc_mutex_lock(tsdn, &bin->lock); if (slab != NULL) { @@ -1193,7 +1307,7 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin, /* Re-fill bin->slabcur, then call arena_slab_reg_alloc(). */ static void * arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin, - szind_t binind) { + szind_t binind, unsigned binshard) { const bin_info_t *bin_info; extent_t *slab; @@ -1202,7 +1316,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin, arena_bin_slabs_full_insert(arena, bin, bin->slabcur); bin->slabcur = NULL; } - slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind); + slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind, binshard); if (bin->slabcur != NULL) { /* * Another thread updated slabcur while this one ran without the @@ -1246,46 +1360,75 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin, return arena_slab_reg_alloc(slab, bin_info); } +/* Choose a bin shard and return the locked bin. */ +bin_t * +arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind, + unsigned *binshard) { + bin_t *bin; + if (tsdn_null(tsdn) || tsd_arena_get(tsdn_tsd(tsdn)) == NULL) { + *binshard = 0; + } else { + *binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind]; + } + assert(*binshard < bin_infos[binind].n_shards); + bin = &arena->bins[binind].bin_shards[*binshard]; + malloc_mutex_lock(tsdn, &bin->lock); + + return bin; +} + void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) { - unsigned i, nfill; - bin_t *bin; + unsigned i, nfill, cnt; assert(tbin->ncached == 0); if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) { prof_idump(tsdn); } - bin = &arena->bins[binind]; - malloc_mutex_lock(tsdn, &bin->lock); + + unsigned binshard; + bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard); + for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> - tcache->lg_fill_div[binind]); i < nfill; i++) { + tcache->lg_fill_div[binind]); i < nfill; i += cnt) { extent_t *slab; - void *ptr; if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) { - ptr = arena_slab_reg_alloc(slab, &bin_infos[binind]); + unsigned tofill = nfill - i; + cnt = tofill < extent_nfree_get(slab) ? + tofill : extent_nfree_get(slab); + arena_slab_reg_alloc_batch( + slab, &bin_infos[binind], cnt, + tbin->avail - nfill + i); } else { - ptr = arena_bin_malloc_hard(tsdn, arena, bin, binind); - } - if (ptr == NULL) { + cnt = 1; + void *ptr = arena_bin_malloc_hard(tsdn, arena, bin, + binind, binshard); /* * OOM. tbin->avail isn't yet filled down to its first * element, so the successful allocations (if any) must * be moved just before tbin->avail before bailing out. */ - if (i > 0) { - memmove(tbin->avail - i, tbin->avail - nfill, - i * sizeof(void *)); + if (ptr == NULL) { + if (i > 0) { + memmove(tbin->avail - i, + tbin->avail - nfill, + i * sizeof(void *)); + } + break; } - break; + /* Insert such that low regions get used first. */ + *(tbin->avail - nfill + i) = ptr; } if (config_fill && unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ptr, &bin_infos[binind], true); + for (unsigned j = 0; j < cnt; j++) { + void* ptr = *(tbin->avail - nfill + i + j); + arena_alloc_junk_small(ptr, &bin_infos[binind], + true); + } } - /* Insert such that low regions get used first. */ - *(tbin->avail - nfill + i) = ptr; } if (config_stats) { bin->stats.nmalloc += i; @@ -1320,15 +1463,15 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) { size_t usize; extent_t *slab; - assert(binind < NBINS); - bin = &arena->bins[binind]; + assert(binind < SC_NBINS); usize = sz_index2size(binind); + unsigned binshard; + bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard); - malloc_mutex_lock(tsdn, &bin->lock); if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) { ret = arena_slab_reg_alloc(slab, &bin_infos[binind]); } else { - ret = arena_bin_malloc_hard(tsdn, arena, bin, binind); + ret = arena_bin_malloc_hard(tsdn, arena, bin, binind, binshard); } if (ret == NULL) { @@ -1373,13 +1516,13 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, assert(!tsdn_null(tsdn) || arena != NULL); if (likely(!tsdn_null(tsdn))) { - arena = arena_choose(tsdn_tsd(tsdn), arena); + arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, size); } if (unlikely(arena == NULL)) { return NULL; } - if (likely(size <= SMALL_MAXCLASS)) { + if (likely(size <= SC_SMALL_MAXCLASS)) { return arena_malloc_small(tsdn, arena, ind, zero); } return large_malloc(tsdn, arena, sz_index2size(ind), zero); @@ -1390,8 +1533,9 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero, tcache_t *tcache) { void *ret; - if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE - && (usize & PAGE_MASK) == 0))) { + if (usize <= SC_SMALL_MAXCLASS + && (alignment < PAGE + || (alignment == PAGE && (usize & PAGE_MASK) == 0))) { /* Small; alignment doesn't require special slab placement. */ ret = arena_malloc(tsdn, arena, usize, sz_size2index(usize), zero, tcache, true); @@ -1406,11 +1550,15 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, } void -arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize) { +arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) { cassert(config_prof); assert(ptr != NULL); - assert(isalloc(tsdn, ptr) == LARGE_MINCLASS); - assert(usize <= SMALL_MAXCLASS); + assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS); + assert(usize <= SC_SMALL_MAXCLASS); + + if (config_opt_safety_checks) { + safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS); + } rtree_ctx_t rtree_ctx_fallback; rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); @@ -1434,15 +1582,15 @@ arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr) { cassert(config_prof); assert(ptr != NULL); - extent_szind_set(extent, NBINS); + extent_szind_set(extent, SC_NBINS); rtree_ctx_t rtree_ctx_fallback; rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, - NBINS, false); + SC_NBINS, false); - assert(isalloc(tsdn, ptr) == LARGE_MINCLASS); + assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS); - return LARGE_MINCLASS; + return SC_LARGE_MINCLASS; } void @@ -1452,10 +1600,19 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, assert(opt_prof); extent_t *extent = iealloc(tsdn, ptr); - size_t usize = arena_prof_demote(tsdn, extent, ptr); - if (usize <= tcache_maxclass) { + size_t usize = extent_usize_get(extent); + size_t bumped_usize = arena_prof_demote(tsdn, extent, ptr); + if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) { + /* + * Currently, we only do redzoning for small sampled + * allocations. + */ + assert(bumped_usize == SC_LARGE_MINCLASS); + safety_check_verify_redzone(ptr, usize, bumped_usize); + } + if (bumped_usize <= tcache_maxclass && tcache != NULL) { tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, - sz_size2index(usize), slow_path); + sz_size2index(bumped_usize), slow_path); } else { large_dalloc(tsdn, extent); } @@ -1499,7 +1656,7 @@ arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, } static void -arena_bin_lower_slab(UNUSED tsdn_t *tsdn, arena_t *arena, extent_t *slab, +arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, bin_t *bin) { assert(extent_nfree_get(slab) > 0); @@ -1526,11 +1683,9 @@ arena_bin_lower_slab(UNUSED tsdn_t *tsdn, arena_t *arena, extent_t *slab, } static void -arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab, - void *ptr, bool junked) { +arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin, + szind_t binind, extent_t *slab, void *ptr, bool junked) { arena_slab_data_t *slab_data = extent_slab_data_get(slab); - szind_t binind = extent_szind_get(slab); - bin_t *bin = &arena->bins[binind]; const bin_info_t *bin_info = &bin_infos[binind]; if (!junked && config_fill && unlikely(opt_junk_free)) { @@ -1554,18 +1709,21 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab, } void -arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, extent_t *extent, - void *ptr) { - arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, true); +arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin, + szind_t binind, extent_t *extent, void *ptr) { + arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, extent, ptr, + true); } static void arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) { szind_t binind = extent_szind_get(extent); - bin_t *bin = &arena->bins[binind]; + unsigned binshard = extent_binshard_get(extent); + bin_t *bin = &arena->bins[binind].bin_shards[binshard]; malloc_mutex_lock(tsdn, &bin->lock); - arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, false); + arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, extent, ptr, + false); malloc_mutex_unlock(tsdn, &bin->lock); } @@ -1580,38 +1738,48 @@ arena_dalloc_small(tsdn_t *tsdn, void *ptr) { bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, - size_t extra, bool zero) { + size_t extra, bool zero, size_t *newsize) { + bool ret; /* Calls with non-zero extra had to clamp extra. */ - assert(extra == 0 || size + extra <= LARGE_MAXCLASS); + assert(extra == 0 || size + extra <= SC_LARGE_MAXCLASS); - if (unlikely(size > LARGE_MAXCLASS)) { - return true; + extent_t *extent = iealloc(tsdn, ptr); + if (unlikely(size > SC_LARGE_MAXCLASS)) { + ret = true; + goto done; } - extent_t *extent = iealloc(tsdn, ptr); size_t usize_min = sz_s2u(size); size_t usize_max = sz_s2u(size + extra); - if (likely(oldsize <= SMALL_MAXCLASS && usize_min <= SMALL_MAXCLASS)) { + if (likely(oldsize <= SC_SMALL_MAXCLASS && usize_min + <= SC_SMALL_MAXCLASS)) { /* * Avoid moving the allocation if the size class can be left the * same. */ assert(bin_infos[sz_size2index(oldsize)].reg_size == oldsize); - if ((usize_max > SMALL_MAXCLASS || sz_size2index(usize_max) != - sz_size2index(oldsize)) && (size > oldsize || usize_max < - oldsize)) { - return true; + if ((usize_max > SC_SMALL_MAXCLASS + || sz_size2index(usize_max) != sz_size2index(oldsize)) + && (size > oldsize || usize_max < oldsize)) { + ret = true; + goto done; } arena_decay_tick(tsdn, extent_arena_get(extent)); - return false; - } else if (oldsize >= LARGE_MINCLASS && usize_max >= LARGE_MINCLASS) { - return large_ralloc_no_move(tsdn, extent, usize_min, usize_max, + ret = false; + } else if (oldsize >= SC_LARGE_MINCLASS + && usize_max >= SC_LARGE_MINCLASS) { + ret = large_ralloc_no_move(tsdn, extent, usize_min, usize_max, zero); + } else { + ret = true; } +done: + assert(extent == iealloc(tsdn, ptr)); + *newsize = extent_usize_get(extent); - return true; + return ret; } static void * @@ -1622,7 +1790,7 @@ arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize, zero, tcache, true); } usize = sz_sa2u(usize, alignment); - if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) { + if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) { return NULL; } return ipalloct(tsdn, usize, alignment, zero, tcache, arena); @@ -1630,22 +1798,30 @@ arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize, void * arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, - size_t size, size_t alignment, bool zero, tcache_t *tcache) { + size_t size, size_t alignment, bool zero, tcache_t *tcache, + hook_ralloc_args_t *hook_args) { size_t usize = sz_s2u(size); - if (unlikely(usize == 0 || size > LARGE_MAXCLASS)) { + if (unlikely(usize == 0 || size > SC_LARGE_MAXCLASS)) { return NULL; } - if (likely(usize <= SMALL_MAXCLASS)) { + if (likely(usize <= SC_SMALL_MAXCLASS)) { /* Try to avoid moving the allocation. */ - if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero)) { + UNUSED size_t newsize; + if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero, + &newsize)) { + hook_invoke_expand(hook_args->is_realloc + ? hook_expand_realloc : hook_expand_rallocx, + ptr, oldsize, usize, (uintptr_t)ptr, + hook_args->args); return ptr; } } - if (oldsize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS) { - return large_ralloc(tsdn, arena, iealloc(tsdn, ptr), usize, - alignment, zero, tcache); + if (oldsize >= SC_LARGE_MINCLASS + && usize >= SC_LARGE_MINCLASS) { + return large_ralloc(tsdn, arena, ptr, usize, + alignment, zero, tcache, hook_args); } /* @@ -1658,11 +1834,16 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, return NULL; } + hook_invoke_alloc(hook_args->is_realloc + ? hook_alloc_realloc : hook_alloc_rallocx, ret, (uintptr_t)ret, + hook_args->args); + hook_invoke_dalloc(hook_args->is_realloc + ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args); + /* * Junk/zero-filling were already done by * ipalloc()/arena_malloc(). */ - size_t copysize = (usize < oldsize) ? usize : oldsize; memcpy(ret, ptr, copysize); isdalloct(tsdn, ptr, oldsize, tcache, NULL, true); @@ -1720,8 +1901,7 @@ arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit, if (new_limit != NULL) { size_t limit = *new_limit; /* Grow no more than the new limit. */ - if ((new_ind = sz_psz2ind(limit + 1) - 1) > - EXTENT_GROW_MAX_PIND) { + if ((new_ind = sz_psz2ind(limit + 1) - 1) >= SC_NPSIZES) { return true; } } @@ -1773,7 +1953,12 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { } } - arena = (arena_t *)base_alloc(tsdn, base, sizeof(arena_t), CACHELINE); + unsigned nbins_total = 0; + for (i = 0; i < SC_NBINS; i++) { + nbins_total += bin_infos[i].n_shards; + } + size_t arena_size = sizeof(arena_t) + sizeof(bin_t) * nbins_total; + arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE); if (arena == NULL) { goto label_error; } @@ -1865,7 +2050,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { } arena->extent_grow_next = sz_psz2ind(HUGEPAGE); - arena->retain_grow_limit = EXTENT_GROW_MAX_PIND; + arena->retain_grow_limit = sz_psz2ind(SC_LARGE_MAXCLASS); if (malloc_mutex_init(&arena->extent_grow_mtx, "extent_grow", WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) { goto label_error; @@ -1878,12 +2063,20 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { } /* Initialize bins. */ - for (i = 0; i < NBINS; i++) { - bool err = bin_init(&arena->bins[i]); - if (err) { - goto label_error; + uintptr_t bin_addr = (uintptr_t)arena + sizeof(arena_t); + atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE); + for (i = 0; i < SC_NBINS; i++) { + unsigned nshards = bin_infos[i].n_shards; + arena->bins[i].bin_shards = (bin_t *)bin_addr; + bin_addr += nshards * sizeof(bin_t); + for (unsigned j = 0; j < nshards; j++) { + bool err = bin_init(&arena->bins[i].bin_shards[j]); + if (err) { + goto label_error; + } } } + assert(bin_addr == (uintptr_t)arena + arena_size); arena->base = base; /* Set arena before creating background threads. */ @@ -1900,8 +2093,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { */ assert(!tsdn_null(tsdn)); pre_reentrancy(tsdn_tsd(tsdn), arena); - if (hooks_arena_new_hook) { - hooks_arena_new_hook(); + if (test_hooks_arena_new_hook) { + test_hooks_arena_new_hook(); } post_reentrancy(tsdn_tsd(tsdn)); } @@ -1914,20 +2107,75 @@ label_error: return NULL; } +arena_t * +arena_choose_huge(tsd_t *tsd) { + /* huge_arena_ind can be 0 during init (will use a0). */ + if (huge_arena_ind == 0) { + assert(!malloc_initialized()); + } + + arena_t *huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, false); + if (huge_arena == NULL) { + /* Create the huge arena on demand. */ + assert(huge_arena_ind != 0); + huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, true); + if (huge_arena == NULL) { + return NULL; + } + /* + * Purge eagerly for huge allocations, because: 1) number of + * huge allocations is usually small, which means ticker based + * decay is not reliable; and 2) less immediate reuse is + * expected for huge allocations. + */ + if (arena_dirty_decay_ms_default_get() > 0) { + arena_dirty_decay_ms_set(tsd_tsdn(tsd), huge_arena, 0); + } + if (arena_muzzy_decay_ms_default_get() > 0) { + arena_muzzy_decay_ms_set(tsd_tsdn(tsd), huge_arena, 0); + } + } + + return huge_arena; +} + +bool +arena_init_huge(void) { + bool huge_enabled; + + /* The threshold should be large size class. */ + if (opt_oversize_threshold > SC_LARGE_MAXCLASS || + opt_oversize_threshold < SC_LARGE_MINCLASS) { + opt_oversize_threshold = 0; + oversize_threshold = SC_LARGE_MAXCLASS + PAGE; + huge_enabled = false; + } else { + /* Reserve the index for the huge arena. */ + huge_arena_ind = narenas_total_get(); + oversize_threshold = opt_oversize_threshold; + huge_enabled = true; + } + + return huge_enabled; +} + +bool +arena_is_huge(unsigned arena_ind) { + if (huge_arena_ind == 0) { + return false; + } + return (arena_ind == huge_arena_ind); +} + void -arena_boot(void) { +arena_boot(sc_data_t *sc_data) { arena_dirty_decay_ms_default_set(opt_dirty_decay_ms); arena_muzzy_decay_ms_default_set(opt_muzzy_decay_ms); -#define REGIND_bin_yes(index, reg_size) \ - div_init(&arena_binind_div_info[(index)], (reg_size)); -#define REGIND_bin_no(index, reg_size) -#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, \ - lg_delta_lookup) \ - REGIND_bin_##bin(index, (1U<<lg_grp) + (ndelta << lg_delta)) - SIZE_CLASSES -#undef REGIND_bin_yes -#undef REGIND_bin_no -#undef SC + for (unsigned i = 0; i < SC_NBINS; i++) { + sc_t *sc = &sc_data->sc[i]; + div_init(&arena_binind_div_info[i], + (1U << sc->lg_base) + (sc->ndelta << sc->lg_delta)); + } } void @@ -1972,8 +2220,10 @@ arena_prefork6(tsdn_t *tsdn, arena_t *arena) { void arena_prefork7(tsdn_t *tsdn, arena_t *arena) { - for (unsigned i = 0; i < NBINS; i++) { - bin_prefork(tsdn, &arena->bins[i]); + for (unsigned i = 0; i < SC_NBINS; i++) { + for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { + bin_prefork(tsdn, &arena->bins[i].bin_shards[j]); + } } } @@ -1981,8 +2231,11 @@ void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) { unsigned i; - for (i = 0; i < NBINS; i++) { - bin_postfork_parent(tsdn, &arena->bins[i]); + for (i = 0; i < SC_NBINS; i++) { + for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { + bin_postfork_parent(tsdn, + &arena->bins[i].bin_shards[j]); + } } malloc_mutex_postfork_parent(tsdn, &arena->large_mtx); base_postfork_parent(tsdn, arena->base); @@ -2025,8 +2278,10 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) { } } - for (i = 0; i < NBINS; i++) { - bin_postfork_child(tsdn, &arena->bins[i]); + for (i = 0; i < SC_NBINS; i++) { + for (unsigned j = 0; j < bin_infos[i].n_shards; j++) { + bin_postfork_child(tsdn, &arena->bins[i].bin_shards[j]); + } } malloc_mutex_postfork_child(tsdn, &arena->large_mtx); base_postfork_child(tsdn, arena->base); |