diff options
author | antirez <antirez@gmail.com> | 2012-11-28 18:39:35 +0100 |
---|---|---|
committer | antirez <antirez@gmail.com> | 2012-11-28 18:39:35 +0100 |
commit | 7383c3b12920c6ae20f7c64c5db92f59e2b02aa5 (patch) | |
tree | b7d32af471dcaa47435006277702c814adaaf7ac /deps/jemalloc/src | |
parent | dee0b939fcd6650edfd8705f0685cd8430750085 (diff) | |
download | redis-7383c3b12920c6ae20f7c64c5db92f59e2b02aa5.tar.gz |
Jemalloc updated to version 3.2.0.
Diffstat (limited to 'deps/jemalloc/src')
-rw-r--r-- | deps/jemalloc/src/arena.c | 699 | ||||
-rw-r--r-- | deps/jemalloc/src/base.c | 3 | ||||
-rw-r--r-- | deps/jemalloc/src/chunk.c | 165 | ||||
-rw-r--r-- | deps/jemalloc/src/chunk_dss.c | 37 | ||||
-rw-r--r-- | deps/jemalloc/src/chunk_mmap.c | 12 | ||||
-rw-r--r-- | deps/jemalloc/src/ctl.c | 361 | ||||
-rw-r--r-- | deps/jemalloc/src/huge.c | 7 | ||||
-rw-r--r-- | deps/jemalloc/src/jemalloc.c | 212 | ||||
-rw-r--r-- | deps/jemalloc/src/mutex.c | 2 | ||||
-rw-r--r-- | deps/jemalloc/src/prof.c | 42 | ||||
-rw-r--r-- | deps/jemalloc/src/rtree.c | 21 | ||||
-rw-r--r-- | deps/jemalloc/src/stats.c | 10 | ||||
-rw-r--r-- | deps/jemalloc/src/tcache.c | 4 | ||||
-rw-r--r-- | deps/jemalloc/src/util.c | 5 | ||||
-rw-r--r-- | deps/jemalloc/src/zone.c | 12 |
15 files changed, 1142 insertions, 450 deletions
diff --git a/deps/jemalloc/src/arena.c b/deps/jemalloc/src/arena.c index 2a6150f3e..0c53b071b 100644 --- a/deps/jemalloc/src/arena.c +++ b/deps/jemalloc/src/arena.c @@ -40,6 +40,12 @@ const uint8_t small_size2bin[] = { /******************************************************************************/ /* Function prototypes for non-inline static functions. */ +static void arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, + size_t pageind, size_t npages, bool maybe_adjac_pred, + bool maybe_adjac_succ); +static void arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, + size_t pageind, size_t npages, bool maybe_adjac_pred, + bool maybe_adjac_succ); static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, size_t binind, bool zero); static arena_chunk_t *arena_chunk_alloc(arena_t *arena); @@ -48,8 +54,11 @@ static arena_run_t *arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind, bool zero); static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, bool zero); +static arena_chunk_t *chunks_dirty_iter_cb(arena_chunk_tree_t *tree, + arena_chunk_t *chunk, void *arg); static void arena_purge(arena_t *arena, bool all); -static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty); +static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, + bool cleaned); static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize); static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, @@ -101,9 +110,6 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) size_t a_size = a->bits & ~PAGE_MASK; size_t b_size = b->bits & ~PAGE_MASK; - assert((a->bits & CHUNK_MAP_KEY) == CHUNK_MAP_KEY || (a->bits & - CHUNK_MAP_DIRTY) == (b->bits & CHUNK_MAP_DIRTY)); - ret = (a_size > b_size) - (a_size < b_size); if (ret == 0) { uintptr_t a_mapelm, b_mapelm; @@ -129,6 +135,182 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, u.rb_link, arena_avail_comp) +static inline int +arena_chunk_dirty_comp(arena_chunk_t *a, arena_chunk_t *b) +{ + + assert(a != NULL); + assert(b != NULL); + + /* + * Short-circuit for self comparison. The following comparison code + * would come to the same result, but at the cost of executing the slow + * path. + */ + if (a == b) + return (0); + + /* + * Order such that chunks with higher fragmentation are "less than" + * those with lower fragmentation -- purging order is from "least" to + * "greatest". Fragmentation is measured as: + * + * mean current avail run size + * -------------------------------- + * mean defragmented avail run size + * + * navail + * ----------- + * nruns_avail nruns_avail-nruns_adjac + * = ========================= = ----------------------- + * navail nruns_avail + * ----------------------- + * nruns_avail-nruns_adjac + * + * The following code multiplies away the denominator prior to + * comparison, in order to avoid division. + * + */ + { + size_t a_val = (a->nruns_avail - a->nruns_adjac) * + b->nruns_avail; + size_t b_val = (b->nruns_avail - b->nruns_adjac) * + a->nruns_avail; + + if (a_val < b_val) + return (1); + if (a_val > b_val) + return (-1); + } + /* + * Break ties by chunk address. For fragmented chunks, report lower + * addresses as "lower", so that fragmentation reduction happens first + * at lower addresses. However, use the opposite ordering for + * unfragmented chunks, in order to increase the chances of + * re-allocating dirty runs. + */ + { + uintptr_t a_chunk = (uintptr_t)a; + uintptr_t b_chunk = (uintptr_t)b; + int ret = ((a_chunk > b_chunk) - (a_chunk < b_chunk)); + if (a->nruns_adjac == 0) { + assert(b->nruns_adjac == 0); + ret = -ret; + } + return (ret); + } +} + +/* Generate red-black tree functions. */ +rb_gen(static UNUSED, arena_chunk_dirty_, arena_chunk_tree_t, arena_chunk_t, + dirty_link, arena_chunk_dirty_comp) + +static inline bool +arena_avail_adjac_pred(arena_chunk_t *chunk, size_t pageind) +{ + bool ret; + + if (pageind-1 < map_bias) + ret = false; + else { + ret = (arena_mapbits_allocated_get(chunk, pageind-1) == 0); + assert(ret == false || arena_mapbits_dirty_get(chunk, + pageind-1) != arena_mapbits_dirty_get(chunk, pageind)); + } + return (ret); +} + +static inline bool +arena_avail_adjac_succ(arena_chunk_t *chunk, size_t pageind, size_t npages) +{ + bool ret; + + if (pageind+npages == chunk_npages) + ret = false; + else { + assert(pageind+npages < chunk_npages); + ret = (arena_mapbits_allocated_get(chunk, pageind+npages) == 0); + assert(ret == false || arena_mapbits_dirty_get(chunk, pageind) + != arena_mapbits_dirty_get(chunk, pageind+npages)); + } + return (ret); +} + +static inline bool +arena_avail_adjac(arena_chunk_t *chunk, size_t pageind, size_t npages) +{ + + return (arena_avail_adjac_pred(chunk, pageind) || + arena_avail_adjac_succ(chunk, pageind, npages)); +} + +static void +arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind, + size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ) +{ + + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> + LG_PAGE)); + + /* + * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be + * removed and reinserted even if the run to be inserted is clean. + */ + if (chunk->ndirty != 0) + arena_chunk_dirty_remove(&arena->chunks_dirty, chunk); + + if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind)) + chunk->nruns_adjac++; + if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages)) + chunk->nruns_adjac++; + chunk->nruns_avail++; + assert(chunk->nruns_avail > chunk->nruns_adjac); + + if (arena_mapbits_dirty_get(chunk, pageind) != 0) { + arena->ndirty += npages; + chunk->ndirty += npages; + } + if (chunk->ndirty != 0) + arena_chunk_dirty_insert(&arena->chunks_dirty, chunk); + + arena_avail_tree_insert(&arena->runs_avail, arena_mapp_get(chunk, + pageind)); +} + +static void +arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind, + size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ) +{ + + assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >> + LG_PAGE)); + + /* + * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be + * removed and reinserted even if the run to be removed is clean. + */ + if (chunk->ndirty != 0) + arena_chunk_dirty_remove(&arena->chunks_dirty, chunk); + + if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind)) + chunk->nruns_adjac--; + if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages)) + chunk->nruns_adjac--; + chunk->nruns_avail--; + assert(chunk->nruns_avail > chunk->nruns_adjac || (chunk->nruns_avail + == 0 && chunk->nruns_adjac == 0)); + + if (arena_mapbits_dirty_get(chunk, pageind) != 0) { + arena->ndirty -= npages; + chunk->ndirty -= npages; + } + if (chunk->ndirty != 0) + arena_chunk_dirty_insert(&arena->chunks_dirty, chunk); + + arena_avail_tree_remove(&arena->runs_avail, arena_mapp_get(chunk, + pageind)); +} + static inline void * arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) { @@ -193,7 +375,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, arena_chunk_t *chunk; size_t run_ind, total_pages, need_pages, rem_pages, i; size_t flag_dirty; - arena_avail_tree_t *runs_avail; assert((large && binind == BININD_INVALID) || (large == false && binind != BININD_INVALID)); @@ -201,8 +382,6 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); flag_dirty = arena_mapbits_dirty_get(chunk, run_ind); - runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty : - &arena->runs_avail_clean; total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >> LG_PAGE; assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) == @@ -212,7 +391,7 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, assert(need_pages <= total_pages); rem_pages = total_pages - need_pages; - arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, run_ind)); + arena_avail_remove(arena, chunk, run_ind, total_pages, true, true); if (config_stats) { /* * Update stats_cactive if nactive is crossing a chunk @@ -244,14 +423,8 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, arena_mapbits_unzeroed_get(chunk, run_ind+total_pages-1)); } - arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, - run_ind+need_pages)); - } - - /* Update dirty page accounting. */ - if (flag_dirty != 0) { - chunk->ndirty -= need_pages; - arena->ndirty -= need_pages; + arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages, + false, true); } /* @@ -344,8 +517,6 @@ arena_chunk_alloc(arena_t *arena) size_t i; if (arena->spare != NULL) { - arena_avail_tree_t *runs_avail; - chunk = arena->spare; arena->spare = NULL; @@ -357,14 +528,6 @@ arena_chunk_alloc(arena_t *arena) chunk_npages-1) == arena_maxclass); assert(arena_mapbits_dirty_get(chunk, map_bias) == arena_mapbits_dirty_get(chunk, chunk_npages-1)); - - /* Insert the run into the appropriate runs_avail_* tree. */ - if (arena_mapbits_dirty_get(chunk, map_bias) == 0) - runs_avail = &arena->runs_avail_clean; - else - runs_avail = &arena->runs_avail_dirty; - arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, - map_bias)); } else { bool zero; size_t unzeroed; @@ -372,7 +535,7 @@ arena_chunk_alloc(arena_t *arena) zero = false; malloc_mutex_unlock(&arena->lock); chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, - false, &zero); + false, &zero, arena->dss_prec); malloc_mutex_lock(&arena->lock); if (chunk == NULL) return (NULL); @@ -380,8 +543,6 @@ arena_chunk_alloc(arena_t *arena) arena->stats.mapped += chunksize; chunk->arena = arena; - ql_elm_new(chunk, link_dirty); - chunk->dirtied = false; /* * Claim that no pages are in use, since the header is merely @@ -389,6 +550,9 @@ arena_chunk_alloc(arena_t *arena) */ chunk->ndirty = 0; + chunk->nruns_avail = 0; + chunk->nruns_adjac = 0; + /* * Initialize the map to contain one maximal free untouched run. * Mark the pages as zeroed iff chunk_alloc() returned a zeroed @@ -412,20 +576,18 @@ arena_chunk_alloc(arena_t *arena) } arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxclass, unzeroed); - - /* Insert the run into the runs_avail_clean tree. */ - arena_avail_tree_insert(&arena->runs_avail_clean, - arena_mapp_get(chunk, map_bias)); } + /* Insert the run into the runs_avail tree. */ + arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias, + false, false); + return (chunk); } static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) { - arena_avail_tree_t *runs_avail; - assert(arena_mapbits_allocated_get(chunk, map_bias) == 0); assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0); assert(arena_mapbits_unallocated_size_get(chunk, map_bias) == @@ -436,24 +598,16 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) arena_mapbits_dirty_get(chunk, chunk_npages-1)); /* - * Remove run from the appropriate runs_avail_* tree, so that the arena - * does not use it. + * Remove run from the runs_avail tree, so that the arena does not use + * it. */ - if (arena_mapbits_dirty_get(chunk, map_bias) == 0) - runs_avail = &arena->runs_avail_clean; - else - runs_avail = &arena->runs_avail_dirty; - arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, map_bias)); + arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias, + false, false); if (arena->spare != NULL) { arena_chunk_t *spare = arena->spare; arena->spare = chunk; - if (spare->dirtied) { - ql_remove(&chunk->arena->chunks_dirty, spare, - link_dirty); - arena->ndirty -= spare->ndirty; - } malloc_mutex_unlock(&arena->lock); chunk_dealloc((void *)spare, chunksize, true); malloc_mutex_lock(&arena->lock); @@ -471,19 +625,7 @@ arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind, arena_chunk_map_t *mapelm, key; key.bits = size | CHUNK_MAP_KEY; - mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; - - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - LG_PAGE)); - arena_run_split(arena, run, size, large, binind, zero); - return (run); - } - mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); + mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key); if (mapelm != NULL) { arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); size_t pageind = (((uintptr_t)mapelm - @@ -537,41 +679,40 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind, static inline void arena_maybe_purge(arena_t *arena) { + size_t npurgeable, threshold; + + /* Don't purge if the option is disabled. */ + if (opt_lg_dirty_mult < 0) + return; + /* Don't purge if all dirty pages are already being purged. */ + if (arena->ndirty <= arena->npurgatory) + return; + npurgeable = arena->ndirty - arena->npurgatory; + threshold = (arena->nactive >> opt_lg_dirty_mult); + /* + * Don't purge unless the number of purgeable pages exceeds the + * threshold. + */ + if (npurgeable <= threshold) + return; - /* Enforce opt_lg_dirty_mult. */ - if (opt_lg_dirty_mult >= 0 && arena->ndirty > arena->npurgatory && - (arena->ndirty - arena->npurgatory) > chunk_npages && - (arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - - arena->npurgatory)) - arena_purge(arena, false); + arena_purge(arena, false); } -static inline void -arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) +static inline size_t +arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk, bool all) { + size_t npurged; ql_head(arena_chunk_map_t) mapelms; arena_chunk_map_t *mapelm; - size_t pageind, flag_unzeroed; - size_t ndirty; + size_t pageind, npages; size_t nmadvise; ql_new(&mapelms); - flag_unzeroed = -#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED - /* - * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous - * mappings, but not for file-backed mappings. - */ - 0 -#else - CHUNK_MAP_UNZEROED -#endif - ; - /* * If chunk is the spare, temporarily re-allocate it, 1) so that its - * run is reinserted into runs_avail_dirty, and 2) so that it cannot be + * run is reinserted into runs_avail, and 2) so that it cannot be * completely discarded by another thread while arena->lock is dropped * by this thread. Note that the arena_run_dalloc() call will * implicitly deallocate the chunk, so no explicit action is required @@ -591,68 +732,50 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) arena_chunk_alloc(arena); } - /* Temporarily allocate all free dirty runs within chunk. */ - for (pageind = map_bias; pageind < chunk_npages;) { + if (config_stats) + arena->stats.purged += chunk->ndirty; + + /* + * Operate on all dirty runs if there is no clean/dirty run + * fragmentation. + */ + if (chunk->nruns_adjac == 0) + all = true; + + /* + * Temporarily allocate free dirty runs within chunk. If all is false, + * only operate on dirty runs that are fragments; otherwise operate on + * all dirty runs. + */ + for (pageind = map_bias; pageind < chunk_npages; pageind += npages) { mapelm = arena_mapp_get(chunk, pageind); if (arena_mapbits_allocated_get(chunk, pageind) == 0) { - size_t npages; + size_t run_size = + arena_mapbits_unallocated_size_get(chunk, pageind); - npages = arena_mapbits_unallocated_size_get(chunk, - pageind) >> LG_PAGE; + npages = run_size >> LG_PAGE; assert(pageind + npages <= chunk_npages); assert(arena_mapbits_dirty_get(chunk, pageind) == arena_mapbits_dirty_get(chunk, pageind+npages-1)); - if (arena_mapbits_dirty_get(chunk, pageind) != 0) { - size_t i; - - arena_avail_tree_remove( - &arena->runs_avail_dirty, mapelm); - arena_mapbits_unzeroed_set(chunk, pageind, - flag_unzeroed); - arena_mapbits_large_set(chunk, pageind, - (npages << LG_PAGE), 0); - /* - * Update internal elements in the page map, so - * that CHUNK_MAP_UNZEROED is properly set. - */ - for (i = 1; i < npages - 1; i++) { - arena_mapbits_unzeroed_set(chunk, - pageind+i, flag_unzeroed); - } - if (npages > 1) { - arena_mapbits_unzeroed_set(chunk, - pageind+npages-1, flag_unzeroed); - arena_mapbits_large_set(chunk, - pageind+npages-1, 0, 0); - } + if (arena_mapbits_dirty_get(chunk, pageind) != 0 && + (all || arena_avail_adjac(chunk, pageind, + npages))) { + arena_run_t *run = (arena_run_t *)((uintptr_t) + chunk + (uintptr_t)(pageind << LG_PAGE)); - if (config_stats) { - /* - * Update stats_cactive if nactive is - * crossing a chunk multiple. - */ - size_t cactive_diff = - CHUNK_CEILING((arena->nactive + - npages) << LG_PAGE) - - CHUNK_CEILING(arena->nactive << - LG_PAGE); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); - } - arena->nactive += npages; + arena_run_split(arena, run, run_size, true, + BININD_INVALID, false); /* Append to list for later processing. */ ql_elm_new(mapelm, u.ql_link); ql_tail_insert(&mapelms, mapelm, u.ql_link); } - - pageind += npages; } else { - /* Skip allocated run. */ - if (arena_mapbits_large_get(chunk, pageind)) - pageind += arena_mapbits_large_size_get(chunk, + /* Skip run. */ + if (arena_mapbits_large_get(chunk, pageind) != 0) { + npages = arena_mapbits_large_size_get(chunk, pageind) >> LG_PAGE; - else { + } else { size_t binind; arena_bin_info_t *bin_info; arena_run_t *run = (arena_run_t *)((uintptr_t) @@ -662,41 +785,48 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) pageind) == 0); binind = arena_bin_index(arena, run->bin); bin_info = &arena_bin_info[binind]; - pageind += bin_info->run_size >> LG_PAGE; + npages = bin_info->run_size >> LG_PAGE; } } } assert(pageind == chunk_npages); - - if (config_debug) - ndirty = chunk->ndirty; - if (config_stats) - arena->stats.purged += chunk->ndirty; - arena->ndirty -= chunk->ndirty; - chunk->ndirty = 0; - ql_remove(&arena->chunks_dirty, chunk, link_dirty); - chunk->dirtied = false; + assert(chunk->ndirty == 0 || all == false); + assert(chunk->nruns_adjac == 0); malloc_mutex_unlock(&arena->lock); if (config_stats) nmadvise = 0; + npurged = 0; ql_foreach(mapelm, &mapelms, u.ql_link) { - size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + bool unzeroed; + size_t flag_unzeroed, i; + + pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / sizeof(arena_chunk_map_t)) + map_bias; - size_t npages = arena_mapbits_large_size_get(chunk, pageind) >> + npages = arena_mapbits_large_size_get(chunk, pageind) >> LG_PAGE; - assert(pageind + npages <= chunk_npages); - assert(ndirty >= npages); - if (config_debug) - ndirty -= npages; - - pages_purge((void *)((uintptr_t)chunk + (pageind << LG_PAGE)), - (npages << LG_PAGE)); + unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind << + LG_PAGE)), (npages << LG_PAGE)); + flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0; + /* + * Set the unzeroed flag for all pages, now that pages_purge() + * has returned whether the pages were zeroed as a side effect + * of purging. This chunk map modification is safe even though + * the arena mutex isn't currently owned by this thread, + * because the run is marked as allocated, thus protecting it + * from being modified by any other thread. As long as these + * writes don't perturb the first and last elements' + * CHUNK_MAP_ALLOCATED bits, behavior is well defined. + */ + for (i = 0; i < npages; i++) { + arena_mapbits_unzeroed_set(chunk, pageind+i, + flag_unzeroed); + } + npurged += npages; if (config_stats) nmadvise++; } - assert(ndirty == 0); malloc_mutex_lock(&arena->lock); if (config_stats) arena->stats.nmadvise += nmadvise; @@ -704,14 +834,27 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) /* Deallocate runs. */ for (mapelm = ql_first(&mapelms); mapelm != NULL; mapelm = ql_first(&mapelms)) { - size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)(pageind << LG_PAGE)); + arena_run_t *run; + pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind << + LG_PAGE)); ql_remove(&mapelms, mapelm, u.ql_link); - arena_run_dalloc(arena, run, false); + arena_run_dalloc(arena, run, false, true); } + + return (npurged); +} + +static arena_chunk_t * +chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg) +{ + size_t *ndirty = (size_t *)arg; + + assert(chunk->ndirty != 0); + *ndirty += chunk->ndirty; + return (NULL); } static void @@ -722,14 +865,11 @@ arena_purge(arena_t *arena, bool all) if (config_debug) { size_t ndirty = 0; - ql_foreach(chunk, &arena->chunks_dirty, link_dirty) { - assert(chunk->dirtied); - ndirty += chunk->ndirty; - } + arena_chunk_dirty_iter(&arena->chunks_dirty, NULL, + chunks_dirty_iter_cb, (void *)&ndirty); assert(ndirty == arena->ndirty); } assert(arena->ndirty > arena->npurgatory || all); - assert(arena->ndirty - arena->npurgatory > chunk_npages || all); assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - arena->npurgatory) || all); @@ -741,16 +881,24 @@ arena_purge(arena_t *arena, bool all) * purge, and add the result to arena->npurgatory. This will keep * multiple threads from racing to reduce ndirty below the threshold. */ - npurgatory = arena->ndirty - arena->npurgatory; - if (all == false) { - assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult); - npurgatory -= arena->nactive >> opt_lg_dirty_mult; + { + size_t npurgeable = arena->ndirty - arena->npurgatory; + + if (all == false) { + size_t threshold = (arena->nactive >> + opt_lg_dirty_mult); + + npurgatory = npurgeable - threshold; + } else + npurgatory = npurgeable; } arena->npurgatory += npurgatory; while (npurgatory > 0) { + size_t npurgeable, npurged, nunpurged; + /* Get next chunk with dirty pages. */ - chunk = ql_first(&arena->chunks_dirty); + chunk = arena_chunk_dirty_first(&arena->chunks_dirty); if (chunk == NULL) { /* * This thread was unable to purge as many pages as @@ -761,23 +909,15 @@ arena_purge(arena_t *arena, bool all) arena->npurgatory -= npurgatory; return; } - while (chunk->ndirty == 0) { - ql_remove(&arena->chunks_dirty, chunk, link_dirty); - chunk->dirtied = false; - chunk = ql_first(&arena->chunks_dirty); - if (chunk == NULL) { - /* Same logic as for above. */ - arena->npurgatory -= npurgatory; - return; - } - } + npurgeable = chunk->ndirty; + assert(npurgeable != 0); - if (chunk->ndirty > npurgatory) { + if (npurgeable > npurgatory && chunk->nruns_adjac == 0) { /* - * This thread will, at a minimum, purge all the dirty - * pages in chunk, so set npurgatory to reflect this - * thread's commitment to purge the pages. This tends - * to reduce the chances of the following scenario: + * This thread will purge all the dirty pages in chunk, + * so set npurgatory to reflect this thread's intent to + * purge the pages. This tends to reduce the chances + * of the following scenario: * * 1) This thread sets arena->npurgatory such that * (arena->ndirty - arena->npurgatory) is at the @@ -791,13 +931,20 @@ arena_purge(arena_t *arena, bool all) * because all of the purging work being done really * needs to happen. */ - arena->npurgatory += chunk->ndirty - npurgatory; - npurgatory = chunk->ndirty; + arena->npurgatory += npurgeable - npurgatory; + npurgatory = npurgeable; } - arena->npurgatory -= chunk->ndirty; - npurgatory -= chunk->ndirty; - arena_chunk_purge(arena, chunk); + /* + * Keep track of how many pages are purgeable, versus how many + * actually get purged, and adjust counters accordingly. + */ + arena->npurgatory -= npurgeable; + npurgatory -= npurgeable; + npurged = arena_chunk_purge(arena, chunk, all); + nunpurged = npurgeable - npurged; + arena->npurgatory += nunpurged; + npurgatory += nunpurged; } } @@ -811,11 +958,10 @@ arena_purge_all(arena_t *arena) } static void -arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) +arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned) { arena_chunk_t *chunk; size_t size, run_ind, run_pages, flag_dirty; - arena_avail_tree_t *runs_avail; chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); @@ -846,15 +992,14 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) /* * The run is dirty if the caller claims to have dirtied it, as well as - * if it was already dirty before being allocated. + * if it was already dirty before being allocated and the caller + * doesn't claim to have cleaned it. */ assert(arena_mapbits_dirty_get(chunk, run_ind) == arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); - if (arena_mapbits_dirty_get(chunk, run_ind) != 0) + if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0) dirty = true; flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; - runs_avail = dirty ? &arena->runs_avail_dirty : - &arena->runs_avail_clean; /* Mark pages as unallocated in the chunk map. */ if (dirty) { @@ -862,9 +1007,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) CHUNK_MAP_DIRTY); arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size, CHUNK_MAP_DIRTY); - - chunk->ndirty += run_pages; - arena->ndirty += run_pages; } else { arena_mapbits_unallocated_set(chunk, run_ind, size, arena_mapbits_unzeroed_get(chunk, run_ind)); @@ -888,8 +1030,8 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) run_ind+run_pages+nrun_pages-1) == nrun_size); assert(arena_mapbits_dirty_get(chunk, run_ind+run_pages+nrun_pages-1) == flag_dirty); - arena_avail_tree_remove(runs_avail, - arena_mapp_get(chunk, run_ind+run_pages)); + arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages, + false, true); size += nrun_size; run_pages += nrun_pages; @@ -915,8 +1057,8 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) assert(arena_mapbits_unallocated_size_get(chunk, run_ind) == prun_size); assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty); - arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, - run_ind)); + arena_avail_remove(arena, chunk, run_ind, prun_pages, true, + false); size += prun_size; run_pages += prun_pages; @@ -931,19 +1073,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1)); assert(arena_mapbits_dirty_get(chunk, run_ind) == arena_mapbits_dirty_get(chunk, run_ind+run_pages-1)); - arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, run_ind)); - - if (dirty) { - /* - * Insert into chunks_dirty before potentially calling - * arena_chunk_dealloc(), so that chunks_dirty and - * arena->ndirty are consistent. - */ - if (chunk->dirtied == false) { - ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty); - chunk->dirtied = true; - } - } + arena_avail_insert(arena, chunk, run_ind, run_pages, true, true); /* Deallocate chunk if it is now completely unused. */ if (size == arena_maxclass) { @@ -992,7 +1122,7 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, arena_mapbits_large_set(chunk, pageind+head_npages, newsize, flag_dirty); - arena_run_dalloc(arena, run, false); + arena_run_dalloc(arena, run, false, false); } static void @@ -1025,7 +1155,7 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, flag_dirty); arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), - dirty); + dirty, false); } static arena_run_t * @@ -1536,7 +1666,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, ((past - run_ind) << LG_PAGE), false); /* npages = past - run_ind; */ } - arena_run_dalloc(arena, run, true); + arena_run_dalloc(arena, run, true, false); malloc_mutex_unlock(&arena->lock); /****************************/ malloc_mutex_lock(&bin->lock); @@ -1629,52 +1759,6 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, mapelm = arena_mapp_get(chunk, pageind); arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm); } -void -arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, - arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats) -{ - unsigned i; - - malloc_mutex_lock(&arena->lock); - *nactive += arena->nactive; - *ndirty += arena->ndirty; - - astats->mapped += arena->stats.mapped; - astats->npurge += arena->stats.npurge; - astats->nmadvise += arena->stats.nmadvise; - astats->purged += arena->stats.purged; - astats->allocated_large += arena->stats.allocated_large; - astats->nmalloc_large += arena->stats.nmalloc_large; - astats->ndalloc_large += arena->stats.ndalloc_large; - astats->nrequests_large += arena->stats.nrequests_large; - - for (i = 0; i < nlclasses; i++) { - lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; - lstats[i].ndalloc += arena->stats.lstats[i].ndalloc; - lstats[i].nrequests += arena->stats.lstats[i].nrequests; - lstats[i].curruns += arena->stats.lstats[i].curruns; - } - malloc_mutex_unlock(&arena->lock); - - for (i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - - malloc_mutex_lock(&bin->lock); - bstats[i].allocated += bin->stats.allocated; - bstats[i].nmalloc += bin->stats.nmalloc; - bstats[i].ndalloc += bin->stats.ndalloc; - bstats[i].nrequests += bin->stats.nrequests; - if (config_tcache) { - bstats[i].nfills += bin->stats.nfills; - bstats[i].nflushes += bin->stats.nflushes; - } - bstats[i].nruns += bin->stats.nruns; - bstats[i].reruns += bin->stats.reruns; - bstats[i].curruns += bin->stats.curruns; - malloc_mutex_unlock(&bin->lock); - } -} void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) @@ -1694,7 +1778,7 @@ arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr) } } - arena_run_dalloc(arena, (arena_run_t *)ptr, true); + arena_run_dalloc(arena, (arena_run_t *)ptr, true, false); } void @@ -1887,8 +1971,9 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, } void * -arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache) +arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, + bool try_tcache_dalloc) { void *ret; size_t copysize; @@ -1907,9 +1992,9 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); - ret = ipalloc(usize, alignment, zero); + ret = ipallocx(usize, alignment, zero, try_tcache_alloc, arena); } else - ret = arena_malloc(NULL, size + extra, zero, try_tcache); + ret = arena_malloc(arena, size + extra, zero, try_tcache_alloc); if (ret == NULL) { if (extra == 0) @@ -1919,9 +2004,10 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t usize = sa2u(size, alignment); if (usize == 0) return (NULL); - ret = ipalloc(usize, alignment, zero); + ret = ipallocx(usize, alignment, zero, try_tcache_alloc, + arena); } else - ret = arena_malloc(NULL, size, zero, try_tcache); + ret = arena_malloc(arena, size, zero, try_tcache_alloc); if (ret == NULL) return (NULL); @@ -1936,10 +2022,78 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, copysize = (size < oldsize) ? size : oldsize; VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize); memcpy(ret, ptr, copysize); - iqalloc(ptr); + iqallocx(ptr, try_tcache_dalloc); + return (ret); +} + +dss_prec_t +arena_dss_prec_get(arena_t *arena) +{ + dss_prec_t ret; + + malloc_mutex_lock(&arena->lock); + ret = arena->dss_prec; + malloc_mutex_unlock(&arena->lock); return (ret); } +void +arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) +{ + + malloc_mutex_lock(&arena->lock); + arena->dss_prec = dss_prec; + malloc_mutex_unlock(&arena->lock); +} + +void +arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, + size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats) +{ + unsigned i; + + malloc_mutex_lock(&arena->lock); + *dss = dss_prec_names[arena->dss_prec]; + *nactive += arena->nactive; + *ndirty += arena->ndirty; + + astats->mapped += arena->stats.mapped; + astats->npurge += arena->stats.npurge; + astats->nmadvise += arena->stats.nmadvise; + astats->purged += arena->stats.purged; + astats->allocated_large += arena->stats.allocated_large; + astats->nmalloc_large += arena->stats.nmalloc_large; + astats->ndalloc_large += arena->stats.ndalloc_large; + astats->nrequests_large += arena->stats.nrequests_large; + + for (i = 0; i < nlclasses; i++) { + lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; + lstats[i].ndalloc += arena->stats.lstats[i].ndalloc; + lstats[i].nrequests += arena->stats.lstats[i].nrequests; + lstats[i].curruns += arena->stats.lstats[i].curruns; + } + malloc_mutex_unlock(&arena->lock); + + for (i = 0; i < NBINS; i++) { + arena_bin_t *bin = &arena->bins[i]; + + malloc_mutex_lock(&bin->lock); + bstats[i].allocated += bin->stats.allocated; + bstats[i].nmalloc += bin->stats.nmalloc; + bstats[i].ndalloc += bin->stats.ndalloc; + bstats[i].nrequests += bin->stats.nrequests; + if (config_tcache) { + bstats[i].nfills += bin->stats.nfills; + bstats[i].nflushes += bin->stats.nflushes; + } + bstats[i].nruns += bin->stats.nruns; + bstats[i].reruns += bin->stats.reruns; + bstats[i].curruns += bin->stats.curruns; + malloc_mutex_unlock(&bin->lock); + } +} + bool arena_new(arena_t *arena, unsigned ind) { @@ -1968,16 +2122,17 @@ arena_new(arena_t *arena, unsigned ind) if (config_prof) arena->prof_accumbytes = 0; + arena->dss_prec = chunk_dss_prec_get(); + /* Initialize chunks. */ - ql_new(&arena->chunks_dirty); + arena_chunk_dirty_new(&arena->chunks_dirty); arena->spare = NULL; arena->nactive = 0; arena->ndirty = 0; arena->npurgatory = 0; - arena_avail_tree_new(&arena->runs_avail_clean); - arena_avail_tree_new(&arena->runs_avail_dirty); + arena_avail_tree_new(&arena->runs_avail); /* Initialize bins. */ for (i = 0; i < NBINS; i++) { diff --git a/deps/jemalloc/src/base.c b/deps/jemalloc/src/base.c index bafaa7438..b1a5945ef 100644 --- a/deps/jemalloc/src/base.c +++ b/deps/jemalloc/src/base.c @@ -32,7 +32,8 @@ base_pages_alloc(size_t minsize) assert(minsize != 0); csize = CHUNK_CEILING(minsize); zero = false; - base_pages = chunk_alloc(csize, chunksize, true, &zero); + base_pages = chunk_alloc(csize, chunksize, true, &zero, + chunk_dss_prec_get()); if (base_pages == NULL) return (true); base_next_addr = base_pages; diff --git a/deps/jemalloc/src/chunk.c b/deps/jemalloc/src/chunk.c index 6bc245447..1a3bb4f67 100644 --- a/deps/jemalloc/src/chunk.c +++ b/deps/jemalloc/src/chunk.c @@ -4,7 +4,8 @@ /******************************************************************************/ /* Data. */ -size_t opt_lg_chunk = LG_CHUNK_DEFAULT; +const char *opt_dss = DSS_DEFAULT; +size_t opt_lg_chunk = LG_CHUNK_DEFAULT; malloc_mutex_t chunks_mtx; chunk_stats_t stats_chunks; @@ -15,8 +16,10 @@ chunk_stats_t stats_chunks; * address space. Depending on function, different tree orderings are needed, * which is why there are two trees with the same contents. */ -static extent_tree_t chunks_szad; -static extent_tree_t chunks_ad; +static extent_tree_t chunks_szad_mmap; +static extent_tree_t chunks_ad_mmap; +static extent_tree_t chunks_szad_dss; +static extent_tree_t chunks_ad_dss; rtree_t *chunks_rtree; @@ -30,19 +33,23 @@ size_t arena_maxclass; /* Max size class for arenas. */ /******************************************************************************/ /* Function prototypes for non-inline static functions. */ -static void *chunk_recycle(size_t size, size_t alignment, bool base, +static void *chunk_recycle(extent_tree_t *chunks_szad, + extent_tree_t *chunks_ad, size_t size, size_t alignment, bool base, bool *zero); -static void chunk_record(void *chunk, size_t size); +static void chunk_record(extent_tree_t *chunks_szad, + extent_tree_t *chunks_ad, void *chunk, size_t size); /******************************************************************************/ static void * -chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) +chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size, + size_t alignment, bool base, bool *zero) { void *ret; extent_node_t *node; extent_node_t key; size_t alloc_size, leadsize, trailsize; + bool zeroed; if (base) { /* @@ -61,7 +68,7 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) key.addr = NULL; key.size = alloc_size; malloc_mutex_lock(&chunks_mtx); - node = extent_tree_szad_nsearch(&chunks_szad, &key); + node = extent_tree_szad_nsearch(chunks_szad, &key); if (node == NULL) { malloc_mutex_unlock(&chunks_mtx); return (NULL); @@ -72,13 +79,13 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) trailsize = node->size - leadsize - size; ret = (void *)((uintptr_t)node->addr + leadsize); /* Remove node from the tree. */ - extent_tree_szad_remove(&chunks_szad, node); - extent_tree_ad_remove(&chunks_ad, node); + extent_tree_szad_remove(chunks_szad, node); + extent_tree_ad_remove(chunks_ad, node); if (leadsize != 0) { /* Insert the leading space as a smaller chunk. */ node->size = leadsize; - extent_tree_szad_insert(&chunks_szad, node); - extent_tree_ad_insert(&chunks_ad, node); + extent_tree_szad_insert(chunks_szad, node); + extent_tree_ad_insert(chunks_ad, node); node = NULL; } if (trailsize != 0) { @@ -101,23 +108,24 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) } node->addr = (void *)((uintptr_t)(ret) + size); node->size = trailsize; - extent_tree_szad_insert(&chunks_szad, node); - extent_tree_ad_insert(&chunks_ad, node); + extent_tree_szad_insert(chunks_szad, node); + extent_tree_ad_insert(chunks_ad, node); node = NULL; } malloc_mutex_unlock(&chunks_mtx); - if (node != NULL) + zeroed = false; + if (node != NULL) { + if (node->zeroed) { + zeroed = true; + *zero = true; + } base_node_dealloc(node); -#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED - /* Pages are zeroed as a side effect of pages_purge(). */ - *zero = true; -#else - if (*zero) { + } + if (zeroed == false && *zero) { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); } -#endif return (ret); } @@ -128,7 +136,8 @@ chunk_recycle(size_t size, size_t alignment, bool base, bool *zero) * advantage of them if they are returned. */ void * -chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) +chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, + dss_prec_t dss_prec) { void *ret; @@ -137,17 +146,26 @@ chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) assert(alignment != 0); assert((alignment & chunksize_mask) == 0); - ret = chunk_recycle(size, alignment, base, zero); - if (ret != NULL) + /* "primary" dss. */ + if (config_dss && dss_prec == dss_prec_primary) { + if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, + alignment, base, zero)) != NULL) + goto label_return; + if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL) + goto label_return; + } + /* mmap. */ + if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size, + alignment, base, zero)) != NULL) goto label_return; - - ret = chunk_alloc_mmap(size, alignment, zero); - if (ret != NULL) + if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL) goto label_return; - - if (config_dss) { - ret = chunk_alloc_dss(size, alignment, zero); - if (ret != NULL) + /* "secondary" dss. */ + if (config_dss && dss_prec == dss_prec_secondary) { + if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size, + alignment, base, zero)) != NULL) + goto label_return; + if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL) goto label_return; } @@ -189,11 +207,13 @@ label_return: } static void -chunk_record(void *chunk, size_t size) +chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk, + size_t size) { + bool unzeroed; extent_node_t *xnode, *node, *prev, key; - pages_purge(chunk, size); + unzeroed = pages_purge(chunk, size); /* * Allocate a node before acquiring chunks_mtx even though it might not @@ -205,7 +225,7 @@ chunk_record(void *chunk, size_t size) malloc_mutex_lock(&chunks_mtx); key.addr = (void *)((uintptr_t)chunk + size); - node = extent_tree_ad_nsearch(&chunks_ad, &key); + node = extent_tree_ad_nsearch(chunks_ad, &key); /* Try to coalesce forward. */ if (node != NULL && node->addr == key.addr) { /* @@ -213,10 +233,11 @@ chunk_record(void *chunk, size_t size) * not change the position within chunks_ad, so only * remove/insert from/into chunks_szad. */ - extent_tree_szad_remove(&chunks_szad, node); + extent_tree_szad_remove(chunks_szad, node); node->addr = chunk; node->size += size; - extent_tree_szad_insert(&chunks_szad, node); + node->zeroed = (node->zeroed && (unzeroed == false)); + extent_tree_szad_insert(chunks_szad, node); if (xnode != NULL) base_node_dealloc(xnode); } else { @@ -234,12 +255,13 @@ chunk_record(void *chunk, size_t size) node = xnode; node->addr = chunk; node->size = size; - extent_tree_ad_insert(&chunks_ad, node); - extent_tree_szad_insert(&chunks_szad, node); + node->zeroed = (unzeroed == false); + extent_tree_ad_insert(chunks_ad, node); + extent_tree_szad_insert(chunks_szad, node); } /* Try to coalesce backward. */ - prev = extent_tree_ad_prev(&chunks_ad, node); + prev = extent_tree_ad_prev(chunks_ad, node); if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == chunk) { /* @@ -247,13 +269,14 @@ chunk_record(void *chunk, size_t size) * not change the position within chunks_ad, so only * remove/insert node from/into chunks_szad. */ - extent_tree_szad_remove(&chunks_szad, prev); - extent_tree_ad_remove(&chunks_ad, prev); + extent_tree_szad_remove(chunks_szad, prev); + extent_tree_ad_remove(chunks_ad, prev); - extent_tree_szad_remove(&chunks_szad, node); + extent_tree_szad_remove(chunks_szad, node); node->addr = prev->addr; node->size += prev->size; - extent_tree_szad_insert(&chunks_szad, node); + node->zeroed = (node->zeroed && prev->zeroed); + extent_tree_szad_insert(chunks_szad, node); base_node_dealloc(prev); } @@ -261,6 +284,20 @@ chunk_record(void *chunk, size_t size) } void +chunk_unmap(void *chunk, size_t size) +{ + assert(chunk != NULL); + assert(CHUNK_ADDR2BASE(chunk) == chunk); + assert(size != 0); + assert((size & chunksize_mask) == 0); + + if (config_dss && chunk_in_dss(chunk)) + chunk_record(&chunks_szad_dss, &chunks_ad_dss, chunk, size); + else if (chunk_dealloc_mmap(chunk, size)) + chunk_record(&chunks_szad_mmap, &chunks_ad_mmap, chunk, size); +} + +void chunk_dealloc(void *chunk, size_t size, bool unmap) { @@ -273,15 +310,13 @@ chunk_dealloc(void *chunk, size_t size, bool unmap) rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); if (config_stats || config_prof) { malloc_mutex_lock(&chunks_mtx); + assert(stats_chunks.curchunks >= (size / chunksize)); stats_chunks.curchunks -= (size / chunksize); malloc_mutex_unlock(&chunks_mtx); } - if (unmap) { - if ((config_dss && chunk_in_dss(chunk)) || - chunk_dealloc_mmap(chunk, size)) - chunk_record(chunk, size); - } + if (unmap) + chunk_unmap(chunk, size); } bool @@ -301,8 +336,10 @@ chunk_boot(void) } if (config_dss && chunk_dss_boot()) return (true); - extent_tree_szad_new(&chunks_szad); - extent_tree_ad_new(&chunks_ad); + extent_tree_szad_new(&chunks_szad_mmap); + extent_tree_ad_new(&chunks_ad_mmap); + extent_tree_szad_new(&chunks_szad_dss); + extent_tree_ad_new(&chunks_ad_dss); if (config_ivsalloc) { chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk); @@ -312,3 +349,33 @@ chunk_boot(void) return (false); } + +void +chunk_prefork(void) +{ + + malloc_mutex_lock(&chunks_mtx); + if (config_ivsalloc) + rtree_prefork(chunks_rtree); + chunk_dss_prefork(); +} + +void +chunk_postfork_parent(void) +{ + + chunk_dss_postfork_parent(); + if (config_ivsalloc) + rtree_postfork_parent(chunks_rtree); + malloc_mutex_postfork_parent(&chunks_mtx); +} + +void +chunk_postfork_child(void) +{ + + chunk_dss_postfork_child(); + if (config_ivsalloc) + rtree_postfork_child(chunks_rtree); + malloc_mutex_postfork_child(&chunks_mtx); +} diff --git a/deps/jemalloc/src/chunk_dss.c b/deps/jemalloc/src/chunk_dss.c index 2d68e4804..24781cc52 100644 --- a/deps/jemalloc/src/chunk_dss.c +++ b/deps/jemalloc/src/chunk_dss.c @@ -3,6 +3,16 @@ /******************************************************************************/ /* Data. */ +const char *dss_prec_names[] = { + "disabled", + "primary", + "secondary", + "N/A" +}; + +/* Current dss precedence default, used when creating new arenas. */ +static dss_prec_t dss_prec_default = DSS_PREC_DEFAULT; + /* * Protects sbrk() calls. This avoids malloc races among threads, though it * does not protect against races with threads that call sbrk() directly. @@ -29,6 +39,31 @@ sbrk(intptr_t increment) } #endif +dss_prec_t +chunk_dss_prec_get(void) +{ + dss_prec_t ret; + + if (config_dss == false) + return (dss_prec_disabled); + malloc_mutex_lock(&dss_mtx); + ret = dss_prec_default; + malloc_mutex_unlock(&dss_mtx); + return (ret); +} + +bool +chunk_dss_prec_set(dss_prec_t dss_prec) +{ + + if (config_dss == false) + return (true); + malloc_mutex_lock(&dss_mtx); + dss_prec_default = dss_prec; + malloc_mutex_unlock(&dss_mtx); + return (false); +} + void * chunk_alloc_dss(size_t size, size_t alignment, bool *zero) { @@ -88,7 +123,7 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) dss_max = dss_next; malloc_mutex_unlock(&dss_mtx); if (cpad_size != 0) - chunk_dealloc(cpad, cpad_size, true); + chunk_unmap(cpad, cpad_size); if (*zero) { VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); diff --git a/deps/jemalloc/src/chunk_mmap.c b/deps/jemalloc/src/chunk_mmap.c index c8da6556b..8a42e7591 100644 --- a/deps/jemalloc/src/chunk_mmap.c +++ b/deps/jemalloc/src/chunk_mmap.c @@ -113,22 +113,30 @@ pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size) #endif } -void +bool pages_purge(void *addr, size_t length) { + bool unzeroed; #ifdef _WIN32 VirtualAlloc(addr, length, MEM_RESET, PAGE_READWRITE); + unzeroed = true; #else # ifdef JEMALLOC_PURGE_MADVISE_DONTNEED # define JEMALLOC_MADV_PURGE MADV_DONTNEED +# define JEMALLOC_MADV_ZEROS true # elif defined(JEMALLOC_PURGE_MADVISE_FREE) # define JEMALLOC_MADV_PURGE MADV_FREE +# define JEMALLOC_MADV_ZEROS false # else # error "No method defined for purging unused dirty pages." # endif - madvise(addr, length, JEMALLOC_MADV_PURGE); + int err = madvise(addr, length, JEMALLOC_MADV_PURGE); + unzeroed = (JEMALLOC_MADV_ZEROS == false || err != 0); +# undef JEMALLOC_MADV_PURGE +# undef JEMALLOC_MADV_ZEROS #endif + return (unzeroed); } static void * diff --git a/deps/jemalloc/src/ctl.c b/deps/jemalloc/src/ctl.c index 55e766777..6e01b1e27 100644 --- a/deps/jemalloc/src/ctl.c +++ b/deps/jemalloc/src/ctl.c @@ -48,8 +48,8 @@ static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ size_t *oldlenp, void *newp, size_t newlen); #define INDEX_PROTO(n) \ -const ctl_named_node_t *n##_index(const size_t *mib, size_t miblen, \ - size_t i); +static const ctl_named_node_t *n##_index(const size_t *mib, \ + size_t miblen, size_t i); static bool ctl_arena_init(ctl_arena_stats_t *astats); static void ctl_arena_clear(ctl_arena_stats_t *astats); @@ -58,6 +58,7 @@ static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, static void ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats); static void ctl_arena_refresh(arena_t *arena, unsigned i); +static bool ctl_grow(void); static void ctl_refresh(void); static bool ctl_init(void); static int ctl_lookup(const char *name, ctl_node_t const **nodesp, @@ -88,6 +89,7 @@ CTL_PROTO(config_utrace) CTL_PROTO(config_valgrind) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) +CTL_PROTO(opt_dss) CTL_PROTO(opt_lg_chunk) CTL_PROTO(opt_narenas) CTL_PROTO(opt_lg_dirty_mult) @@ -110,6 +112,10 @@ CTL_PROTO(opt_prof_gdump) CTL_PROTO(opt_prof_final) CTL_PROTO(opt_prof_leak) CTL_PROTO(opt_prof_accum) +CTL_PROTO(arena_i_purge) +static void arena_purge(unsigned arena_ind); +CTL_PROTO(arena_i_dss) +INDEX_PROTO(arena_i) CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) CTL_PROTO(arenas_bin_i_run_size) @@ -125,6 +131,7 @@ CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) CTL_PROTO(arenas_nlruns) CTL_PROTO(arenas_purge) +CTL_PROTO(arenas_extend) CTL_PROTO(prof_active) CTL_PROTO(prof_dump) CTL_PROTO(prof_interval) @@ -158,6 +165,7 @@ CTL_PROTO(stats_arenas_i_lruns_j_nrequests) CTL_PROTO(stats_arenas_i_lruns_j_curruns) INDEX_PROTO(stats_arenas_i_lruns_j) CTL_PROTO(stats_arenas_i_nthreads) +CTL_PROTO(stats_arenas_i_dss) CTL_PROTO(stats_arenas_i_pactive) CTL_PROTO(stats_arenas_i_pdirty) CTL_PROTO(stats_arenas_i_mapped) @@ -223,6 +231,7 @@ static const ctl_named_node_t config_node[] = { static const ctl_named_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, + {NAME("dss"), CTL(opt_dss)}, {NAME("lg_chunk"), CTL(opt_lg_chunk)}, {NAME("narenas"), CTL(opt_narenas)}, {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, @@ -247,6 +256,18 @@ static const ctl_named_node_t opt_node[] = { {NAME("prof_accum"), CTL(opt_prof_accum)} }; +static const ctl_named_node_t arena_i_node[] = { + {NAME("purge"), CTL(arena_i_purge)}, + {NAME("dss"), CTL(arena_i_dss)} +}; +static const ctl_named_node_t super_arena_i_node[] = { + {NAME(""), CHILD(named, arena_i)} +}; + +static const ctl_indexed_node_t arena_node[] = { + {INDEX(arena_i)} +}; + static const ctl_named_node_t arenas_bin_i_node[] = { {NAME("size"), CTL(arenas_bin_i_size)}, {NAME("nregs"), CTL(arenas_bin_i_nregs)}, @@ -282,7 +303,8 @@ static const ctl_named_node_t arenas_node[] = { {NAME("bin"), CHILD(indexed, arenas_bin)}, {NAME("nlruns"), CTL(arenas_nlruns)}, {NAME("lrun"), CHILD(indexed, arenas_lrun)}, - {NAME("purge"), CTL(arenas_purge)} + {NAME("purge"), CTL(arenas_purge)}, + {NAME("extend"), CTL(arenas_extend)} }; static const ctl_named_node_t prof_node[] = { @@ -352,6 +374,7 @@ static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = { static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, + {NAME("dss"), CTL(stats_arenas_i_dss)}, {NAME("pactive"), CTL(stats_arenas_i_pactive)}, {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, {NAME("mapped"), CTL(stats_arenas_i_mapped)}, @@ -387,6 +410,7 @@ static const ctl_named_node_t root_node[] = { {NAME("thread"), CHILD(named, thread)}, {NAME("config"), CHILD(named, config)}, {NAME("opt"), CHILD(named, opt)}, + {NAME("arena"), CHILD(indexed, arena)}, {NAME("arenas"), CHILD(named, arenas)}, {NAME("prof"), CHILD(named, prof)}, {NAME("stats"), CHILD(named, stats)} @@ -420,6 +444,7 @@ static void ctl_arena_clear(ctl_arena_stats_t *astats) { + astats->dss = dss_prec_names[dss_prec_limit]; astats->pactive = 0; astats->pdirty = 0; if (config_stats) { @@ -439,8 +464,8 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) { unsigned i; - arena_stats_merge(arena, &cstats->pactive, &cstats->pdirty, - &cstats->astats, cstats->bstats, cstats->lstats); + arena_stats_merge(arena, &cstats->dss, &cstats->pactive, + &cstats->pdirty, &cstats->astats, cstats->bstats, cstats->lstats); for (i = 0; i < NBINS; i++) { cstats->allocated_small += cstats->bstats[i].allocated; @@ -500,7 +525,7 @@ static void ctl_arena_refresh(arena_t *arena, unsigned i) { ctl_arena_stats_t *astats = &ctl_stats.arenas[i]; - ctl_arena_stats_t *sstats = &ctl_stats.arenas[narenas]; + ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas]; ctl_arena_clear(astats); @@ -518,11 +543,72 @@ ctl_arena_refresh(arena_t *arena, unsigned i) } } +static bool +ctl_grow(void) +{ + size_t astats_size; + ctl_arena_stats_t *astats; + arena_t **tarenas; + + /* Extend arena stats and arenas arrays. */ + astats_size = (ctl_stats.narenas + 2) * sizeof(ctl_arena_stats_t); + if (ctl_stats.narenas == narenas_auto) { + /* ctl_stats.arenas and arenas came from base_alloc(). */ + astats = (ctl_arena_stats_t *)imalloc(astats_size); + if (astats == NULL) + return (true); + memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) * + sizeof(ctl_arena_stats_t)); + + tarenas = (arena_t **)imalloc((ctl_stats.narenas + 1) * + sizeof(arena_t *)); + if (tarenas == NULL) { + idalloc(astats); + return (true); + } + memcpy(tarenas, arenas, ctl_stats.narenas * sizeof(arena_t *)); + } else { + astats = (ctl_arena_stats_t *)iralloc(ctl_stats.arenas, + astats_size, 0, 0, false, false); + if (astats == NULL) + return (true); + + tarenas = (arena_t **)iralloc(arenas, (ctl_stats.narenas + 1) * + sizeof(arena_t *), 0, 0, false, false); + if (tarenas == NULL) + return (true); + } + /* Initialize the new astats and arenas elements. */ + memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t)); + if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) + return (true); + tarenas[ctl_stats.narenas] = NULL; + /* Swap merged stats to their new location. */ + { + ctl_arena_stats_t tstats; + memcpy(&tstats, &astats[ctl_stats.narenas], + sizeof(ctl_arena_stats_t)); + memcpy(&astats[ctl_stats.narenas], + &astats[ctl_stats.narenas + 1], sizeof(ctl_arena_stats_t)); + memcpy(&astats[ctl_stats.narenas + 1], &tstats, + sizeof(ctl_arena_stats_t)); + } + ctl_stats.arenas = astats; + ctl_stats.narenas++; + malloc_mutex_lock(&arenas_lock); + arenas = tarenas; + narenas_total++; + arenas_extend(narenas_total - 1); + malloc_mutex_unlock(&arenas_lock); + + return (false); +} + static void ctl_refresh(void) { unsigned i; - VARIABLE_ARRAY(arena_t *, tarenas, narenas); + VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); if (config_stats) { malloc_mutex_lock(&chunks_mtx); @@ -542,19 +628,19 @@ ctl_refresh(void) * Clear sum stats, since they will be merged into by * ctl_arena_refresh(). */ - ctl_stats.arenas[narenas].nthreads = 0; - ctl_arena_clear(&ctl_stats.arenas[narenas]); + ctl_stats.arenas[ctl_stats.narenas].nthreads = 0; + ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]); malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); - for (i = 0; i < narenas; i++) { + memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); + for (i = 0; i < ctl_stats.narenas; i++) { if (arenas[i] != NULL) ctl_stats.arenas[i].nthreads = arenas[i]->nthreads; else ctl_stats.arenas[i].nthreads = 0; } malloc_mutex_unlock(&arenas_lock); - for (i = 0; i < narenas; i++) { + for (i = 0; i < ctl_stats.narenas; i++) { bool initialized = (tarenas[i] != NULL); ctl_stats.arenas[i].initialized = initialized; @@ -563,11 +649,13 @@ ctl_refresh(void) } if (config_stats) { - ctl_stats.allocated = ctl_stats.arenas[narenas].allocated_small - + ctl_stats.arenas[narenas].astats.allocated_large + ctl_stats.allocated = + ctl_stats.arenas[ctl_stats.narenas].allocated_small + + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large + + ctl_stats.huge.allocated; + ctl_stats.active = + (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE) + ctl_stats.huge.allocated; - ctl_stats.active = (ctl_stats.arenas[narenas].pactive << - LG_PAGE) + ctl_stats.huge.allocated; ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); } @@ -585,13 +673,15 @@ ctl_init(void) * Allocate space for one extra arena stats element, which * contains summed stats across all arenas. */ + assert(narenas_auto == narenas_total_get()); + ctl_stats.narenas = narenas_auto; ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc( - (narenas + 1) * sizeof(ctl_arena_stats_t)); + (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); if (ctl_stats.arenas == NULL) { ret = true; goto label_return; } - memset(ctl_stats.arenas, 0, (narenas + 1) * + memset(ctl_stats.arenas, 0, (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t)); /* @@ -601,14 +691,14 @@ ctl_init(void) */ if (config_stats) { unsigned i; - for (i = 0; i <= narenas; i++) { + for (i = 0; i <= ctl_stats.narenas; i++) { if (ctl_arena_init(&ctl_stats.arenas[i])) { ret = true; goto label_return; } } } - ctl_stats.arenas[narenas].initialized = true; + ctl_stats.arenas[ctl_stats.narenas].initialized = true; ctl_epoch = 0; ctl_refresh(); @@ -827,6 +917,27 @@ ctl_boot(void) return (false); } +void +ctl_prefork(void) +{ + + malloc_mutex_lock(&ctl_mtx); +} + +void +ctl_postfork_parent(void) +{ + + malloc_mutex_postfork_parent(&ctl_mtx); +} + +void +ctl_postfork_child(void) +{ + + malloc_mutex_postfork_child(&ctl_mtx); +} + /******************************************************************************/ /* *_ctl() functions. */ @@ -1032,8 +1143,8 @@ thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, } READ(oldval, bool); -label_return: ret = 0; +label_return: return (ret); } @@ -1063,13 +1174,14 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; unsigned newind, oldind; + malloc_mutex_lock(&ctl_mtx); newind = oldind = choose_arena(NULL)->ind; WRITE(newind, unsigned); READ(oldind, unsigned); if (newind != oldind) { arena_t *arena; - if (newind >= narenas) { + if (newind >= ctl_stats.narenas) { /* New arena index is out of range. */ ret = EFAULT; goto label_return; @@ -1102,6 +1214,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, ret = 0; label_return: + malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -1135,6 +1248,7 @@ CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) /******************************************************************************/ CTL_RO_NL_GEN(opt_abort, opt_abort, bool) +CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) @@ -1160,10 +1274,121 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) /******************************************************************************/ +/* ctl_mutex must be held during execution of this function. */ +static void +arena_purge(unsigned arena_ind) +{ + VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas); + + malloc_mutex_lock(&arenas_lock); + memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas); + malloc_mutex_unlock(&arenas_lock); + + if (arena_ind == ctl_stats.narenas) { + unsigned i; + for (i = 0; i < ctl_stats.narenas; i++) { + if (tarenas[i] != NULL) + arena_purge_all(tarenas[i]); + } + } else { + assert(arena_ind < ctl_stats.narenas); + if (tarenas[arena_ind] != NULL) + arena_purge_all(tarenas[arena_ind]); + } +} + +static int +arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + + READONLY(); + WRITEONLY(); + malloc_mutex_lock(&ctl_mtx); + arena_purge(mib[1]); + malloc_mutex_unlock(&ctl_mtx); + + ret = 0; +label_return: + return (ret); +} + +static int +arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret, i; + bool match, err; + const char *dss; + unsigned arena_ind = mib[1]; + dss_prec_t dss_prec_old = dss_prec_limit; + dss_prec_t dss_prec = dss_prec_limit; + + malloc_mutex_lock(&ctl_mtx); + WRITE(dss, const char *); + match = false; + for (i = 0; i < dss_prec_limit; i++) { + if (strcmp(dss_prec_names[i], dss) == 0) { + dss_prec = i; + match = true; + break; + } + } + if (match == false) { + ret = EINVAL; + goto label_return; + } + + if (arena_ind < ctl_stats.narenas) { + arena_t *arena = arenas[arena_ind]; + if (arena != NULL) { + dss_prec_old = arena_dss_prec_get(arena); + arena_dss_prec_set(arena, dss_prec); + err = false; + } else + err = true; + } else { + dss_prec_old = chunk_dss_prec_get(); + err = chunk_dss_prec_set(dss_prec); + } + dss = dss_prec_names[dss_prec_old]; + READ(dss, const char *); + if (err) { + ret = EFAULT; + goto label_return; + } + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static const ctl_named_node_t * +arena_i_index(const size_t *mib, size_t miblen, size_t i) +{ + const ctl_named_node_t * ret; + + malloc_mutex_lock(&ctl_mtx); + if (i > ctl_stats.narenas) { + ret = NULL; + goto label_return; + } + + ret = super_arena_i_node; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + + +/******************************************************************************/ + CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) -const ctl_named_node_t * +static const ctl_named_node_t * arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1173,7 +1398,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) } CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) -const ctl_named_node_t * +static const ctl_named_node_t * arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) { @@ -1182,7 +1407,27 @@ arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) return (super_arenas_lrun_i_node); } -CTL_RO_NL_GEN(arenas_narenas, narenas, unsigned) +static int +arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + unsigned narenas; + + malloc_mutex_lock(&ctl_mtx); + READONLY(); + if (*oldlenp != sizeof(unsigned)) { + ret = EINVAL; + goto label_return; + } + narenas = ctl_stats.narenas; + READ(narenas, unsigned); + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} static int arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, @@ -1193,13 +1438,13 @@ arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, malloc_mutex_lock(&ctl_mtx); READONLY(); - if (*oldlenp != narenas * sizeof(bool)) { + if (*oldlenp != ctl_stats.narenas * sizeof(bool)) { ret = EINVAL; - nread = (*oldlenp < narenas * sizeof(bool)) - ? (*oldlenp / sizeof(bool)) : narenas; + nread = (*oldlenp < ctl_stats.narenas * sizeof(bool)) + ? (*oldlenp / sizeof(bool)) : ctl_stats.narenas; } else { ret = 0; - nread = narenas; + nread = ctl_stats.narenas; } for (i = 0; i < nread; i++) @@ -1222,36 +1467,43 @@ arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { int ret; - unsigned arena; + unsigned arena_ind; + malloc_mutex_lock(&ctl_mtx); WRITEONLY(); - arena = UINT_MAX; - WRITE(arena, unsigned); - if (newp != NULL && arena >= narenas) { + arena_ind = UINT_MAX; + WRITE(arena_ind, unsigned); + if (newp != NULL && arena_ind >= ctl_stats.narenas) ret = EFAULT; - goto label_return; - } else { - VARIABLE_ARRAY(arena_t *, tarenas, narenas); + else { + if (arena_ind == UINT_MAX) + arena_ind = ctl_stats.narenas; + arena_purge(arena_ind); + ret = 0; + } - malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); - malloc_mutex_unlock(&arenas_lock); +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} - if (arena == UINT_MAX) { - unsigned i; - for (i = 0; i < narenas; i++) { - if (tarenas[i] != NULL) - arena_purge_all(tarenas[i]); - } - } else { - assert(arena < narenas); - if (tarenas[arena] != NULL) - arena_purge_all(tarenas[arena]); - } +static int +arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + + malloc_mutex_lock(&ctl_mtx); + READONLY(); + if (ctl_grow()) { + ret = EAGAIN; + goto label_return; } + READ(ctl_stats.narenas - 1, unsigned); ret = 0; label_return: + malloc_mutex_unlock(&ctl_mtx); return (ret); } @@ -1356,7 +1608,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns, CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns, ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) -const ctl_named_node_t * +static const ctl_named_node_t * stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) { @@ -1374,7 +1626,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests, CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns, ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) -const ctl_named_node_t * +static const ctl_named_node_t * stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) { @@ -1384,6 +1636,7 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) } CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) +CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *) CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, @@ -1395,13 +1648,13 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, CTL_RO_CGEN(config_stats, stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged, uint64_t) -const ctl_named_node_t * +static const ctl_named_node_t * stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) { const ctl_named_node_t * ret; malloc_mutex_lock(&ctl_mtx); - if (ctl_stats.arenas[i].initialized == false) { + if (i > ctl_stats.narenas || ctl_stats.arenas[i].initialized == false) { ret = NULL; goto label_return; } diff --git a/deps/jemalloc/src/huge.c b/deps/jemalloc/src/huge.c index 8a4ec9424..aa08d43d3 100644 --- a/deps/jemalloc/src/huge.c +++ b/deps/jemalloc/src/huge.c @@ -48,7 +48,8 @@ huge_palloc(size_t size, size_t alignment, bool zero) * it is possible to make correct junk/zero fill decisions below. */ is_zeroed = zero; - ret = chunk_alloc(csize, alignment, false, &is_zeroed); + ret = chunk_alloc(csize, alignment, false, &is_zeroed, + chunk_dss_prec_get()); if (ret == NULL) { base_node_dealloc(node); return (NULL); @@ -101,7 +102,7 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) void * huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero) + size_t alignment, bool zero, bool try_tcache_dalloc) { void *ret; size_t copysize; @@ -180,7 +181,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, #endif { memcpy(ret, ptr, copysize); - iqalloc(ptr); + iqallocx(ptr, try_tcache_dalloc); } return (ret); } diff --git a/deps/jemalloc/src/jemalloc.c b/deps/jemalloc/src/jemalloc.c index bc54cd7ca..8a667b62e 100644 --- a/deps/jemalloc/src/jemalloc.c +++ b/deps/jemalloc/src/jemalloc.c @@ -33,7 +33,8 @@ unsigned ncpus; malloc_mutex_t arenas_lock; arena_t **arenas; -unsigned narenas; +unsigned narenas_total; +unsigned narenas_auto; /* Set to true once the allocator has been initialized. */ static bool malloc_initialized = false; @@ -144,14 +145,14 @@ choose_arena_hard(void) { arena_t *ret; - if (narenas > 1) { + if (narenas_auto > 1) { unsigned i, choose, first_null; choose = 0; - first_null = narenas; + first_null = narenas_auto; malloc_mutex_lock(&arenas_lock); assert(arenas[0] != NULL); - for (i = 1; i < narenas; i++) { + for (i = 1; i < narenas_auto; i++) { if (arenas[i] != NULL) { /* * Choose the first arena that has the lowest @@ -160,7 +161,7 @@ choose_arena_hard(void) if (arenas[i]->nthreads < arenas[choose]->nthreads) choose = i; - } else if (first_null == narenas) { + } else if (first_null == narenas_auto) { /* * Record the index of the first uninitialized * arena, in case all extant arenas are in use. @@ -174,7 +175,8 @@ choose_arena_hard(void) } } - if (arenas[choose]->nthreads == 0 || first_null == narenas) { + if (arenas[choose]->nthreads == 0 + || first_null == narenas_auto) { /* * Use an unloaded arena, or the least loaded arena if * all arenas are already initialized. @@ -203,7 +205,7 @@ stats_print_atexit(void) { if (config_tcache && config_stats) { - unsigned i; + unsigned narenas, i; /* * Merge stats from extant threads. This is racy, since @@ -212,7 +214,7 @@ stats_print_atexit(void) * out of date by the time they are reported, if other threads * continue to allocate. */ - for (i = 0; i < narenas; i++) { + for (i = 0, narenas = narenas_total_get(); i < narenas; i++) { arena_t *arena = arenas[i]; if (arena != NULL) { tcache_t *tcache; @@ -254,12 +256,13 @@ malloc_ncpus(void) result = si.dwNumberOfProcessors; #else result = sysconf(_SC_NPROCESSORS_ONLN); +#endif if (result == -1) { /* Error. */ ret = 1; - } -#endif - ret = (unsigned)result; + } else { + ret = (unsigned)result; + } return (ret); } @@ -377,6 +380,22 @@ malloc_conf_init(void) const char *opts, *k, *v; size_t klen, vlen; + /* + * Automatically configure valgrind before processing options. The + * valgrind option remains in jemalloc 3.x for compatibility reasons. + */ + if (config_valgrind) { + opt_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false; + if (config_fill && opt_valgrind) { + opt_junk = false; + assert(opt_zero == false); + opt_quarantine = JEMALLOC_VALGRIND_QUARANTINE_DEFAULT; + opt_redzone = true; + } + if (config_tcache && opt_valgrind) + opt_tcache = false; + } + for (i = 0; i < 3; i++) { /* Get runtime configuration. */ switch (i) { @@ -537,6 +556,30 @@ malloc_conf_init(void) */ CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE + (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1) + if (strncmp("dss", k, klen) == 0) { + int i; + bool match = false; + for (i = 0; i < dss_prec_limit; i++) { + if (strncmp(dss_prec_names[i], v, vlen) + == 0) { + if (chunk_dss_prec_set(i)) { + malloc_conf_error( + "Error setting dss", + k, klen, v, vlen); + } else { + opt_dss = + dss_prec_names[i]; + match = true; + break; + } + } + } + if (match == false) { + malloc_conf_error("Invalid conf value", + k, klen, v, vlen); + } + continue; + } CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1, SIZE_T_MAX) CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult", @@ -553,20 +596,7 @@ malloc_conf_init(void) CONF_HANDLE_BOOL(opt_utrace, "utrace") } if (config_valgrind) { - bool hit; - CONF_HANDLE_BOOL_HIT(opt_valgrind, - "valgrind", hit) - if (config_fill && opt_valgrind && hit) { - opt_junk = false; - opt_zero = false; - if (opt_quarantine == 0) { - opt_quarantine = - JEMALLOC_VALGRIND_QUARANTINE_DEFAULT; - } - opt_redzone = true; - } - if (hit) - continue; + CONF_HANDLE_BOOL(opt_valgrind, "valgrind") } if (config_xmalloc) { CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc") @@ -695,9 +725,9 @@ malloc_init_hard(void) * Create enough scaffolding to allow recursive allocation in * malloc_ncpus(). */ - narenas = 1; + narenas_total = narenas_auto = 1; arenas = init_arenas; - memset(arenas, 0, sizeof(arena_t *) * narenas); + memset(arenas, 0, sizeof(arena_t *) * narenas_auto); /* * Initialize one arena here. The rest are lazily created in @@ -755,20 +785,21 @@ malloc_init_hard(void) else opt_narenas = 1; } - narenas = opt_narenas; + narenas_auto = opt_narenas; /* * Make sure that the arenas array can be allocated. In practice, this * limit is enough to allow the allocator to function, but the ctl * machinery will fail to allocate memory at far lower limits. */ - if (narenas > chunksize / sizeof(arena_t *)) { - narenas = chunksize / sizeof(arena_t *); + if (narenas_auto > chunksize / sizeof(arena_t *)) { + narenas_auto = chunksize / sizeof(arena_t *); malloc_printf("<jemalloc>: Reducing narenas to limit (%d)\n", - narenas); + narenas_auto); } + narenas_total = narenas_auto; /* Allocate and initialize arenas. */ - arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); + arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas_total); if (arenas == NULL) { malloc_mutex_unlock(&init_lock); return (true); @@ -777,7 +808,7 @@ malloc_init_hard(void) * Zero the array. In practice, this should always be pre-zeroed, * since it was just mmap()ed, but let's be sure. */ - memset(arenas, 0, sizeof(arena_t *) * narenas); + memset(arenas, 0, sizeof(arena_t *) * narenas_total); /* Copy the pointer to the one arena that was already initialized. */ arenas[0] = init_arenas[0]; @@ -1262,11 +1293,10 @@ je_valloc(size_t size) * passed an extra argument for the caller return address, which will be * ignored. */ -JEMALLOC_EXPORT void (* const __free_hook)(void *ptr) = je_free; -JEMALLOC_EXPORT void *(* const __malloc_hook)(size_t size) = je_malloc; -JEMALLOC_EXPORT void *(* const __realloc_hook)(void *ptr, size_t size) = - je_realloc; -JEMALLOC_EXPORT void *(* const __memalign_hook)(size_t alignment, size_t size) = +JEMALLOC_EXPORT void (* __free_hook)(void *ptr) = je_free; +JEMALLOC_EXPORT void *(* __malloc_hook)(size_t size) = je_malloc; +JEMALLOC_EXPORT void *(* __realloc_hook)(void *ptr, size_t size) = je_realloc; +JEMALLOC_EXPORT void *(* __memalign_hook)(size_t alignment, size_t size) = je_memalign; #endif @@ -1279,7 +1309,7 @@ JEMALLOC_EXPORT void *(* const __memalign_hook)(size_t alignment, size_t size) = */ size_t -je_malloc_usable_size(const void *ptr) +je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) { size_t ret; @@ -1343,18 +1373,19 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, #ifdef JEMALLOC_EXPERIMENTAL JEMALLOC_INLINE void * -iallocm(size_t usize, size_t alignment, bool zero) +iallocm(size_t usize, size_t alignment, bool zero, bool try_tcache, + arena_t *arena) { assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment))); if (alignment != 0) - return (ipalloc(usize, alignment, zero)); + return (ipallocx(usize, alignment, zero, try_tcache, arena)); else if (zero) - return (icalloc(usize)); + return (icallocx(usize, try_tcache, arena)); else - return (imalloc(usize)); + return (imallocx(usize, try_tcache, arena)); } int @@ -1365,6 +1396,9 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + arena_t *arena; + bool try_tcache; assert(ptr != NULL); assert(size != 0); @@ -1372,6 +1406,14 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) if (malloc_init()) goto label_oom; + if (arena_ind != UINT_MAX) { + arena = arenas[arena_ind]; + try_tcache = false; + } else { + arena = NULL; + try_tcache = true; + } + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); if (usize == 0) goto label_oom; @@ -1388,18 +1430,19 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment); assert(usize_promoted != 0); - p = iallocm(usize_promoted, alignment, zero); + p = iallocm(usize_promoted, alignment, zero, + try_tcache, arena); if (p == NULL) goto label_oom; arena_prof_promoted(p, usize); } else { - p = iallocm(usize, alignment, zero); + p = iallocm(usize, alignment, zero, try_tcache, arena); if (p == NULL) goto label_oom; } prof_malloc(p, usize, cnt); } else { - p = iallocm(usize, alignment, zero); + p = iallocm(usize, alignment, zero, try_tcache, arena); if (p == NULL) goto label_oom; } @@ -1436,6 +1479,9 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) & (SIZE_T_MAX-1)); bool zero = flags & ALLOCM_ZERO; bool no_move = flags & ALLOCM_NO_MOVE; + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + bool try_tcache_alloc, try_tcache_dalloc; + arena_t *arena; assert(ptr != NULL); assert(*ptr != NULL); @@ -1443,6 +1489,19 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) assert(SIZE_T_MAX - size >= extra); assert(malloc_initialized || IS_INITIALIZER); + if (arena_ind != UINT_MAX) { + arena_chunk_t *chunk; + try_tcache_alloc = true; + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(*ptr); + try_tcache_dalloc = (chunk == *ptr || chunk->arena != + arenas[arena_ind]); + arena = arenas[arena_ind]; + } else { + try_tcache_alloc = true; + try_tcache_dalloc = true; + arena = NULL; + } + p = *ptr; if (config_prof && opt_prof) { prof_thr_cnt_t *cnt; @@ -1469,9 +1528,10 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && ((alignment == 0) ? s2u(size) : sa2u(size, alignment)) <= SMALL_MAXCLASS) { - q = iralloc(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + q = irallocx(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), - alignment, zero, no_move); + alignment, zero, no_move, try_tcache_alloc, + try_tcache_dalloc, arena); if (q == NULL) goto label_err; if (max_usize < PAGE) { @@ -1480,7 +1540,8 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) } else usize = isalloc(q, config_prof); } else { - q = iralloc(p, size, extra, alignment, zero, no_move); + q = irallocx(p, size, extra, alignment, zero, no_move, + try_tcache_alloc, try_tcache_dalloc, arena); if (q == NULL) goto label_err; usize = isalloc(q, config_prof); @@ -1497,7 +1558,8 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) old_size = isalloc(p, false); old_rzsize = u2rz(old_size); } - q = iralloc(p, size, extra, alignment, zero, no_move); + q = irallocx(p, size, extra, alignment, zero, no_move, + try_tcache_alloc, try_tcache_dalloc, arena); if (q == NULL) goto label_err; if (config_stats) @@ -1558,10 +1620,19 @@ je_dallocm(void *ptr, int flags) { size_t usize; size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + unsigned arena_ind = ((unsigned)(flags >> 8)) - 1; + bool try_tcache; assert(ptr != NULL); assert(malloc_initialized || IS_INITIALIZER); + if (arena_ind != UINT_MAX) { + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + try_tcache = (chunk == ptr || chunk->arena != + arenas[arena_ind]); + } else + try_tcache = true; + UTRACE(ptr, 0, 0); if (config_stats || config_valgrind) usize = isalloc(ptr, config_prof); @@ -1574,7 +1645,7 @@ je_dallocm(void *ptr, int flags) thread_allocated_tsd_get()->deallocated += usize; if (config_valgrind && opt_valgrind) rzsize = p2rz(ptr); - iqalloc(ptr); + iqallocx(ptr, try_tcache); JEMALLOC_VALGRIND_FREE(ptr, rzsize); return (ALLOCM_SUCCESS); @@ -1611,6 +1682,27 @@ je_nallocm(size_t *rsize, size_t size, int flags) * malloc during fork(). */ +/* + * If an application creates a thread before doing any allocation in the main + * thread, then calls fork(2) in the main thread followed by memory allocation + * in the child process, a race can occur that results in deadlock within the + * child: the main thread may have forked while the created thread had + * partially initialized the allocator. Ordinarily jemalloc prevents + * fork/malloc races via the following functions it registers during + * initialization using pthread_atfork(), but of course that does no good if + * the allocator isn't fully initialized at fork time. The following library + * constructor is a partial solution to this problem. It may still possible to + * trigger the deadlock described above, but doing so would involve forking via + * a library constructor that runs before jemalloc's runs. + */ +JEMALLOC_ATTR(constructor) +static void +jemalloc_constructor(void) +{ + + malloc_init(); +} + #ifndef JEMALLOC_MUTEX_INIT_CB void jemalloc_prefork(void) @@ -1628,14 +1720,16 @@ _malloc_prefork(void) assert(malloc_initialized); /* Acquire all mutexes in a safe order. */ + ctl_prefork(); malloc_mutex_prefork(&arenas_lock); - for (i = 0; i < narenas; i++) { + for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_prefork(arenas[i]); } + prof_prefork(); + chunk_prefork(); base_prefork(); huge_prefork(); - chunk_dss_prefork(); } #ifndef JEMALLOC_MUTEX_INIT_CB @@ -1655,14 +1749,16 @@ _malloc_postfork(void) assert(malloc_initialized); /* Release all mutexes, now that fork() has completed. */ - chunk_dss_postfork_parent(); huge_postfork_parent(); base_postfork_parent(); - for (i = 0; i < narenas; i++) { + chunk_postfork_parent(); + prof_postfork_parent(); + for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_postfork_parent(arenas[i]); } malloc_mutex_postfork_parent(&arenas_lock); + ctl_postfork_parent(); } void @@ -1673,14 +1769,16 @@ jemalloc_postfork_child(void) assert(malloc_initialized); /* Release all mutexes, now that fork() has completed. */ - chunk_dss_postfork_child(); huge_postfork_child(); base_postfork_child(); - for (i = 0; i < narenas; i++) { + chunk_postfork_child(); + prof_postfork_child(); + for (i = 0; i < narenas_total; i++) { if (arenas[i] != NULL) arena_postfork_child(arenas[i]); } malloc_mutex_postfork_child(&arenas_lock); + ctl_postfork_child(); } /******************************************************************************/ diff --git a/deps/jemalloc/src/mutex.c b/deps/jemalloc/src/mutex.c index 37a843e6e..55e18c237 100644 --- a/deps/jemalloc/src/mutex.c +++ b/deps/jemalloc/src/mutex.c @@ -64,7 +64,7 @@ pthread_create(pthread_t *__restrict thread, /******************************************************************************/ #ifdef JEMALLOC_MUTEX_INIT_CB -int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, +JEMALLOC_EXPORT int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, void *(calloc_cb)(size_t, size_t)); #endif diff --git a/deps/jemalloc/src/prof.c b/deps/jemalloc/src/prof.c index de1d39299..04964ef7c 100644 --- a/deps/jemalloc/src/prof.c +++ b/deps/jemalloc/src/prof.c @@ -1270,4 +1270,46 @@ prof_boot2(void) return (false); } +void +prof_prefork(void) +{ + + if (opt_prof) { + unsigned i; + + malloc_mutex_lock(&bt2ctx_mtx); + malloc_mutex_lock(&prof_dump_seq_mtx); + for (i = 0; i < PROF_NCTX_LOCKS; i++) + malloc_mutex_lock(&ctx_locks[i]); + } +} + +void +prof_postfork_parent(void) +{ + + if (opt_prof) { + unsigned i; + + for (i = 0; i < PROF_NCTX_LOCKS; i++) + malloc_mutex_postfork_parent(&ctx_locks[i]); + malloc_mutex_postfork_parent(&prof_dump_seq_mtx); + malloc_mutex_postfork_parent(&bt2ctx_mtx); + } +} + +void +prof_postfork_child(void) +{ + + if (opt_prof) { + unsigned i; + + for (i = 0; i < PROF_NCTX_LOCKS; i++) + malloc_mutex_postfork_child(&ctx_locks[i]); + malloc_mutex_postfork_child(&prof_dump_seq_mtx); + malloc_mutex_postfork_child(&bt2ctx_mtx); + } +} + /******************************************************************************/ diff --git a/deps/jemalloc/src/rtree.c b/deps/jemalloc/src/rtree.c index eb0ff1e24..90c6935a0 100644 --- a/deps/jemalloc/src/rtree.c +++ b/deps/jemalloc/src/rtree.c @@ -44,3 +44,24 @@ rtree_new(unsigned bits) return (ret); } + +void +rtree_prefork(rtree_t *rtree) +{ + + malloc_mutex_prefork(&rtree->mutex); +} + +void +rtree_postfork_parent(rtree_t *rtree) +{ + + malloc_mutex_postfork_parent(&rtree->mutex); +} + +void +rtree_postfork_child(rtree_t *rtree) +{ + + malloc_mutex_postfork_child(&rtree->mutex); +} diff --git a/deps/jemalloc/src/stats.c b/deps/jemalloc/src/stats.c index 433b80d12..43f87af67 100644 --- a/deps/jemalloc/src/stats.c +++ b/deps/jemalloc/src/stats.c @@ -206,6 +206,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, unsigned i, bool bins, bool large) { unsigned nthreads; + const char *dss; size_t page, pactive, pdirty, mapped; uint64_t npurge, nmadvise, purged; size_t small_allocated; @@ -218,6 +219,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); malloc_cprintf(write_cb, cbopaque, "assigned threads: %u\n", nthreads); + CTL_I_GET("stats.arenas.0.dss", &dss, const char *); + malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n", + dss); CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); @@ -370,6 +374,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, "Run-time option settings:\n"); OPT_WRITE_BOOL(abort) OPT_WRITE_SIZE_T(lg_chunk) + OPT_WRITE_CHAR_P(dss) OPT_WRITE_SIZE_T(narenas) OPT_WRITE_SSIZE_T(lg_dirty_mult) OPT_WRITE_BOOL(stats_print) @@ -400,7 +405,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus); CTL_GET("arenas.narenas", &uv, unsigned); - malloc_cprintf(write_cb, cbopaque, "Max arenas: %u\n", uv); + malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n", sizeof(void *)); @@ -472,7 +477,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("stats.chunks.current", &chunks_current, size_t); malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " "highchunks curchunks\n"); - malloc_cprintf(write_cb, cbopaque, " %13"PRIu64"%13zu%13zu\n", + malloc_cprintf(write_cb, cbopaque, + " %13"PRIu64" %12zu %12zu\n", chunks_total, chunks_high, chunks_current); /* Print huge stats. */ diff --git a/deps/jemalloc/src/tcache.c b/deps/jemalloc/src/tcache.c index 60244c45f..47e14f30b 100644 --- a/deps/jemalloc/src/tcache.c +++ b/deps/jemalloc/src/tcache.c @@ -288,7 +288,7 @@ tcache_create(arena_t *arena) else if (size <= tcache_maxclass) tcache = (tcache_t *)arena_malloc_large(arena, size, true); else - tcache = (tcache_t *)icalloc(size); + tcache = (tcache_t *)icallocx(size, false, arena); if (tcache == NULL) return (NULL); @@ -364,7 +364,7 @@ tcache_destroy(tcache_t *tcache) arena_dalloc_large(arena, chunk, tcache); } else - idalloc(tcache); + idallocx(tcache, false); } void diff --git a/deps/jemalloc/src/util.c b/deps/jemalloc/src/util.c index 9b73c3ec0..b3a011436 100644 --- a/deps/jemalloc/src/util.c +++ b/deps/jemalloc/src/util.c @@ -377,7 +377,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) case '\0': goto label_out; case '%': { bool alt_form = false; - bool zero_pad = false; bool left_justify = false; bool plus_space = false; bool plus_plus = false; @@ -398,10 +397,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) assert(alt_form == false); alt_form = true; break; - case '0': - assert(zero_pad == false); - zero_pad = true; - break; case '-': assert(left_justify == false); left_justify = true; diff --git a/deps/jemalloc/src/zone.c b/deps/jemalloc/src/zone.c index cde5d49a4..c62c183f6 100644 --- a/deps/jemalloc/src/zone.c +++ b/deps/jemalloc/src/zone.c @@ -171,6 +171,16 @@ void register_zone(void) { + /* + * If something else replaced the system default zone allocator, don't + * register jemalloc's. + */ + malloc_zone_t *default_zone = malloc_default_zone(); + if (!default_zone->zone_name || + strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) { + return; + } + zone.size = (void *)zone_size; zone.malloc = (void *)zone_malloc; zone.calloc = (void *)zone_calloc; @@ -241,7 +251,7 @@ register_zone(void) * then becomes the default. */ do { - malloc_zone_t *default_zone = malloc_default_zone(); + default_zone = malloc_default_zone(); malloc_zone_unregister(default_zone); malloc_zone_register(default_zone); } while (malloc_default_zone() != &zone); |