diff options
author | antirez <antirez@gmail.com> | 2015-10-06 16:18:30 +0200 |
---|---|---|
committer | antirez <antirez@gmail.com> | 2015-10-06 16:55:37 +0200 |
commit | a9951b1b6a326532163e0fe4ee1a26e972258a1e (patch) | |
tree | ca555f37238537175cc1b34aa62a9f873026047f /deps/jemalloc/include/jemalloc/internal | |
parent | e3ded0273c43986a49ddd9d5fb4a20d187d015de (diff) | |
download | redis-a9951b1b6a326532163e0fe4ee1a26e972258a1e.tar.gz |
Jemalloc updated to 4.0.3.
Diffstat (limited to 'deps/jemalloc/include/jemalloc/internal')
32 files changed, 3962 insertions, 2005 deletions
diff --git a/deps/jemalloc/include/jemalloc/internal/arena.h b/deps/jemalloc/include/jemalloc/internal/arena.h index 9d000c03d..12c617979 100644 --- a/deps/jemalloc/include/jemalloc/internal/arena.h +++ b/deps/jemalloc/include/jemalloc/internal/arena.h @@ -1,30 +1,10 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES -/* - * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized - * as small as possible such that this setting is still honored, without - * violating other constraints. The goal is to make runs as small as possible - * without exceeding a per run external fragmentation threshold. - * - * We use binary fixed point math for overhead computations, where the binary - * point is implicitly RUN_BFP bits to the left. - * - * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be - * honored for some/all object sizes, since when heap profiling is enabled - * there is one pointer of header overhead per object (plus a constant). This - * constraint is relaxed (ignored) for runs that are so small that the - * per-region overhead is greater than: - * - * (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP)) - */ -#define RUN_BFP 12 -/* \/ Implicit binary fixed point. */ -#define RUN_MAX_OVRHD 0x0000003dU -#define RUN_MAX_OVRHD_RELAX 0x00001800U +#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS) /* Maximum number of regions in one run. */ -#define LG_RUN_MAXREGS 11 +#define LG_RUN_MAXREGS (LG_PAGE - LG_TINY_MIN) #define RUN_MAXREGS (1U << LG_RUN_MAXREGS) /* @@ -36,16 +16,18 @@ /* * The minimum ratio of active:dirty pages per arena is computed as: * - * (nactive >> opt_lg_dirty_mult) >= ndirty + * (nactive >> lg_dirty_mult) >= ndirty * - * So, supposing that opt_lg_dirty_mult is 3, there can be no less than 8 times - * as many active pages as dirty pages. + * So, supposing that lg_dirty_mult is 3, there can be no less than 8 times as + * many active pages as dirty pages. */ #define LG_DIRTY_MULT_DEFAULT 3 -typedef struct arena_chunk_map_s arena_chunk_map_t; -typedef struct arena_chunk_s arena_chunk_t; +typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t; typedef struct arena_run_s arena_run_t; +typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t; +typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t; +typedef struct arena_chunk_s arena_chunk_t; typedef struct arena_bin_info_s arena_bin_info_t; typedef struct arena_bin_s arena_bin_t; typedef struct arena_s arena_t; @@ -54,54 +36,34 @@ typedef struct arena_s arena_t; /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS -/* Each element of the chunk map corresponds to one page within the chunk. */ -struct arena_chunk_map_s { -#ifndef JEMALLOC_PROF - /* - * Overlay prof_ctx in order to allow it to be referenced by dead code. - * Such antics aren't warranted for per arena data structures, but - * chunk map overhead accounts for a percentage of memory, rather than - * being just a fixed cost. - */ - union { -#endif - union { - /* - * Linkage for run trees. There are two disjoint uses: - * - * 1) arena_t's runs_avail tree. - * 2) arena_run_t conceptually uses this linkage for in-use - * non-full runs, rather than directly embedding linkage. - */ - rb_node(arena_chunk_map_t) rb_link; - /* - * List of runs currently in purgatory. arena_chunk_purge() - * temporarily allocates runs that contain dirty pages while - * purging, so that other threads cannot use the runs while the - * purging thread is operating without the arena lock held. - */ - ql_elm(arena_chunk_map_t) ql_link; - } u; +#ifdef JEMALLOC_ARENA_STRUCTS_A +struct arena_run_s { + /* Index of bin this run is associated with. */ + szind_t binind; - /* Profile counters, used for large object runs. */ - prof_ctx_t *prof_ctx; -#ifndef JEMALLOC_PROF - }; /* union { ... }; */ -#endif + /* Number of free regions in run. */ + unsigned nfree; + /* Per region allocated/deallocated bitmap. */ + bitmap_t bitmap[BITMAP_GROUPS_MAX]; +}; + +/* Each element of the chunk map corresponds to one page within the chunk. */ +struct arena_chunk_map_bits_s { /* * Run address (or size) and various flags are stored together. The bit * layout looks like (assuming 32-bit system): * - * ???????? ???????? ????nnnn nnnndula + * ???????? ???????? ???nnnnn nnndumla * * ? : Unallocated: Run address for first/last pages, unset for internal * pages. * Small: Run page offset. - * Large: Run size for first page, unset for trailing pages. + * Large: Run page count for first page, unset for trailing pages. * n : binind for small size class, BININD_INVALID for large size class. * d : dirty? * u : unzeroed? + * m : decommitted? * l : large? * a : allocated? * @@ -110,78 +72,109 @@ struct arena_chunk_map_s { * p : run page offset * s : run size * n : binind for size class; large objects set these to BININD_INVALID - * except for promoted allocations (see prof_promote) * x : don't care * - : 0 * + : 1 - * [DULA] : bit set - * [dula] : bit unset + * [DUMLA] : bit set + * [dumla] : bit unset * * Unallocated (clean): - * ssssssss ssssssss ssss++++ ++++du-a - * xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx - * ssssssss ssssssss ssss++++ ++++dU-a + * ssssssss ssssssss sss+++++ +++dum-a + * xxxxxxxx xxxxxxxx xxxxxxxx xxx-Uxxx + * ssssssss ssssssss sss+++++ +++dUm-a * * Unallocated (dirty): - * ssssssss ssssssss ssss++++ ++++D--a + * ssssssss ssssssss sss+++++ +++D-m-a * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * ssssssss ssssssss ssss++++ ++++D--a + * ssssssss ssssssss sss+++++ +++D-m-a * * Small: - * pppppppp pppppppp ppppnnnn nnnnd--A - * pppppppp pppppppp ppppnnnn nnnn---A - * pppppppp pppppppp ppppnnnn nnnnd--A + * pppppppp pppppppp pppnnnnn nnnd---A + * pppppppp pppppppp pppnnnnn nnn----A + * pppppppp pppppppp pppnnnnn nnnd---A * * Large: - * ssssssss ssssssss ssss++++ ++++D-LA + * ssssssss ssssssss sss+++++ +++D--LA * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * -------- -------- ----++++ ++++D-LA + * -------- -------- ---+++++ +++D--LA * - * Large (sampled, size <= PAGE): - * ssssssss ssssssss ssssnnnn nnnnD-LA + * Large (sampled, size <= LARGE_MINCLASS): + * ssssssss ssssssss sssnnnnn nnnD--LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ---+++++ +++D--LA * - * Large (not sampled, size == PAGE): - * ssssssss ssssssss ssss++++ ++++D-LA + * Large (not sampled, size == LARGE_MINCLASS): + * ssssssss ssssssss sss+++++ +++D--LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ---+++++ +++D--LA */ size_t bits; -#define CHUNK_MAP_BININD_SHIFT 4 +#define CHUNK_MAP_ALLOCATED ((size_t)0x01U) +#define CHUNK_MAP_LARGE ((size_t)0x02U) +#define CHUNK_MAP_STATE_MASK ((size_t)0x3U) + +#define CHUNK_MAP_DECOMMITTED ((size_t)0x04U) +#define CHUNK_MAP_UNZEROED ((size_t)0x08U) +#define CHUNK_MAP_DIRTY ((size_t)0x10U) +#define CHUNK_MAP_FLAGS_MASK ((size_t)0x1cU) + +#define CHUNK_MAP_BININD_SHIFT 5 #define BININD_INVALID ((size_t)0xffU) -/* CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */ -#define CHUNK_MAP_BININD_MASK ((size_t)0xff0U) +#define CHUNK_MAP_BININD_MASK (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) #define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK -#define CHUNK_MAP_FLAGS_MASK ((size_t)0xcU) -#define CHUNK_MAP_DIRTY ((size_t)0x8U) -#define CHUNK_MAP_UNZEROED ((size_t)0x4U) -#define CHUNK_MAP_LARGE ((size_t)0x2U) -#define CHUNK_MAP_ALLOCATED ((size_t)0x1U) -#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED + +#define CHUNK_MAP_RUNIND_SHIFT (CHUNK_MAP_BININD_SHIFT + 8) +#define CHUNK_MAP_SIZE_SHIFT (CHUNK_MAP_RUNIND_SHIFT - LG_PAGE) +#define CHUNK_MAP_SIZE_MASK \ + (~(CHUNK_MAP_BININD_MASK | CHUNK_MAP_FLAGS_MASK | CHUNK_MAP_STATE_MASK)) }; -typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; -typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; -typedef ql_head(arena_chunk_map_t) arena_chunk_mapelms_t; -/* Arena chunk header. */ -struct arena_chunk_s { - /* Arena that owns the chunk. */ - arena_t *arena; +struct arena_runs_dirty_link_s { + qr(arena_runs_dirty_link_t) rd_link; +}; - /* Linkage for tree of arena chunks that contain dirty runs. */ - rb_node(arena_chunk_t) dirty_link; +/* + * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just + * like arena_chunk_map_bits_t. Two separate arrays are stored within each + * chunk header in order to improve cache locality. + */ +struct arena_chunk_map_misc_s { + /* + * Linkage for run trees. There are two disjoint uses: + * + * 1) arena_t's runs_avail tree. + * 2) arena_run_t conceptually uses this linkage for in-use non-full + * runs, rather than directly embedding linkage. + */ + rb_node(arena_chunk_map_misc_t) rb_link; - /* Number of dirty pages. */ - size_t ndirty; + union { + /* Linkage for list of dirty runs. */ + arena_runs_dirty_link_t rd; - /* Number of available runs. */ - size_t nruns_avail; + /* Profile counters, used for large object runs. */ + union { + void *prof_tctx_pun; + prof_tctx_t *prof_tctx; + }; + /* Small region run metadata. */ + arena_run_t run; + }; +}; +typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t; +typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t; +#endif /* JEMALLOC_ARENA_STRUCTS_A */ + +#ifdef JEMALLOC_ARENA_STRUCTS_B +/* Arena chunk header. */ +struct arena_chunk_s { /* - * Number of available run adjacencies that purging could coalesce. - * Clean and dirty available runs are not coalesced, which causes - * virtual memory fragmentation. The ratio of - * (nruns_avail-nruns_adjac):nruns_adjac is used for tracking this - * fragmentation. + * A pointer to the arena that owns the chunk is stored within the node. + * This field as a whole is used by chunks_rtree to support both + * ivsalloc() and core-based debugging. */ - size_t nruns_adjac; + extent_node_t node; /* * Map of pages within chunk that keeps track of free/large/small. The @@ -189,19 +182,7 @@ struct arena_chunk_s { * need to be tracked in the map. This omission saves a header page * for common chunk sizes (e.g. 4 MiB). */ - arena_chunk_map_t map[1]; /* Dynamically sized. */ -}; -typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; - -struct arena_run_s { - /* Bin this run is associated with. */ - arena_bin_t *bin; - - /* Index of next region that has never been allocated, or nregs. */ - uint32_t nextind; - - /* Number of free regions in run. */ - unsigned nfree; + arena_chunk_map_bits_t map_bits[1]; /* Dynamically sized. */ }; /* @@ -212,12 +193,7 @@ struct arena_run_s { * Each run has the following layout: * * /--------------------\ - * | arena_run_t header | - * | ... | - * bitmap_offset | bitmap | - * | ... | - * ctx0_offset | ctx map | - * | ... | + * | pad? | * |--------------------| * | redzone | * reg0_offset | region 0 | @@ -259,23 +235,11 @@ struct arena_bin_info_s { uint32_t nregs; /* - * Offset of first bitmap_t element in a run header for this bin's size - * class. - */ - uint32_t bitmap_offset; - - /* * Metadata used to manipulate bitmaps for runs associated with this * bin. */ bitmap_info_t bitmap_info; - /* - * Offset of first (prof_ctx_t *) in a run header for this bin's size - * class, or 0 if (config_prof == false || opt_prof == false). - */ - uint32_t ctx0_offset; - /* Offset of first region in a run for this bin's size class. */ uint32_t reg0_offset; }; @@ -321,8 +285,7 @@ struct arena_s { /* * There are three classes of arena operations from a locking * perspective: - * 1) Thread asssignment (modifies nthreads) is protected by - * arenas_lock. + * 1) Thread assignment (modifies nthreads) is protected by arenas_lock. * 2) Bin-related operations are protected by bin locks. * 3) Chunk- and run-related operations are protected by this mutex. */ @@ -331,16 +294,20 @@ struct arena_s { arena_stats_t stats; /* * List of tcaches for extant threads associated with this arena. - * Stats from these are merged incrementally, and at exit. + * Stats from these are merged incrementally, and at exit if + * opt_stats_print is enabled. */ ql_head(tcache_t) tcache_ql; uint64_t prof_accumbytes; - dss_prec_t dss_prec; + /* + * PRNG state for cache index randomization of large allocation base + * pointers. + */ + uint64_t offset_state; - /* Tree of dirty-page-containing chunks this arena manages. */ - arena_chunk_tree_t chunks_dirty; + dss_prec_t dss_prec; /* * In order to avoid rapid chunk allocation/deallocation when an arena @@ -354,7 +321,13 @@ struct arena_s { */ arena_chunk_t *spare; - /* Number of pages in active runs. */ + /* Minimum ratio (log base 2) of nactive:ndirty. */ + ssize_t lg_dirty_mult; + + /* True if a thread is currently executing arena_purge(). */ + bool purging; + + /* Number of pages in active runs and huge regions. */ size_t nactive; /* @@ -366,44 +339,116 @@ struct arena_s { size_t ndirty; /* - * Approximate number of pages being purged. It is possible for - * multiple threads to purge dirty pages concurrently, and they use - * npurgatory to indicate the total number of pages all threads are - * attempting to purge. + * Size/address-ordered tree of this arena's available runs. The tree + * is used for first-best-fit run allocation. */ - size_t npurgatory; + arena_avail_tree_t runs_avail; /* - * Size/address-ordered trees of this arena's available runs. The trees - * are used for first-best-fit run allocation. + * Unused dirty memory this arena manages. Dirty memory is conceptually + * tracked as an arbitrarily interleaved LRU of dirty runs and cached + * chunks, but the list linkage is actually semi-duplicated in order to + * avoid extra arena_chunk_map_misc_t space overhead. + * + * LRU-----------------------------------------------------------MRU + * + * /-- arena ---\ + * | | + * | | + * |------------| /- chunk -\ + * ...->|chunks_cache|<--------------------------->| /----\ |<--... + * |------------| | |node| | + * | | | | | | + * | | /- run -\ /- run -\ | | | | + * | | | | | | | | | | + * | | | | | | | | | | + * |------------| |-------| |-------| | |----| | + * ...->|runs_dirty |<-->|rd |<-->|rd |<---->|rd |<----... + * |------------| |-------| |-------| | |----| | + * | | | | | | | | | | + * | | | | | | | \----/ | + * | | \-------/ \-------/ | | + * | | | | + * | | | | + * \------------/ \---------/ */ - arena_avail_tree_t runs_avail; + arena_runs_dirty_link_t runs_dirty; + extent_node_t chunks_cache; + + /* Extant huge allocations. */ + ql_head(extent_node_t) huge; + /* Synchronizes all huge allocation/update/deallocation. */ + malloc_mutex_t huge_mtx; + + /* + * Trees of chunks that were previously allocated (trees differ only in + * node ordering). These are used when allocating chunks, in an attempt + * to re-use address space. Depending on function, different tree + * orderings are needed, which is why there are two trees with the same + * contents. + */ + extent_tree_t chunks_szad_cached; + extent_tree_t chunks_ad_cached; + extent_tree_t chunks_szad_retained; + extent_tree_t chunks_ad_retained; + + malloc_mutex_t chunks_mtx; + /* Cache of nodes that were allocated via base_alloc(). */ + ql_head(extent_node_t) node_cache; + malloc_mutex_t node_cache_mtx; + + /* User-configurable chunk hook functions. */ + chunk_hooks_t chunk_hooks; /* bins is used to store trees of free regions. */ arena_bin_t bins[NBINS]; }; +#endif /* JEMALLOC_ARENA_STRUCTS_B */ #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -extern ssize_t opt_lg_dirty_mult; -/* - * small_size2bin is a compact lookup table that rounds request sizes up to - * size classes. In order to reduce cache footprint, the table is compressed, - * and all accesses are via the SMALL_SIZE2BIN macro. - */ -extern uint8_t const small_size2bin[]; -#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) +static const size_t large_pad = +#ifdef JEMALLOC_CACHE_OBLIVIOUS + PAGE +#else + 0 +#endif + ; -extern arena_bin_info_t arena_bin_info[NBINS]; +extern ssize_t opt_lg_dirty_mult; -/* Number of large size classes. */ -#define nlclasses (chunk_npages - map_bias) +extern arena_bin_info_t arena_bin_info[NBINS]; +extern size_t map_bias; /* Number of arena chunk header pages. */ +extern size_t map_misc_offset; +extern size_t arena_maxrun; /* Max run size for arenas. */ +extern size_t large_maxclass; /* Max large size class. */ +extern unsigned nlclasses; /* Number of large size classes. */ +extern unsigned nhclasses; /* Number of huge size classes. */ + +void arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node, + bool cache); +void arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node, + bool cache); +extent_node_t *arena_node_alloc(arena_t *arena); +void arena_node_dalloc(arena_t *arena, extent_node_t *node); +void *arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, + bool *zero); +void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize); +void arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, + size_t oldsize, size_t usize); +void arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, + size_t oldsize, size_t usize); +bool arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, + size_t oldsize, size_t usize, bool *zero); +ssize_t arena_lg_dirty_mult_get(arena_t *arena); +bool arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult); +void arena_maybe_purge(arena_t *arena); void arena_purge_all(arena_t *arena); void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, - size_t binind, uint64_t prof_accumbytes); + szind_t binind, uint64_t prof_accumbytes); void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero); #ifdef JEMALLOC_JET @@ -418,19 +463,22 @@ void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); void arena_quarantine_junk_small(void *ptr, size_t usize); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); -void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); +void *arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, + size_t alignment, bool zero, tcache_t *tcache); void arena_prof_promoted(const void *ptr, size_t size); -void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, - arena_chunk_map_t *mapelm); +void arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, + void *ptr, arena_chunk_map_bits_t *bitselm); void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind, arena_chunk_map_t *mapelm); + size_t pageind, arena_chunk_map_bits_t *bitselm); void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t pageind); #ifdef JEMALLOC_JET typedef void (arena_dalloc_junk_large_t)(void *, size_t); extern arena_dalloc_junk_large_t *arena_dalloc_junk_large; +#else +void arena_dalloc_junk_large(void *ptr, size_t usize); #endif -void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, +void arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr); void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); #ifdef JEMALLOC_JET @@ -439,16 +487,18 @@ extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; #endif bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); -void *arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, bool try_tcache_alloc, - bool try_tcache_dalloc); +void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, + size_t size, size_t alignment, bool zero, tcache_t *tcache); dss_prec_t arena_dss_prec_get(arena_t *arena); -void arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); -void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive, - size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats); -bool arena_new(arena_t *arena, unsigned ind); -void arena_boot(void); +bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); +ssize_t arena_lg_dirty_mult_default_get(void); +bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult); +void arena_stats_merge(arena_t *arena, const char **dss, + ssize_t *lg_dirty_mult, size_t *nactive, size_t *ndirty, + arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats); +arena_t *arena_new(unsigned ind); +bool arena_boot(void); void arena_prefork(arena_t *arena); void arena_postfork_parent(arena_t *arena); void arena_postfork_child(arena_t *arena); @@ -458,64 +508,138 @@ void arena_postfork_child(arena_t *arena); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind); +arena_chunk_map_bits_t *arena_bitselm_get(arena_chunk_t *chunk, + size_t pageind); +arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk, + size_t pageind); +size_t arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm); +void *arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm); +arena_chunk_map_misc_t *arena_rd_to_miscelm(arena_runs_dirty_link_t *rd); +arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run); size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbitsp_read(size_t *mapbitsp); size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_size_decode(size_t mapbits); size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); +szind_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind); void arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits); +size_t arena_mapbits_size_encode(size_t size); void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags); void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, size_t size); +void arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, + size_t flags); void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags); void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - size_t binind); + szind_t binind); void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, - size_t runind, size_t binind, size_t flags); -void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, - size_t unzeroed); + size_t runind, szind_t binind, size_t flags); +void arena_metadata_allocated_add(arena_t *arena, size_t size); +void arena_metadata_allocated_sub(arena_t *arena, size_t size); +size_t arena_metadata_allocated_get(arena_t *arena); bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); -size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); -size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); +szind_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); +szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); -prof_ctx_t *arena_prof_ctx_get(const void *ptr); -void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); -void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); +prof_tctx_t *arena_prof_tctx_get(const void *ptr); +void arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); +void arena_prof_tctx_reset(const void *ptr, size_t usize, + const void *old_ptr, prof_tctx_t *old_tctx); +void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + tcache_t *tcache); +arena_t *arena_aalloc(const void *ptr); size_t arena_salloc(const void *ptr, bool demote); -void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, - bool try_tcache); +void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) # ifdef JEMALLOC_ARENA_INLINE_A -JEMALLOC_ALWAYS_INLINE arena_chunk_map_t * -arena_mapp_get(arena_chunk_t *chunk, size_t pageind) +JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t * +arena_bitselm_get(arena_chunk_t *chunk, size_t pageind) { assert(pageind >= map_bias); assert(pageind < chunk_npages); - return (&chunk->map[pageind-map_bias]); + return (&chunk->map_bits[pageind-map_bias]); +} + +JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * +arena_miscelm_get(arena_chunk_t *chunk, size_t pageind) +{ + + assert(pageind >= map_bias); + assert(pageind < chunk_npages); + + return ((arena_chunk_map_misc_t *)((uintptr_t)chunk + + (uintptr_t)map_misc_offset) + pageind-map_bias); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm) +{ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); + size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk + + map_misc_offset)) / sizeof(arena_chunk_map_misc_t) + map_bias; + + assert(pageind >= map_bias); + assert(pageind < chunk_npages); + + return (pageind); +} + +JEMALLOC_ALWAYS_INLINE void * +arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm) +{ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); + size_t pageind = arena_miscelm_to_pageind(miscelm); + + return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE))); +} + +JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * +arena_rd_to_miscelm(arena_runs_dirty_link_t *rd) +{ + arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t + *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd)); + + assert(arena_miscelm_to_pageind(miscelm) >= map_bias); + assert(arena_miscelm_to_pageind(miscelm) < chunk_npages); + + return (miscelm); +} + +JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * +arena_run_to_miscelm(arena_run_t *run) +{ + arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t + *)((uintptr_t)run - offsetof(arena_chunk_map_misc_t, run)); + + assert(arena_miscelm_to_pageind(miscelm) >= map_bias); + assert(arena_miscelm_to_pageind(miscelm) < chunk_npages); + + return (miscelm); } JEMALLOC_ALWAYS_INLINE size_t * arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) { - return (&arena_mapp_get(chunk, pageind)->bits); + return (&arena_bitselm_get(chunk, pageind)->bits); } JEMALLOC_ALWAYS_INLINE size_t @@ -533,13 +657,29 @@ arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) } JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_size_decode(size_t mapbits) +{ + size_t size; + +#if CHUNK_MAP_SIZE_SHIFT > 0 + size = (mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT; +#elif CHUNK_MAP_SIZE_SHIFT == 0 + size = mapbits & CHUNK_MAP_SIZE_MASK; +#else + size = (mapbits & CHUNK_MAP_SIZE_MASK) << -CHUNK_MAP_SIZE_SHIFT; +#endif + + return (size); +} + +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); - return (mapbits & ~PAGE_MASK); + return (arena_mapbits_size_decode(mapbits)); } JEMALLOC_ALWAYS_INLINE size_t @@ -550,7 +690,7 @@ arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)); - return (mapbits & ~PAGE_MASK); + return (arena_mapbits_size_decode(mapbits)); } JEMALLOC_ALWAYS_INLINE size_t @@ -561,14 +701,14 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == CHUNK_MAP_ALLOCATED); - return (mapbits >> LG_PAGE); + return (mapbits >> CHUNK_MAP_RUNIND_SHIFT); } -JEMALLOC_ALWAYS_INLINE size_t +JEMALLOC_ALWAYS_INLINE szind_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; - size_t binind; + szind_t binind; mapbits = arena_mapbits_get(chunk, pageind); binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; @@ -582,6 +722,8 @@ arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) size_t mapbits; mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits & + (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); return (mapbits & CHUNK_MAP_DIRTY); } @@ -591,10 +733,23 @@ arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) size_t mapbits; mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits & + (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); return (mapbits & CHUNK_MAP_UNZEROED); } JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits & + (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); + return (mapbits & CHUNK_MAP_DECOMMITTED); +} + +JEMALLOC_ALWAYS_INLINE size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) { size_t mapbits; @@ -619,6 +774,23 @@ arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits) *mapbitsp = mapbits; } +JEMALLOC_ALWAYS_INLINE size_t +arena_mapbits_size_encode(size_t size) +{ + size_t mapbits; + +#if CHUNK_MAP_SIZE_SHIFT > 0 + mapbits = size << CHUNK_MAP_SIZE_SHIFT; +#elif CHUNK_MAP_SIZE_SHIFT == 0 + mapbits = size; +#else + mapbits = size >> -CHUNK_MAP_SIZE_SHIFT; +#endif + + assert((mapbits & ~CHUNK_MAP_SIZE_MASK) == 0); + return (mapbits); +} + JEMALLOC_ALWAYS_INLINE void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) @@ -626,9 +798,11 @@ arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); assert((size & PAGE_MASK) == 0); - assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); - assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags); - arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags); + assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); + assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags & + (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); + arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | + CHUNK_MAP_BININD_INVALID | flags); } JEMALLOC_ALWAYS_INLINE void @@ -640,7 +814,17 @@ arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, assert((size & PAGE_MASK) == 0); assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); - arena_mapbitsp_write(mapbitsp, size | (mapbits & PAGE_MASK)); + arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | + (mapbits & ~CHUNK_MAP_SIZE_MASK)); +} + +JEMALLOC_ALWAYS_INLINE void +arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, size_t flags) +{ + size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); + + assert((flags & CHUNK_MAP_UNZEROED) == flags); + arena_mapbitsp_write(mapbitsp, flags); } JEMALLOC_ALWAYS_INLINE void @@ -648,54 +832,62 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, size_t flags) { size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); - size_t mapbits = arena_mapbitsp_read(mapbitsp); - size_t unzeroed; assert((size & PAGE_MASK) == 0); - assert((flags & CHUNK_MAP_DIRTY) == flags); - unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ - arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags - | unzeroed | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED); + assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); + assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags & + (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); + arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | + CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE | + CHUNK_MAP_ALLOCATED); } JEMALLOC_ALWAYS_INLINE void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - size_t binind) + szind_t binind) { size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); size_t mapbits = arena_mapbitsp_read(mapbitsp); assert(binind <= BININD_INVALID); - assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE); + assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS + + large_pad); arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) | (binind << CHUNK_MAP_BININD_SHIFT)); } JEMALLOC_ALWAYS_INLINE void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, - size_t binind, size_t flags) + szind_t binind, size_t flags) { size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); - size_t mapbits = arena_mapbitsp_read(mapbitsp); - size_t unzeroed; assert(binind < BININD_INVALID); assert(pageind - runind >= map_bias); - assert((flags & CHUNK_MAP_DIRTY) == flags); - unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ - arena_mapbitsp_write(mapbitsp, (runind << LG_PAGE) | (binind << - CHUNK_MAP_BININD_SHIFT) | flags | unzeroed | CHUNK_MAP_ALLOCATED); + assert((flags & CHUNK_MAP_UNZEROED) == flags); + arena_mapbitsp_write(mapbitsp, (runind << CHUNK_MAP_RUNIND_SHIFT) | + (binind << CHUNK_MAP_BININD_SHIFT) | flags | CHUNK_MAP_ALLOCATED); } -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, - size_t unzeroed) +JEMALLOC_INLINE void +arena_metadata_allocated_add(arena_t *arena, size_t size) +{ + + atomic_add_z(&arena->stats.metadata_allocated, size); +} + +JEMALLOC_INLINE void +arena_metadata_allocated_sub(arena_t *arena, size_t size) { - size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); - size_t mapbits = arena_mapbitsp_read(mapbitsp); - arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_UNZEROED) | - unzeroed); + atomic_sub_z(&arena->stats.metadata_allocated, size); +} + +JEMALLOC_INLINE size_t +arena_metadata_allocated_get(arena_t *arena) +{ + + return (atomic_read_z(&arena->stats.metadata_allocated)); } JEMALLOC_INLINE bool @@ -719,7 +911,7 @@ arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) cassert(config_prof); - if (prof_interval == 0) + if (likely(prof_interval == 0)) return (false); return (arena_prof_accum_impl(arena, accumbytes)); } @@ -730,7 +922,7 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes) cassert(config_prof); - if (prof_interval == 0) + if (likely(prof_interval == 0)) return (false); { @@ -743,10 +935,10 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes) } } -JEMALLOC_ALWAYS_INLINE size_t +JEMALLOC_ALWAYS_INLINE szind_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits) { - size_t binind; + szind_t binind; binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; @@ -755,27 +947,34 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) arena_t *arena; size_t pageind; size_t actual_mapbits; + size_t rpages_ind; arena_run_t *run; arena_bin_t *bin; - size_t actual_binind; + szind_t run_binind, actual_binind; arena_bin_info_t *bin_info; + arena_chunk_map_misc_t *miscelm; + void *rpages; assert(binind != BININD_INVALID); assert(binind < NBINS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; + arena = extent_node_arena_get(&chunk->node); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; actual_mapbits = arena_mapbits_get(chunk, pageind); assert(mapbits == actual_mapbits); assert(arena_mapbits_large_get(chunk, pageind) == 0); assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (actual_mapbits >> LG_PAGE)) << LG_PAGE)); - bin = run->bin; + rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, + pageind); + miscelm = arena_miscelm_get(chunk, rpages_ind); + run = &miscelm->run; + run_binind = run->binind; + bin = &arena->bins[run_binind]; actual_binind = bin - arena->bins; - assert(binind == actual_binind); + assert(run_binind == actual_binind); bin_info = &arena_bin_info[actual_binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + + rpages = arena_miscelm_to_rpages(miscelm); + assert(((uintptr_t)ptr - ((uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval == 0); } @@ -785,10 +984,10 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits) # endif /* JEMALLOC_ARENA_INLINE_A */ # ifdef JEMALLOC_ARENA_INLINE_B -JEMALLOC_INLINE size_t +JEMALLOC_INLINE szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { - size_t binind = bin - arena->bins; + szind_t binind = bin - arena->bins; assert(binind < NBINS); return (binind); } @@ -798,24 +997,26 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) { unsigned shift, diff, regind; size_t interval; + arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); + void *rpages = arena_miscelm_to_rpages(miscelm); /* * Freeing a pointer lower than region zero can cause assertion * failure. */ - assert((uintptr_t)ptr >= (uintptr_t)run + + assert((uintptr_t)ptr >= (uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset); /* * Avoid doing division with a variable divisor if possible. Using * actual division here can reduce allocator throughput by over 20%! */ - diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - + diff = (unsigned)((uintptr_t)ptr - (uintptr_t)rpages - bin_info->reg0_offset); /* Rescale (factor powers of 2 out of the numerator and denominator). */ interval = bin_info->reg_interval; - shift = ffs(interval) - 1; + shift = jemalloc_ffs(interval) - 1; diff >>= shift; interval >>= shift; @@ -850,8 +1051,8 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) }; - if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) + - 2)) { + if (likely(interval <= ((sizeof(interval_invs) / + sizeof(unsigned)) + 2))) { regind = (diff * interval_invs[interval - 3]) >> SIZE_INV_SHIFT; } else @@ -865,113 +1066,138 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) return (regind); } -JEMALLOC_INLINE prof_ctx_t * -arena_prof_ctx_get(const void *ptr) +JEMALLOC_INLINE prof_tctx_t * +arena_prof_tctx_get(const void *ptr) { - prof_ctx_t *ret; + prof_tctx_t *ret; arena_chunk_t *chunk; - size_t pageind, mapbits; cassert(config_prof); assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - if (prof_promote) - ret = (prof_ctx_t *)(uintptr_t)1U; + if (likely(chunk != ptr)) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) + ret = (prof_tctx_t *)(uintptr_t)1U; else { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << - LG_PAGE)); - size_t binind = arena_ptr_small_binind_get(ptr, - mapbits); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - unsigned regind; - - regind = arena_run_regind(run, bin_info, ptr); - ret = *(prof_ctx_t **)((uintptr_t)run + - bin_info->ctx0_offset + (regind * - sizeof(prof_ctx_t *))); + arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk, + pageind); + ret = atomic_read_p(&elm->prof_tctx_pun); } } else - ret = arena_mapp_get(chunk, pageind)->prof_ctx; + ret = huge_prof_tctx_get(ptr); return (ret); } JEMALLOC_INLINE void -arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) +arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx) { arena_chunk_t *chunk; - size_t pageind; cassert(config_prof); assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - - if (usize > SMALL_MAXCLASS || (prof_promote && - ((uintptr_t)ctx != (uintptr_t)1U || arena_mapbits_large_get(chunk, - pageind) != 0))) { - assert(arena_mapbits_large_get(chunk, pageind) != 0); - arena_mapp_get(chunk, pageind)->prof_ctx = ctx; - } else { - assert(arena_mapbits_large_get(chunk, pageind) == 0); - if (prof_promote == false) { - size_t mapbits = arena_mapbits_get(chunk, pageind); - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << - LG_PAGE)); - size_t binind; - arena_bin_info_t *bin_info; - unsigned regind; - - binind = arena_ptr_small_binind_get(ptr, mapbits); - bin_info = &arena_bin_info[binind]; - regind = arena_run_regind(run, bin_info, ptr); - - *((prof_ctx_t **)((uintptr_t)run + - bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t - *)))) = ctx; + if (likely(chunk != ptr)) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + + if (unlikely(usize > SMALL_MAXCLASS || (uintptr_t)tctx > + (uintptr_t)1U)) { + arena_chunk_map_misc_t *elm; + + assert(arena_mapbits_large_get(chunk, pageind) != 0); + + elm = arena_miscelm_get(chunk, pageind); + atomic_write_p(&elm->prof_tctx_pun, tctx); + } else { + /* + * tctx must always be initialized for large runs. + * Assert that the surrounding conditional logic is + * equivalent to checking whether ptr refers to a large + * run. + */ + assert(arena_mapbits_large_get(chunk, pageind) == 0); } + } else + huge_prof_tctx_set(ptr, tctx); +} + +JEMALLOC_INLINE void +arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, + prof_tctx_t *old_tctx) +{ + + cassert(config_prof); + assert(ptr != NULL); + + if (unlikely(usize > SMALL_MAXCLASS || (ptr == old_ptr && + (uintptr_t)old_tctx > (uintptr_t)1U))) { + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) { + size_t pageind; + arena_chunk_map_misc_t *elm; + + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> + LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != + 0); + assert(arena_mapbits_large_get(chunk, pageind) != 0); + + elm = arena_miscelm_get(chunk, pageind); + atomic_write_p(&elm->prof_tctx_pun, + (prof_tctx_t *)(uintptr_t)1U); + } else + huge_prof_tctx_reset(ptr); } } JEMALLOC_ALWAYS_INLINE void * -arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) +arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + tcache_t *tcache) { - tcache_t *tcache; assert(size != 0); - assert(size <= arena_maxclass); - if (size <= SMALL_MAXCLASS) { - if (try_tcache && (tcache = tcache_get(true)) != NULL) - return (tcache_alloc_small(tcache, size, zero)); - else { - return (arena_malloc_small(choose_arena(arena), size, + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) + return (NULL); + + if (likely(size <= SMALL_MAXCLASS)) { + if (likely(tcache != NULL)) { + return (tcache_alloc_small(tsd, arena, tcache, size, zero)); - } - } else { + } else + return (arena_malloc_small(arena, size, zero)); + } else if (likely(size <= large_maxclass)) { /* * Initialize tcache after checking size in order to avoid * infinite recursion during tcache initialization. */ - if (try_tcache && size <= tcache_maxclass && (tcache = - tcache_get(true)) != NULL) - return (tcache_alloc_large(tcache, size, zero)); - else { - return (arena_malloc_large(choose_arena(arena), size, + if (likely(tcache != NULL) && size <= tcache_maxclass) { + return (tcache_alloc_large(tsd, arena, tcache, size, zero)); - } - } + } else + return (arena_malloc_large(arena, size, zero)); + } else + return (huge_malloc(tsd, arena, size, zero, tcache)); +} + +JEMALLOC_ALWAYS_INLINE arena_t * +arena_aalloc(const void *ptr) +{ + arena_chunk_t *chunk; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) + return (extent_node_arena_get(&chunk->node)); + else + return (huge_aalloc(ptr)); } /* Return the size of the allocation pointed to by ptr. */ @@ -980,81 +1206,139 @@ arena_salloc(const void *ptr, bool demote) { size_t ret; arena_chunk_t *chunk; - size_t pageind, binind; + size_t pageind; + szind_t binind; assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - binind = arena_mapbits_binind_get(chunk, pageind); - if (binind == BININD_INVALID || (config_prof && demote == false && - prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) { - /* - * Large allocation. In the common case (demote == true), and - * as this is an inline function, most callers will only end up - * looking at binind to determine that ptr is a small - * allocation. - */ - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - ret = arena_mapbits_large_size_get(chunk, pageind); - assert(ret != 0); - assert(pageind + (ret>>LG_PAGE) <= chunk_npages); - assert(ret == PAGE || arena_mapbits_large_size_get(chunk, - pageind+(ret>>LG_PAGE)-1) == 0); - assert(binind == arena_mapbits_binind_get(chunk, - pageind+(ret>>LG_PAGE)-1)); - assert(arena_mapbits_dirty_get(chunk, pageind) == - arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); - } else { - /* - * Small allocation (possibly promoted to a large object due to - * prof_promote). - */ - assert(arena_mapbits_large_get(chunk, pageind) != 0 || - arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, - pageind)) == binind); - ret = arena_bin_info[binind].reg_size; - } + if (likely(chunk != ptr)) { + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + binind = arena_mapbits_binind_get(chunk, pageind); + if (unlikely(binind == BININD_INVALID || (config_prof && !demote + && arena_mapbits_large_get(chunk, pageind) != 0))) { + /* + * Large allocation. In the common case (demote), and + * as this is an inline function, most callers will only + * end up looking at binind to determine that ptr is a + * small allocation. + */ + assert(config_cache_oblivious || ((uintptr_t)ptr & + PAGE_MASK) == 0); + ret = arena_mapbits_large_size_get(chunk, pageind) - + large_pad; + assert(ret != 0); + assert(pageind + ((ret+large_pad)>>LG_PAGE) <= + chunk_npages); + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, + pageind+((ret+large_pad)>>LG_PAGE)-1)); + } else { + /* + * Small allocation (possibly promoted to a large + * object). + */ + assert(arena_mapbits_large_get(chunk, pageind) != 0 || + arena_ptr_small_binind_get(ptr, + arena_mapbits_get(chunk, pageind)) == binind); + ret = index2size(binind); + } + } else + ret = huge_salloc(ptr); return (ret); } JEMALLOC_ALWAYS_INLINE void -arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) +arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) { + arena_chunk_t *chunk; size_t pageind, mapbits; - tcache_t *tcache; - assert(arena != NULL); - assert(chunk->arena == arena); assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = arena_mapbits_get(chunk, pageind); - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - /* Small allocation. */ - if (try_tcache && (tcache = tcache_get(false)) != NULL) { - size_t binind; - - binind = arena_ptr_small_binind_get(ptr, mapbits); - tcache_dalloc_small(tcache, ptr, binind); - } else - arena_dalloc_small(arena, chunk, ptr, pageind); - } else { - size_t size = arena_mapbits_large_size_get(chunk, pageind); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) { + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapbits = arena_mapbits_get(chunk, pageind); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { + /* Small allocation. */ + if (likely(tcache != NULL)) { + szind_t binind = arena_ptr_small_binind_get(ptr, + mapbits); + tcache_dalloc_small(tsd, tcache, ptr, binind); + } else { + arena_dalloc_small(extent_node_arena_get( + &chunk->node), chunk, ptr, pageind); + } + } else { + size_t size = arena_mapbits_large_size_get(chunk, + pageind); + + assert(config_cache_oblivious || ((uintptr_t)ptr & + PAGE_MASK) == 0); + + if (likely(tcache != NULL) && size - large_pad <= + tcache_maxclass) { + tcache_dalloc_large(tsd, tcache, ptr, size - + large_pad); + } else { + arena_dalloc_large(extent_node_arena_get( + &chunk->node), chunk, ptr); + } + } + } else + huge_dalloc(tsd, ptr, tcache); +} - assert(((uintptr_t)ptr & PAGE_MASK) == 0); +JEMALLOC_ALWAYS_INLINE void +arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) +{ + arena_chunk_t *chunk; - if (try_tcache && size <= tcache_maxclass && (tcache = - tcache_get(false)) != NULL) { - tcache_dalloc_large(tcache, ptr, size); - } else - arena_dalloc_large(arena, chunk, ptr); - } + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (likely(chunk != ptr)) { + if (config_prof && opt_prof) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> + LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + if (arena_mapbits_large_get(chunk, pageind) != 0) { + /* + * Make sure to use promoted size, not request + * size. + */ + size = arena_mapbits_large_size_get(chunk, + pageind) - large_pad; + } + } + assert(s2u(size) == s2u(arena_salloc(ptr, false))); + + if (likely(size <= SMALL_MAXCLASS)) { + /* Small allocation. */ + if (likely(tcache != NULL)) { + szind_t binind = size2index(size); + tcache_dalloc_small(tsd, tcache, ptr, binind); + } else { + size_t pageind = ((uintptr_t)ptr - + (uintptr_t)chunk) >> LG_PAGE; + arena_dalloc_small(extent_node_arena_get( + &chunk->node), chunk, ptr, pageind); + } + } else { + assert(config_cache_oblivious || ((uintptr_t)ptr & + PAGE_MASK) == 0); + + if (likely(tcache != NULL) && size <= tcache_maxclass) + tcache_dalloc_large(tsd, tcache, ptr, size); + else { + arena_dalloc_large(extent_node_arena_get( + &chunk->node), chunk, ptr); + } + } + } else + huge_dalloc(tsd, ptr, tcache); } # endif /* JEMALLOC_ARENA_INLINE_B */ #endif diff --git a/deps/jemalloc/include/jemalloc/internal/atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic.h index 11a7b47fe..a9aad35d1 100644 --- a/deps/jemalloc/include/jemalloc/internal/atomic.h +++ b/deps/jemalloc/include/jemalloc/internal/atomic.h @@ -11,6 +11,7 @@ #define atomic_read_uint64(p) atomic_add_uint64(p, 0) #define atomic_read_uint32(p) atomic_add_uint32(p, 0) +#define atomic_read_p(p) atomic_add_p(p, NULL) #define atomic_read_z(p) atomic_add_z(p, 0) #define atomic_read_u(p) atomic_add_u(p, 0) @@ -18,113 +19,244 @@ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +/* + * All arithmetic functions return the arithmetic result of the atomic + * operation. Some atomic operation APIs return the value prior to mutation, in + * which case the following functions must redundantly compute the result so + * that it can be returned. These functions are normally inlined, so the extra + * operations can be optimized away if the return values aren't used by the + * callers. + * + * <t> atomic_read_<t>(<t> *p) { return (*p); } + * <t> atomic_add_<t>(<t> *p, <t> x) { return (*p + x); } + * <t> atomic_sub_<t>(<t> *p, <t> x) { return (*p - x); } + * bool atomic_cas_<t>(<t> *p, <t> c, <t> s) + * { + * if (*p != c) + * return (true); + * *p = s; + * return (false); + * } + * void atomic_write_<t>(<t> *p, <t> x) { *p = x; } + */ + #ifndef JEMALLOC_ENABLE_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); +bool atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s); +void atomic_write_uint64(uint64_t *p, uint64_t x); uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); +bool atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s); +void atomic_write_uint32(uint32_t *p, uint32_t x); +void *atomic_add_p(void **p, void *x); +void *atomic_sub_p(void **p, void *x); +bool atomic_cas_p(void **p, void *c, void *s); +void atomic_write_p(void **p, const void *x); size_t atomic_add_z(size_t *p, size_t x); size_t atomic_sub_z(size_t *p, size_t x); +bool atomic_cas_z(size_t *p, size_t c, size_t s); +void atomic_write_z(size_t *p, size_t x); unsigned atomic_add_u(unsigned *p, unsigned x); unsigned atomic_sub_u(unsigned *p, unsigned x); +bool atomic_cas_u(unsigned *p, unsigned c, unsigned s); +void atomic_write_u(unsigned *p, unsigned x); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) /******************************************************************************/ /* 64-bit operations. */ #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) -# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 +# if (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { + uint64_t t = x; - return (__sync_add_and_fetch(p, x)); + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); } JEMALLOC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) { + uint64_t t; - return (__sync_sub_and_fetch(p, x)); + x = (uint64_t)(-(int64_t)x); + t = x; + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); } -#elif (defined(_MSC_VER)) + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + uint8_t success; + + asm volatile ( + "lock; cmpxchgq %4, %0;" + "sete %1;" + : "=m" (*p), "=a" (success) /* Outputs. */ + : "m" (*p), "a" (c), "r" (s) /* Inputs. */ + : "memory" /* Clobbers. */ + ); + + return (!(bool)success); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + asm volatile ( + "xchgq %1, %0;" /* Lock is implied by xchgq. */ + : "=m" (*p), "+r" (x) /* Outputs. */ + : "m" (*p) /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +# elif (defined(JEMALLOC_C11ATOMICS)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { - - return (InterlockedExchangeAdd64(p, x)); + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (atomic_fetch_add(a, x) + x); } JEMALLOC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) { + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (atomic_fetch_sub(a, x) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + return (!atomic_compare_exchange_strong(a, &c, s)); +} - return (InterlockedExchangeAdd64(p, -((int64_t)x))); +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; + atomic_store(a, x); } -#elif (defined(JEMALLOC_OSATOMIC)) +# elif (defined(JEMALLOC_ATOMIC9)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { - return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); + /* + * atomic_fetchadd_64() doesn't exist, but we only ever use this + * function on LP64 systems, so atomic_fetchadd_long() will do. + */ + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)x) + x); } JEMALLOC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) { - return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (!atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s)); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + atomic_store_rel_long(p, x); } -# elif (defined(__amd64__) || defined(__x86_64__)) +# elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); + return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); } JEMALLOC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) { - x = (uint64_t)(-(int64_t)x); - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); + return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); +} + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ - return (x); + return (!OSAtomicCompareAndSwap64(c, s, (int64_t *)p)); } -# elif (defined(JEMALLOC_ATOMIC9)) + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + uint64_t o; + + /*The documented OSAtomic*() API does not expose an atomic exchange. */ + do { + o = atomic_read_uint64(p); + } while (atomic_cas_uint64(p, o, x)); +} +# elif (defined(_MSC_VER)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { - /* - * atomic_fetchadd_64() doesn't exist, but we only ever use this - * function on LP64 systems, so atomic_fetchadd_long() will do. - */ - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return (atomic_fetchadd_long(p, (unsigned long)x) + x); + return (InterlockedExchangeAdd64(p, x) + x); } JEMALLOC_INLINE uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x) { - assert(sizeof(uint64_t) == sizeof(unsigned long)); + return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x); +} - return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + uint64_t o; + + o = InterlockedCompareExchange64(p, s, c); + return (o != c); } -# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + InterlockedExchange64(p, x); +} +# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \ + defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -138,6 +270,20 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (__sync_sub_and_fetch(p, x)); } + +JEMALLOC_INLINE bool +atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) +{ + + return (!__sync_bool_compare_and_swap(p, c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint64(uint64_t *p, uint64_t x) +{ + + __sync_lock_test_and_set(p, x); +} # else # error "Missing implementation for 64-bit atomic operations" # endif @@ -145,90 +291,184 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) /******************************************************************************/ /* 32-bit operations. */ -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 +#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { + uint32_t t = x; - return (__sync_add_and_fetch(p, x)); + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); } JEMALLOC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) { + uint32_t t; - return (__sync_sub_and_fetch(p, x)); + x = (uint32_t)(-(int32_t)x); + t = x; + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (t), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (t + x); } -#elif (defined(_MSC_VER)) + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + uint8_t success; + + asm volatile ( + "lock; cmpxchgl %4, %0;" + "sete %1;" + : "=m" (*p), "=a" (success) /* Outputs. */ + : "m" (*p), "a" (c), "r" (s) /* Inputs. */ + : "memory" + ); + + return (!(bool)success); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + asm volatile ( + "xchgl %1, %0;" /* Lock is implied by xchgl. */ + : "=m" (*p), "+r" (x) /* Outputs. */ + : "m" (*p) /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +# elif (defined(JEMALLOC_C11ATOMICS)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { - - return (InterlockedExchangeAdd(p, x)); + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (atomic_fetch_add(a, x) + x); } JEMALLOC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) { + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (atomic_fetch_sub(a, x) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + return (!atomic_compare_exchange_strong(a, &c, s)); +} - return (InterlockedExchangeAdd(p, -((int32_t)x))); +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; + atomic_store(a, x); } -#elif (defined(JEMALLOC_OSATOMIC)) +#elif (defined(JEMALLOC_ATOMIC9)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { - return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); + return (atomic_fetchadd_32(p, x) + x); } JEMALLOC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) { - return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); + return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!atomic_cmpset_32(p, c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + atomic_store_rel_32(p, x); } -#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +#elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); + return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); } JEMALLOC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) { - x = (uint32_t)(-(int32_t)x); - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); + return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); +} - return (x); +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!OSAtomicCompareAndSwap32(c, s, (int32_t *)p)); } -#elif (defined(JEMALLOC_ATOMIC9)) + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + uint32_t o; + + /*The documented OSAtomic*() API does not expose an atomic exchange. */ + do { + o = atomic_read_uint32(p); + } while (atomic_cas_uint32(p, o, x)); +} +#elif (defined(_MSC_VER)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { - return (atomic_fetchadd_32(p, x) + x); + return (InterlockedExchangeAdd(p, x) + x); } JEMALLOC_INLINE uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x) { - return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); + return (InterlockedExchangeAdd(p, -((int32_t)x)) - x); +} + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + uint32_t o; + + o = InterlockedCompareExchange(p, s, c); + return (o != c); } -#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + InterlockedExchange(p, x); +} +#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \ + defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { @@ -242,11 +482,73 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (__sync_sub_and_fetch(p, x)); } + +JEMALLOC_INLINE bool +atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) +{ + + return (!__sync_bool_compare_and_swap(p, c, s)); +} + +JEMALLOC_INLINE void +atomic_write_uint32(uint32_t *p, uint32_t x) +{ + + __sync_lock_test_and_set(p, x); +} #else # error "Missing implementation for 32-bit atomic operations" #endif /******************************************************************************/ +/* Pointer operations. */ +JEMALLOC_INLINE void * +atomic_add_p(void **p, void *x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((void *)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_PTR == 2) + return ((void *)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE void * +atomic_sub_p(void **p, void *x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((void *)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_PTR == 2) + return ((void *)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} + +JEMALLOC_INLINE bool +atomic_cas_p(void **p, void *c, void *s) +{ + +#if (LG_SIZEOF_PTR == 3) + return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); +#elif (LG_SIZEOF_PTR == 2) + return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); +#endif +} + +JEMALLOC_INLINE void +atomic_write_p(void **p, const void *x) +{ + +#if (LG_SIZEOF_PTR == 3) + atomic_write_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_PTR == 2) + atomic_write_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + +/******************************************************************************/ /* size_t operations. */ JEMALLOC_INLINE size_t atomic_add_z(size_t *p, size_t x) @@ -272,6 +574,28 @@ atomic_sub_z(size_t *p, size_t x) #endif } +JEMALLOC_INLINE bool +atomic_cas_z(size_t *p, size_t c, size_t s) +{ + +#if (LG_SIZEOF_PTR == 3) + return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); +#elif (LG_SIZEOF_PTR == 2) + return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); +#endif +} + +JEMALLOC_INLINE void +atomic_write_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + atomic_write_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_PTR == 2) + atomic_write_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + /******************************************************************************/ /* unsigned operations. */ JEMALLOC_INLINE unsigned @@ -297,6 +621,29 @@ atomic_sub_u(unsigned *p, unsigned x) (uint32_t)-((int32_t)x))); #endif } + +JEMALLOC_INLINE bool +atomic_cas_u(unsigned *p, unsigned c, unsigned s) +{ + +#if (LG_SIZEOF_INT == 3) + return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); +#elif (LG_SIZEOF_INT == 2) + return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); +#endif +} + +JEMALLOC_INLINE void +atomic_write_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + atomic_write_uint64((uint64_t *)p, (uint64_t)x); +#elif (LG_SIZEOF_INT == 2) + atomic_write_uint32((uint32_t *)p, (uint32_t)x); +#endif +} + /******************************************************************************/ #endif diff --git a/deps/jemalloc/include/jemalloc/internal/base.h b/deps/jemalloc/include/jemalloc/internal/base.h index 9cf75ffb0..39e46ee44 100644 --- a/deps/jemalloc/include/jemalloc/internal/base.h +++ b/deps/jemalloc/include/jemalloc/internal/base.h @@ -10,9 +10,7 @@ #ifdef JEMALLOC_H_EXTERNS void *base_alloc(size_t size); -void *base_calloc(size_t number, size_t size); -extent_node_t *base_node_alloc(void); -void base_node_dealloc(extent_node_t *node); +void base_stats_get(size_t *allocated, size_t *resident, size_t *mapped); bool base_boot(void); void base_prefork(void); void base_postfork_parent(void); diff --git a/deps/jemalloc/include/jemalloc/internal/bitmap.h b/deps/jemalloc/include/jemalloc/internal/bitmap.h index 605ebac58..fcc6005c7 100644 --- a/deps/jemalloc/include/jemalloc/internal/bitmap.h +++ b/deps/jemalloc/include/jemalloc/internal/bitmap.h @@ -3,6 +3,7 @@ /* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ #define LG_BITMAP_MAXBITS LG_RUN_MAXREGS +#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS) typedef struct bitmap_level_s bitmap_level_t; typedef struct bitmap_info_s bitmap_info_t; @@ -14,6 +15,51 @@ typedef unsigned long bitmap_t; #define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) #define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) +/* Number of groups required to store a given number of bits. */ +#define BITMAP_BITS2GROUPS(nbits) \ + ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS) + +/* + * Number of groups required at a particular level for a given number of bits. + */ +#define BITMAP_GROUPS_L0(nbits) \ + BITMAP_BITS2GROUPS(nbits) +#define BITMAP_GROUPS_L1(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits)) +#define BITMAP_GROUPS_L2(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))) +#define BITMAP_GROUPS_L3(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \ + BITMAP_BITS2GROUPS((nbits))))) + +/* + * Assuming the number of levels, number of groups required for a given number + * of bits. + */ +#define BITMAP_GROUPS_1_LEVEL(nbits) \ + BITMAP_GROUPS_L0(nbits) +#define BITMAP_GROUPS_2_LEVEL(nbits) \ + (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits)) +#define BITMAP_GROUPS_3_LEVEL(nbits) \ + (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits)) +#define BITMAP_GROUPS_4_LEVEL(nbits) \ + (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits)) + +/* + * Maximum number of groups required to support LG_BITMAP_MAXBITS. + */ +#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4 +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS) +#else +# error "Unsupported bitmap size" +#endif + /* Maximum number of levels possible. */ #define BITMAP_MAX_LEVELS \ (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ @@ -93,7 +139,7 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) bitmap_t g; assert(bit < binfo->nbits); - assert(bitmap_get(bitmap, binfo, bit) == false); + assert(!bitmap_get(bitmap, binfo, bit)); goff = bit >> LG_BITMAP_GROUP_NBITS; gp = &bitmap[goff]; g = *gp; @@ -126,15 +172,15 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) bitmap_t g; unsigned i; - assert(bitmap_full(bitmap, binfo) == false); + assert(!bitmap_full(bitmap, binfo)); i = binfo->nlevels - 1; g = bitmap[binfo->levels[i].group_offset]; - bit = ffsl(g) - 1; + bit = jemalloc_ffsl(g) - 1; while (i > 0) { i--; g = bitmap[binfo->levels[i].group_offset + bit]; - bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1); + bit = (bit << LG_BITMAP_GROUP_NBITS) + (jemalloc_ffsl(g) - 1); } bitmap_set(bitmap, binfo, bit); @@ -158,7 +204,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; - assert(bitmap_get(bitmap, binfo, bit) == false); + assert(!bitmap_get(bitmap, binfo, bit)); /* Propagate group state transitions up the tree. */ if (propagate) { unsigned i; @@ -172,7 +218,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) == 0); g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; - if (propagate == false) + if (!propagate) break; } } diff --git a/deps/jemalloc/include/jemalloc/internal/chunk.h b/deps/jemalloc/include/jemalloc/internal/chunk.h index 87d8700da..5d1938353 100644 --- a/deps/jemalloc/include/jemalloc/internal/chunk.h +++ b/deps/jemalloc/include/jemalloc/internal/chunk.h @@ -5,7 +5,7 @@ * Size and alignment of memory chunks that are allocated by the OS's virtual * memory system. */ -#define LG_CHUNK_DEFAULT 22 +#define LG_CHUNK_DEFAULT 21 /* Return the chunk address for allocation address a. */ #define CHUNK_ADDR2BASE(a) \ @@ -19,6 +19,16 @@ #define CHUNK_CEILING(s) \ (((s) + chunksize_mask) & ~chunksize_mask) +#define CHUNK_HOOKS_INITIALIZER { \ + NULL, \ + NULL, \ + NULL, \ + NULL, \ + NULL, \ + NULL, \ + NULL \ +} + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS @@ -30,23 +40,36 @@ extern size_t opt_lg_chunk; extern const char *opt_dss; -/* Protects stats_chunks; currently not used for any other purpose. */ -extern malloc_mutex_t chunks_mtx; -/* Chunk statistics. */ -extern chunk_stats_t stats_chunks; - -extern rtree_t *chunks_rtree; +extern rtree_t chunks_rtree; extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ extern size_t chunk_npages; -extern size_t map_bias; /* Number of arena chunk header pages. */ -extern size_t arena_maxclass; /* Max size class for arenas. */ -void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero, - dss_prec_t dss_prec); -void chunk_unmap(void *chunk, size_t size); -void chunk_dealloc(void *chunk, size_t size, bool unmap); +extern const chunk_hooks_t chunk_hooks_default; + +chunk_hooks_t chunk_hooks_get(arena_t *arena); +chunk_hooks_t chunk_hooks_set(arena_t *arena, + const chunk_hooks_t *chunk_hooks); + +bool chunk_register(const void *chunk, const extent_node_t *node); +void chunk_deregister(const void *chunk, const extent_node_t *node); +void *chunk_alloc_base(size_t size); +void *chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *new_addr, size_t size, size_t alignment, bool *zero, + bool dalloc_node); +void *chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit); +void chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t size, bool committed); +void chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t size, bool zeroed, bool committed); +void chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t size, bool committed); +bool chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, + size_t length); +bool chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, + void *chunk, size_t size, size_t offset, size_t length); bool chunk_boot(void); void chunk_prefork(void); void chunk_postfork_parent(void); @@ -56,6 +79,19 @@ void chunk_postfork_child(void); /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +extent_node_t *chunk_lookup(const void *chunk, bool dependent); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_CHUNK_C_)) +JEMALLOC_INLINE extent_node_t * +chunk_lookup(const void *ptr, bool dependent) +{ + + return (rtree_get(&chunks_rtree, (uintptr_t)ptr, dependent)); +} +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h index 4535ce09c..388f46be0 100644 --- a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h +++ b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h @@ -23,7 +23,8 @@ extern const char *dss_prec_names[]; dss_prec_t chunk_dss_prec_get(void); bool chunk_dss_prec_set(dss_prec_t dss_prec); -void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero); +void *chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, + size_t alignment, bool *zero, bool *commit); bool chunk_in_dss(void *chunk); bool chunk_dss_boot(void); void chunk_dss_prefork(void); diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h index f24abac75..7d8014c58 100644 --- a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h +++ b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h @@ -9,10 +9,9 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -bool pages_purge(void *addr, size_t length); - -void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero); -bool chunk_dealloc_mmap(void *chunk, size_t size); +void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, + bool *commit); +bool chunk_dalloc_mmap(void *chunk, size_t size); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/ckh.h b/deps/jemalloc/include/jemalloc/internal/ckh.h index 58712a6a7..75c1c979f 100644 --- a/deps/jemalloc/include/jemalloc/internal/ckh.h +++ b/deps/jemalloc/include/jemalloc/internal/ckh.h @@ -66,13 +66,13 @@ struct ckh_s { /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, +bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp); -void ckh_delete(ckh_t *ckh); +void ckh_delete(tsd_t *tsd, ckh_t *ckh); size_t ckh_count(ckh_t *ckh); bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); -bool ckh_insert(ckh_t *ckh, const void *key, const void *data); -bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key, +bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); +bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data); bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); void ckh_string_hash(const void *key, size_t r_hash[2]); diff --git a/deps/jemalloc/include/jemalloc/internal/ctl.h b/deps/jemalloc/include/jemalloc/internal/ctl.h index 0ffecc5f2..751c14b5b 100644 --- a/deps/jemalloc/include/jemalloc/internal/ctl.h +++ b/deps/jemalloc/include/jemalloc/internal/ctl.h @@ -34,6 +34,7 @@ struct ctl_arena_stats_s { bool initialized; unsigned nthreads; const char *dss; + ssize_t lg_dirty_mult; size_t pactive; size_t pdirty; arena_stats_t astats; @@ -46,22 +47,15 @@ struct ctl_arena_stats_s { malloc_bin_stats_t bstats[NBINS]; malloc_large_stats_t *lstats; /* nlclasses elements. */ + malloc_huge_stats_t *hstats; /* nhclasses elements. */ }; struct ctl_stats_s { size_t allocated; size_t active; + size_t metadata; + size_t resident; size_t mapped; - struct { - size_t current; /* stats_chunks.curchunks */ - uint64_t total; /* stats_chunks.nchunks */ - size_t high; /* stats_chunks.highchunks */ - } chunks; - struct { - size_t allocated; /* huge_allocated */ - uint64_t nmalloc; /* huge_nmalloc */ - uint64_t ndalloc; /* huge_ndalloc */ - } huge; unsigned narenas; ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ }; diff --git a/deps/jemalloc/include/jemalloc/internal/extent.h b/deps/jemalloc/include/jemalloc/internal/extent.h index ba95ca816..386d50ef4 100644 --- a/deps/jemalloc/include/jemalloc/internal/extent.h +++ b/deps/jemalloc/include/jemalloc/internal/extent.h @@ -7,25 +7,53 @@ typedef struct extent_node_s extent_node_t; /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS -/* Tree of extents. */ +/* Tree of extents. Use accessor functions for en_* fields. */ struct extent_node_s { - /* Linkage for the size/address-ordered tree. */ - rb_node(extent_node_t) link_szad; + /* Arena from which this extent came, if any. */ + arena_t *en_arena; - /* Linkage for the address-ordered tree. */ - rb_node(extent_node_t) link_ad; + /* Pointer to the extent that this tree node is responsible for. */ + void *en_addr; + + /* Total region size. */ + size_t en_size; + + /* + * The zeroed flag is used by chunk recycling code to track whether + * memory is zero-filled. + */ + bool en_zeroed; + + /* + * True if physical memory is committed to the extent, whether + * explicitly or implicitly as on a system that overcommits and + * satisfies physical memory needs on demand via soft page faults. + */ + bool en_committed; + + /* + * The achunk flag is used to validate that huge allocation lookups + * don't return arena chunks. + */ + bool en_achunk; /* Profile counters, used for huge objects. */ - prof_ctx_t *prof_ctx; + prof_tctx_t *en_prof_tctx; - /* Pointer to the extent that this tree node is responsible for. */ - void *addr; + /* Linkage for arena's runs_dirty and chunks_cache rings. */ + arena_runs_dirty_link_t rd; + qr(extent_node_t) cc_link; - /* Total region size. */ - size_t size; + union { + /* Linkage for the size/address-ordered tree. */ + rb_node(extent_node_t) szad_link; + + /* Linkage for arena's huge and node_cache lists. */ + ql_elm(extent_node_t) ql_link; + }; - /* True if zero-filled; used by chunk recycling code. */ - bool zeroed; + /* Linkage for the address-ordered tree. */ + rb_node(extent_node_t) ad_link; }; typedef rb_tree(extent_node_t) extent_tree_t; @@ -41,6 +69,171 @@ rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +arena_t *extent_node_arena_get(const extent_node_t *node); +void *extent_node_addr_get(const extent_node_t *node); +size_t extent_node_size_get(const extent_node_t *node); +bool extent_node_zeroed_get(const extent_node_t *node); +bool extent_node_committed_get(const extent_node_t *node); +bool extent_node_achunk_get(const extent_node_t *node); +prof_tctx_t *extent_node_prof_tctx_get(const extent_node_t *node); +void extent_node_arena_set(extent_node_t *node, arena_t *arena); +void extent_node_addr_set(extent_node_t *node, void *addr); +void extent_node_size_set(extent_node_t *node, size_t size); +void extent_node_zeroed_set(extent_node_t *node, bool zeroed); +void extent_node_committed_set(extent_node_t *node, bool committed); +void extent_node_achunk_set(extent_node_t *node, bool achunk); +void extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx); +void extent_node_init(extent_node_t *node, arena_t *arena, void *addr, + size_t size, bool zeroed, bool committed); +void extent_node_dirty_linkage_init(extent_node_t *node); +void extent_node_dirty_insert(extent_node_t *node, + arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty); +void extent_node_dirty_remove(extent_node_t *node); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_)) +JEMALLOC_INLINE arena_t * +extent_node_arena_get(const extent_node_t *node) +{ + + return (node->en_arena); +} + +JEMALLOC_INLINE void * +extent_node_addr_get(const extent_node_t *node) +{ + + return (node->en_addr); +} + +JEMALLOC_INLINE size_t +extent_node_size_get(const extent_node_t *node) +{ + + return (node->en_size); +} + +JEMALLOC_INLINE bool +extent_node_zeroed_get(const extent_node_t *node) +{ + + return (node->en_zeroed); +} + +JEMALLOC_INLINE bool +extent_node_committed_get(const extent_node_t *node) +{ + + assert(!node->en_achunk); + return (node->en_committed); +} + +JEMALLOC_INLINE bool +extent_node_achunk_get(const extent_node_t *node) +{ + + return (node->en_achunk); +} + +JEMALLOC_INLINE prof_tctx_t * +extent_node_prof_tctx_get(const extent_node_t *node) +{ + + return (node->en_prof_tctx); +} + +JEMALLOC_INLINE void +extent_node_arena_set(extent_node_t *node, arena_t *arena) +{ + + node->en_arena = arena; +} + +JEMALLOC_INLINE void +extent_node_addr_set(extent_node_t *node, void *addr) +{ + + node->en_addr = addr; +} + +JEMALLOC_INLINE void +extent_node_size_set(extent_node_t *node, size_t size) +{ + + node->en_size = size; +} + +JEMALLOC_INLINE void +extent_node_zeroed_set(extent_node_t *node, bool zeroed) +{ + + node->en_zeroed = zeroed; +} + +JEMALLOC_INLINE void +extent_node_committed_set(extent_node_t *node, bool committed) +{ + + node->en_committed = committed; +} + +JEMALLOC_INLINE void +extent_node_achunk_set(extent_node_t *node, bool achunk) +{ + + node->en_achunk = achunk; +} + +JEMALLOC_INLINE void +extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx) +{ + + node->en_prof_tctx = tctx; +} + +JEMALLOC_INLINE void +extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size, + bool zeroed, bool committed) +{ + + extent_node_arena_set(node, arena); + extent_node_addr_set(node, addr); + extent_node_size_set(node, size); + extent_node_zeroed_set(node, zeroed); + extent_node_committed_set(node, committed); + extent_node_achunk_set(node, false); + if (config_prof) + extent_node_prof_tctx_set(node, NULL); +} + +JEMALLOC_INLINE void +extent_node_dirty_linkage_init(extent_node_t *node) +{ + + qr_new(&node->rd, rd_link); + qr_new(node, cc_link); +} + +JEMALLOC_INLINE void +extent_node_dirty_insert(extent_node_t *node, + arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty) +{ + + qr_meld(runs_dirty, &node->rd, rd_link); + qr_meld(chunks_dirty, node, cc_link); +} + +JEMALLOC_INLINE void +extent_node_dirty_remove(extent_node_t *node) +{ + + qr_remove(&node->rd, rd_link); + qr_remove(node, cc_link); +} + +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/hash.h b/deps/jemalloc/include/jemalloc/internal/hash.h index c7183ede8..bcead337a 100644 --- a/deps/jemalloc/include/jemalloc/internal/hash.h +++ b/deps/jemalloc/include/jemalloc/internal/hash.h @@ -35,13 +35,14 @@ JEMALLOC_INLINE uint32_t hash_rotl_32(uint32_t x, int8_t r) { - return (x << r) | (x >> (32 - r)); + return ((x << r) | (x >> (32 - r))); } JEMALLOC_INLINE uint64_t hash_rotl_64(uint64_t x, int8_t r) { - return (x << r) | (x >> (64 - r)); + + return ((x << r) | (x >> (64 - r))); } JEMALLOC_INLINE uint32_t @@ -76,9 +77,9 @@ hash_fmix_64(uint64_t k) { k ^= k >> 33; - k *= QU(0xff51afd7ed558ccdLLU); + k *= KQU(0xff51afd7ed558ccd); k ^= k >> 33; - k *= QU(0xc4ceb9fe1a85ec53LLU); + k *= KQU(0xc4ceb9fe1a85ec53); k ^= k >> 33; return (k); @@ -247,8 +248,8 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, uint64_t h1 = seed; uint64_t h2 = seed; - const uint64_t c1 = QU(0x87c37b91114253d5LLU); - const uint64_t c2 = QU(0x4cf5ad432745937fLLU); + const uint64_t c1 = KQU(0x87c37b91114253d5); + const uint64_t c2 = KQU(0x4cf5ad432745937f); /* body */ { diff --git a/deps/jemalloc/include/jemalloc/internal/huge.h b/deps/jemalloc/include/jemalloc/internal/huge.h index a2b9c7791..ece7af980 100644 --- a/deps/jemalloc/include/jemalloc/internal/huge.h +++ b/deps/jemalloc/include/jemalloc/internal/huge.h @@ -9,34 +9,24 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -/* Huge allocation statistics. */ -extern uint64_t huge_nmalloc; -extern uint64_t huge_ndalloc; -extern size_t huge_allocated; - -/* Protects chunk-related data structures. */ -extern malloc_mutex_t huge_mtx; - -void *huge_malloc(size_t size, bool zero, dss_prec_t dss_prec); -void *huge_palloc(size_t size, size_t alignment, bool zero, - dss_prec_t dss_prec); -bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, - size_t extra); -void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache_dalloc, dss_prec_t dss_prec); +void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, + tcache_t *tcache); +void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, + bool zero, tcache_t *tcache); +bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, + size_t usize_max, bool zero); +void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, + size_t usize, size_t alignment, bool zero, tcache_t *tcache); #ifdef JEMALLOC_JET typedef void (huge_dalloc_junk_t)(void *, size_t); extern huge_dalloc_junk_t *huge_dalloc_junk; #endif -void huge_dalloc(void *ptr, bool unmap); +void huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +arena_t *huge_aalloc(const void *ptr); size_t huge_salloc(const void *ptr); -dss_prec_t huge_dss_prec_get(arena_t *arena); -prof_ctx_t *huge_prof_ctx_get(const void *ptr); -void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -bool huge_boot(void); -void huge_prefork(void); -void huge_postfork_parent(void); -void huge_postfork_child(void); +prof_tctx_t *huge_prof_tctx_get(const void *ptr); +void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); +void huge_prof_tctx_reset(const void *ptr); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in index df266abb7..8536a3eda 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,70 +1,13 @@ #ifndef JEMALLOC_INTERNAL_H #define JEMALLOC_INTERNAL_H -#include <math.h> -#ifdef _WIN32 -# include <windows.h> -# define ENOENT ERROR_PATH_NOT_FOUND -# define EINVAL ERROR_BAD_ARGUMENTS -# define EAGAIN ERROR_OUTOFMEMORY -# define EPERM ERROR_WRITE_FAULT -# define EFAULT ERROR_INVALID_ADDRESS -# define ENOMEM ERROR_NOT_ENOUGH_MEMORY -# undef ERANGE -# define ERANGE ERROR_INVALID_DATA -#else -# include <sys/param.h> -# include <sys/mman.h> -# include <sys/syscall.h> -# if !defined(SYS_write) && defined(__NR_write) -# define SYS_write __NR_write -# endif -# include <sys/uio.h> -# include <pthread.h> -# include <errno.h> -#endif -#include <sys/types.h> - -#include <limits.h> -#ifndef SIZE_T_MAX -# define SIZE_T_MAX SIZE_MAX -#endif -#include <stdarg.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdint.h> -#include <stddef.h> -#ifndef offsetof -# define offsetof(type, member) ((size_t)&(((type *)NULL)->member)) -#endif -#include <inttypes.h> -#include <string.h> -#include <strings.h> -#include <ctype.h> -#ifdef _MSC_VER -# include <io.h> -typedef intptr_t ssize_t; -# define PATH_MAX 1024 -# define STDERR_FILENO 2 -# define __func__ __FUNCTION__ -/* Disable warnings about deprecated system functions */ -# pragma warning(disable: 4996) -#else -# include <unistd.h> -#endif -#include <fcntl.h> #include "jemalloc_internal_defs.h" +#include "jemalloc/internal/jemalloc_internal_decls.h" #ifdef JEMALLOC_UTRACE #include <sys/ktrace.h> #endif -#ifdef JEMALLOC_VALGRIND -#include <valgrind/valgrind.h> -#include <valgrind/memcheck.h> -#endif - #define JEMALLOC_NO_DEMANGLE #ifdef JEMALLOC_JET # define JEMALLOC_N(n) jet_##n @@ -85,7 +28,7 @@ static const bool config_debug = false #endif ; -static const bool config_dss = +static const bool have_dss = #ifdef JEMALLOC_DSS true #else @@ -127,8 +70,8 @@ static const bool config_prof_libunwind = false #endif ; -static const bool config_mremap = -#ifdef JEMALLOC_MREMAP +static const bool maps_coalesce = +#ifdef JEMALLOC_MAPS_COALESCE true #else false @@ -190,6 +133,17 @@ static const bool config_ivsalloc = false #endif ; +static const bool config_cache_oblivious = +#ifdef JEMALLOC_CACHE_OBLIVIOUS + true +#else + false +#endif + ; + +#ifdef JEMALLOC_C11ATOMICS +#include <stdatomic.h> +#endif #ifdef JEMALLOC_ATOMIC9 #include <machine/atomic.h> @@ -229,20 +183,48 @@ static const bool config_ivsalloc = #include "jemalloc/internal/jemalloc_internal_macros.h" +/* Size class index type. */ +typedef unsigned szind_t; + +/* + * Flags bits: + * + * a: arena + * t: tcache + * 0: unused + * z: zero + * n: alignment + * + * aaaaaaaa aaaatttt tttttttt 0znnnnnn + */ +#define MALLOCX_ARENA_MASK ((int)~0xfffff) +#define MALLOCX_ARENA_MAX 0xffe +#define MALLOCX_TCACHE_MASK ((int)~0xfff000ffU) +#define MALLOCX_TCACHE_MAX 0xffd #define MALLOCX_LG_ALIGN_MASK ((int)0x3f) -#define ALLOCM_LG_ALIGN_MASK ((int)0x3f) +/* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */ +#define MALLOCX_ALIGN_GET_SPECIFIED(flags) \ + (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)) +#define MALLOCX_ALIGN_GET(flags) \ + (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1)) +#define MALLOCX_ZERO_GET(flags) \ + ((bool)(flags & MALLOCX_ZERO)) + +#define MALLOCX_TCACHE_GET(flags) \ + (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> 8)) - 2) +#define MALLOCX_ARENA_GET(flags) \ + (((unsigned)(((unsigned)flags) >> 20)) - 1) /* Smallest size class to support. */ -#define LG_TINY_MIN 3 #define TINY_MIN (1U << LG_TINY_MIN) /* - * Minimum alignment of allocations is 2^LG_QUANTUM bytes (ignoring tiny size + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size * classes). */ #ifndef LG_QUANTUM # if (defined(__i386__) || defined(_M_IX86)) -# define LG_QUANTUM 3 +# define LG_QUANTUM 4 # endif # ifdef __ia64__ # define LG_QUANTUM 4 @@ -250,11 +232,11 @@ static const bool config_ivsalloc = # ifdef __alpha__ # define LG_QUANTUM 4 # endif -# ifdef __sparc64__ +# if (defined(__sparc64__) || defined(__sparcv9)) # define LG_QUANTUM 4 # endif # if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) -# define LG_QUANTUM 3 +# define LG_QUANTUM 4 # endif # ifdef __arm__ # define LG_QUANTUM 3 @@ -268,6 +250,9 @@ static const bool config_ivsalloc = # ifdef __mips__ # define LG_QUANTUM 3 # endif +# ifdef __or1k__ +# define LG_QUANTUM 3 +# endif # ifdef __powerpc__ # define LG_QUANTUM 4 # endif @@ -280,8 +265,12 @@ static const bool config_ivsalloc = # ifdef __tile__ # define LG_QUANTUM 4 # endif +# ifdef __le32__ +# define LG_QUANTUM 4 +# endif # ifndef LG_QUANTUM -# error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS" +# error "Unknown minimum alignment for architecture; specify via " + "--with-lg-quantum" # endif #endif @@ -321,12 +310,11 @@ static const bool config_ivsalloc = #define CACHELINE_CEILING(s) \ (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) -/* Page size. STATIC_PAGE_SHIFT is determined by the configure script. */ +/* Page size. LG_PAGE is determined by the configure script. */ #ifdef PAGE_MASK # undef PAGE_MASK #endif -#define LG_PAGE STATIC_PAGE_SHIFT -#define PAGE ((size_t)(1U << STATIC_PAGE_SHIFT)) +#define PAGE ((size_t)(1U << LG_PAGE)) #define PAGE_MASK ((size_t)(PAGE - 1)) /* Return the smallest pagesize multiple that is >= s. */ @@ -345,7 +333,7 @@ static const bool config_ivsalloc = #define ALIGNMENT_CEILING(s, alignment) \ (((s) + (alignment - 1)) & (-(alignment))) -/* Declare a variable length array */ +/* Declare a variable-length array. */ #if __STDC_VERSION__ < 199901L # ifdef _MSC_VER # include <malloc.h> @@ -358,86 +346,12 @@ static const bool config_ivsalloc = # endif # endif # define VARIABLE_ARRAY(type, name, count) \ - type *name = alloca(sizeof(type) * count) -#else -# define VARIABLE_ARRAY(type, name, count) type name[count] -#endif - -#ifdef JEMALLOC_VALGRIND -/* - * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions - * so that when Valgrind reports errors, there are no extra stack frames - * in the backtraces. - * - * The size that is reported to valgrind must be consistent through a chain of - * malloc..realloc..realloc calls. Request size isn't recorded anywhere in - * jemalloc, so it is critical that all callers of these macros provide usize - * rather than request size. As a result, buffer overflow detection is - * technically weakened for the standard API, though it is generally accepted - * practice to consider any extra bytes reported by malloc_usable_size() as - * usable space. - */ -#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ - if (config_valgrind && opt_valgrind && cond) \ - VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ -} while (0) -#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ - old_rzsize, zero) do { \ - if (config_valgrind && opt_valgrind) { \ - size_t rzsize = p2rz(ptr); \ - \ - if (ptr == old_ptr) { \ - VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ - usize, rzsize); \ - if (zero && old_usize < usize) { \ - VALGRIND_MAKE_MEM_DEFINED( \ - (void *)((uintptr_t)ptr + \ - old_usize), usize - old_usize); \ - } \ - } else { \ - if (old_ptr != NULL) { \ - VALGRIND_FREELIKE_BLOCK(old_ptr, \ - old_rzsize); \ - } \ - if (ptr != NULL) { \ - size_t copy_size = (old_usize < usize) \ - ? old_usize : usize; \ - size_t tail_size = usize - copy_size; \ - VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \ - rzsize, false); \ - if (copy_size > 0) { \ - VALGRIND_MAKE_MEM_DEFINED(ptr, \ - copy_size); \ - } \ - if (zero && tail_size > 0) { \ - VALGRIND_MAKE_MEM_DEFINED( \ - (void *)((uintptr_t)ptr + \ - copy_size), tail_size); \ - } \ - } \ - } \ - } \ -} while (0) -#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ - if (config_valgrind && opt_valgrind) \ - VALGRIND_FREELIKE_BLOCK(ptr, rzsize); \ -} while (0) + type *name = alloca(sizeof(type) * (count)) #else -#define RUNNING_ON_VALGRIND ((unsigned)0) -#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ - do {} while (0) -#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) \ - do {} while (0) -#define VALGRIND_FREELIKE_BLOCK(addr, rzB) do {} while (0) -#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr, _qzz_len) do {} while (0) -#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) do {} while (0) -#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) do {} while (0) -#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0) -#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ - old_rzsize, zero) do {} while (0) -#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0) +# define VARIABLE_ARRAY(type, name, count) type name[(count)] #endif +#include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" @@ -452,9 +366,10 @@ static const bool config_ivsalloc = #include "jemalloc/internal/arena.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/pages.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" -#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" @@ -464,6 +379,7 @@ static const bool config_ivsalloc = /******************************************************************************/ #define JEMALLOC_H_STRUCTS +#include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" @@ -472,68 +388,83 @@ static const bool config_ivsalloc = #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" +#define JEMALLOC_ARENA_STRUCTS_A +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_STRUCTS_A #include "jemalloc/internal/extent.h" +#define JEMALLOC_ARENA_STRUCTS_B #include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_STRUCTS_B #include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/pages.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" -#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" #include "jemalloc/internal/prof.h" -typedef struct { - uint64_t allocated; - uint64_t deallocated; -} thread_allocated_t; -/* - * The JEMALLOC_ARG_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro - * argument. - */ -#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_ARG_CONCAT({0, 0}) +#include "jemalloc/internal/tsd.h" #undef JEMALLOC_H_STRUCTS /******************************************************************************/ #define JEMALLOC_H_EXTERNS extern bool opt_abort; -extern bool opt_junk; +extern const char *opt_junk; +extern bool opt_junk_alloc; +extern bool opt_junk_free; extern size_t opt_quarantine; extern bool opt_redzone; extern bool opt_utrace; -extern bool opt_valgrind; extern bool opt_xmalloc; extern bool opt_zero; extern size_t opt_narenas; +extern bool in_valgrind; + /* Number of CPUs. */ extern unsigned ncpus; -/* Protects arenas initialization (arenas, arenas_total). */ -extern malloc_mutex_t arenas_lock; /* - * Arenas that are used to service external requests. Not all elements of the - * arenas array are necessarily used; arenas are created lazily as needed. - * - * arenas[0..narenas_auto) are used for automatic multiplexing of threads and - * arenas. arenas[narenas_auto..narenas_total) are only used if the application - * takes some action to create them and allocate from them. + * index2size_tab encodes the same information as could be computed (at + * unacceptable cost in some code paths) by index2size_compute(). */ -extern arena_t **arenas; -extern unsigned narenas_total; -extern unsigned narenas_auto; /* Read-only after initialization. */ - +extern size_t const index2size_tab[NSIZES]; +/* + * size2index_tab is a compact lookup table that rounds request sizes up to + * size classes. In order to reduce cache footprint, the table is compressed, + * and all accesses are via size2index(). + */ +extern uint8_t const size2index_tab[]; + +arena_t *a0get(void); +void *a0malloc(size_t size); +void a0dalloc(void *ptr); +void *bootstrap_malloc(size_t size); +void *bootstrap_calloc(size_t num, size_t size); +void bootstrap_free(void *ptr); arena_t *arenas_extend(unsigned ind); -void arenas_cleanup(void *arg); -arena_t *choose_arena_hard(void); +arena_t *arena_init(unsigned ind); +unsigned narenas_total_get(void); +arena_t *arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing); +arena_t *arena_choose_hard(tsd_t *tsd); +void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind); +unsigned arena_nbound(unsigned ind); +void thread_allocated_cleanup(tsd_t *tsd); +void thread_deallocated_cleanup(tsd_t *tsd); +void arena_cleanup(tsd_t *tsd); +void arenas_cache_cleanup(tsd_t *tsd); +void narenas_cache_cleanup(tsd_t *tsd); +void arenas_cache_bypass_cleanup(tsd_t *tsd); void jemalloc_prefork(void); void jemalloc_postfork_parent(void); void jemalloc_postfork_child(void); +#include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" @@ -542,24 +473,26 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/pages.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" -#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" #include "jemalloc/internal/quarantine.h" #include "jemalloc/internal/prof.h" +#include "jemalloc/internal/tsd.h" #undef JEMALLOC_H_EXTERNS /******************************************************************************/ #define JEMALLOC_H_INLINES +#include "jemalloc/internal/valgrind.h" #include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prng.h" @@ -572,26 +505,158 @@ void jemalloc_postfork_child(void); #include "jemalloc/internal/mb.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/base.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/pages.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" #ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) - +szind_t size2index_compute(size_t size); +szind_t size2index_lookup(size_t size); +szind_t size2index(size_t size); +size_t index2size_compute(szind_t index); +size_t index2size_lookup(szind_t index); +size_t index2size(szind_t index); +size_t s2u_compute(size_t size); +size_t s2u_lookup(size_t size); size_t s2u(size_t size); size_t sa2u(size_t size, size_t alignment); -unsigned narenas_total_get(void); -arena_t *choose_arena(arena_t *arena); +arena_t *arena_choose(tsd_t *tsd, arena_t *arena); +arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, + bool refresh_if_missing); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -/* - * Map of pthread_self() --> arenas[???], used for selecting an arena to use - * for allocations. - */ -malloc_tsd_externs(arenas, arena_t *) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, arenas, arena_t *, NULL, - arenas_cleanup) +JEMALLOC_INLINE szind_t +size2index_compute(size_t size) +{ + +#if (NTBINS != 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil(size)); + return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); + } +#endif + { + size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? + (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) + : lg_floor((size<<1)-1); + size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : + x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); + size_t grp = shift << LG_SIZE_CLASS_GROUP; + + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZI(-1) << lg_delta; + size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + size_t index = NTBINS + grp + mod; + return (index); + } +} + +JEMALLOC_ALWAYS_INLINE szind_t +size2index_lookup(size_t size) +{ + + assert(size <= LOOKUP_MAXCLASS); + { + size_t ret = ((size_t)(size2index_tab[(size-1) >> + LG_TINY_MIN])); + assert(ret == size2index_compute(size)); + return (ret); + } +} + +JEMALLOC_ALWAYS_INLINE szind_t +size2index(size_t size) +{ + + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) + return (size2index_lookup(size)); + return (size2index_compute(size)); +} + +JEMALLOC_INLINE size_t +index2size_compute(szind_t index) +{ + +#if (NTBINS > 0) + if (index < NTBINS) + return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index)); +#endif + { + size_t reduced_index = index - NTBINS; + size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP; + size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) - + 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_QUANTUM + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_QUANTUM-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t usize = grp_size + mod_size; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +index2size_lookup(szind_t index) +{ + size_t ret = (size_t)index2size_tab[index]; + assert(ret == index2size_compute(index)); + return (ret); +} + +JEMALLOC_ALWAYS_INLINE size_t +index2size(szind_t index) +{ + + assert(index < NSIZES); + return (index2size_lookup(index)); +} + +JEMALLOC_ALWAYS_INLINE size_t +s2u_compute(size_t size) +{ + +#if (NTBINS > 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil(size)); + return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : + (ZU(1) << lg_ceil)); + } +#endif + { + size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? + (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) + : lg_floor((size<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (size + delta_mask) & ~delta_mask; + return (usize); + } +} + +JEMALLOC_ALWAYS_INLINE size_t +s2u_lookup(size_t size) +{ + size_t ret = index2size_lookup(size2index_lookup(size)); + + assert(ret == s2u_compute(size)); + return (ret); +} /* * Compute usable size that would result from allocating an object with the @@ -601,11 +666,10 @@ JEMALLOC_ALWAYS_INLINE size_t s2u(size_t size) { - if (size <= SMALL_MAXCLASS) - return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size); - if (size <= arena_maxclass) - return (PAGE_CEILING(size)); - return (CHUNK_CEILING(size)); + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) + return (s2u_lookup(size)); + return (s2u_compute(size)); } /* @@ -619,108 +683,128 @@ sa2u(size_t size, size_t alignment) assert(alignment != 0 && ((alignment - 1) & alignment) == 0); - /* - * Round size up to the nearest multiple of alignment. - * - * This done, we can take advantage of the fact that for each small - * size class, every object is aligned at the smallest power of two - * that is non-zero in the base two representation of the size. For - * example: - * - * Size | Base 2 | Minimum alignment - * -----+----------+------------------ - * 96 | 1100000 | 32 - * 144 | 10100000 | 32 - * 192 | 11000000 | 64 - */ - usize = ALIGNMENT_CEILING(size, alignment); - /* - * (usize < size) protects against the combination of maximal - * alignment and size greater than maximal alignment. - */ - if (usize < size) { - /* size_t overflow. */ - return (0); + /* Try for a small size class. */ + if (size <= SMALL_MAXCLASS && alignment < PAGE) { + /* + * Round size up to the nearest multiple of alignment. + * + * This done, we can take advantage of the fact that for each + * small size class, every object is aligned at the smallest + * power of two that is non-zero in the base two representation + * of the size. For example: + * + * Size | Base 2 | Minimum alignment + * -----+----------+------------------ + * 96 | 1100000 | 32 + * 144 | 10100000 | 32 + * 192 | 11000000 | 64 + */ + usize = s2u(ALIGNMENT_CEILING(size, alignment)); + if (usize < LARGE_MINCLASS) + return (usize); } - if (usize <= arena_maxclass && alignment <= PAGE) { - if (usize <= SMALL_MAXCLASS) - return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); - return (PAGE_CEILING(usize)); - } else { - size_t run_size; - + /* Try for a large size class. */ + if (likely(size <= large_maxclass) && likely(alignment < chunksize)) { /* * We can't achieve subpage alignment, so round up alignment - * permanently; it makes later calculations simpler. + * to the minimum that can actually be supported. */ alignment = PAGE_CEILING(alignment); - usize = PAGE_CEILING(size); - /* - * (usize < size) protects against very large sizes within - * PAGE of SIZE_T_MAX. - * - * (usize + alignment < usize) protects against the - * combination of maximal alignment and usize large enough - * to cause overflow. This is similar to the first overflow - * check above, but it needs to be repeated due to the new - * usize value, which may now be *equal* to maximal - * alignment, whereas before we only detected overflow if the - * original size was *greater* than maximal alignment. - */ - if (usize < size || usize + alignment < usize) { - /* size_t overflow. */ - return (0); - } + + /* Make sure result is a large size class. */ + usize = (size <= LARGE_MINCLASS) ? LARGE_MINCLASS : s2u(size); /* * Calculate the size of the over-size run that arena_palloc() * would need to allocate in order to guarantee the alignment. - * If the run wouldn't fit within a chunk, round up to a huge - * allocation size. */ - run_size = usize + alignment - PAGE; - if (run_size <= arena_maxclass) - return (PAGE_CEILING(usize)); - return (CHUNK_CEILING(usize)); + if (usize + large_pad + alignment - PAGE <= arena_maxrun) + return (usize); } -} -JEMALLOC_INLINE unsigned -narenas_total_get(void) -{ - unsigned narenas; + /* Huge size class. Beware of size_t overflow. */ - malloc_mutex_lock(&arenas_lock); - narenas = narenas_total; - malloc_mutex_unlock(&arenas_lock); + /* + * We can't achieve subchunk alignment, so round up alignment to the + * minimum that can actually be supported. + */ + alignment = CHUNK_CEILING(alignment); + if (alignment == 0) { + /* size_t overflow. */ + return (0); + } + + /* Make sure result is a huge size class. */ + if (size <= chunksize) + usize = chunksize; + else { + usize = s2u(size); + if (usize < size) { + /* size_t overflow. */ + return (0); + } + } - return (narenas); + /* + * Calculate the multi-chunk mapping that huge_palloc() would need in + * order to guarantee the alignment. + */ + if (usize + alignment - PAGE < usize) { + /* size_t overflow. */ + return (0); + } + return (usize); } /* Choose an arena based on a per-thread value. */ JEMALLOC_INLINE arena_t * -choose_arena(arena_t *arena) +arena_choose(tsd_t *tsd, arena_t *arena) { arena_t *ret; if (arena != NULL) return (arena); - if ((ret = *arenas_tsd_get()) == NULL) { - ret = choose_arena_hard(); - assert(ret != NULL); - } + if (unlikely((ret = tsd_arena_get(tsd)) == NULL)) + ret = arena_choose_hard(tsd); return (ret); } + +JEMALLOC_INLINE arena_t * +arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, + bool refresh_if_missing) +{ + arena_t *arena; + arena_t **arenas_cache = tsd_arenas_cache_get(tsd); + + /* init_if_missing requires refresh_if_missing. */ + assert(!init_if_missing || refresh_if_missing); + + if (unlikely(arenas_cache == NULL)) { + /* arenas_cache hasn't been initialized yet. */ + return (arena_get_hard(tsd, ind, init_if_missing)); + } + if (unlikely(ind >= tsd_narenas_cache_get(tsd))) { + /* + * ind is invalid, cache is old (too small), or arena to be + * initialized. + */ + return (refresh_if_missing ? arena_get_hard(tsd, ind, + init_if_missing) : NULL); + } + arena = arenas_cache[ind]; + if (likely(arena != NULL) || !refresh_if_missing) + return (arena); + return (arena_get_hard(tsd, ind, init_if_missing)); +} #endif #include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/rtree.h" /* - * Include arena.h twice in order to resolve circular dependencies with - * tcache.h. + * Include portions of arena.h interleaved with tcache.h in order to resolve + * circular dependencies. */ #define JEMALLOC_ARENA_INLINE_A #include "jemalloc/internal/arena.h" @@ -733,133 +817,155 @@ choose_arena(arena_t *arena) #include "jemalloc/internal/quarantine.h" #ifndef JEMALLOC_ENABLE_INLINE -void *imalloct(size_t size, bool try_tcache, arena_t *arena); -void *imalloc(size_t size); -void *icalloct(size_t size, bool try_tcache, arena_t *arena); -void *icalloc(size_t size); -void *ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena); -void *ipalloc(size_t usize, size_t alignment, bool zero); +arena_t *iaalloc(const void *ptr); size_t isalloc(const void *ptr, bool demote); +void *iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, + bool is_metadata, arena_t *arena); +void *imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); +void *imalloc(tsd_t *tsd, size_t size); +void *icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); +void *icalloc(tsd_t *tsd, size_t size); +void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, bool is_metadata, arena_t *arena); +void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena); +void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero); size_t ivsalloc(const void *ptr, bool demote); size_t u2rz(size_t usize); size_t p2rz(const void *ptr); -void idalloct(void *ptr, bool try_tcache); -void idalloc(void *ptr); -void iqalloct(void *ptr, bool try_tcache); -void iqalloc(void *ptr); -void *iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, +void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata); +void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache); +void idalloc(tsd_t *tsd, void *ptr); +void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); +void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); +void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); +void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); -void *iralloct(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena); -void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero); -bool ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero); -malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) +void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); +void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t alignment, bool zero); +bool ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_ALWAYS_INLINE arena_t * +iaalloc(const void *ptr) +{ + + assert(ptr != NULL); + + return (arena_aalloc(ptr)); +} + +/* + * Typical usage: + * void *ptr = [...] + * size_t sz = isalloc(ptr, config_prof); + */ +JEMALLOC_ALWAYS_INLINE size_t +isalloc(const void *ptr, bool demote) +{ + + assert(ptr != NULL); + /* Demotion only makes sense if config_prof is true. */ + assert(config_prof || !demote); + + return (arena_salloc(ptr, demote)); +} + JEMALLOC_ALWAYS_INLINE void * -imalloct(size_t size, bool try_tcache, arena_t *arena) +iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata, + arena_t *arena) { + void *ret; assert(size != 0); - if (size <= arena_maxclass) - return (arena_malloc(arena, size, false, try_tcache)); - else - return (huge_malloc(size, false, huge_dss_prec_get(arena))); + ret = arena_malloc(tsd, arena, size, zero, tcache); + if (config_stats && is_metadata && likely(ret != NULL)) { + arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, + config_prof)); + } + return (ret); } JEMALLOC_ALWAYS_INLINE void * -imalloc(size_t size) +imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) { - return (imalloct(size, true, NULL)); + return (iallocztm(tsd, size, false, tcache, false, arena)); } JEMALLOC_ALWAYS_INLINE void * -icalloct(size_t size, bool try_tcache, arena_t *arena) +imalloc(tsd_t *tsd, size_t size) { - if (size <= arena_maxclass) - return (arena_malloc(arena, size, true, try_tcache)); - else - return (huge_malloc(size, true, huge_dss_prec_get(arena))); + return (iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL)); } JEMALLOC_ALWAYS_INLINE void * -icalloc(size_t size) +icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) { - return (icalloct(size, true, NULL)); + return (iallocztm(tsd, size, true, tcache, false, arena)); } JEMALLOC_ALWAYS_INLINE void * -ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache, - arena_t *arena) +icalloc(tsd_t *tsd, size_t size) +{ + + return (iallocztm(tsd, size, true, tcache_get(tsd, true), false, NULL)); +} + +JEMALLOC_ALWAYS_INLINE void * +ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, bool is_metadata, arena_t *arena) { void *ret; assert(usize != 0); assert(usize == sa2u(usize, alignment)); - if (usize <= arena_maxclass && alignment <= PAGE) - ret = arena_malloc(arena, usize, zero, try_tcache); - else { - if (usize <= arena_maxclass) { - ret = arena_palloc(choose_arena(arena), usize, - alignment, zero); - } else if (alignment <= chunksize) - ret = huge_malloc(usize, zero, huge_dss_prec_get(arena)); - else - ret = huge_palloc(usize, alignment, zero, huge_dss_prec_get(arena)); - } - + ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache); assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); + if (config_stats && is_metadata && likely(ret != NULL)) { + arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, + config_prof)); + } return (ret); } JEMALLOC_ALWAYS_INLINE void * -ipalloc(size_t usize, size_t alignment, bool zero) +ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena) { - return (ipalloct(usize, alignment, zero, true, NULL)); + return (ipallocztm(tsd, usize, alignment, zero, tcache, false, arena)); } -/* - * Typical usage: - * void *ptr = [...] - * size_t sz = isalloc(ptr, config_prof); - */ -JEMALLOC_ALWAYS_INLINE size_t -isalloc(const void *ptr, bool demote) +JEMALLOC_ALWAYS_INLINE void * +ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) { - size_t ret; - arena_chunk_t *chunk; - - assert(ptr != NULL); - /* Demotion only makes sense if config_prof is true. */ - assert(config_prof || demote == false); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) - ret = arena_salloc(ptr, demote); - else - ret = huge_salloc(ptr); - - return (ret); + return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, + NULL), false, NULL)); } JEMALLOC_ALWAYS_INLINE size_t ivsalloc(const void *ptr, bool demote) { + extent_node_t *node; /* Return 0 if ptr is not within a chunk managed by jemalloc. */ - if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0) + node = chunk_lookup(ptr, false); + if (node == NULL) return (0); + /* Only arena chunks should be looked up via interior pointers. */ + assert(extent_node_addr_get(node) == ptr || + extent_node_achunk_get(node)); return (isalloc(ptr, demote)); } @@ -870,7 +976,7 @@ u2rz(size_t usize) size_t ret; if (usize <= SMALL_MAXCLASS) { - size_t binind = SMALL_SIZE2BIN(usize); + szind_t binind = size2index(usize); ret = arena_bin_info[binind].redzone_size; } else ret = 0; @@ -887,47 +993,62 @@ p2rz(const void *ptr) } JEMALLOC_ALWAYS_INLINE void -idalloct(void *ptr, bool try_tcache) +idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata) { - arena_chunk_t *chunk; assert(ptr != NULL); + if (config_stats && is_metadata) { + arena_metadata_allocated_sub(iaalloc(ptr), isalloc(ptr, + config_prof)); + } - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) - arena_dalloc(chunk->arena, chunk, ptr, try_tcache); - else - huge_dalloc(ptr, true); + arena_dalloc(tsd, ptr, tcache); +} + +JEMALLOC_ALWAYS_INLINE void +idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache) +{ + + idalloctm(tsd, ptr, tcache, false); } JEMALLOC_ALWAYS_INLINE void -idalloc(void *ptr) +idalloc(tsd_t *tsd, void *ptr) { - idalloct(ptr, true); + idalloctm(tsd, ptr, tcache_get(tsd, false), false); } JEMALLOC_ALWAYS_INLINE void -iqalloct(void *ptr, bool try_tcache) +iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) { - if (config_fill && opt_quarantine) - quarantine(ptr); + if (config_fill && unlikely(opt_quarantine)) + quarantine(tsd, ptr); else - idalloct(ptr, try_tcache); + idalloctm(tsd, ptr, tcache, false); +} + +JEMALLOC_ALWAYS_INLINE void +isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) +{ + + arena_sdalloc(tsd, ptr, size, tcache); } JEMALLOC_ALWAYS_INLINE void -iqalloc(void *ptr) +isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) { - iqalloct(ptr, true); + if (config_fill && unlikely(opt_quarantine)) + quarantine(tsd, ptr); + else + isdalloct(tsd, ptr, size, tcache); } JEMALLOC_ALWAYS_INLINE void * -iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, - arena_t *arena) +iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) { void *p; size_t usize, copysize; @@ -935,7 +1056,7 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); - p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); + p = ipalloct(tsd, usize, alignment, zero, tcache, arena); if (p == NULL) { if (extra == 0) return (NULL); @@ -943,7 +1064,7 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, usize = sa2u(size, alignment); if (usize == 0) return (NULL); - p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena); + p = ipalloct(tsd, usize, alignment, zero, tcache, arena); if (p == NULL) return (NULL); } @@ -953,72 +1074,57 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra, */ copysize = (size < oldsize) ? size : oldsize; memcpy(p, ptr, copysize); - iqalloct(ptr, try_tcache_dalloc); + isqalloc(tsd, ptr, oldsize, tcache); return (p); } JEMALLOC_ALWAYS_INLINE void * -iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, - bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena) +iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, + bool zero, tcache_t *tcache, arena_t *arena) { - size_t oldsize; assert(ptr != NULL); assert(size != 0); - oldsize = isalloc(ptr, config_prof); - if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { /* * Existing object alignment is inadequate; allocate new space * and copy. */ - return (iralloct_realign(ptr, oldsize, size, extra, alignment, - zero, try_tcache_alloc, try_tcache_dalloc, arena)); + return (iralloct_realign(tsd, ptr, oldsize, size, 0, alignment, + zero, tcache, arena)); } - if (size + extra <= arena_maxclass) { - return (arena_ralloc(arena, ptr, oldsize, size, extra, - alignment, zero, try_tcache_alloc, - try_tcache_dalloc)); - } else { - return (huge_ralloc(ptr, oldsize, size, extra, - alignment, zero, try_tcache_dalloc, huge_dss_prec_get(arena))); - } + return (arena_ralloc(tsd, arena, ptr, oldsize, size, alignment, zero, + tcache)); } JEMALLOC_ALWAYS_INLINE void * -iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero) +iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, + bool zero) { - return (iralloct(ptr, size, extra, alignment, zero, true, true, NULL)); + return (iralloct(tsd, ptr, oldsize, size, alignment, zero, + tcache_get(tsd, true), NULL)); } JEMALLOC_ALWAYS_INLINE bool -ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero) +ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, + bool zero) { - size_t oldsize; assert(ptr != NULL); assert(size != 0); - oldsize = isalloc(ptr, config_prof); if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { /* Existing object alignment is inadequate. */ return (true); } - if (size <= arena_maxclass) - return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero)); - else - return (huge_ralloc_no_move(ptr, oldsize, size, extra)); + return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero)); } - -malloc_tsd_externs(thread_allocated, thread_allocated_t) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, thread_allocated, thread_allocated_t, - THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup) #endif #include "jemalloc/internal/prof.h" diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h new file mode 100644 index 000000000..a601d6ebb --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h @@ -0,0 +1,64 @@ +#ifndef JEMALLOC_INTERNAL_DECLS_H +#define JEMALLOC_INTERNAL_DECLS_H + +#include <math.h> +#ifdef _WIN32 +# include <windows.h> +# include "msvc_compat/windows_extra.h" + +#else +# include <sys/param.h> +# include <sys/mman.h> +# if !defined(__pnacl__) && !defined(__native_client__) +# include <sys/syscall.h> +# if !defined(SYS_write) && defined(__NR_write) +# define SYS_write __NR_write +# endif +# include <sys/uio.h> +# endif +# include <pthread.h> +# include <errno.h> +#endif +#include <sys/types.h> + +#include <limits.h> +#ifndef SIZE_T_MAX +# define SIZE_T_MAX SIZE_MAX +#endif +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stddef.h> +#ifndef offsetof +# define offsetof(type, member) ((size_t)&(((type *)NULL)->member)) +#endif +#include <string.h> +#include <strings.h> +#include <ctype.h> +#ifdef _MSC_VER +# include <io.h> +typedef intptr_t ssize_t; +# define PATH_MAX 1024 +# define STDERR_FILENO 2 +# define __func__ __FUNCTION__ +# ifdef JEMALLOC_HAS_RESTRICT +# define restrict __restrict +# endif +/* Disable warnings about deprecated system functions. */ +# pragma warning(disable: 4996) +#if _MSC_VER < 1800 +static int +isblank(int c) +{ + + return (c == '\t' || c == ' '); +} +#endif +#else +# include <unistd.h> +#endif +#include <fcntl.h> + +#endif /* JEMALLOC_INTERNAL_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in index c166fbd9e..b0f8caaf8 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -22,6 +22,9 @@ */ #undef CPU_SPINWAIT +/* Defined if C11 atomics are available. */ +#undef JEMALLOC_C11ATOMICS + /* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */ #undef JEMALLOC_ATOMIC9 @@ -35,7 +38,7 @@ * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the - * functions are defined in libgcc instead of being inlines) + * functions are defined in libgcc instead of being inlines). */ #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 @@ -43,17 +46,37 @@ * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the - * functions are defined in libgcc instead of being inlines) + * functions are defined in libgcc instead of being inlines). */ #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 /* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#undef JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if madvise(2) is available. + */ +#undef JEMALLOC_HAVE_MADVISE + +/* * Defined if OSSpin*() functions are available, as provided by Darwin, and * documented in the spinlock(3) manual page. */ #undef JEMALLOC_OSSPIN /* + * Defined if secure_getenv(3) is available. + */ +#undef JEMALLOC_HAVE_SECURE_GETENV + +/* + * Defined if issetugid(2) is available. + */ +#undef JEMALLOC_HAVE_ISSETUGID + +/* * Defined if _malloc_thread_cleanup() exists. At least in the case of * FreeBSD, pthread_key_create() allocates, which if used during malloc * bootstrapping will cause recursion into the pthreads library. Therefore, if @@ -76,9 +99,6 @@ */ #undef JEMALLOC_MUTEX_INIT_CB -/* Defined if sbrk() is supported. */ -#undef JEMALLOC_HAVE_SBRK - /* Non-empty if the tls_model attribute is supported. */ #undef JEMALLOC_TLS_MODEL @@ -137,8 +157,26 @@ /* Support lazy locking (avoid locking unless a second thread is launched). */ #undef JEMALLOC_LAZY_LOCK -/* One page is 2^STATIC_PAGE_SHIFT bytes. */ -#undef STATIC_PAGE_SHIFT +/* Minimum size class to support is 2^LG_TINY_MIN bytes. */ +#undef LG_TINY_MIN + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +#undef LG_QUANTUM + +/* One page is 2^LG_PAGE bytes. */ +#undef LG_PAGE + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#undef JEMALLOC_MAPS_COALESCE /* * If defined, use munmap() to unmap freed chunks, rather than storing them for @@ -147,23 +185,29 @@ */ #undef JEMALLOC_MUNMAP -/* - * If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). This is - * disabled by default because it is Linux-specific and it will cause virtual - * memory map holes, much like munmap(2) does. - */ -#undef JEMALLOC_MREMAP - /* TLS is used to map arenas and magazine caches to threads. */ #undef JEMALLOC_TLS /* + * ffs()/ffsl() functions to use for bitmapping. Don't use these directly; + * instead, use jemalloc_ffs() or jemalloc_ffsl() from util.h. + */ +#undef JEMALLOC_INTERNAL_FFSL +#undef JEMALLOC_INTERNAL_FFS + +/* * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside * within jemalloc-owned chunks before dereferencing them. */ #undef JEMALLOC_IVSALLOC /* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#undef JEMALLOC_CACHE_OBLIVIOUS + +/* * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. */ #undef JEMALLOC_ZONE @@ -182,9 +226,7 @@ #undef JEMALLOC_PURGE_MADVISE_DONTNEED #undef JEMALLOC_PURGE_MADVISE_FREE -/* - * Define if operating system has alloca.h header. - */ +/* Define if operating system has alloca.h header. */ #undef JEMALLOC_HAS_ALLOCA_H /* C99 restrict keyword supported. */ @@ -202,4 +244,19 @@ /* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ #undef LG_SIZEOF_INTMAX_T +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +#undef JEMALLOC_GLIBC_MALLOC_HOOK + +/* glibc memalign hook. */ +#undef JEMALLOC_GLIBC_MEMALIGN_HOOK + +/* Adaptive mutex support in pthreads. */ +#undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +#undef JEMALLOC_EXPORT + #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h index 4e2392302..a08ba772e 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h @@ -39,9 +39,15 @@ #endif #define ZU(z) ((size_t)z) +#define ZI(z) ((ssize_t)z) #define QU(q) ((uint64_t)q) #define QI(q) ((int64_t)q) +#define KZU(z) ZU(z##ULL) +#define KZI(z) ZI(z##LL) +#define KQU(q) QU(q##ULL) +#define KQI(q) QI(q##LL) + #ifndef __DECONST # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) #endif diff --git a/deps/jemalloc/include/jemalloc/internal/mutex.h b/deps/jemalloc/include/jemalloc/internal/mutex.h index de44e1435..f051f2917 100644 --- a/deps/jemalloc/include/jemalloc/internal/mutex.h +++ b/deps/jemalloc/include/jemalloc/internal/mutex.h @@ -10,7 +10,7 @@ typedef struct malloc_mutex_s malloc_mutex_t; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) # define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} #else -# if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \ +# if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) && \ defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) # define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP # define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP} @@ -26,7 +26,11 @@ typedef struct malloc_mutex_s malloc_mutex_t; struct malloc_mutex_s { #ifdef _WIN32 +# if _WIN32_WINNT >= 0x0600 + SRWLOCK lock; +# else CRITICAL_SECTION lock; +# endif #elif (defined(JEMALLOC_OSSPIN)) OSSpinLock lock; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) @@ -70,7 +74,11 @@ malloc_mutex_lock(malloc_mutex_t *mutex) if (isthreaded) { #ifdef _WIN32 +# if _WIN32_WINNT >= 0x0600 + AcquireSRWLockExclusive(&mutex->lock); +# else EnterCriticalSection(&mutex->lock); +# endif #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockLock(&mutex->lock); #else @@ -85,7 +93,11 @@ malloc_mutex_unlock(malloc_mutex_t *mutex) if (isthreaded) { #ifdef _WIN32 +# if _WIN32_WINNT >= 0x0600 + ReleaseSRWLockExclusive(&mutex->lock); +# else LeaveCriticalSection(&mutex->lock); +# endif #elif (defined(JEMALLOC_OSSPIN)) OSSpinLockUnlock(&mutex->lock); #else diff --git a/deps/jemalloc/include/jemalloc/internal/pages.h b/deps/jemalloc/include/jemalloc/internal/pages.h new file mode 100644 index 000000000..da7eb9686 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/pages.h @@ -0,0 +1,26 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *pages_map(void *addr, size_t size); +void pages_unmap(void *addr, size_t size); +void *pages_trim(void *addr, size_t alloc_size, size_t leadsize, + size_t size); +bool pages_commit(void *addr, size_t size); +bool pages_decommit(void *addr, size_t size); +bool pages_purge(void *addr, size_t size); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/deps/jemalloc/include/jemalloc/internal/private_symbols.txt b/deps/jemalloc/include/jemalloc/internal/private_symbols.txt index 93516d242..a90021aa6 100644 --- a/deps/jemalloc/include/jemalloc/internal/private_symbols.txt +++ b/deps/jemalloc/include/jemalloc/internal/private_symbols.txt @@ -1,44 +1,76 @@ -a0calloc -a0free +a0dalloc +a0get a0malloc +arena_aalloc arena_alloc_junk_small arena_bin_index arena_bin_info +arena_bitselm_get arena_boot +arena_choose +arena_choose_hard +arena_chunk_alloc_huge +arena_chunk_cache_maybe_insert +arena_chunk_cache_maybe_remove +arena_chunk_dalloc_huge +arena_chunk_ralloc_huge_expand +arena_chunk_ralloc_huge_shrink +arena_chunk_ralloc_huge_similar +arena_cleanup arena_dalloc arena_dalloc_bin -arena_dalloc_bin_locked +arena_dalloc_bin_junked_locked arena_dalloc_junk_large arena_dalloc_junk_small arena_dalloc_large -arena_dalloc_large_locked +arena_dalloc_large_junked_locked arena_dalloc_small arena_dss_prec_get arena_dss_prec_set +arena_get +arena_get_hard +arena_init +arena_lg_dirty_mult_default_get +arena_lg_dirty_mult_default_set +arena_lg_dirty_mult_get +arena_lg_dirty_mult_set arena_malloc arena_malloc_large arena_malloc_small arena_mapbits_allocated_get arena_mapbits_binind_get +arena_mapbits_decommitted_get arena_mapbits_dirty_get arena_mapbits_get +arena_mapbits_internal_set arena_mapbits_large_binind_set arena_mapbits_large_get arena_mapbits_large_set arena_mapbits_large_size_get +arena_mapbitsp_get +arena_mapbitsp_read +arena_mapbitsp_write +arena_mapbits_size_decode +arena_mapbits_size_encode arena_mapbits_small_runind_get arena_mapbits_small_set arena_mapbits_unallocated_set arena_mapbits_unallocated_size_get arena_mapbits_unallocated_size_set arena_mapbits_unzeroed_get -arena_mapbits_unzeroed_set -arena_mapbitsp_get -arena_mapbitsp_read -arena_mapbitsp_write -arena_mapp_get -arena_maxclass +arena_maxrun +arena_maybe_purge +arena_metadata_allocated_add +arena_metadata_allocated_get +arena_metadata_allocated_sub +arena_migrate +arena_miscelm_get +arena_miscelm_to_pageind +arena_miscelm_to_rpages +arena_nbound arena_new +arena_node_alloc +arena_node_dalloc arena_palloc arena_postfork_child arena_postfork_parent @@ -46,50 +78,47 @@ arena_prefork arena_prof_accum arena_prof_accum_impl arena_prof_accum_locked -arena_prof_ctx_get -arena_prof_ctx_set arena_prof_promoted +arena_prof_tctx_get +arena_prof_tctx_reset +arena_prof_tctx_set arena_ptr_small_binind_get arena_purge_all arena_quarantine_junk_small arena_ralloc arena_ralloc_junk_large arena_ralloc_no_move +arena_rd_to_miscelm arena_redzone_corruption arena_run_regind +arena_run_to_miscelm arena_salloc +arenas_cache_bypass_cleanup +arenas_cache_cleanup +arena_sdalloc arena_stats_merge arena_tcache_fill_small -arenas -arenas_booted -arenas_cleanup -arenas_extend -arenas_initialized -arenas_lock -arenas_tls -arenas_tsd -arenas_tsd_boot -arenas_tsd_cleanup_wrapper -arenas_tsd_get -arenas_tsd_get_wrapper -arenas_tsd_init_head -arenas_tsd_set +atomic_add_p atomic_add_u atomic_add_uint32 atomic_add_uint64 atomic_add_z +atomic_cas_p +atomic_cas_u +atomic_cas_uint32 +atomic_cas_uint64 +atomic_cas_z +atomic_sub_p atomic_sub_u atomic_sub_uint32 atomic_sub_uint64 atomic_sub_z base_alloc base_boot -base_calloc -base_node_alloc -base_node_dealloc base_postfork_child base_postfork_parent base_prefork +base_stats_get bitmap_full bitmap_get bitmap_info_init @@ -99,49 +128,54 @@ bitmap_set bitmap_sfu bitmap_size bitmap_unset +bootstrap_calloc +bootstrap_free +bootstrap_malloc bt_init buferror -choose_arena -choose_arena_hard -chunk_alloc +chunk_alloc_base +chunk_alloc_cache chunk_alloc_dss chunk_alloc_mmap +chunk_alloc_wrapper chunk_boot -chunk_dealloc -chunk_dealloc_mmap +chunk_dalloc_arena +chunk_dalloc_cache +chunk_dalloc_mmap +chunk_dalloc_wrapper +chunk_deregister chunk_dss_boot chunk_dss_postfork_child chunk_dss_postfork_parent chunk_dss_prec_get chunk_dss_prec_set chunk_dss_prefork +chunk_hooks_default +chunk_hooks_get +chunk_hooks_set chunk_in_dss +chunk_lookup chunk_npages chunk_postfork_child chunk_postfork_parent chunk_prefork -chunk_unmap -chunks_mtx -chunks_rtree +chunk_purge_arena +chunk_purge_wrapper +chunk_register chunksize chunksize_mask -ckh_bucket_search +chunks_rtree ckh_count ckh_delete -ckh_evict_reloc_insert ckh_insert -ckh_isearch ckh_iter ckh_new ckh_pointer_hash ckh_pointer_keycomp -ckh_rebuild ckh_remove ckh_search ckh_string_hash ckh_string_keycomp -ckh_try_bucket_insert -ckh_try_insert ctl_boot ctl_bymib ctl_byname @@ -150,6 +184,23 @@ ctl_postfork_child ctl_postfork_parent ctl_prefork dss_prec_names +extent_node_achunk_get +extent_node_achunk_set +extent_node_addr_get +extent_node_addr_set +extent_node_arena_get +extent_node_arena_set +extent_node_dirty_insert +extent_node_dirty_linkage_init +extent_node_dirty_remove +extent_node_init +extent_node_prof_tctx_get +extent_node_prof_tctx_set +extent_node_size_get +extent_node_size_set +extent_node_zeroed_get +extent_node_zeroed_set +extent_tree_ad_empty extent_tree_ad_first extent_tree_ad_insert extent_tree_ad_iter @@ -166,6 +217,7 @@ extent_tree_ad_reverse_iter extent_tree_ad_reverse_iter_recurse extent_tree_ad_reverse_iter_start extent_tree_ad_search +extent_tree_szad_empty extent_tree_szad_first extent_tree_szad_insert extent_tree_szad_iter @@ -193,45 +245,49 @@ hash_rotl_64 hash_x64_128 hash_x86_128 hash_x86_32 -huge_allocated -huge_boot +huge_aalloc huge_dalloc huge_dalloc_junk -huge_dss_prec_get huge_malloc -huge_mtx -huge_ndalloc -huge_nmalloc huge_palloc -huge_postfork_child -huge_postfork_parent -huge_prefork -huge_prof_ctx_get -huge_prof_ctx_set +huge_prof_tctx_get +huge_prof_tctx_reset +huge_prof_tctx_set huge_ralloc huge_ralloc_no_move huge_salloc -iallocm +iaalloc +iallocztm icalloc icalloct idalloc idalloct +idalloctm imalloc imalloct +index2size +index2size_compute +index2size_lookup +index2size_tab +in_valgrind ipalloc ipalloct +ipallocztm iqalloc -iqalloct iralloc iralloct iralloct_realign isalloc +isdalloct +isqalloc isthreaded ivsalloc ixalloc jemalloc_postfork_child jemalloc_postfork_parent jemalloc_prefork +large_maxclass +lg_floor malloc_cprintf malloc_mutex_init malloc_mutex_lock @@ -242,7 +298,8 @@ malloc_mutex_unlock malloc_printf malloc_snprintf malloc_strtoumax -malloc_tsd_boot +malloc_tsd_boot0 +malloc_tsd_boot1 malloc_tsd_cleanup_register malloc_tsd_dalloc malloc_tsd_malloc @@ -251,16 +308,18 @@ malloc_vcprintf malloc_vsnprintf malloc_write map_bias +map_misc_offset mb_write mutex_boot -narenas_auto -narenas_total +narenas_cache_cleanup narenas_total_get ncpus nhbins opt_abort opt_dss opt_junk +opt_junk_alloc +opt_junk_free opt_lg_chunk opt_lg_dirty_mult opt_lg_prof_interval @@ -274,84 +333,99 @@ opt_prof_final opt_prof_gdump opt_prof_leak opt_prof_prefix +opt_prof_thread_active_init opt_quarantine opt_redzone opt_stats_print opt_tcache opt_utrace -opt_valgrind opt_xmalloc opt_zero p2rz +pages_commit +pages_decommit +pages_map pages_purge +pages_trim +pages_unmap pow2_ceil +prof_active_get +prof_active_get_unlocked +prof_active_set +prof_alloc_prep +prof_alloc_rollback prof_backtrace prof_boot0 prof_boot1 prof_boot2 -prof_bt_count -prof_ctx_get -prof_ctx_set +prof_dump_header prof_dump_open prof_free +prof_free_sampled_object prof_gdump +prof_gdump_get +prof_gdump_get_unlocked +prof_gdump_set +prof_gdump_val prof_idump prof_interval prof_lookup prof_malloc +prof_malloc_sample_object prof_mdump prof_postfork_child prof_postfork_parent prof_prefork -prof_promote prof_realloc +prof_reset prof_sample_accum_update prof_sample_threshold_update -prof_tdata_booted +prof_tctx_get +prof_tctx_reset +prof_tctx_set prof_tdata_cleanup prof_tdata_get prof_tdata_init -prof_tdata_initialized -prof_tdata_tls -prof_tdata_tsd -prof_tdata_tsd_boot -prof_tdata_tsd_cleanup_wrapper -prof_tdata_tsd_get -prof_tdata_tsd_get_wrapper -prof_tdata_tsd_init_head -prof_tdata_tsd_set +prof_tdata_reinit +prof_thread_active_get +prof_thread_active_init_get +prof_thread_active_init_set +prof_thread_active_set +prof_thread_name_get +prof_thread_name_set quarantine quarantine_alloc_hook -quarantine_boot -quarantine_booted +quarantine_alloc_hook_work quarantine_cleanup -quarantine_init -quarantine_tls -quarantine_tsd -quarantine_tsd_boot -quarantine_tsd_cleanup_wrapper -quarantine_tsd_get -quarantine_tsd_get_wrapper -quarantine_tsd_init_head -quarantine_tsd_set register_zone +rtree_child_read +rtree_child_read_hard +rtree_child_tryread rtree_delete rtree_get -rtree_get_locked rtree_new -rtree_postfork_child -rtree_postfork_parent -rtree_prefork +rtree_node_valid rtree_set +rtree_start_level +rtree_subkey +rtree_subtree_read +rtree_subtree_read_hard +rtree_subtree_tryread +rtree_val_read +rtree_val_write s2u +s2u_compute +s2u_lookup sa2u set_errno -small_size2bin +size2index +size2index_compute +size2index_lookup +size2index_tab stats_cactive stats_cactive_add stats_cactive_get stats_cactive_sub -stats_chunks stats_print tcache_alloc_easy tcache_alloc_large @@ -359,55 +433,67 @@ tcache_alloc_small tcache_alloc_small_hard tcache_arena_associate tcache_arena_dissociate +tcache_arena_reassociate tcache_bin_flush_large tcache_bin_flush_small tcache_bin_info -tcache_boot0 -tcache_boot1 -tcache_booted +tcache_boot +tcache_cleanup tcache_create tcache_dalloc_large tcache_dalloc_small -tcache_destroy -tcache_enabled_booted +tcache_enabled_cleanup tcache_enabled_get -tcache_enabled_initialized tcache_enabled_set -tcache_enabled_tls -tcache_enabled_tsd -tcache_enabled_tsd_boot -tcache_enabled_tsd_cleanup_wrapper -tcache_enabled_tsd_get -tcache_enabled_tsd_get_wrapper -tcache_enabled_tsd_init_head -tcache_enabled_tsd_set tcache_event tcache_event_hard tcache_flush tcache_get -tcache_initialized +tcache_get_hard tcache_maxclass +tcaches tcache_salloc +tcaches_create +tcaches_destroy +tcaches_flush +tcaches_get tcache_stats_merge -tcache_thread_cleanup -tcache_tls -tcache_tsd -tcache_tsd_boot -tcache_tsd_cleanup_wrapper -tcache_tsd_get -tcache_tsd_get_wrapper -tcache_tsd_init_head -tcache_tsd_set -thread_allocated_booted -thread_allocated_initialized -thread_allocated_tls -thread_allocated_tsd -thread_allocated_tsd_boot -thread_allocated_tsd_cleanup_wrapper -thread_allocated_tsd_get -thread_allocated_tsd_get_wrapper -thread_allocated_tsd_init_head -thread_allocated_tsd_set +thread_allocated_cleanup +thread_deallocated_cleanup +tsd_arena_get +tsd_arena_set +tsd_boot +tsd_boot0 +tsd_boot1 +tsd_booted +tsd_cleanup +tsd_cleanup_wrapper +tsd_fetch +tsd_get +tsd_wrapper_get +tsd_wrapper_set +tsd_initialized tsd_init_check_recursion tsd_init_finish +tsd_init_head +tsd_nominal +tsd_quarantine_get +tsd_quarantine_set +tsd_set +tsd_tcache_enabled_get +tsd_tcache_enabled_set +tsd_tcache_get +tsd_tcache_set +tsd_tls +tsd_tsd +tsd_prof_tdata_get +tsd_prof_tdata_set +tsd_thread_allocated_get +tsd_thread_allocated_set +tsd_thread_deallocated_get +tsd_thread_deallocated_set u2rz +valgrind_freelike_block +valgrind_make_mem_defined +valgrind_make_mem_noaccess +valgrind_make_mem_undefined diff --git a/deps/jemalloc/include/jemalloc/internal/prng.h b/deps/jemalloc/include/jemalloc/internal/prng.h index 7b2b06512..216d0ef47 100644 --- a/deps/jemalloc/include/jemalloc/internal/prng.h +++ b/deps/jemalloc/include/jemalloc/internal/prng.h @@ -15,7 +15,7 @@ * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints. * * This choice of m has the disadvantage that the quality of the bits is - * proportional to bit position. For example. the lowest bit has a cycle of 2, + * proportional to bit position. For example, the lowest bit has a cycle of 2, * the next has a cycle of 4, etc. For this reason, we prefer to use the upper * bits. * @@ -26,22 +26,22 @@ * const uint32_t a, c : See above discussion. */ #define prng32(r, lg_range, state, a, c) do { \ - assert(lg_range > 0); \ - assert(lg_range <= 32); \ + assert((lg_range) > 0); \ + assert((lg_range) <= 32); \ \ r = (state * (a)) + (c); \ state = r; \ - r >>= (32 - lg_range); \ + r >>= (32 - (lg_range)); \ } while (false) /* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ #define prng64(r, lg_range, state, a, c) do { \ - assert(lg_range > 0); \ - assert(lg_range <= 64); \ + assert((lg_range) > 0); \ + assert((lg_range) <= 64); \ \ r = (state * (a)) + (c); \ state = r; \ - r >>= (64 - lg_range); \ + r >>= (64 - (lg_range)); \ } while (false) #endif /* JEMALLOC_H_TYPES */ diff --git a/deps/jemalloc/include/jemalloc/internal/prof.h b/deps/jemalloc/include/jemalloc/internal/prof.h index 6f162d21e..e5198c3e8 100644 --- a/deps/jemalloc/include/jemalloc/internal/prof.h +++ b/deps/jemalloc/include/jemalloc/internal/prof.h @@ -3,8 +3,8 @@ typedef struct prof_bt_s prof_bt_t; typedef struct prof_cnt_s prof_cnt_t; -typedef struct prof_thr_cnt_s prof_thr_cnt_t; -typedef struct prof_ctx_s prof_ctx_t; +typedef struct prof_tctx_s prof_tctx_t; +typedef struct prof_gctx_s prof_gctx_t; typedef struct prof_tdata_s prof_tdata_t; /* Option defaults. */ @@ -23,9 +23,6 @@ typedef struct prof_tdata_s prof_tdata_t; */ #define PROF_BT_MAX 128 -/* Maximum number of backtraces to store in each per thread LRU cache. */ -#define PROF_TCMAX 1024 - /* Initial hash table size. */ #define PROF_CKH_MINITEMS 64 @@ -36,12 +33,18 @@ typedef struct prof_tdata_s prof_tdata_t; #define PROF_PRINTF_BUFSIZE 128 /* - * Number of mutexes shared among all ctx's. No space is allocated for these + * Number of mutexes shared among all gctx's. No space is allocated for these * unless profiling is enabled, so it's okay to over-provision. */ #define PROF_NCTX_LOCKS 1024 /* + * Number of mutexes shared among all tdata's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NTDATA_LOCKS 256 + +/* * prof_tdata pointers close to NULL are used to encode state information that * is used for cleaning up during thread shutdown. */ @@ -63,141 +66,186 @@ struct prof_bt_s { /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ typedef struct { prof_bt_t *bt; - unsigned nignore; unsigned max; } prof_unwind_data_t; #endif struct prof_cnt_s { - /* - * Profiling counters. An allocation/deallocation pair can operate on - * different prof_thr_cnt_t objects that are linked into the same - * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go - * negative. In principle it is possible for the *bytes counters to - * overflow/underflow, but a general solution would require something - * like 128-bit counters; this implementation doesn't bother to solve - * that problem. - */ - int64_t curobjs; - int64_t curbytes; + /* Profiling counters. */ + uint64_t curobjs; + uint64_t curbytes; uint64_t accumobjs; uint64_t accumbytes; }; -struct prof_thr_cnt_s { - /* Linkage into prof_ctx_t's cnts_ql. */ - ql_elm(prof_thr_cnt_t) cnts_link; +typedef enum { + prof_tctx_state_initializing, + prof_tctx_state_nominal, + prof_tctx_state_dumping, + prof_tctx_state_purgatory /* Dumper must finish destroying. */ +} prof_tctx_state_t; - /* Linkage into thread's LRU. */ - ql_elm(prof_thr_cnt_t) lru_link; +struct prof_tctx_s { + /* Thread data for thread that performed the allocation. */ + prof_tdata_t *tdata; /* - * Associated context. If a thread frees an object that it did not - * allocate, it is possible that the context is not cached in the - * thread's hash table, in which case it must be able to look up the - * context, insert a new prof_thr_cnt_t into the thread's hash table, - * and link it into the prof_ctx_t's cnts_ql. + * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be + * defunct during teardown. */ - prof_ctx_t *ctx; + uint64_t thr_uid; + uint64_t thr_discrim; + + /* Profiling counters, protected by tdata->lock. */ + prof_cnt_t cnts; + + /* Associated global context. */ + prof_gctx_t *gctx; /* - * Threads use memory barriers to update the counters. Since there is - * only ever one writer, the only challenge is for the reader to get a - * consistent read of the counters. - * - * The writer uses this series of operations: - * - * 1) Increment epoch to an odd number. - * 2) Update counters. - * 3) Increment epoch to an even number. - * - * The reader must assure 1) that the epoch is even while it reads the - * counters, and 2) that the epoch doesn't change between the time it - * starts and finishes reading the counters. + * UID that distinguishes multiple tctx's created by the same thread, + * but coexisting in gctx->tctxs. There are two ways that such + * coexistence can occur: + * - A dumper thread can cause a tctx to be retained in the purgatory + * state. + * - Although a single "producer" thread must create all tctx's which + * share the same thr_uid, multiple "consumers" can each concurrently + * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only + * gets called once each time cnts.cur{objs,bytes} drop to 0, but this + * threshold can be hit again before the first consumer finishes + * executing prof_tctx_destroy(). */ - unsigned epoch; + uint64_t tctx_uid; - /* Profiling counters. */ - prof_cnt_t cnts; -}; + /* Linkage into gctx's tctxs. */ + rb_node(prof_tctx_t) tctx_link; -struct prof_ctx_s { - /* Associated backtrace. */ - prof_bt_t *bt; + /* + * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents + * sample vs destroy race. + */ + bool prepared; + + /* Current dump-related state, protected by gctx->lock. */ + prof_tctx_state_t state; + + /* + * Copy of cnts snapshotted during early dump phase, protected by + * dump_mtx. + */ + prof_cnt_t dump_cnts; +}; +typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; - /* Protects nlimbo, cnt_merged, and cnts_ql. */ +struct prof_gctx_s { + /* Protects nlimbo, cnt_summed, and tctxs. */ malloc_mutex_t *lock; /* - * Number of threads that currently cause this ctx to be in a state of + * Number of threads that currently cause this gctx to be in a state of * limbo due to one of: - * - Initializing per thread counters associated with this ctx. - * - Preparing to destroy this ctx. - * - Dumping a heap profile that includes this ctx. + * - Initializing this gctx. + * - Initializing per thread counters associated with this gctx. + * - Preparing to destroy this gctx. + * - Dumping a heap profile that includes this gctx. * nlimbo must be 1 (single destroyer) in order to safely destroy the - * ctx. + * gctx. */ unsigned nlimbo; - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* When threads exit, they merge their stats into cnt_merged. */ - prof_cnt_t cnt_merged; - /* - * List of profile counters, one for each thread that has allocated in + * Tree of profile counters, one for each thread that has allocated in * this context. */ - ql_head(prof_thr_cnt_t) cnts_ql; + prof_tctx_tree_t tctxs; + + /* Linkage for tree of contexts to be dumped. */ + rb_node(prof_gctx_t) dump_link; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Associated backtrace. */ + prof_bt_t bt; - /* Linkage for list of contexts to be dumped. */ - ql_elm(prof_ctx_t) dump_link; + /* Backtrace vector, variable size, referred to by bt. */ + void *vec[1]; }; -typedef ql_head(prof_ctx_t) prof_ctx_list_t; +typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; struct prof_tdata_s { + malloc_mutex_t *lock; + + /* Monotonically increasing unique thread identifier. */ + uint64_t thr_uid; + /* - * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a - * cache of backtraces, with associated thread-specific prof_thr_cnt_t - * objects. Other threads may read the prof_thr_cnt_t contents, but no - * others will ever write them. - * - * Upon thread exit, the thread must merge all the prof_thr_cnt_t - * counter data into the associated prof_ctx_t objects, and unlink/free - * the prof_thr_cnt_t objects. + * Monotonically increasing discriminator among tdata structures + * associated with the same thr_uid. */ - ckh_t bt2cnt; + uint64_t thr_discrim; - /* LRU for contents of bt2cnt. */ - ql_head(prof_thr_cnt_t) lru_ql; + /* Included in heap profile dumps if non-NULL. */ + char *thread_name; - /* Backtrace vector, used for calls to prof_backtrace(). */ - void **vec; + bool attached; + bool expired; + + rb_node(prof_tdata_t) tdata_link; + + /* + * Counter used to initialize prof_tctx_t's tctx_uid. No locking is + * necessary when incrementing this field, because only one thread ever + * does so. + */ + uint64_t tctx_uid_next; + + /* + * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks + * backtraces for which it has non-zero allocation/deallocation counters + * associated with thread-specific prof_tctx_t objects. Other threads + * may write to prof_tctx_t contents when freeing associated objects. + */ + ckh_t bt2tctx; /* Sampling state. */ uint64_t prng_state; - uint64_t threshold; - uint64_t accum; + uint64_t bytes_until_sample; /* State used to avoid dumping while operating on prof internals. */ bool enq; bool enq_idump; bool enq_gdump; + + /* + * Set to true during an early dump phase for tdata's which are + * currently being dumped. New threads' tdata's have this initialized + * to false so that they aren't accidentally included in later dump + * phases. + */ + bool dumping; + + /* + * True if profiling is active for this tdata's thread + * (thread.prof.active mallctl). + */ + bool active; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Backtrace vector, used for calls to prof_backtrace(). */ + void *vec[PROF_BT_MAX]; }; +typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS extern bool opt_prof; -/* - * Even if opt_prof is true, sampling can be temporarily disabled by setting - * opt_prof_active to false. No locking is used when updating opt_prof_active, - * so there are no guarantees regarding how long it will take for all threads - * to notice state changes. - */ extern bool opt_prof_active; +extern bool opt_prof_thread_active_init; extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ extern bool opt_prof_gdump; /* High-water memory dumping. */ @@ -211,6 +259,12 @@ extern char opt_prof_prefix[ #endif 1]; +/* Accessed via prof_active_[gs]et{_unlocked,}(). */ +extern bool prof_active; + +/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ +extern bool prof_gdump_val; + /* * Profile dump interval, measured in bytes allocated. Each arena triggers a * profile dump when it reaches this threshold. The effect is that the @@ -221,391 +275,269 @@ extern char opt_prof_prefix[ extern uint64_t prof_interval; /* - * If true, promote small sampled objects to large objects, since small run - * headers do not have embedded profile context pointers. + * Initialized as opt_lg_prof_sample, and potentially modified during profiling + * resets. */ -extern bool prof_promote; +extern size_t lg_prof_sample; +void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); +void prof_malloc_sample_object(const void *ptr, size_t usize, + prof_tctx_t *tctx); +void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); void bt_init(prof_bt_t *bt, void **vec); -void prof_backtrace(prof_bt_t *bt, unsigned nignore); -prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); +void prof_backtrace(prof_bt_t *bt); +prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); #ifdef JEMALLOC_JET +size_t prof_tdata_count(void); size_t prof_bt_count(void); +const prof_cnt_t *prof_cnt_all(void); typedef int (prof_dump_open_t)(bool, const char *); extern prof_dump_open_t *prof_dump_open; +typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *); +extern prof_dump_header_t *prof_dump_header; #endif void prof_idump(void); bool prof_mdump(const char *filename); void prof_gdump(void); -prof_tdata_t *prof_tdata_init(void); -void prof_tdata_cleanup(void *arg); +prof_tdata_t *prof_tdata_init(tsd_t *tsd); +prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); +void prof_reset(tsd_t *tsd, size_t lg_sample); +void prof_tdata_cleanup(tsd_t *tsd); +const char *prof_thread_name_get(void); +bool prof_active_get(void); +bool prof_active_set(bool active); +int prof_thread_name_set(tsd_t *tsd, const char *thread_name); +bool prof_thread_active_get(void); +bool prof_thread_active_set(bool active); +bool prof_thread_active_init_get(void); +bool prof_thread_active_init_set(bool active_init); +bool prof_gdump_get(void); +bool prof_gdump_set(bool active); void prof_boot0(void); void prof_boot1(void); bool prof_boot2(void); void prof_prefork(void); void prof_postfork_parent(void); void prof_postfork_child(void); +void prof_sample_threshold_update(prof_tdata_t *tdata); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES -#define PROF_ALLOC_PREP(nignore, size, ret) do { \ - prof_tdata_t *prof_tdata; \ - prof_bt_t bt; \ - \ - assert(size == s2u(size)); \ - \ - prof_tdata = prof_tdata_get(true); \ - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \ - if (prof_tdata != NULL) \ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ - else \ - ret = NULL; \ - break; \ - } \ - \ - if (opt_prof_active == false) { \ - /* Sampling is currently inactive, so avoid sampling. */\ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ - } else if (opt_lg_prof_sample == 0) { \ - /* Don't bother with sampling logic, since sampling */\ - /* interval is 1. */\ - bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore); \ - ret = prof_lookup(&bt); \ - } else { \ - if (prof_tdata->threshold == 0) { \ - /* Initialize. Seed the prng differently for */\ - /* each thread. */\ - prof_tdata->prng_state = \ - (uint64_t)(uintptr_t)&size; \ - prof_sample_threshold_update(prof_tdata); \ - } \ - \ - /* Determine whether to capture a backtrace based on */\ - /* whether size is enough for prof_accum to reach */\ - /* prof_tdata->threshold. However, delay updating */\ - /* these variables until prof_{m,re}alloc(), because */\ - /* we don't know for sure that the allocation will */\ - /* succeed. */\ - /* */\ - /* Use subtraction rather than addition to avoid */\ - /* potential integer overflow. */\ - if (size >= prof_tdata->threshold - \ - prof_tdata->accum) { \ - bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore); \ - ret = prof_lookup(&bt); \ - } else \ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ - } \ -} while (0) - #ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) - -prof_tdata_t *prof_tdata_get(bool create); -void prof_sample_threshold_update(prof_tdata_t *prof_tdata); -prof_ctx_t *prof_ctx_get(const void *ptr); -void prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); -bool prof_sample_accum_update(size_t size); -void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt); -void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, - size_t old_usize, prof_ctx_t *old_ctx); -void prof_free(const void *ptr, size_t size); +bool prof_active_get_unlocked(void); +bool prof_gdump_get_unlocked(void); +prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); +bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit, + prof_tdata_t **tdata_out); +prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, + bool update); +prof_tctx_t *prof_tctx_get(const void *ptr); +void prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); +void prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, + prof_tctx_t *tctx); +void prof_malloc_sample_object(const void *ptr, size_t usize, + prof_tctx_t *tctx); +void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx); +void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, + prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr, + size_t old_usize, prof_tctx_t *old_tctx); +void prof_free(tsd_t *tsd, const void *ptr, size_t usize); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) -/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ -malloc_tsd_externs(prof_tdata, prof_tdata_t *) -malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, - prof_tdata_cleanup) +JEMALLOC_ALWAYS_INLINE bool +prof_active_get_unlocked(void) +{ + + /* + * Even if opt_prof is true, sampling can be temporarily disabled by + * setting prof_active to false. No locking is used when reading + * prof_active in the fast path, so there are no guarantees regarding + * how long it will take for all threads to notice state changes. + */ + return (prof_active); +} -JEMALLOC_INLINE prof_tdata_t * -prof_tdata_get(bool create) +JEMALLOC_ALWAYS_INLINE bool +prof_gdump_get_unlocked(void) { - prof_tdata_t *prof_tdata; + + /* + * No locking is used when reading prof_gdump_val in the fast path, so + * there are no guarantees regarding how long it will take for all + * threads to notice state changes. + */ + return (prof_gdump_val); +} + +JEMALLOC_ALWAYS_INLINE prof_tdata_t * +prof_tdata_get(tsd_t *tsd, bool create) +{ + prof_tdata_t *tdata; cassert(config_prof); - prof_tdata = *prof_tdata_tsd_get(); - if (create && prof_tdata == NULL) - prof_tdata = prof_tdata_init(); + tdata = tsd_prof_tdata_get(tsd); + if (create) { + if (unlikely(tdata == NULL)) { + if (tsd_nominal(tsd)) { + tdata = prof_tdata_init(tsd); + tsd_prof_tdata_set(tsd, tdata); + } + } else if (unlikely(tdata->expired)) { + tdata = prof_tdata_reinit(tsd, tdata); + tsd_prof_tdata_set(tsd, tdata); + } + assert(tdata == NULL || tdata->attached); + } - return (prof_tdata); + return (tdata); } -JEMALLOC_INLINE void -prof_sample_threshold_update(prof_tdata_t *prof_tdata) +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +prof_tctx_get(const void *ptr) { - /* - * The body of this function is compiled out unless heap profiling is - * enabled, so that it is possible to compile jemalloc with floating - * point support completely disabled. Avoiding floating point code is - * important on memory-constrained systems, but it also enables a - * workaround for versions of glibc that don't properly save/restore - * floating point registers during dynamic lazy symbol loading (which - * internally calls into whatever malloc implementation happens to be - * integrated into the application). Note that some compilers (e.g. - * gcc 4.8) may use floating point registers for fast memory moves, so - * jemalloc must be compiled with such optimizations disabled (e.g. - * -mno-sse) in order for the workaround to be complete. - */ -#ifdef JEMALLOC_PROF - uint64_t r; - double u; cassert(config_prof); + assert(ptr != NULL); - /* - * Compute sample threshold as a geometrically distributed random - * variable with mean (2^opt_lg_prof_sample). - * - * __ __ - * | log(u) | 1 - * prof_tdata->threshold = | -------- |, where p = ------------------- - * | log(1-p) | opt_lg_prof_sample - * 2 - * - * For more information on the math, see: - * - * Non-Uniform Random Variate Generation - * Luc Devroye - * Springer-Verlag, New York, 1986 - * pp 500 - * (http://luc.devroye.org/rnbookindex.html) - */ - prng64(r, 53, prof_tdata->prng_state, - UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); - u = (double)r * (1.0/9007199254740992.0L); - prof_tdata->threshold = (uint64_t)(log(u) / - log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) - + (uint64_t)1U; -#endif + return (arena_prof_tctx_get(ptr)); } -JEMALLOC_INLINE prof_ctx_t * -prof_ctx_get(const void *ptr) +JEMALLOC_ALWAYS_INLINE void +prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx) { - prof_ctx_t *ret; - arena_chunk_t *chunk; cassert(config_prof); assert(ptr != NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ - ret = arena_prof_ctx_get(ptr); - } else - ret = huge_prof_ctx_get(ptr); - - return (ret); + arena_prof_tctx_set(ptr, usize, tctx); } -JEMALLOC_INLINE void -prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx) +JEMALLOC_ALWAYS_INLINE void +prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, + prof_tctx_t *old_tctx) { - arena_chunk_t *chunk; cassert(config_prof); assert(ptr != NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ - arena_prof_ctx_set(ptr, usize, ctx); - } else - huge_prof_ctx_set(ptr, ctx); + arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx); } -JEMALLOC_INLINE bool -prof_sample_accum_update(size_t size) +JEMALLOC_ALWAYS_INLINE bool +prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, + prof_tdata_t **tdata_out) { - prof_tdata_t *prof_tdata; + prof_tdata_t *tdata; cassert(config_prof); - /* Sampling logic is unnecessary if the interval is 1. */ - assert(opt_lg_prof_sample != 0); - prof_tdata = prof_tdata_get(false); - if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = prof_tdata_get(tsd, true); + if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + tdata = NULL; + + if (tdata_out != NULL) + *tdata_out = tdata; + + if (tdata == NULL) return (true); - /* Take care to avoid integer overflow. */ - if (size >= prof_tdata->threshold - prof_tdata->accum) { - prof_tdata->accum -= (prof_tdata->threshold - size); - /* Compute new sample threshold. */ - prof_sample_threshold_update(prof_tdata); - while (prof_tdata->accum >= prof_tdata->threshold) { - prof_tdata->accum -= prof_tdata->threshold; - prof_sample_threshold_update(prof_tdata); - } - return (false); - } else { - prof_tdata->accum += size; + if (tdata->bytes_until_sample >= usize) { + if (update) + tdata->bytes_until_sample -= usize; return (true); + } else { + /* Compute new sample threshold. */ + if (update) + prof_sample_threshold_update(tdata); + return (!tdata->active); } } -JEMALLOC_INLINE void -prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) +{ + prof_tctx_t *ret; + prof_tdata_t *tdata; + prof_bt_t bt; + + assert(usize == s2u(usize)); + + if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update, + &tdata))) + ret = (prof_tctx_t *)(uintptr_t)1U; + else { + bt_init(&bt, tdata->vec); + prof_backtrace(&bt); + ret = prof_lookup(tsd, &bt); + } + + return (ret); +} + +JEMALLOC_ALWAYS_INLINE void +prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) { cassert(config_prof); assert(ptr != NULL); assert(usize == isalloc(ptr, true)); - if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(usize)) { - /* - * Don't sample. For malloc()-like allocation, it is - * always possible to tell in advance how large an - * object's usable size will be, so there should never - * be a difference between the usize passed to - * PROF_ALLOC_PREP() and prof_malloc(). - */ - assert((uintptr_t)cnt == (uintptr_t)1U); - } - } - - if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, usize, cnt->ctx); - - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - cnt->cnts.curobjs++; - cnt->cnts.curbytes += usize; - if (opt_prof_accum) { - cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += usize; - } - /*********/ - mb_write(); - /*********/ - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - } else - prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) + prof_malloc_sample_object(ptr, usize, tctx); + else + prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U); } -JEMALLOC_INLINE void -prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, - size_t old_usize, prof_ctx_t *old_ctx) +JEMALLOC_ALWAYS_INLINE void +prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, + bool prof_active, bool updated, const void *old_ptr, size_t old_usize, + prof_tctx_t *old_tctx) { - prof_thr_cnt_t *told_cnt; + bool sampled, old_sampled; cassert(config_prof); - assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); + assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); - if (ptr != NULL) { + if (prof_active && !updated && ptr != NULL) { assert(usize == isalloc(ptr, true)); - if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(usize)) { - /* - * Don't sample. The usize passed to - * PROF_ALLOC_PREP() was larger than what - * actually got allocated, so a backtrace was - * captured for this allocation, even though - * its actual usize was insufficient to cross - * the sample threshold. - */ - cnt = (prof_thr_cnt_t *)(uintptr_t)1U; - } - } - } - - if ((uintptr_t)old_ctx > (uintptr_t)1U) { - told_cnt = prof_lookup(old_ctx->bt); - if (told_cnt == NULL) { + if (prof_sample_accum_update(tsd, usize, true, NULL)) { /* - * It's too late to propagate OOM for this realloc(), - * so operate directly on old_cnt->ctx->cnt_merged. + * Don't sample. The usize passed to prof_alloc_prep() + * was larger than what actually got allocated, so a + * backtrace was captured for this allocation, even + * though its actual usize was insufficient to cross the + * sample threshold. */ - malloc_mutex_lock(old_ctx->lock); - old_ctx->cnt_merged.curobjs--; - old_ctx->cnt_merged.curbytes -= old_usize; - malloc_mutex_unlock(old_ctx->lock); - told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + tctx = (prof_tctx_t *)(uintptr_t)1U; } - } else - told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; - - if ((uintptr_t)told_cnt > (uintptr_t)1U) - told_cnt->epoch++; - if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, usize, cnt->ctx); - cnt->epoch++; - } else if (ptr != NULL) - prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U); - /*********/ - mb_write(); - /*********/ - if ((uintptr_t)told_cnt > (uintptr_t)1U) { - told_cnt->cnts.curobjs--; - told_cnt->cnts.curbytes -= old_usize; } - if ((uintptr_t)cnt > (uintptr_t)1U) { - cnt->cnts.curobjs++; - cnt->cnts.curbytes += usize; - if (opt_prof_accum) { - cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += usize; - } - } - /*********/ - mb_write(); - /*********/ - if ((uintptr_t)told_cnt > (uintptr_t)1U) - told_cnt->epoch++; - if ((uintptr_t)cnt > (uintptr_t)1U) - cnt->epoch++; - /*********/ - mb_write(); /* Not strictly necessary. */ + + sampled = ((uintptr_t)tctx > (uintptr_t)1U); + old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); + + if (unlikely(sampled)) + prof_malloc_sample_object(ptr, usize, tctx); + else + prof_tctx_reset(ptr, usize, old_ptr, old_tctx); + + if (unlikely(old_sampled)) + prof_free_sampled_object(tsd, old_usize, old_tctx); } -JEMALLOC_INLINE void -prof_free(const void *ptr, size_t size) +JEMALLOC_ALWAYS_INLINE void +prof_free(tsd_t *tsd, const void *ptr, size_t usize) { - prof_ctx_t *ctx = prof_ctx_get(ptr); + prof_tctx_t *tctx = prof_tctx_get(ptr); cassert(config_prof); + assert(usize == isalloc(ptr, true)); - if ((uintptr_t)ctx > (uintptr_t)1) { - prof_thr_cnt_t *tcnt; - assert(size == isalloc(ptr, true)); - tcnt = prof_lookup(ctx->bt); - - if (tcnt != NULL) { - tcnt->epoch++; - /*********/ - mb_write(); - /*********/ - tcnt->cnts.curobjs--; - tcnt->cnts.curbytes -= size; - /*********/ - mb_write(); - /*********/ - tcnt->epoch++; - /*********/ - mb_write(); - /*********/ - } else { - /* - * OOM during free() cannot be propagated, so operate - * directly on cnt->ctx->cnt_merged. - */ - malloc_mutex_lock(ctx->lock); - ctx->cnt_merged.curobjs--; - ctx->cnt_merged.curbytes -= size; - malloc_mutex_unlock(ctx->lock); - } - } + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) + prof_free_sampled_object(tsd, usize, tctx); } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/ql.h b/deps/jemalloc/include/jemalloc/internal/ql.h index f70c5f6f3..1834bb855 100644 --- a/deps/jemalloc/include/jemalloc/internal/ql.h +++ b/deps/jemalloc/include/jemalloc/internal/ql.h @@ -1,6 +1,4 @@ -/* - * List definitions. - */ +/* List definitions. */ #define ql_head(a_type) \ struct { \ a_type *qlh_first; \ diff --git a/deps/jemalloc/include/jemalloc/internal/qr.h b/deps/jemalloc/include/jemalloc/internal/qr.h index 602944b9b..0fbaec25e 100644 --- a/deps/jemalloc/include/jemalloc/internal/qr.h +++ b/deps/jemalloc/include/jemalloc/internal/qr.h @@ -40,8 +40,10 @@ struct { \ (a_qr_b)->a_field.qre_prev = t; \ } while (0) -/* qr_meld() and qr_split() are functionally equivalent, so there's no need to - * have two copies of the code. */ +/* + * qr_meld() and qr_split() are functionally equivalent, so there's no need to + * have two copies of the code. + */ #define qr_split(a_qr_a, a_qr_b, a_field) \ qr_meld((a_qr_a), (a_qr_b), a_field) diff --git a/deps/jemalloc/include/jemalloc/internal/quarantine.h b/deps/jemalloc/include/jemalloc/internal/quarantine.h index 16f677f73..ae607399f 100644 --- a/deps/jemalloc/include/jemalloc/internal/quarantine.h +++ b/deps/jemalloc/include/jemalloc/internal/quarantine.h @@ -29,36 +29,29 @@ struct quarantine_s { /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -quarantine_t *quarantine_init(size_t lg_maxobjs); -void quarantine(void *ptr); -void quarantine_cleanup(void *arg); -bool quarantine_boot(void); +void quarantine_alloc_hook_work(tsd_t *tsd); +void quarantine(tsd_t *tsd, void *ptr); +void quarantine_cleanup(tsd_t *tsd); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), quarantine, quarantine_t *) - void quarantine_alloc_hook(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_QUARANTINE_C_)) -malloc_tsd_externs(quarantine, quarantine_t *) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, quarantine, quarantine_t *, NULL, - quarantine_cleanup) - JEMALLOC_ALWAYS_INLINE void quarantine_alloc_hook(void) { - quarantine_t *quarantine; + tsd_t *tsd; assert(config_fill && opt_quarantine); - quarantine = *quarantine_tsd_get(); - if (quarantine == NULL) - quarantine_init(LG_MAXOBJS_INIT); + tsd = tsd_fetch(); + if (tsd_quarantine_get(tsd) == NULL) + quarantine_alloc_hook_work(tsd); } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/rb.h b/deps/jemalloc/include/jemalloc/internal/rb.h index 423802eb2..2ca8e5933 100644 --- a/deps/jemalloc/include/jemalloc/internal/rb.h +++ b/deps/jemalloc/include/jemalloc/internal/rb.h @@ -158,6 +158,8 @@ struct { \ #define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \ a_attr void \ a_prefix##new(a_rbt_type *rbtree); \ +a_attr bool \ +a_prefix##empty(a_rbt_type *rbtree); \ a_attr a_type * \ a_prefix##first(a_rbt_type *rbtree); \ a_attr a_type * \ @@ -198,7 +200,7 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * int (a_cmp *)(a_type *a_node, a_type *a_other); * ^^^^^^ * or a_key - * Interpretation of comparision function return values: + * Interpretation of comparison function return values: * -1 : a_node < a_other * 0 : a_node == a_other * 1 : a_node > a_other @@ -224,6 +226,13 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * Args: * tree: Pointer to an uninitialized red-black tree object. * + * static bool + * ex_empty(ex_t *tree); + * Description: Determine whether tree is empty. + * Args: + * tree: Pointer to an initialized red-black tree object. + * Ret: True if tree is empty, false otherwise. + * * static ex_node_t * * ex_first(ex_t *tree); * static ex_node_t * @@ -309,6 +318,10 @@ a_attr void \ a_prefix##new(a_rbt_type *rbtree) { \ rb_new(a_type, a_field, rbtree); \ } \ +a_attr bool \ +a_prefix##empty(a_rbt_type *rbtree) { \ + return (rbtree->rbt_root == &rbtree->rbt_nil); \ +} \ a_attr a_type * \ a_prefix##first(a_rbt_type *rbtree) { \ a_type *ret; \ @@ -580,7 +593,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ if (left != &rbtree->rbt_nil) { \ /* node has no successor, but it has a left child. */\ /* Splice node out, without losing the left child. */\ - assert(rbtn_red_get(a_type, a_field, node) == false); \ + assert(!rbtn_red_get(a_type, a_field, node)); \ assert(rbtn_red_get(a_type, a_field, left)); \ rbtn_black_set(a_type, a_field, left); \ if (pathp == path) { \ @@ -616,8 +629,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ if (pathp->cmp < 0) { \ rbtn_left_set(a_type, a_field, pathp->node, \ pathp[1].node); \ - assert(rbtn_red_get(a_type, a_field, pathp[1].node) \ - == false); \ + assert(!rbtn_red_get(a_type, a_field, pathp[1].node)); \ if (rbtn_red_get(a_type, a_field, pathp->node)) { \ a_type *right = rbtn_right_get(a_type, a_field, \ pathp->node); \ @@ -681,7 +693,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ rbtn_rotate_left(a_type, a_field, pathp->node, \ tnode); \ /* Balance restored, but rotation modified */\ - /* subree root, which may actually be the tree */\ + /* subtree root, which may actually be the tree */\ /* root. */\ if (pathp == path) { \ /* Set root. */ \ @@ -849,7 +861,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ } \ /* Set root. */ \ rbtree->rbt_root = path->node; \ - assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false); \ + assert(!rbtn_red_get(a_type, a_field, rbtree->rbt_root)); \ } \ a_attr a_type * \ a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ diff --git a/deps/jemalloc/include/jemalloc/internal/rtree.h b/deps/jemalloc/include/jemalloc/internal/rtree.h index bc74769f5..28ae9d1dd 100644 --- a/deps/jemalloc/include/jemalloc/internal/rtree.h +++ b/deps/jemalloc/include/jemalloc/internal/rtree.h @@ -1,170 +1,292 @@ /* * This radix tree implementation is tailored to the singular purpose of - * tracking which chunks are currently owned by jemalloc. This functionality - * is mandatory for OS X, where jemalloc must be able to respond to object - * ownership queries. + * associating metadata with chunks that are currently owned by jemalloc. * ******************************************************************************* */ #ifdef JEMALLOC_H_TYPES +typedef struct rtree_node_elm_s rtree_node_elm_t; +typedef struct rtree_level_s rtree_level_t; typedef struct rtree_s rtree_t; /* - * Size of each radix tree node (must be a power of 2). This impacts tree - * depth. + * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the + * machine address width. */ -#define RTREE_NODESIZE (1U << 16) +#define LG_RTREE_BITS_PER_LEVEL 4 +#define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL) +#define RTREE_HEIGHT_MAX \ + ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) -typedef void *(rtree_alloc_t)(size_t); -typedef void (rtree_dalloc_t)(void *); +/* Used for two-stage lock-free node initialization. */ +#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1) + +/* + * The node allocation callback function's argument is the number of contiguous + * rtree_node_elm_t structures to allocate, and the resulting memory must be + * zeroed. + */ +typedef rtree_node_elm_t *(rtree_node_alloc_t)(size_t); +typedef void (rtree_node_dalloc_t)(rtree_node_elm_t *); #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +struct rtree_node_elm_s { + union { + void *pun; + rtree_node_elm_t *child; + extent_node_t *val; + }; +}; + +struct rtree_level_s { + /* + * A non-NULL subtree points to a subtree rooted along the hypothetical + * path to the leaf node corresponding to key 0. Depending on what keys + * have been used to store to the tree, an arbitrary combination of + * subtree pointers may remain NULL. + * + * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4. + * This results in a 3-level tree, and the leftmost leaf can be directly + * accessed via subtrees[2], the subtree prefixed by 0x0000 (excluding + * 0x00000000) can be accessed via subtrees[1], and the remainder of the + * tree can be accessed via subtrees[0]. + * + * levels[0] : [<unused> | 0x0001******** | 0x0002******** | ...] + * + * levels[1] : [<unused> | 0x00000001**** | 0x00000002**** | ... ] + * + * levels[2] : [val(0x000000000000) | val(0x000000000001) | ...] + * + * This has practical implications on x64, which currently uses only the + * lower 47 bits of virtual address space in userland, thus leaving + * subtrees[0] unused and avoiding a level of tree traversal. + */ + union { + void *subtree_pun; + rtree_node_elm_t *subtree; + }; + /* Number of key bits distinguished by this level. */ + unsigned bits; + /* + * Cumulative number of key bits distinguished by traversing to + * corresponding tree level. + */ + unsigned cumbits; +}; + struct rtree_s { - rtree_alloc_t *alloc; - rtree_dalloc_t *dalloc; - malloc_mutex_t mutex; - void **root; - unsigned height; - unsigned level2bits[1]; /* Dynamically sized. */ + rtree_node_alloc_t *alloc; + rtree_node_dalloc_t *dalloc; + unsigned height; + /* + * Precomputed table used to convert from the number of leading 0 key + * bits to which subtree level to start at. + */ + unsigned start_level[RTREE_HEIGHT_MAX]; + rtree_level_t levels[RTREE_HEIGHT_MAX]; }; #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -rtree_t *rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc); +bool rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc, + rtree_node_dalloc_t *dalloc); void rtree_delete(rtree_t *rtree); -void rtree_prefork(rtree_t *rtree); -void rtree_postfork_parent(rtree_t *rtree); -void rtree_postfork_child(rtree_t *rtree); +rtree_node_elm_t *rtree_subtree_read_hard(rtree_t *rtree, + unsigned level); +rtree_node_elm_t *rtree_child_read_hard(rtree_t *rtree, + rtree_node_elm_t *elm, unsigned level); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -#ifdef JEMALLOC_DEBUG -uint8_t rtree_get_locked(rtree_t *rtree, uintptr_t key); -#endif -uint8_t rtree_get(rtree_t *rtree, uintptr_t key); -bool rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val); +unsigned rtree_start_level(rtree_t *rtree, uintptr_t key); +uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level); + +bool rtree_node_valid(rtree_node_elm_t *node); +rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm); +rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, + unsigned level); +extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, + bool dependent); +void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, + const extent_node_t *val); +rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level); +rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level); + +extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent); +bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) -#define RTREE_GET_GENERATE(f) \ -/* The least significant bits of the key are ignored. */ \ -JEMALLOC_INLINE uint8_t \ -f(rtree_t *rtree, uintptr_t key) \ -{ \ - uint8_t ret; \ - uintptr_t subkey; \ - unsigned i, lshift, height, bits; \ - void **node, **child; \ - \ - RTREE_LOCK(&rtree->mutex); \ - for (i = lshift = 0, height = rtree->height, node = rtree->root;\ - i < height - 1; \ - i++, lshift += bits, node = child) { \ - bits = rtree->level2bits[i]; \ - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ - 3)) - bits); \ - child = (void**)node[subkey]; \ - if (child == NULL) { \ - RTREE_UNLOCK(&rtree->mutex); \ - return (0); \ - } \ - } \ - \ - /* \ - * node is a leaf, so it contains values rather than node \ - * pointers. \ - */ \ - bits = rtree->level2bits[i]; \ - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \ - bits); \ - { \ - uint8_t *leaf = (uint8_t *)node; \ - ret = leaf[subkey]; \ - } \ - RTREE_UNLOCK(&rtree->mutex); \ - \ - RTREE_GET_VALIDATE \ - return (ret); \ +JEMALLOC_INLINE unsigned +rtree_start_level(rtree_t *rtree, uintptr_t key) +{ + unsigned start_level; + + if (unlikely(key == 0)) + return (rtree->height - 1); + + start_level = rtree->start_level[lg_floor(key) >> + LG_RTREE_BITS_PER_LEVEL]; + assert(start_level < rtree->height); + return (start_level); } -#ifdef JEMALLOC_DEBUG -# define RTREE_LOCK(l) malloc_mutex_lock(l) -# define RTREE_UNLOCK(l) malloc_mutex_unlock(l) -# define RTREE_GET_VALIDATE -RTREE_GET_GENERATE(rtree_get_locked) -# undef RTREE_LOCK -# undef RTREE_UNLOCK -# undef RTREE_GET_VALIDATE -#endif +JEMALLOC_INLINE uintptr_t +rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) +{ -#define RTREE_LOCK(l) -#define RTREE_UNLOCK(l) -#ifdef JEMALLOC_DEBUG - /* - * Suppose that it were possible for a jemalloc-allocated chunk to be - * munmap()ped, followed by a different allocator in another thread re-using - * overlapping virtual memory, all without invalidating the cached rtree - * value. The result would be a false positive (the rtree would claim that - * jemalloc owns memory that it had actually discarded). This scenario - * seems impossible, but the following assertion is a prudent sanity check. - */ -# define RTREE_GET_VALIDATE \ - assert(rtree_get_locked(rtree, key) == ret); -#else -# define RTREE_GET_VALIDATE -#endif -RTREE_GET_GENERATE(rtree_get) -#undef RTREE_LOCK -#undef RTREE_UNLOCK -#undef RTREE_GET_VALIDATE + return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - + rtree->levels[level].cumbits)) & ((ZU(1) << + rtree->levels[level].bits) - 1)); +} JEMALLOC_INLINE bool -rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val) +rtree_node_valid(rtree_node_elm_t *node) +{ + + return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_child_tryread(rtree_node_elm_t *elm) +{ + rtree_node_elm_t *child; + + /* Double-checked read (first read may be stale. */ + child = elm->child; + if (!rtree_node_valid(child)) + child = atomic_read_p(&elm->pun); + return (child); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) +{ + rtree_node_elm_t *child; + + child = rtree_child_tryread(elm); + if (unlikely(!rtree_node_valid(child))) + child = rtree_child_read_hard(rtree, elm, level); + return (child); +} + +JEMALLOC_INLINE extent_node_t * +rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent) +{ + + if (dependent) { + /* + * Reading a val on behalf of a pointer to a valid allocation is + * guaranteed to be a clean read even without synchronization, + * because the rtree update became visible in memory before the + * pointer came into existence. + */ + return (elm->val); + } else { + /* + * An arbitrary read, e.g. on behalf of ivsalloc(), may not be + * dependent on a previous rtree write, which means a stale read + * could result if synchronization were omitted here. + */ + return (atomic_read_p(&elm->pun)); + } +} + +JEMALLOC_INLINE void +rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val) +{ + + atomic_write_p(&elm->pun, val); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_subtree_tryread(rtree_t *rtree, unsigned level) +{ + rtree_node_elm_t *subtree; + + /* Double-checked read (first read may be stale. */ + subtree = rtree->levels[level].subtree; + if (!rtree_node_valid(subtree)) + subtree = atomic_read_p(&rtree->levels[level].subtree_pun); + return (subtree); +} + +JEMALLOC_INLINE rtree_node_elm_t * +rtree_subtree_read(rtree_t *rtree, unsigned level) +{ + rtree_node_elm_t *subtree; + + subtree = rtree_subtree_tryread(rtree, level); + if (unlikely(!rtree_node_valid(subtree))) + subtree = rtree_subtree_read_hard(rtree, level); + return (subtree); +} + +JEMALLOC_INLINE extent_node_t * +rtree_get(rtree_t *rtree, uintptr_t key, bool dependent) { uintptr_t subkey; - unsigned i, lshift, height, bits; - void **node, **child; - - malloc_mutex_lock(&rtree->mutex); - for (i = lshift = 0, height = rtree->height, node = rtree->root; - i < height - 1; - i++, lshift += bits, node = child) { - bits = rtree->level2bits[i]; - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - - bits); - child = (void**)node[subkey]; - if (child == NULL) { - size_t size = ((i + 1 < height - 1) ? sizeof(void *) - : (sizeof(uint8_t))) << rtree->level2bits[i+1]; - child = (void**)rtree->alloc(size); - if (child == NULL) { - malloc_mutex_unlock(&rtree->mutex); - return (true); - } - memset(child, 0, size); - node[subkey] = child; + unsigned i, start_level; + rtree_node_elm_t *node, *child; + + start_level = rtree_start_level(rtree, key); + + for (i = start_level, node = rtree_subtree_tryread(rtree, start_level); + /**/; i++, node = child) { + if (!dependent && unlikely(!rtree_node_valid(node))) + return (NULL); + subkey = rtree_subkey(rtree, key, i); + if (i == rtree->height - 1) { + /* + * node is a leaf, so it contains values rather than + * child pointers. + */ + return (rtree_val_read(rtree, &node[subkey], + dependent)); } + assert(i < rtree->height - 1); + child = rtree_child_tryread(&node[subkey]); } + not_reached(); +} - /* node is a leaf, so it contains values rather than node pointers. */ - bits = rtree->level2bits[i]; - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits); - { - uint8_t *leaf = (uint8_t *)node; - leaf[subkey] = val; - } - malloc_mutex_unlock(&rtree->mutex); +JEMALLOC_INLINE bool +rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val) +{ + uintptr_t subkey; + unsigned i, start_level; + rtree_node_elm_t *node, *child; - return (false); + start_level = rtree_start_level(rtree, key); + + node = rtree_subtree_read(rtree, start_level); + if (node == NULL) + return (true); + for (i = start_level; /**/; i++, node = child) { + subkey = rtree_subkey(rtree, key, i); + if (i == rtree->height - 1) { + /* + * node is a leaf, so it contains values rather than + * child pointers. + */ + rtree_val_write(rtree, &node[subkey], val); + return (false); + } + assert(i + 1 < rtree->height); + child = rtree_child_read(rtree, &node[subkey], i); + if (child == NULL) + return (true); + } + not_reached(); } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/size_classes.sh b/deps/jemalloc/include/jemalloc/internal/size_classes.sh index 29c80c1fb..fc82036d3 100755 --- a/deps/jemalloc/include/jemalloc/internal/size_classes.sh +++ b/deps/jemalloc/include/jemalloc/internal/size_classes.sh @@ -1,17 +1,26 @@ #!/bin/sh +# +# Usage: size_classes.sh <lg_qarr> <lg_tmin> <lg_parr> <lg_g> # The following limits are chosen such that they cover all supported platforms. -# Range of quanta. -lg_qmin=3 -lg_qmax=4 +# Pointer sizes. +lg_zarr="2 3" + +# Quanta. +lg_qarr=$1 # The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)]. -lg_tmin=3 +lg_tmin=$2 + +# Maximum lookup size. +lg_kmax=12 + +# Page sizes. +lg_parr=`echo $3 | tr ',' ' '` -# Range of page sizes. -lg_pmin=12 -lg_pmax=16 +# Size class group size (number of size classes for each size doubling). +lg_g=$4 pow2() { e=$1 @@ -22,68 +31,224 @@ pow2() { done } +lg() { + x=$1 + lg_result=0 + while [ ${x} -gt 1 ] ; do + lg_result=$((${lg_result} + 1)) + x=$((${x} / 2)) + done +} + +size_class() { + index=$1 + lg_grp=$2 + lg_delta=$3 + ndelta=$4 + lg_p=$5 + lg_kmax=$6 + + lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta} + if [ ${pow2_result} -lt ${ndelta} ] ; then + rem="yes" + else + rem="no" + fi + + lg_size=${lg_grp} + if [ $((${lg_delta} + ${lg_ndelta})) -eq ${lg_grp} ] ; then + lg_size=$((${lg_grp} + 1)) + else + lg_size=${lg_grp} + rem="yes" + fi + + if [ ${lg_size} -lt $((${lg_p} + ${lg_g})) ] ; then + bin="yes" + else + bin="no" + fi + if [ ${lg_size} -lt ${lg_kmax} \ + -o ${lg_size} -eq ${lg_kmax} -a ${rem} = "no" ] ; then + lg_delta_lookup=${lg_delta} + else + lg_delta_lookup="no" + fi + printf ' SC(%3d, %6d, %8d, %6d, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${lg_delta_lookup} + # Defined upon return: + # - lg_delta_lookup (${lg_delta} or "no") + # - bin ("yes" or "no") +} + +sep_line() { + echo " \\" +} + +size_classes() { + lg_z=$1 + lg_q=$2 + lg_t=$3 + lg_p=$4 + lg_g=$5 + + pow2 $((${lg_z} + 3)); ptr_bits=${pow2_result} + pow2 ${lg_g}; g=${pow2_result} + + echo "#define SIZE_CLASSES \\" + echo " /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \\" + + ntbins=0 + nlbins=0 + lg_tiny_maxclass='"NA"' + nbins=0 + + # Tiny size classes. + ndelta=0 + index=0 + lg_grp=${lg_t} + lg_delta=${lg_grp} + while [ ${lg_grp} -lt ${lg_q} ] ; do + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + if [ ${lg_delta_lookup} != "no" ] ; then + nlbins=$((${index} + 1)) + fi + if [ ${bin} != "no" ] ; then + nbins=$((${index} + 1)) + fi + ntbins=$((${ntbins} + 1)) + lg_tiny_maxclass=${lg_grp} # Final written value is correct. + index=$((${index} + 1)) + lg_delta=${lg_grp} + lg_grp=$((${lg_grp} + 1)) + done + + # First non-tiny group. + if [ ${ntbins} -gt 0 ] ; then + sep_line + # The first size class has an unusual encoding, because the size has to be + # split between grp and delta*ndelta. + lg_grp=$((${lg_grp} - 1)) + ndelta=1 + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + index=$((${index} + 1)) + lg_grp=$((${lg_grp} + 1)) + lg_delta=$((${lg_delta} + 1)) + fi + while [ ${ndelta} -lt ${g} ] ; do + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + index=$((${index} + 1)) + ndelta=$((${ndelta} + 1)) + done + + # All remaining groups. + lg_grp=$((${lg_grp} + ${lg_g})) + while [ ${lg_grp} -lt ${ptr_bits} ] ; do + sep_line + ndelta=1 + if [ ${lg_grp} -eq $((${ptr_bits} - 1)) ] ; then + ndelta_limit=$((${g} - 1)) + else + ndelta_limit=${g} + fi + while [ ${ndelta} -le ${ndelta_limit} ] ; do + size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} + if [ ${lg_delta_lookup} != "no" ] ; then + nlbins=$((${index} + 1)) + # Final written value is correct: + lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" + fi + if [ ${bin} != "no" ] ; then + nbins=$((${index} + 1)) + # Final written value is correct: + small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" + if [ ${lg_g} -gt 0 ] ; then + lg_large_minclass=$((${lg_grp} + 1)) + else + lg_large_minclass=$((${lg_grp} + 2)) + fi + fi + # Final written value is correct: + huge_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" + index=$((${index} + 1)) + ndelta=$((${ndelta} + 1)) + done + lg_grp=$((${lg_grp} + 1)) + lg_delta=$((${lg_delta} + 1)) + done + echo + nsizes=${index} + + # Defined upon completion: + # - ntbins + # - nlbins + # - nbins + # - nsizes + # - lg_tiny_maxclass + # - lookup_maxclass + # - small_maxclass + # - lg_large_minclass + # - huge_maxclass +} + cat <<EOF /* This file was automatically generated by size_classes.sh. */ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +/* + * This header requires LG_SIZEOF_PTR, LG_TINY_MIN, LG_QUANTUM, and LG_PAGE to + * be defined prior to inclusion, and it in turn defines: + * + * LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling. + * SIZE_CLASSES: Complete table of + * SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) + * tuples. + * index: Size class index. + * lg_grp: Lg group base size (no deltas added). + * lg_delta: Lg delta to previous size class. + * ndelta: Delta multiplier. size == 1<<lg_grp + ndelta<<lg_delta + * bin: 'yes' if a small bin size class, 'no' otherwise. + * lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no' + * otherwise. + * NTBINS: Number of tiny bins. + * NLBINS: Number of bins supported by the lookup table. + * NBINS: Number of small size class bins. + * NSIZES: Number of size classes. + * LG_TINY_MAXCLASS: Lg of maximum tiny size class. + * LOOKUP_MAXCLASS: Maximum size class included in lookup table. + * SMALL_MAXCLASS: Maximum small size class. + * LG_LARGE_MINCLASS: Lg of minimum large size class. + * HUGE_MAXCLASS: Maximum (huge) size class. + */ + +#define LG_SIZE_CLASS_GROUP ${lg_g} + EOF -lg_q=${lg_qmin} -while [ ${lg_q} -le ${lg_qmax} ] ; do - lg_t=${lg_tmin} - while [ ${lg_t} -le ${lg_q} ] ; do - lg_p=${lg_pmin} - while [ ${lg_p} -le ${lg_pmax} ] ; do - echo "#if (LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})" - echo "#define SIZE_CLASSES_DEFINED" - pow2 ${lg_q}; q=${pow2_result} - pow2 ${lg_t}; t=${pow2_result} - pow2 ${lg_p}; p=${pow2_result} - bin=0 - psz=0 - sz=${t} - delta=$((${sz} - ${psz})) - echo "/* SIZE_CLASS(bin, delta, sz) */" - echo "#define SIZE_CLASSES \\" - - # Tiny size classes. - while [ ${sz} -lt ${q} ] ; do - echo " SIZE_CLASS(${bin}, ${delta}, ${sz}) \\" - bin=$((${bin} + 1)) - psz=${sz} - sz=$((${sz} + ${sz})) - delta=$((${sz} - ${psz})) - done - # Quantum-multiple size classes. For each doubling of sz, as many as 4 - # size classes exist. Their spacing is the greater of: - # - q - # - sz/4, where sz is a power of 2 - while [ ${sz} -lt ${p} ] ; do - if [ ${sz} -ge $((${q} * 4)) ] ; then - i=$((${sz} / 4)) - else - i=${q} - fi - next_2pow=$((${sz} * 2)) - while [ ${sz} -lt $next_2pow ] ; do - echo " SIZE_CLASS(${bin}, ${delta}, ${sz}) \\" - bin=$((${bin} + 1)) - psz=${sz} - sz=$((${sz} + ${i})) - delta=$((${sz} - ${psz})) - done +for lg_z in ${lg_zarr} ; do + for lg_q in ${lg_qarr} ; do + lg_t=${lg_tmin} + while [ ${lg_t} -le ${lg_q} ] ; do + # Iterate through page sizes and compute how many bins there are. + for lg_p in ${lg_parr} ; do + echo "#if (LG_SIZEOF_PTR == ${lg_z} && LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})" + size_classes ${lg_z} ${lg_q} ${lg_t} ${lg_p} ${lg_g} + echo "#define SIZE_CLASSES_DEFINED" + echo "#define NTBINS ${ntbins}" + echo "#define NLBINS ${nlbins}" + echo "#define NBINS ${nbins}" + echo "#define NSIZES ${nsizes}" + echo "#define LG_TINY_MAXCLASS ${lg_tiny_maxclass}" + echo "#define LOOKUP_MAXCLASS ${lookup_maxclass}" + echo "#define SMALL_MAXCLASS ${small_maxclass}" + echo "#define LG_LARGE_MINCLASS ${lg_large_minclass}" + echo "#define HUGE_MAXCLASS ${huge_maxclass}" + echo "#endif" + echo done - echo - echo "#define NBINS ${bin}" - echo "#define SMALL_MAXCLASS ${psz}" - echo "#endif" - echo - lg_p=$((${lg_p} + 1)) + lg_t=$((${lg_t} + 1)) done - lg_t=$((${lg_t} + 1)) done - lg_q=$((${lg_q} + 1)) done cat <<EOF @@ -92,11 +257,10 @@ cat <<EOF #endif #undef SIZE_CLASSES_DEFINED /* - * The small_size2bin lookup table uses uint8_t to encode each bin index, so we + * The size2index_tab lookup table uses uint8_t to encode each bin index, so we * cannot support more than 256 small size classes. Further constrain NBINS to - * 255 to support prof_promote, since all small size classes, plus a "not - * small" size class must be stored in 8 bits of arena_chunk_map_t's bits - * field. + * 255 since all small size classes, plus a "not small" size class must be + * stored in 8 bits of arena_chunk_map_bits_t's bits field. */ #if (NBINS > 255) # error "Too many small size classes" diff --git a/deps/jemalloc/include/jemalloc/internal/stats.h b/deps/jemalloc/include/jemalloc/internal/stats.h index 27f68e368..c91dba99d 100644 --- a/deps/jemalloc/include/jemalloc/internal/stats.h +++ b/deps/jemalloc/include/jemalloc/internal/stats.h @@ -4,6 +4,7 @@ typedef struct tcache_bin_stats_s tcache_bin_stats_t; typedef struct malloc_bin_stats_s malloc_bin_stats_t; typedef struct malloc_large_stats_s malloc_large_stats_t; +typedef struct malloc_huge_stats_s malloc_huge_stats_t; typedef struct arena_stats_s arena_stats_t; typedef struct chunk_stats_s chunk_stats_t; @@ -21,12 +22,6 @@ struct tcache_bin_stats_s { struct malloc_bin_stats_s { /* - * Current number of bytes allocated, including objects currently - * cached by tcache. - */ - size_t allocated; - - /* * Total number of allocation/deallocation requests served directly by * the bin. Note that tcache may allocate an object, then recycle it * many times, resulting many increments to nrequests, but only one @@ -42,6 +37,12 @@ struct malloc_bin_stats_s { */ uint64_t nrequests; + /* + * Current number of regions of this size class, including regions + * currently cached by tcache. + */ + size_t curregs; + /* Number of tcache fills from this bin. */ uint64_t nfills; @@ -78,10 +79,25 @@ struct malloc_large_stats_s { */ uint64_t nrequests; - /* Current number of runs of this size class. */ + /* + * Current number of runs of this size class, including runs currently + * cached by tcache. + */ size_t curruns; }; +struct malloc_huge_stats_s { + /* + * Total number of allocation/deallocation requests served directly by + * the arena. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* Current number of (multi-)chunk allocations of this size class. */ + size_t curhchunks; +}; + struct arena_stats_s { /* Number of bytes currently mapped. */ size_t mapped; @@ -95,34 +111,28 @@ struct arena_stats_s { uint64_t nmadvise; uint64_t purged; + /* + * Number of bytes currently mapped purely for metadata purposes, and + * number of bytes currently allocated for internal metadata. + */ + size_t metadata_mapped; + size_t metadata_allocated; /* Protected via atomic_*_z(). */ + /* Per-size-category statistics. */ size_t allocated_large; uint64_t nmalloc_large; uint64_t ndalloc_large; uint64_t nrequests_large; - /* - * One element for each possible size class, including sizes that - * overlap with bin size classes. This is necessary because ipalloc() - * sometimes has to use such large objects in order to assure proper - * alignment. - */ - malloc_large_stats_t *lstats; -}; - -struct chunk_stats_s { - /* Number of chunks that were allocated. */ - uint64_t nchunks; + size_t allocated_huge; + uint64_t nmalloc_huge; + uint64_t ndalloc_huge; - /* High-water mark for number of chunks allocated. */ - size_t highchunks; + /* One element for each large size class. */ + malloc_large_stats_t *lstats; - /* - * Current number of chunks allocated. This value isn't maintained for - * any other purpose, so keep track of it in order to be able to set - * highchunks. - */ - size_t curchunks; + /* One element for each huge size class. */ + malloc_huge_stats_t *hstats; }; #endif /* JEMALLOC_H_STRUCTS */ diff --git a/deps/jemalloc/include/jemalloc/internal/tcache.h b/deps/jemalloc/include/jemalloc/internal/tcache.h index c3d4b58d4..5079cd266 100644 --- a/deps/jemalloc/include/jemalloc/internal/tcache.h +++ b/deps/jemalloc/include/jemalloc/internal/tcache.h @@ -4,6 +4,7 @@ typedef struct tcache_bin_info_s tcache_bin_info_t; typedef struct tcache_bin_s tcache_bin_t; typedef struct tcache_s tcache_t; +typedef struct tcaches_s tcaches_t; /* * tcache pointers close to NULL are used to encode state information that is @@ -16,6 +17,11 @@ typedef struct tcache_s tcache_t; #define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY /* + * Absolute minimum number of cache slots for each small bin. + */ +#define TCACHE_NSLOTS_SMALL_MIN 20 + +/* * Absolute maximum number of cache slots for each small bin in the thread * cache. This is an additional constraint beyond that imposed as: twice the * number of regions per run for this size class. @@ -69,10 +75,9 @@ struct tcache_bin_s { struct tcache_s { ql_elm(tcache_t) link; /* Used for aggregating stats. */ - uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */ - arena_t *arena; /* This thread's arena. */ + uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ unsigned ev_cnt; /* Event count since incremental GC. */ - unsigned next_gc_bin; /* Next bin to GC. */ + szind_t next_gc_bin; /* Next bin to GC. */ tcache_bin_t tbins[1]; /* Dynamically sized. */ /* * The pointer stacks associated with tbins follow as a contiguous @@ -82,6 +87,14 @@ struct tcache_s { */ }; +/* Linkage for list of available (previously used) explicit tcache IDs. */ +struct tcaches_s { + union { + tcache_t *tcache; + tcaches_t *next; + }; +}; + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS @@ -95,84 +108,90 @@ extern tcache_bin_info_t *tcache_bin_info; * Number of tcache bins. There are NBINS small-object bins, plus 0 or more * large-object bins. */ -extern size_t nhbins; +extern size_t nhbins; /* Maximum cached size class. */ -extern size_t tcache_maxclass; +extern size_t tcache_maxclass; + +/* + * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and + * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are + * completely disjoint from this data structure. tcaches starts off as a sparse + * array, so it has no physical memory footprint until individual pages are + * touched. This allows the entire array to be allocated the first time an + * explicit tcache is created without a disproportionate impact on memory usage. + */ +extern tcaches_t *tcaches; size_t tcache_salloc(const void *ptr); -void tcache_event_hard(tcache_t *tcache); -void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, - size_t binind); -void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, - tcache_t *tcache); -void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, - tcache_t *tcache); +void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); +void *tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + tcache_bin_t *tbin, szind_t binind); +void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, + szind_t binind, unsigned rem); +void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, + unsigned rem, tcache_t *tcache); void tcache_arena_associate(tcache_t *tcache, arena_t *arena); -void tcache_arena_dissociate(tcache_t *tcache); -tcache_t *tcache_create(arena_t *arena); -void tcache_destroy(tcache_t *tcache); -void tcache_thread_cleanup(void *arg); +void tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, + arena_t *newarena); +void tcache_arena_dissociate(tcache_t *tcache, arena_t *arena); +tcache_t *tcache_get_hard(tsd_t *tsd); +tcache_t *tcache_create(tsd_t *tsd, arena_t *arena); +void tcache_cleanup(tsd_t *tsd); +void tcache_enabled_cleanup(tsd_t *tsd); void tcache_stats_merge(tcache_t *tcache, arena_t *arena); -bool tcache_boot0(void); -bool tcache_boot1(void); +bool tcaches_create(tsd_t *tsd, unsigned *r_ind); +void tcaches_flush(tsd_t *tsd, unsigned ind); +void tcaches_destroy(tsd_t *tsd, unsigned ind); +bool tcache_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *) -malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t) - -void tcache_event(tcache_t *tcache); +void tcache_event(tsd_t *tsd, tcache_t *tcache); void tcache_flush(void); bool tcache_enabled_get(void); -tcache_t *tcache_get(bool create); +tcache_t *tcache_get(tsd_t *tsd, bool create); void tcache_enabled_set(bool enabled); void *tcache_alloc_easy(tcache_bin_t *tbin); -void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); -void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); -void tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind); -void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); +void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + size_t size, bool zero); +void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + size_t size, bool zero); +void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, + szind_t binind); +void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, + size_t size); +tcache_t *tcaches_get(tsd_t *tsd, unsigned ind); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) -/* Map of thread-specific caches. */ -malloc_tsd_externs(tcache, tcache_t *) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache, tcache_t *, NULL, - tcache_thread_cleanup) -/* Per thread flag that allows thread caches to be disabled. */ -malloc_tsd_externs(tcache_enabled, tcache_enabled_t) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache_enabled, tcache_enabled_t, - tcache_enabled_default, malloc_tsd_no_cleanup) - JEMALLOC_INLINE void tcache_flush(void) { - tcache_t *tcache; + tsd_t *tsd; cassert(config_tcache); - tcache = *tcache_tsd_get(); - if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) - return; - tcache_destroy(tcache); - tcache = NULL; - tcache_tsd_set(&tcache); + tsd = tsd_fetch(); + tcache_cleanup(tsd); } JEMALLOC_INLINE bool tcache_enabled_get(void) { + tsd_t *tsd; tcache_enabled_t tcache_enabled; cassert(config_tcache); - tcache_enabled = *tcache_enabled_tsd_get(); + tsd = tsd_fetch(); + tcache_enabled = tsd_tcache_enabled_get(tsd); if (tcache_enabled == tcache_enabled_default) { tcache_enabled = (tcache_enabled_t)opt_tcache; - tcache_enabled_tsd_set(&tcache_enabled); + tsd_tcache_enabled_set(tsd, tcache_enabled); } return ((bool)tcache_enabled); @@ -181,85 +200,41 @@ tcache_enabled_get(void) JEMALLOC_INLINE void tcache_enabled_set(bool enabled) { + tsd_t *tsd; tcache_enabled_t tcache_enabled; - tcache_t *tcache; cassert(config_tcache); + tsd = tsd_fetch(); + tcache_enabled = (tcache_enabled_t)enabled; - tcache_enabled_tsd_set(&tcache_enabled); - tcache = *tcache_tsd_get(); - if (enabled) { - if (tcache == TCACHE_STATE_DISABLED) { - tcache = NULL; - tcache_tsd_set(&tcache); - } - } else /* disabled */ { - if (tcache > TCACHE_STATE_MAX) { - tcache_destroy(tcache); - tcache = NULL; - } - if (tcache == NULL) { - tcache = TCACHE_STATE_DISABLED; - tcache_tsd_set(&tcache); - } - } + tsd_tcache_enabled_set(tsd, tcache_enabled); + + if (!enabled) + tcache_cleanup(tsd); } JEMALLOC_ALWAYS_INLINE tcache_t * -tcache_get(bool create) +tcache_get(tsd_t *tsd, bool create) { tcache_t *tcache; - if (config_tcache == false) - return (NULL); - if (config_lazy_lock && isthreaded == false) + if (!config_tcache) return (NULL); - tcache = *tcache_tsd_get(); - if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) { - if (tcache == TCACHE_STATE_DISABLED) - return (NULL); - if (tcache == NULL) { - if (create == false) { - /* - * Creating a tcache here would cause - * allocation as a side effect of free(). - * Ordinarily that would be okay since - * tcache_create() failure is a soft failure - * that doesn't propagate. However, if TLS - * data are freed via free() as in glibc, - * subtle corruption could result from setting - * a TLS variable after its backing memory is - * freed. - */ - return (NULL); - } - if (tcache_enabled_get() == false) { - tcache_enabled_set(false); /* Memoize. */ - return (NULL); - } - return (tcache_create(choose_arena(NULL))); - } - if (tcache == TCACHE_STATE_PURGATORY) { - /* - * Make a note that an allocator function was called - * after tcache_thread_cleanup() was called. - */ - tcache = TCACHE_STATE_REINCARNATED; - tcache_tsd_set(&tcache); - return (NULL); - } - if (tcache == TCACHE_STATE_REINCARNATED) - return (NULL); - not_reached(); + tcache = tsd_tcache_get(tsd); + if (!create) + return (tcache); + if (unlikely(tcache == NULL) && tsd_nominal(tsd)) { + tcache = tcache_get_hard(tsd); + tsd_tcache_set(tsd, tcache); } return (tcache); } JEMALLOC_ALWAYS_INLINE void -tcache_event(tcache_t *tcache) +tcache_event(tsd_t *tsd, tcache_t *tcache) { if (TCACHE_GC_INCR == 0) @@ -267,8 +242,8 @@ tcache_event(tcache_t *tcache) tcache->ev_cnt++; assert(tcache->ev_cnt <= TCACHE_GC_INCR); - if (tcache->ev_cnt == TCACHE_GC_INCR) - tcache_event_hard(tcache); + if (unlikely(tcache->ev_cnt == TCACHE_GC_INCR)) + tcache_event_hard(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void * @@ -276,85 +251,87 @@ tcache_alloc_easy(tcache_bin_t *tbin) { void *ret; - if (tbin->ncached == 0) { + if (unlikely(tbin->ncached == 0)) { tbin->low_water = -1; return (NULL); } tbin->ncached--; - if ((int)tbin->ncached < tbin->low_water) + if (unlikely((int)tbin->ncached < tbin->low_water)) tbin->low_water = tbin->ncached; ret = tbin->avail[tbin->ncached]; return (ret); } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) +tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, + bool zero) { void *ret; - size_t binind; + szind_t binind; + size_t usize; tcache_bin_t *tbin; - binind = SMALL_SIZE2BIN(size); + binind = size2index(size); assert(binind < NBINS); tbin = &tcache->tbins[binind]; - size = arena_bin_info[binind].reg_size; + usize = index2size(binind); ret = tcache_alloc_easy(tbin); - if (ret == NULL) { - ret = tcache_alloc_small_hard(tcache, tbin, binind); + if (unlikely(ret == NULL)) { + ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind); if (ret == NULL) return (NULL); } - assert(tcache_salloc(ret) == arena_bin_info[binind].reg_size); + assert(tcache_salloc(ret) == usize); - if (zero == false) { + if (likely(!zero)) { if (config_fill) { - if (opt_junk) { + if (unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], false); - } else if (opt_zero) - memset(ret, 0, size); + } else if (unlikely(opt_zero)) + memset(ret, 0, usize); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); } else { - if (config_fill && opt_junk) { + if (config_fill && unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, &arena_bin_info[binind], true); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - memset(ret, 0, size); + memset(ret, 0, usize); } if (config_stats) tbin->tstats.nrequests++; if (config_prof) - tcache->prof_accumbytes += arena_bin_info[binind].reg_size; - tcache_event(tcache); + tcache->prof_accumbytes += usize; + tcache_event(tsd, tcache); return (ret); } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) +tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, + bool zero) { void *ret; - size_t binind; + szind_t binind; + size_t usize; tcache_bin_t *tbin; - size = PAGE_CEILING(size); - assert(size <= tcache_maxclass); - binind = NBINS + (size >> LG_PAGE) - 1; + binind = size2index(size); + usize = index2size(binind); + assert(usize <= tcache_maxclass); assert(binind < nhbins); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); - if (ret == NULL) { + if (unlikely(ret == NULL)) { /* * Only allocate one large object at a time, because it's quite * expensive to create one and not use it. */ - ret = arena_malloc_large(tcache->arena, size, zero); + ret = arena_malloc_large(arena, usize, zero); if (ret == NULL) return (NULL); } else { - if (config_prof && prof_promote && size == PAGE) { + if (config_prof && usize == LARGE_MINCLASS) { arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> @@ -362,57 +339,54 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) arena_mapbits_large_binind_set(chunk, pageind, BININD_INVALID); } - if (zero == false) { + if (likely(!zero)) { if (config_fill) { - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); + if (unlikely(opt_junk_alloc)) + memset(ret, 0xa5, usize); + else if (unlikely(opt_zero)) + memset(ret, 0, usize); } - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - } else { - VALGRIND_MAKE_MEM_UNDEFINED(ret, size); - memset(ret, 0, size); - } + } else + memset(ret, 0, usize); if (config_stats) tbin->tstats.nrequests++; if (config_prof) - tcache->prof_accumbytes += size; + tcache->prof_accumbytes += usize; } - tcache_event(tcache); + tcache_event(tsd, tcache); return (ret); } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) +tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind) { tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); - if (config_fill && opt_junk) + if (config_fill && unlikely(opt_junk_free)) arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; - if (tbin->ncached == tbin_info->ncached_max) { - tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> - 1), tcache); + if (unlikely(tbin->ncached == tbin_info->ncached_max)) { + tcache_bin_flush_small(tsd, tcache, tbin, binind, + (tbin_info->ncached_max >> 1)); } assert(tbin->ncached < tbin_info->ncached_max); tbin->avail[tbin->ncached] = ptr; tbin->ncached++; - tcache_event(tcache); + tcache_event(tsd, tcache); } JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) +tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) { - size_t binind; + szind_t binind; tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; @@ -420,22 +394,31 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) assert(tcache_salloc(ptr) > SMALL_MAXCLASS); assert(tcache_salloc(ptr) <= tcache_maxclass); - binind = NBINS + (size >> LG_PAGE) - 1; + binind = size2index(size); - if (config_fill && opt_junk) - memset(ptr, 0x5a, size); + if (config_fill && unlikely(opt_junk_free)) + arena_dalloc_junk_large(ptr, size); tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; - if (tbin->ncached == tbin_info->ncached_max) { - tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> - 1), tcache); + if (unlikely(tbin->ncached == tbin_info->ncached_max)) { + tcache_bin_flush_large(tsd, tbin, binind, + (tbin_info->ncached_max >> 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); tbin->avail[tbin->ncached] = ptr; tbin->ncached++; - tcache_event(tcache); + tcache_event(tsd, tcache); +} + +JEMALLOC_ALWAYS_INLINE tcache_t * +tcaches_get(tsd_t *tsd, unsigned ind) +{ + tcaches_t *elm = &tcaches[ind]; + if (unlikely(elm->tcache == NULL)) + elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL)); + return (elm->tcache); } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/tsd.h b/deps/jemalloc/include/jemalloc/internal/tsd.h index 9fb4a23ec..eed7aa013 100644 --- a/deps/jemalloc/include/jemalloc/internal/tsd.h +++ b/deps/jemalloc/include/jemalloc/internal/tsd.h @@ -2,7 +2,7 @@ #ifdef JEMALLOC_H_TYPES /* Maximum number of malloc_tsd users with cleanup functions. */ -#define MALLOC_TSD_CLEANUPS_MAX 8 +#define MALLOC_TSD_CLEANUPS_MAX 2 typedef bool (*malloc_tsd_cleanup_t)(void); @@ -12,9 +12,18 @@ typedef struct tsd_init_block_s tsd_init_block_t; typedef struct tsd_init_head_s tsd_init_head_t; #endif +typedef struct tsd_s tsd_t; + +typedef enum { + tsd_state_uninitialized, + tsd_state_nominal, + tsd_state_purgatory, + tsd_state_reincarnated +} tsd_state_t; + /* * TLS/TSD-agnostic macro-based implementation of thread-specific data. There - * are four macros that support (at least) three use cases: file-private, + * are five macros that support (at least) three use cases: file-private, * library-private, and library-private inlined. Following is an example * library-private tsd variable: * @@ -24,34 +33,36 @@ typedef struct tsd_init_head_s tsd_init_head_t; * int y; * } example_t; * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) - * malloc_tsd_protos(, example, example_t *) - * malloc_tsd_externs(example, example_t *) + * malloc_tsd_types(example_, example_t) + * malloc_tsd_protos(, example_, example_t) + * malloc_tsd_externs(example_, example_t) * In example.c: - * malloc_tsd_data(, example, example_t *, EX_INITIALIZER) - * malloc_tsd_funcs(, example, example_t *, EX_INITIALIZER, + * malloc_tsd_data(, example_, example_t, EX_INITIALIZER) + * malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER, * example_tsd_cleanup) * * The result is a set of generated functions, e.g.: * * bool example_tsd_boot(void) {...} - * example_t **example_tsd_get() {...} - * void example_tsd_set(example_t **val) {...} + * example_t *example_tsd_get() {...} + * void example_tsd_set(example_t *val) {...} * * Note that all of the functions deal in terms of (a_type *) rather than - * (a_type) so that it is possible to support non-pointer types (unlike + * (a_type) so that it is possible to support non-pointer types (unlike * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is - * cast to (void *). This means that the cleanup function needs to cast *and* - * dereference the function argument, e.g.: + * cast to (void *). This means that the cleanup function needs to cast the + * function argument to (a_type *), then dereference the resulting pointer to + * access fields, e.g. * * void * example_tsd_cleanup(void *arg) * { - * example_t *example = *(example_t **)arg; + * example_t *example = (example_t *)arg; * + * example->x = 42; * [...] - * if ([want the cleanup function to be called again]) { - * example_tsd_set(&example); - * } + * if ([want the cleanup function to be called again]) + * example_tsd_set(example); * } * * If example_tsd_set() is called within example_tsd_cleanup(), it will be @@ -60,63 +71,96 @@ typedef struct tsd_init_head_s tsd_init_head_t; * non-NULL. */ +/* malloc_tsd_types(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_types(a_name, a_type) +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_types(a_name, a_type) +#elif (defined(_WIN32)) +#define malloc_tsd_types(a_name, a_type) \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##tsd_wrapper_t; +#else +#define malloc_tsd_types(a_name, a_type) \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##tsd_wrapper_t; +#endif + /* malloc_tsd_protos(). */ #define malloc_tsd_protos(a_attr, a_name, a_type) \ a_attr bool \ -a_name##_tsd_boot(void); \ +a_name##tsd_boot0(void); \ +a_attr void \ +a_name##tsd_boot1(void); \ +a_attr bool \ +a_name##tsd_boot(void); \ a_attr a_type * \ -a_name##_tsd_get(void); \ +a_name##tsd_get(void); \ a_attr void \ -a_name##_tsd_set(a_type *val); +a_name##tsd_set(a_type *val); /* malloc_tsd_externs(). */ #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP #define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##_tls; \ -extern __thread bool a_name##_initialized; \ -extern bool a_name##_booted; +extern __thread a_type a_name##tsd_tls; \ +extern __thread bool a_name##tsd_initialized; \ +extern bool a_name##tsd_booted; #elif (defined(JEMALLOC_TLS)) #define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##_tls; \ -extern pthread_key_t a_name##_tsd; \ -extern bool a_name##_booted; +extern __thread a_type a_name##tsd_tls; \ +extern pthread_key_t a_name##tsd_tsd; \ +extern bool a_name##tsd_booted; #elif (defined(_WIN32)) #define malloc_tsd_externs(a_name, a_type) \ -extern DWORD a_name##_tsd; \ -extern bool a_name##_booted; +extern DWORD a_name##tsd_tsd; \ +extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ +extern bool a_name##tsd_booted; #else #define malloc_tsd_externs(a_name, a_type) \ -extern pthread_key_t a_name##_tsd; \ -extern tsd_init_head_t a_name##_tsd_init_head; \ -extern bool a_name##_booted; +extern pthread_key_t a_name##tsd_tsd; \ +extern tsd_init_head_t a_name##tsd_init_head; \ +extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ +extern bool a_name##tsd_booted; #endif /* malloc_tsd_data(). */ #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##_tls = a_initializer; \ + a_name##tsd_tls = a_initializer; \ a_attr __thread bool JEMALLOC_TLS_MODEL \ - a_name##_initialized = false; \ -a_attr bool a_name##_booted = false; + a_name##tsd_initialized = false; \ +a_attr bool a_name##tsd_booted = false; #elif (defined(JEMALLOC_TLS)) #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##_tls = a_initializer; \ -a_attr pthread_key_t a_name##_tsd; \ -a_attr bool a_name##_booted = false; + a_name##tsd_tls = a_initializer; \ +a_attr pthread_key_t a_name##tsd_tsd; \ +a_attr bool a_name##tsd_booted = false; #elif (defined(_WIN32)) #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr DWORD a_name##_tsd; \ -a_attr bool a_name##_booted = false; +a_attr DWORD a_name##tsd_tsd; \ +a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ + false, \ + a_initializer \ +}; \ +a_attr bool a_name##tsd_booted = false; #else #define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr pthread_key_t a_name##_tsd; \ -a_attr tsd_init_head_t a_name##_tsd_init_head = { \ +a_attr pthread_key_t a_name##tsd_tsd; \ +a_attr tsd_init_head_t a_name##tsd_init_head = { \ ql_head_initializer(blocks), \ MALLOC_MUTEX_INITIALIZER \ }; \ -a_attr bool a_name##_booted = false; +a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ + false, \ + a_initializer \ +}; \ +a_attr bool a_name##tsd_booted = false; #endif /* malloc_tsd_funcs(). */ @@ -125,75 +169,100 @@ a_attr bool a_name##_booted = false; a_cleanup) \ /* Initialization/cleanup. */ \ a_attr bool \ -a_name##_tsd_cleanup_wrapper(void) \ +a_name##tsd_cleanup_wrapper(void) \ { \ \ - if (a_name##_initialized) { \ - a_name##_initialized = false; \ - a_cleanup(&a_name##_tls); \ + if (a_name##tsd_initialized) { \ + a_name##tsd_initialized = false; \ + a_cleanup(&a_name##tsd_tls); \ } \ - return (a_name##_initialized); \ + return (a_name##tsd_initialized); \ } \ a_attr bool \ -a_name##_tsd_boot(void) \ +a_name##tsd_boot0(void) \ { \ \ if (a_cleanup != malloc_tsd_no_cleanup) { \ malloc_tsd_cleanup_register( \ - &a_name##_tsd_cleanup_wrapper); \ + &a_name##tsd_cleanup_wrapper); \ } \ - a_name##_booted = true; \ + a_name##tsd_booted = true; \ return (false); \ } \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + \ + /* Do nothing. */ \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + return (a_name##tsd_boot0()); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##_tsd_get(void) \ +a_name##tsd_get(void) \ { \ \ - assert(a_name##_booted); \ - return (&a_name##_tls); \ + assert(a_name##tsd_booted); \ + return (&a_name##tsd_tls); \ } \ a_attr void \ -a_name##_tsd_set(a_type *val) \ +a_name##tsd_set(a_type *val) \ { \ \ - assert(a_name##_booted); \ - a_name##_tls = (*val); \ + assert(a_name##tsd_booted); \ + a_name##tsd_tls = (*val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ - a_name##_initialized = true; \ + a_name##tsd_initialized = true; \ } #elif (defined(JEMALLOC_TLS)) #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ /* Initialization/cleanup. */ \ a_attr bool \ -a_name##_tsd_boot(void) \ +a_name##tsd_boot0(void) \ { \ \ if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_key_create(&a_name##_tsd, a_cleanup) != 0) \ + if (pthread_key_create(&a_name##tsd_tsd, a_cleanup) != \ + 0) \ return (true); \ } \ - a_name##_booted = true; \ + a_name##tsd_booted = true; \ return (false); \ } \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + \ + /* Do nothing. */ \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + return (a_name##tsd_boot0()); \ +} \ /* Get/set. */ \ a_attr a_type * \ -a_name##_tsd_get(void) \ +a_name##tsd_get(void) \ { \ \ - assert(a_name##_booted); \ - return (&a_name##_tls); \ + assert(a_name##tsd_booted); \ + return (&a_name##tsd_tls); \ } \ a_attr void \ -a_name##_tsd_set(a_type *val) \ +a_name##tsd_set(a_type *val) \ { \ \ - assert(a_name##_booted); \ - a_name##_tls = (*val); \ + assert(a_name##tsd_booted); \ + a_name##tsd_tls = (*val); \ if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_setspecific(a_name##_tsd, \ - (void *)(&a_name##_tls))) { \ + if (pthread_setspecific(a_name##tsd_tsd, \ + (void *)(&a_name##tsd_tls))) { \ malloc_write("<jemalloc>: Error" \ " setting TSD for "#a_name"\n"); \ if (opt_abort) \ @@ -204,27 +273,21 @@ a_name##_tsd_set(a_type *val) \ #elif (defined(_WIN32)) #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ -/* Data structure. */ \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##_tsd_wrapper_t; \ /* Initialization/cleanup. */ \ a_attr bool \ -a_name##_tsd_cleanup_wrapper(void) \ +a_name##tsd_cleanup_wrapper(void) \ { \ - a_name##_tsd_wrapper_t *wrapper; \ + DWORD error = GetLastError(); \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ + TlsGetValue(a_name##tsd_tsd); \ + SetLastError(error); \ \ - wrapper = (a_name##_tsd_wrapper_t *) TlsGetValue(a_name##_tsd); \ if (wrapper == NULL) \ return (false); \ if (a_cleanup != malloc_tsd_no_cleanup && \ wrapper->initialized) { \ - a_type val = wrapper->val; \ - a_type tsd_static_data = a_initializer; \ wrapper->initialized = false; \ - wrapper->val = tsd_static_data; \ - a_cleanup(&val); \ + a_cleanup(&wrapper->val); \ if (wrapper->initialized) { \ /* Trigger another cleanup round. */ \ return (true); \ @@ -233,63 +296,95 @@ a_name##_tsd_cleanup_wrapper(void) \ malloc_tsd_dalloc(wrapper); \ return (false); \ } \ -a_attr bool \ -a_name##_tsd_boot(void) \ +a_attr void \ +a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ { \ \ - a_name##_tsd = TlsAlloc(); \ - if (a_name##_tsd == TLS_OUT_OF_INDEXES) \ - return (true); \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - malloc_tsd_cleanup_register( \ - &a_name##_tsd_cleanup_wrapper); \ + if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \ + malloc_write("<jemalloc>: Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ } \ - a_name##_booted = true; \ - return (false); \ } \ -/* Get/set. */ \ -a_attr a_name##_tsd_wrapper_t * \ -a_name##_tsd_get_wrapper(void) \ +a_attr a_name##tsd_wrapper_t * \ +a_name##tsd_wrapper_get(void) \ { \ - a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ - TlsGetValue(a_name##_tsd); \ + DWORD error = GetLastError(); \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ + TlsGetValue(a_name##tsd_tsd); \ + SetLastError(error); \ \ - if (wrapper == NULL) { \ - wrapper = (a_name##_tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + if (unlikely(wrapper == NULL)) { \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ if (wrapper == NULL) { \ malloc_write("<jemalloc>: Error allocating" \ " TSD for "#a_name"\n"); \ abort(); \ } else { \ - static a_type tsd_static_data = a_initializer; \ wrapper->initialized = false; \ - wrapper->val = tsd_static_data; \ - } \ - if (!TlsSetValue(a_name##_tsd, (void *)wrapper)) { \ - malloc_write("<jemalloc>: Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ + wrapper->val = a_initializer; \ } \ + a_name##tsd_wrapper_set(wrapper); \ } \ return (wrapper); \ } \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + a_name##tsd_tsd = TlsAlloc(); \ + if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \ + return (true); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##tsd_cleanup_wrapper); \ + } \ + a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write("<jemalloc>: Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + memcpy(wrapper, &a_name##tsd_boot_wrapper, \ + sizeof(a_name##tsd_wrapper_t)); \ + a_name##tsd_wrapper_set(wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + if (a_name##tsd_boot0()) \ + return (true); \ + a_name##tsd_boot1(); \ + return (false); \ +} \ +/* Get/set. */ \ a_attr a_type * \ -a_name##_tsd_get(void) \ +a_name##tsd_get(void) \ { \ - a_name##_tsd_wrapper_t *wrapper; \ + a_name##tsd_wrapper_t *wrapper; \ \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(); \ return (&wrapper->val); \ } \ a_attr void \ -a_name##_tsd_set(a_type *val) \ +a_name##tsd_set(a_type *val) \ { \ - a_name##_tsd_wrapper_t *wrapper; \ + a_name##tsd_wrapper_t *wrapper; \ \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -297,16 +392,11 @@ a_name##_tsd_set(a_type *val) \ #else #define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ a_cleanup) \ -/* Data structure. */ \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##_tsd_wrapper_t; \ /* Initialization/cleanup. */ \ a_attr void \ -a_name##_tsd_cleanup_wrapper(void *arg) \ +a_name##tsd_cleanup_wrapper(void *arg) \ { \ - a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)arg;\ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)arg; \ \ if (a_cleanup != malloc_tsd_no_cleanup && \ wrapper->initialized) { \ @@ -314,7 +404,7 @@ a_name##_tsd_cleanup_wrapper(void *arg) \ a_cleanup(&wrapper->val); \ if (wrapper->initialized) { \ /* Trigger another cleanup round. */ \ - if (pthread_setspecific(a_name##_tsd, \ + if (pthread_setspecific(a_name##tsd_tsd, \ (void *)wrapper)) { \ malloc_write("<jemalloc>: Error" \ " setting TSD for "#a_name"\n"); \ @@ -326,67 +416,97 @@ a_name##_tsd_cleanup_wrapper(void *arg) \ } \ malloc_tsd_dalloc(wrapper); \ } \ -a_attr bool \ -a_name##_tsd_boot(void) \ +a_attr void \ +a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ { \ \ - if (pthread_key_create(&a_name##_tsd, \ - a_name##_tsd_cleanup_wrapper) != 0) \ - return (true); \ - a_name##_booted = true; \ - return (false); \ + if (pthread_setspecific(a_name##tsd_tsd, \ + (void *)wrapper)) { \ + malloc_write("<jemalloc>: Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ } \ -/* Get/set. */ \ -a_attr a_name##_tsd_wrapper_t * \ -a_name##_tsd_get_wrapper(void) \ +a_attr a_name##tsd_wrapper_t * \ +a_name##tsd_wrapper_get(void) \ { \ - a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ - pthread_getspecific(a_name##_tsd); \ + a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ + pthread_getspecific(a_name##tsd_tsd); \ \ - if (wrapper == NULL) { \ + if (unlikely(wrapper == NULL)) { \ tsd_init_block_t block; \ wrapper = tsd_init_check_recursion( \ - &a_name##_tsd_init_head, &block); \ + &a_name##tsd_init_head, &block); \ if (wrapper) \ return (wrapper); \ - wrapper = (a_name##_tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ block.data = wrapper; \ if (wrapper == NULL) { \ malloc_write("<jemalloc>: Error allocating" \ " TSD for "#a_name"\n"); \ abort(); \ } else { \ - static a_type tsd_static_data = a_initializer; \ wrapper->initialized = false; \ - wrapper->val = tsd_static_data; \ - } \ - if (pthread_setspecific(a_name##_tsd, \ - (void *)wrapper)) { \ - malloc_write("<jemalloc>: Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ + wrapper->val = a_initializer; \ } \ - tsd_init_finish(&a_name##_tsd_init_head, &block); \ + a_name##tsd_wrapper_set(wrapper); \ + tsd_init_finish(&a_name##tsd_init_head, &block); \ } \ return (wrapper); \ } \ +a_attr bool \ +a_name##tsd_boot0(void) \ +{ \ + \ + if (pthread_key_create(&a_name##tsd_tsd, \ + a_name##tsd_cleanup_wrapper) != 0) \ + return (true); \ + a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ + a_name##tsd_booted = true; \ + return (false); \ +} \ +a_attr void \ +a_name##tsd_boot1(void) \ +{ \ + a_name##tsd_wrapper_t *wrapper; \ + wrapper = (a_name##tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write("<jemalloc>: Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + memcpy(wrapper, &a_name##tsd_boot_wrapper, \ + sizeof(a_name##tsd_wrapper_t)); \ + a_name##tsd_wrapper_set(wrapper); \ +} \ +a_attr bool \ +a_name##tsd_boot(void) \ +{ \ + \ + if (a_name##tsd_boot0()) \ + return (true); \ + a_name##tsd_boot1(); \ + return (false); \ +} \ +/* Get/set. */ \ a_attr a_type * \ -a_name##_tsd_get(void) \ +a_name##tsd_get(void) \ { \ - a_name##_tsd_wrapper_t *wrapper; \ + a_name##tsd_wrapper_t *wrapper; \ \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(); \ return (&wrapper->val); \ } \ a_attr void \ -a_name##_tsd_set(a_type *val) \ +a_name##tsd_set(a_type *val) \ { \ - a_name##_tsd_wrapper_t *wrapper; \ + a_name##tsd_wrapper_t *wrapper; \ \ - assert(a_name##_booted); \ - wrapper = a_name##_tsd_get_wrapper(); \ + assert(a_name##tsd_booted); \ + wrapper = a_name##tsd_wrapper_get(); \ wrapper->val = *(val); \ if (a_cleanup != malloc_tsd_no_cleanup) \ wrapper->initialized = true; \ @@ -410,25 +530,136 @@ struct tsd_init_head_s { }; #endif +#define MALLOC_TSD \ +/* O(name, type) */ \ + O(tcache, tcache_t *) \ + O(thread_allocated, uint64_t) \ + O(thread_deallocated, uint64_t) \ + O(prof_tdata, prof_tdata_t *) \ + O(arena, arena_t *) \ + O(arenas_cache, arena_t **) \ + O(narenas_cache, unsigned) \ + O(arenas_cache_bypass, bool) \ + O(tcache_enabled, tcache_enabled_t) \ + O(quarantine, quarantine_t *) \ + +#define TSD_INITIALIZER { \ + tsd_state_uninitialized, \ + NULL, \ + 0, \ + 0, \ + NULL, \ + NULL, \ + NULL, \ + 0, \ + false, \ + tcache_enabled_default, \ + NULL \ +} + +struct tsd_s { + tsd_state_t state; +#define O(n, t) \ + t n; +MALLOC_TSD +#undef O +}; + +static const tsd_t tsd_initializer = TSD_INITIALIZER; + +malloc_tsd_types(, tsd_t) + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS void *malloc_tsd_malloc(size_t size); void malloc_tsd_dalloc(void *wrapper); -void malloc_tsd_no_cleanup(void *); +void malloc_tsd_no_cleanup(void *arg); void malloc_tsd_cleanup_register(bool (*f)(void)); -void malloc_tsd_boot(void); +bool malloc_tsd_boot0(void); +void malloc_tsd_boot1(void); #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ !defined(_WIN32)) void *tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block); void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block); #endif +void tsd_cleanup(void *arg); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t) + +tsd_t *tsd_fetch(void); +bool tsd_nominal(tsd_t *tsd); +#define O(n, t) \ +t *tsd_##n##p_get(tsd_t *tsd); \ +t tsd_##n##_get(tsd_t *tsd); \ +void tsd_##n##_set(tsd_t *tsd, t n); +MALLOC_TSD +#undef O +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_)) +malloc_tsd_externs(, tsd_t) +malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup) + +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch(void) +{ + tsd_t *tsd = tsd_get(); + + if (unlikely(tsd->state != tsd_state_nominal)) { + if (tsd->state == tsd_state_uninitialized) { + tsd->state = tsd_state_nominal; + /* Trigger cleanup handler registration. */ + tsd_set(tsd); + } else if (tsd->state == tsd_state_purgatory) { + tsd->state = tsd_state_reincarnated; + tsd_set(tsd); + } else + assert(tsd->state == tsd_state_reincarnated); + } + + return (tsd); +} + +JEMALLOC_INLINE bool +tsd_nominal(tsd_t *tsd) +{ + + return (tsd->state == tsd_state_nominal); +} + +#define O(n, t) \ +JEMALLOC_ALWAYS_INLINE t * \ +tsd_##n##p_get(tsd_t *tsd) \ +{ \ + \ + return (&tsd->n); \ +} \ + \ +JEMALLOC_ALWAYS_INLINE t \ +tsd_##n##_get(tsd_t *tsd) \ +{ \ + \ + return (*tsd_##n##p_get(tsd)); \ +} \ + \ +JEMALLOC_ALWAYS_INLINE void \ +tsd_##n##_set(tsd_t *tsd, t n) \ +{ \ + \ + assert(tsd->state == tsd_state_nominal); \ + tsd->n = n; \ +} +MALLOC_TSD +#undef O +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/util.h b/deps/jemalloc/include/jemalloc/internal/util.h index 6b938f746..b2ea740fd 100644 --- a/deps/jemalloc/include/jemalloc/internal/util.h +++ b/deps/jemalloc/include/jemalloc/internal/util.h @@ -1,6 +1,36 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +#ifdef _WIN32 +# ifdef _WIN64 +# define FMT64_PREFIX "ll" +# define FMTPTR_PREFIX "ll" +# else +# define FMT64_PREFIX "ll" +# define FMTPTR_PREFIX "" +# endif +# define FMTd32 "d" +# define FMTu32 "u" +# define FMTx32 "x" +# define FMTd64 FMT64_PREFIX "d" +# define FMTu64 FMT64_PREFIX "u" +# define FMTx64 FMT64_PREFIX "x" +# define FMTdPTR FMTPTR_PREFIX "d" +# define FMTuPTR FMTPTR_PREFIX "u" +# define FMTxPTR FMTPTR_PREFIX "x" +#else +# include <inttypes.h> +# define FMTd32 PRId32 +# define FMTu32 PRIu32 +# define FMTx32 PRIx32 +# define FMTd64 PRId64 +# define FMTu64 PRIu64 +# define FMTx64 PRIx64 +# define FMTdPTR PRIdPTR +# define FMTuPTR PRIuPTR +# define FMTxPTR PRIxPTR +#endif + /* Size of stack-allocated buffer passed to buferror(). */ #define BUFERROR_BUF 64 @@ -22,9 +52,33 @@ * uninitialized. */ #ifdef JEMALLOC_CC_SILENCE -# define JEMALLOC_CC_SILENCE_INIT(v) = v +# define JEMALLOC_CC_SILENCE_INIT(v) = v #else -# define JEMALLOC_CC_SILENCE_INIT(v) +# define JEMALLOC_CC_SILENCE_INIT(v) +#endif + +#define JEMALLOC_GNUC_PREREQ(major, minor) \ + (!defined(__clang__) && \ + (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))) +#ifndef __has_builtin +# define __has_builtin(builtin) (0) +#endif +#define JEMALLOC_CLANG_HAS_BUILTIN(builtin) \ + (defined(__clang__) && __has_builtin(builtin)) + +#ifdef __GNUC__ +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) +# if JEMALLOC_GNUC_PREREQ(4, 6) || \ + JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable) +# define unreachable() __builtin_unreachable() +# else +# define unreachable() +# endif +#else +# define likely(x) !!(x) +# define unlikely(x) !!(x) +# define unreachable() #endif /* @@ -33,7 +87,7 @@ */ #ifndef assert #define assert(e) do { \ - if (config_debug && !(e)) { \ + if (unlikely(config_debug && !(e))) { \ malloc_printf( \ "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \ __FILE__, __LINE__, #e); \ @@ -50,6 +104,7 @@ __FILE__, __LINE__); \ abort(); \ } \ + unreachable(); \ } while (0) #endif @@ -65,14 +120,14 @@ #ifndef assert_not_implemented #define assert_not_implemented(e) do { \ - if (config_debug && !(e)) \ + if (unlikely(config_debug && !(e))) \ not_implemented(); \ } while (0) #endif /* Use to assert a particular configuration, e.g., cassert(config_debug). */ #define cassert(c) do { \ - if ((c) == false) \ + if (unlikely(!(c))) \ not_reached(); \ } while (0) @@ -96,25 +151,47 @@ void malloc_write(const char *s); int malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap); int malloc_snprintf(char *str, size_t size, const char *format, ...) - JEMALLOC_ATTR(format(printf, 3, 4)); + JEMALLOC_FORMAT_PRINTF(3, 4); void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, va_list ap); void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, - const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); -void malloc_printf(const char *format, ...) - JEMALLOC_ATTR(format(printf, 1, 2)); + const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4); +void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +int jemalloc_ffsl(long bitmap); +int jemalloc_ffs(int bitmap); size_t pow2_ceil(size_t x); +size_t lg_floor(size_t x); void set_errno(int errnum); int get_errno(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) + +/* Sanity check. */ +#if !defined(JEMALLOC_INTERNAL_FFSL) || !defined(JEMALLOC_INTERNAL_FFS) +# error Both JEMALLOC_INTERNAL_FFSL && JEMALLOC_INTERNAL_FFS should have been defined by configure +#endif + +JEMALLOC_ALWAYS_INLINE int +jemalloc_ffsl(long bitmap) +{ + + return (JEMALLOC_INTERNAL_FFSL(bitmap)); +} + +JEMALLOC_ALWAYS_INLINE int +jemalloc_ffs(int bitmap) +{ + + return (JEMALLOC_INTERNAL_FFS(bitmap)); +} + /* Compute the smallest power of 2 that is >= x. */ JEMALLOC_INLINE size_t pow2_ceil(size_t x) @@ -133,7 +210,82 @@ pow2_ceil(size_t x) return (x); } -/* Sets error code */ +#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE size_t +lg_floor(size_t x) +{ + size_t ret; + + assert(x != 0); + + asm ("bsr %1, %0" + : "=r"(ret) // Outputs. + : "r"(x) // Inputs. + ); + return (ret); +} +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE size_t +lg_floor(size_t x) +{ + unsigned long ret; + + assert(x != 0); + +#if (LG_SIZEOF_PTR == 3) + _BitScanReverse64(&ret, x); +#elif (LG_SIZEOF_PTR == 2) + _BitScanReverse(&ret, x); +#else +# error "Unsupported type sizes for lg_floor()" +#endif + return (ret); +} +#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) +JEMALLOC_INLINE size_t +lg_floor(size_t x) +{ + + assert(x != 0); + +#if (LG_SIZEOF_PTR == LG_SIZEOF_INT) + return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x)); +#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG) + return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x)); +#else +# error "Unsupported type sizes for lg_floor()" +#endif +} +#else +JEMALLOC_INLINE size_t +lg_floor(size_t x) +{ + + assert(x != 0); + + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); +#if (LG_SIZEOF_PTR == 3 && LG_SIZEOF_PTR == LG_SIZEOF_LONG) + x |= (x >> 32); + if (x == KZU(0xffffffffffffffff)) + return (63); + x++; + return (jemalloc_ffsl(x) - 2); +#elif (LG_SIZEOF_PTR == 2) + if (x == KZU(0xffffffff)) + return (31); + x++; + return (jemalloc_ffs(x) - 2); +#else +# error "Unsupported type sizes for lg_floor()" +#endif +} +#endif + +/* Set error code. */ JEMALLOC_INLINE void set_errno(int errnum) { @@ -145,7 +297,7 @@ set_errno(int errnum) #endif } -/* Get last error code */ +/* Get last error code. */ JEMALLOC_INLINE int get_errno(void) { diff --git a/deps/jemalloc/include/jemalloc/internal/valgrind.h b/deps/jemalloc/include/jemalloc/internal/valgrind.h new file mode 100644 index 000000000..a3380df92 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/valgrind.h @@ -0,0 +1,112 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#ifdef JEMALLOC_VALGRIND +#include <valgrind/valgrind.h> + +/* + * The size that is reported to Valgrind must be consistent through a chain of + * malloc..realloc..realloc calls. Request size isn't recorded anywhere in + * jemalloc, so it is critical that all callers of these macros provide usize + * rather than request size. As a result, buffer overflow detection is + * technically weakened for the standard API, though it is generally accepted + * practice to consider any extra bytes reported by malloc_usable_size() as + * usable space. + */ +#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do { \ + if (unlikely(in_valgrind)) \ + valgrind_make_mem_noaccess(ptr, usize); \ +} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do { \ + if (unlikely(in_valgrind)) \ + valgrind_make_mem_undefined(ptr, usize); \ +} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do { \ + if (unlikely(in_valgrind)) \ + valgrind_make_mem_defined(ptr, usize); \ +} while (0) +/* + * The VALGRIND_MALLOCLIKE_BLOCK() and VALGRIND_RESIZEINPLACE_BLOCK() macro + * calls must be embedded in macros rather than in functions so that when + * Valgrind reports errors, there are no extra stack frames in the backtraces. + */ +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ + if (unlikely(in_valgrind && cond)) \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ +} while (0) +#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \ + ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ + zero) do { \ + if (unlikely(in_valgrind)) { \ + size_t rzsize = p2rz(ptr); \ + \ + if (!maybe_moved || ptr == old_ptr) { \ + VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ + usize, rzsize); \ + if (zero && old_usize < usize) { \ + valgrind_make_mem_defined( \ + (void *)((uintptr_t)ptr + \ + old_usize), usize - old_usize); \ + } \ + } else { \ + if (!old_ptr_maybe_null || old_ptr != NULL) { \ + valgrind_freelike_block(old_ptr, \ + old_rzsize); \ + } \ + if (!ptr_maybe_null || ptr != NULL) { \ + size_t copy_size = (old_usize < usize) \ + ? old_usize : usize; \ + size_t tail_size = usize - copy_size; \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \ + rzsize, false); \ + if (copy_size > 0) { \ + valgrind_make_mem_defined(ptr, \ + copy_size); \ + } \ + if (zero && tail_size > 0) { \ + valgrind_make_mem_defined( \ + (void *)((uintptr_t)ptr + \ + copy_size), tail_size); \ + } \ + } \ + } \ + } \ +} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ + if (unlikely(in_valgrind)) \ + valgrind_freelike_block(ptr, rzsize); \ +} while (0) +#else +#define RUNNING_ON_VALGRIND ((unsigned)0) +#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0) +#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0) +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0) +#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \ + ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ + zero) do {} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0) +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#ifdef JEMALLOC_VALGRIND +void valgrind_make_mem_noaccess(void *ptr, size_t usize); +void valgrind_make_mem_undefined(void *ptr, size_t usize); +void valgrind_make_mem_defined(void *ptr, size_t usize); +void valgrind_freelike_block(void *ptr, size_t usize); +#endif + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + |