diff options
author | antirez <antirez@gmail.com> | 2018-05-24 17:17:37 +0200 |
---|---|---|
committer | antirez <antirez@gmail.com> | 2018-05-24 17:17:37 +0200 |
commit | 08e1c8e820031751bd58736cc334dda1bc55cb5c (patch) | |
tree | 2ee6ccfc7a5d7986b7a55bca5cf80ecbb6c08fc6 /deps/jemalloc/include | |
parent | 8f4e2075a703acce08056321f49defd44dae19ed (diff) | |
download | redis-08e1c8e820031751bd58736cc334dda1bc55cb5c.tar.gz |
Jemalloc upgraded to version 5.0.1.
Diffstat (limited to 'deps/jemalloc/include')
111 files changed, 10635 insertions, 7525 deletions
diff --git a/deps/jemalloc/include/jemalloc/internal/arena.h b/deps/jemalloc/include/jemalloc/internal/arena.h deleted file mode 100644 index 12c617979..000000000 --- a/deps/jemalloc/include/jemalloc/internal/arena.h +++ /dev/null @@ -1,1347 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS) - -/* Maximum number of regions in one run. */ -#define LG_RUN_MAXREGS (LG_PAGE - LG_TINY_MIN) -#define RUN_MAXREGS (1U << LG_RUN_MAXREGS) - -/* - * Minimum redzone size. Redzones may be larger than this if necessary to - * preserve region alignment. - */ -#define REDZONE_MINSIZE 16 - -/* - * The minimum ratio of active:dirty pages per arena is computed as: - * - * (nactive >> lg_dirty_mult) >= ndirty - * - * So, supposing that lg_dirty_mult is 3, there can be no less than 8 times as - * many active pages as dirty pages. - */ -#define LG_DIRTY_MULT_DEFAULT 3 - -typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t; -typedef struct arena_run_s arena_run_t; -typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t; -typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t; -typedef struct arena_chunk_s arena_chunk_t; -typedef struct arena_bin_info_s arena_bin_info_t; -typedef struct arena_bin_s arena_bin_t; -typedef struct arena_s arena_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#ifdef JEMALLOC_ARENA_STRUCTS_A -struct arena_run_s { - /* Index of bin this run is associated with. */ - szind_t binind; - - /* Number of free regions in run. */ - unsigned nfree; - - /* Per region allocated/deallocated bitmap. */ - bitmap_t bitmap[BITMAP_GROUPS_MAX]; -}; - -/* Each element of the chunk map corresponds to one page within the chunk. */ -struct arena_chunk_map_bits_s { - /* - * Run address (or size) and various flags are stored together. The bit - * layout looks like (assuming 32-bit system): - * - * ???????? ???????? ???nnnnn nnndumla - * - * ? : Unallocated: Run address for first/last pages, unset for internal - * pages. - * Small: Run page offset. - * Large: Run page count for first page, unset for trailing pages. - * n : binind for small size class, BININD_INVALID for large size class. - * d : dirty? - * u : unzeroed? - * m : decommitted? - * l : large? - * a : allocated? - * - * Following are example bit patterns for the three types of runs. - * - * p : run page offset - * s : run size - * n : binind for size class; large objects set these to BININD_INVALID - * x : don't care - * - : 0 - * + : 1 - * [DUMLA] : bit set - * [dumla] : bit unset - * - * Unallocated (clean): - * ssssssss ssssssss sss+++++ +++dum-a - * xxxxxxxx xxxxxxxx xxxxxxxx xxx-Uxxx - * ssssssss ssssssss sss+++++ +++dUm-a - * - * Unallocated (dirty): - * ssssssss ssssssss sss+++++ +++D-m-a - * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * ssssssss ssssssss sss+++++ +++D-m-a - * - * Small: - * pppppppp pppppppp pppnnnnn nnnd---A - * pppppppp pppppppp pppnnnnn nnn----A - * pppppppp pppppppp pppnnnnn nnnd---A - * - * Large: - * ssssssss ssssssss sss+++++ +++D--LA - * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * -------- -------- ---+++++ +++D--LA - * - * Large (sampled, size <= LARGE_MINCLASS): - * ssssssss ssssssss sssnnnnn nnnD--LA - * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * -------- -------- ---+++++ +++D--LA - * - * Large (not sampled, size == LARGE_MINCLASS): - * ssssssss ssssssss sss+++++ +++D--LA - * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - * -------- -------- ---+++++ +++D--LA - */ - size_t bits; -#define CHUNK_MAP_ALLOCATED ((size_t)0x01U) -#define CHUNK_MAP_LARGE ((size_t)0x02U) -#define CHUNK_MAP_STATE_MASK ((size_t)0x3U) - -#define CHUNK_MAP_DECOMMITTED ((size_t)0x04U) -#define CHUNK_MAP_UNZEROED ((size_t)0x08U) -#define CHUNK_MAP_DIRTY ((size_t)0x10U) -#define CHUNK_MAP_FLAGS_MASK ((size_t)0x1cU) - -#define CHUNK_MAP_BININD_SHIFT 5 -#define BININD_INVALID ((size_t)0xffU) -#define CHUNK_MAP_BININD_MASK (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) -#define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK - -#define CHUNK_MAP_RUNIND_SHIFT (CHUNK_MAP_BININD_SHIFT + 8) -#define CHUNK_MAP_SIZE_SHIFT (CHUNK_MAP_RUNIND_SHIFT - LG_PAGE) -#define CHUNK_MAP_SIZE_MASK \ - (~(CHUNK_MAP_BININD_MASK | CHUNK_MAP_FLAGS_MASK | CHUNK_MAP_STATE_MASK)) -}; - -struct arena_runs_dirty_link_s { - qr(arena_runs_dirty_link_t) rd_link; -}; - -/* - * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just - * like arena_chunk_map_bits_t. Two separate arrays are stored within each - * chunk header in order to improve cache locality. - */ -struct arena_chunk_map_misc_s { - /* - * Linkage for run trees. There are two disjoint uses: - * - * 1) arena_t's runs_avail tree. - * 2) arena_run_t conceptually uses this linkage for in-use non-full - * runs, rather than directly embedding linkage. - */ - rb_node(arena_chunk_map_misc_t) rb_link; - - union { - /* Linkage for list of dirty runs. */ - arena_runs_dirty_link_t rd; - - /* Profile counters, used for large object runs. */ - union { - void *prof_tctx_pun; - prof_tctx_t *prof_tctx; - }; - - /* Small region run metadata. */ - arena_run_t run; - }; -}; -typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t; -typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t; -#endif /* JEMALLOC_ARENA_STRUCTS_A */ - -#ifdef JEMALLOC_ARENA_STRUCTS_B -/* Arena chunk header. */ -struct arena_chunk_s { - /* - * A pointer to the arena that owns the chunk is stored within the node. - * This field as a whole is used by chunks_rtree to support both - * ivsalloc() and core-based debugging. - */ - extent_node_t node; - - /* - * Map of pages within chunk that keeps track of free/large/small. The - * first map_bias entries are omitted, since the chunk header does not - * need to be tracked in the map. This omission saves a header page - * for common chunk sizes (e.g. 4 MiB). - */ - arena_chunk_map_bits_t map_bits[1]; /* Dynamically sized. */ -}; - -/* - * Read-only information associated with each element of arena_t's bins array - * is stored separately, partly to reduce memory usage (only one copy, rather - * than one per arena), but mainly to avoid false cacheline sharing. - * - * Each run has the following layout: - * - * /--------------------\ - * | pad? | - * |--------------------| - * | redzone | - * reg0_offset | region 0 | - * | redzone | - * |--------------------| \ - * | redzone | | - * | region 1 | > reg_interval - * | redzone | / - * |--------------------| - * | ... | - * | ... | - * | ... | - * |--------------------| - * | redzone | - * | region nregs-1 | - * | redzone | - * |--------------------| - * | alignment pad? | - * \--------------------/ - * - * reg_interval has at least the same minimum alignment as reg_size; this - * preserves the alignment constraint that sa2u() depends on. Alignment pad is - * either 0 or redzone_size; it is present only if needed to align reg0_offset. - */ -struct arena_bin_info_s { - /* Size of regions in a run for this bin's size class. */ - size_t reg_size; - - /* Redzone size. */ - size_t redzone_size; - - /* Interval between regions (reg_size + (redzone_size << 1)). */ - size_t reg_interval; - - /* Total size of a run for this bin's size class. */ - size_t run_size; - - /* Total number of regions in a run for this bin's size class. */ - uint32_t nregs; - - /* - * Metadata used to manipulate bitmaps for runs associated with this - * bin. - */ - bitmap_info_t bitmap_info; - - /* Offset of first region in a run for this bin's size class. */ - uint32_t reg0_offset; -}; - -struct arena_bin_s { - /* - * All operations on runcur, runs, and stats require that lock be - * locked. Run allocation/deallocation are protected by the arena lock, - * which may be acquired while holding one or more bin locks, but not - * vise versa. - */ - malloc_mutex_t lock; - - /* - * Current run being used to service allocations of this bin's size - * class. - */ - arena_run_t *runcur; - - /* - * Tree of non-full runs. This tree is used when looking for an - * existing run when runcur is no longer usable. We choose the - * non-full run that is lowest in memory; this policy tends to keep - * objects packed well, and it can also help reduce the number of - * almost-empty chunks. - */ - arena_run_tree_t runs; - - /* Bin statistics. */ - malloc_bin_stats_t stats; -}; - -struct arena_s { - /* This arena's index within the arenas array. */ - unsigned ind; - - /* - * Number of threads currently assigned to this arena. This field is - * protected by arenas_lock. - */ - unsigned nthreads; - - /* - * There are three classes of arena operations from a locking - * perspective: - * 1) Thread assignment (modifies nthreads) is protected by arenas_lock. - * 2) Bin-related operations are protected by bin locks. - * 3) Chunk- and run-related operations are protected by this mutex. - */ - malloc_mutex_t lock; - - arena_stats_t stats; - /* - * List of tcaches for extant threads associated with this arena. - * Stats from these are merged incrementally, and at exit if - * opt_stats_print is enabled. - */ - ql_head(tcache_t) tcache_ql; - - uint64_t prof_accumbytes; - - /* - * PRNG state for cache index randomization of large allocation base - * pointers. - */ - uint64_t offset_state; - - dss_prec_t dss_prec; - - /* - * In order to avoid rapid chunk allocation/deallocation when an arena - * oscillates right on the cusp of needing a new chunk, cache the most - * recently freed chunk. The spare is left in the arena's chunk trees - * until it is deleted. - * - * There is one spare chunk per arena, rather than one spare total, in - * order to avoid interactions between multiple threads that could make - * a single spare inadequate. - */ - arena_chunk_t *spare; - - /* Minimum ratio (log base 2) of nactive:ndirty. */ - ssize_t lg_dirty_mult; - - /* True if a thread is currently executing arena_purge(). */ - bool purging; - - /* Number of pages in active runs and huge regions. */ - size_t nactive; - - /* - * Current count of pages within unused runs that are potentially - * dirty, and for which madvise(... MADV_DONTNEED) has not been called. - * By tracking this, we can institute a limit on how much dirty unused - * memory is mapped for each arena. - */ - size_t ndirty; - - /* - * Size/address-ordered tree of this arena's available runs. The tree - * is used for first-best-fit run allocation. - */ - arena_avail_tree_t runs_avail; - - /* - * Unused dirty memory this arena manages. Dirty memory is conceptually - * tracked as an arbitrarily interleaved LRU of dirty runs and cached - * chunks, but the list linkage is actually semi-duplicated in order to - * avoid extra arena_chunk_map_misc_t space overhead. - * - * LRU-----------------------------------------------------------MRU - * - * /-- arena ---\ - * | | - * | | - * |------------| /- chunk -\ - * ...->|chunks_cache|<--------------------------->| /----\ |<--... - * |------------| | |node| | - * | | | | | | - * | | /- run -\ /- run -\ | | | | - * | | | | | | | | | | - * | | | | | | | | | | - * |------------| |-------| |-------| | |----| | - * ...->|runs_dirty |<-->|rd |<-->|rd |<---->|rd |<----... - * |------------| |-------| |-------| | |----| | - * | | | | | | | | | | - * | | | | | | | \----/ | - * | | \-------/ \-------/ | | - * | | | | - * | | | | - * \------------/ \---------/ - */ - arena_runs_dirty_link_t runs_dirty; - extent_node_t chunks_cache; - - /* Extant huge allocations. */ - ql_head(extent_node_t) huge; - /* Synchronizes all huge allocation/update/deallocation. */ - malloc_mutex_t huge_mtx; - - /* - * Trees of chunks that were previously allocated (trees differ only in - * node ordering). These are used when allocating chunks, in an attempt - * to re-use address space. Depending on function, different tree - * orderings are needed, which is why there are two trees with the same - * contents. - */ - extent_tree_t chunks_szad_cached; - extent_tree_t chunks_ad_cached; - extent_tree_t chunks_szad_retained; - extent_tree_t chunks_ad_retained; - - malloc_mutex_t chunks_mtx; - /* Cache of nodes that were allocated via base_alloc(). */ - ql_head(extent_node_t) node_cache; - malloc_mutex_t node_cache_mtx; - - /* User-configurable chunk hook functions. */ - chunk_hooks_t chunk_hooks; - - /* bins is used to store trees of free regions. */ - arena_bin_t bins[NBINS]; -}; -#endif /* JEMALLOC_ARENA_STRUCTS_B */ - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -static const size_t large_pad = -#ifdef JEMALLOC_CACHE_OBLIVIOUS - PAGE -#else - 0 -#endif - ; - -extern ssize_t opt_lg_dirty_mult; - -extern arena_bin_info_t arena_bin_info[NBINS]; - -extern size_t map_bias; /* Number of arena chunk header pages. */ -extern size_t map_misc_offset; -extern size_t arena_maxrun; /* Max run size for arenas. */ -extern size_t large_maxclass; /* Max large size class. */ -extern unsigned nlclasses; /* Number of large size classes. */ -extern unsigned nhclasses; /* Number of huge size classes. */ - -void arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node, - bool cache); -void arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node, - bool cache); -extent_node_t *arena_node_alloc(arena_t *arena); -void arena_node_dalloc(arena_t *arena, extent_node_t *node); -void *arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment, - bool *zero); -void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize); -void arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, - size_t oldsize, size_t usize); -void arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, - size_t oldsize, size_t usize); -bool arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, - size_t oldsize, size_t usize, bool *zero); -ssize_t arena_lg_dirty_mult_get(arena_t *arena); -bool arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult); -void arena_maybe_purge(arena_t *arena); -void arena_purge_all(arena_t *arena); -void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, - szind_t binind, uint64_t prof_accumbytes); -void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, - bool zero); -#ifdef JEMALLOC_JET -typedef void (arena_redzone_corruption_t)(void *, size_t, bool, size_t, - uint8_t); -extern arena_redzone_corruption_t *arena_redzone_corruption; -typedef void (arena_dalloc_junk_small_t)(void *, arena_bin_info_t *); -extern arena_dalloc_junk_small_t *arena_dalloc_junk_small; -#else -void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); -#endif -void arena_quarantine_junk_small(void *ptr, size_t usize); -void *arena_malloc_small(arena_t *arena, size_t size, bool zero); -void *arena_malloc_large(arena_t *arena, size_t size, bool zero); -void *arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, - size_t alignment, bool zero, tcache_t *tcache); -void arena_prof_promoted(const void *ptr, size_t size); -void arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, - void *ptr, arena_chunk_map_bits_t *bitselm); -void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind, arena_chunk_map_bits_t *bitselm); -void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t pageind); -#ifdef JEMALLOC_JET -typedef void (arena_dalloc_junk_large_t)(void *, size_t); -extern arena_dalloc_junk_large_t *arena_dalloc_junk_large; -#else -void arena_dalloc_junk_large(void *ptr, size_t usize); -#endif -void arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk, - void *ptr); -void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); -#ifdef JEMALLOC_JET -typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t); -extern arena_ralloc_junk_large_t *arena_ralloc_junk_large; -#endif -bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, - size_t extra, bool zero); -void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, - size_t size, size_t alignment, bool zero, tcache_t *tcache); -dss_prec_t arena_dss_prec_get(arena_t *arena); -bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); -ssize_t arena_lg_dirty_mult_default_get(void); -bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult); -void arena_stats_merge(arena_t *arena, const char **dss, - ssize_t *lg_dirty_mult, size_t *nactive, size_t *ndirty, - arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats); -arena_t *arena_new(unsigned ind); -bool arena_boot(void); -void arena_prefork(arena_t *arena); -void arena_postfork_parent(arena_t *arena); -void arena_postfork_child(arena_t *arena); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -arena_chunk_map_bits_t *arena_bitselm_get(arena_chunk_t *chunk, - size_t pageind); -arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk, - size_t pageind); -size_t arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm); -void *arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm); -arena_chunk_map_misc_t *arena_rd_to_miscelm(arena_runs_dirty_link_t *rd); -arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run); -size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbitsp_read(size_t *mapbitsp); -size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_size_decode(size_t mapbits); -size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, - size_t pageind); -size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); -szind_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); -size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind); -void arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits); -size_t arena_mapbits_size_encode(size_t size); -void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, - size_t size, size_t flags); -void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, - size_t size); -void arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, - size_t flags); -void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, - size_t size, size_t flags); -void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - szind_t binind); -void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, - size_t runind, szind_t binind, size_t flags); -void arena_metadata_allocated_add(arena_t *arena, size_t size); -void arena_metadata_allocated_sub(arena_t *arena, size_t size); -size_t arena_metadata_allocated_get(arena_t *arena); -bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); -bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); -bool arena_prof_accum(arena_t *arena, uint64_t accumbytes); -szind_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); -szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin); -unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, - const void *ptr); -prof_tctx_t *arena_prof_tctx_get(const void *ptr); -void arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); -void arena_prof_tctx_reset(const void *ptr, size_t usize, - const void *old_ptr, prof_tctx_t *old_tctx); -void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - tcache_t *tcache); -arena_t *arena_aalloc(const void *ptr); -size_t arena_salloc(const void *ptr, bool demote); -void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); -void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) -# ifdef JEMALLOC_ARENA_INLINE_A -JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t * -arena_bitselm_get(arena_chunk_t *chunk, size_t pageind) -{ - - assert(pageind >= map_bias); - assert(pageind < chunk_npages); - - return (&chunk->map_bits[pageind-map_bias]); -} - -JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * -arena_miscelm_get(arena_chunk_t *chunk, size_t pageind) -{ - - assert(pageind >= map_bias); - assert(pageind < chunk_npages); - - return ((arena_chunk_map_misc_t *)((uintptr_t)chunk + - (uintptr_t)map_misc_offset) + pageind-map_bias); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm) -{ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); - size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk + - map_misc_offset)) / sizeof(arena_chunk_map_misc_t) + map_bias; - - assert(pageind >= map_bias); - assert(pageind < chunk_npages); - - return (pageind); -} - -JEMALLOC_ALWAYS_INLINE void * -arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm) -{ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm); - size_t pageind = arena_miscelm_to_pageind(miscelm); - - return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE))); -} - -JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * -arena_rd_to_miscelm(arena_runs_dirty_link_t *rd) -{ - arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t - *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd)); - - assert(arena_miscelm_to_pageind(miscelm) >= map_bias); - assert(arena_miscelm_to_pageind(miscelm) < chunk_npages); - - return (miscelm); -} - -JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t * -arena_run_to_miscelm(arena_run_t *run) -{ - arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t - *)((uintptr_t)run - offsetof(arena_chunk_map_misc_t, run)); - - assert(arena_miscelm_to_pageind(miscelm) >= map_bias); - assert(arena_miscelm_to_pageind(miscelm) < chunk_npages); - - return (miscelm); -} - -JEMALLOC_ALWAYS_INLINE size_t * -arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) -{ - - return (&arena_bitselm_get(chunk, pageind)->bits); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbitsp_read(size_t *mapbitsp) -{ - - return (*mapbitsp); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) -{ - - return (arena_mapbitsp_read(arena_mapbitsp_get(chunk, pageind))); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_size_decode(size_t mapbits) -{ - size_t size; - -#if CHUNK_MAP_SIZE_SHIFT > 0 - size = (mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT; -#elif CHUNK_MAP_SIZE_SHIFT == 0 - size = mapbits & CHUNK_MAP_SIZE_MASK; -#else - size = (mapbits & CHUNK_MAP_SIZE_MASK) << -CHUNK_MAP_SIZE_SHIFT; -#endif - - return (size); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); - return (arena_mapbits_size_decode(mapbits)); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == - (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)); - return (arena_mapbits_size_decode(mapbits)); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == - CHUNK_MAP_ALLOCATED); - return (mapbits >> CHUNK_MAP_RUNIND_SHIFT); -} - -JEMALLOC_ALWAYS_INLINE szind_t -arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - szind_t binind; - - mapbits = arena_mapbits_get(chunk, pageind); - binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; - assert(binind < NBINS || binind == BININD_INVALID); - return (binind); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits & - (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); - return (mapbits & CHUNK_MAP_DIRTY); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits & - (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); - return (mapbits & CHUNK_MAP_UNZEROED); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits & - (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); - return (mapbits & CHUNK_MAP_DECOMMITTED); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - return (mapbits & CHUNK_MAP_LARGE); -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) -{ - size_t mapbits; - - mapbits = arena_mapbits_get(chunk, pageind); - return (mapbits & CHUNK_MAP_ALLOCATED); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits) -{ - - *mapbitsp = mapbits; -} - -JEMALLOC_ALWAYS_INLINE size_t -arena_mapbits_size_encode(size_t size) -{ - size_t mapbits; - -#if CHUNK_MAP_SIZE_SHIFT > 0 - mapbits = size << CHUNK_MAP_SIZE_SHIFT; -#elif CHUNK_MAP_SIZE_SHIFT == 0 - mapbits = size; -#else - mapbits = size >> -CHUNK_MAP_SIZE_SHIFT; -#endif - - assert((mapbits & ~CHUNK_MAP_SIZE_MASK) == 0); - return (mapbits); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, - size_t flags) -{ - size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); - - assert((size & PAGE_MASK) == 0); - assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); - assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags & - (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); - arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | - CHUNK_MAP_BININD_INVALID | flags); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, - size_t size) -{ - size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); - size_t mapbits = arena_mapbitsp_read(mapbitsp); - - assert((size & PAGE_MASK) == 0); - assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); - arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | - (mapbits & ~CHUNK_MAP_SIZE_MASK)); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, size_t flags) -{ - size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); - - assert((flags & CHUNK_MAP_UNZEROED) == flags); - arena_mapbitsp_write(mapbitsp, flags); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, - size_t flags) -{ - size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); - - assert((size & PAGE_MASK) == 0); - assert((flags & CHUNK_MAP_FLAGS_MASK) == flags); - assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags & - (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0); - arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) | - CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, - szind_t binind) -{ - size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); - size_t mapbits = arena_mapbitsp_read(mapbitsp); - - assert(binind <= BININD_INVALID); - assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS + - large_pad); - arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) | - (binind << CHUNK_MAP_BININD_SHIFT)); -} - -JEMALLOC_ALWAYS_INLINE void -arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, - szind_t binind, size_t flags) -{ - size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind); - - assert(binind < BININD_INVALID); - assert(pageind - runind >= map_bias); - assert((flags & CHUNK_MAP_UNZEROED) == flags); - arena_mapbitsp_write(mapbitsp, (runind << CHUNK_MAP_RUNIND_SHIFT) | - (binind << CHUNK_MAP_BININD_SHIFT) | flags | CHUNK_MAP_ALLOCATED); -} - -JEMALLOC_INLINE void -arena_metadata_allocated_add(arena_t *arena, size_t size) -{ - - atomic_add_z(&arena->stats.metadata_allocated, size); -} - -JEMALLOC_INLINE void -arena_metadata_allocated_sub(arena_t *arena, size_t size) -{ - - atomic_sub_z(&arena->stats.metadata_allocated, size); -} - -JEMALLOC_INLINE size_t -arena_metadata_allocated_get(arena_t *arena) -{ - - return (atomic_read_z(&arena->stats.metadata_allocated)); -} - -JEMALLOC_INLINE bool -arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) -{ - - cassert(config_prof); - assert(prof_interval != 0); - - arena->prof_accumbytes += accumbytes; - if (arena->prof_accumbytes >= prof_interval) { - arena->prof_accumbytes -= prof_interval; - return (true); - } - return (false); -} - -JEMALLOC_INLINE bool -arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) -{ - - cassert(config_prof); - - if (likely(prof_interval == 0)) - return (false); - return (arena_prof_accum_impl(arena, accumbytes)); -} - -JEMALLOC_INLINE bool -arena_prof_accum(arena_t *arena, uint64_t accumbytes) -{ - - cassert(config_prof); - - if (likely(prof_interval == 0)) - return (false); - - { - bool ret; - - malloc_mutex_lock(&arena->lock); - ret = arena_prof_accum_impl(arena, accumbytes); - malloc_mutex_unlock(&arena->lock); - return (ret); - } -} - -JEMALLOC_ALWAYS_INLINE szind_t -arena_ptr_small_binind_get(const void *ptr, size_t mapbits) -{ - szind_t binind; - - binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; - - if (config_debug) { - arena_chunk_t *chunk; - arena_t *arena; - size_t pageind; - size_t actual_mapbits; - size_t rpages_ind; - arena_run_t *run; - arena_bin_t *bin; - szind_t run_binind, actual_binind; - arena_bin_info_t *bin_info; - arena_chunk_map_misc_t *miscelm; - void *rpages; - - assert(binind != BININD_INVALID); - assert(binind < NBINS); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = extent_node_arena_get(&chunk->node); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - actual_mapbits = arena_mapbits_get(chunk, pageind); - assert(mapbits == actual_mapbits); - assert(arena_mapbits_large_get(chunk, pageind) == 0); - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, - pageind); - miscelm = arena_miscelm_get(chunk, rpages_ind); - run = &miscelm->run; - run_binind = run->binind; - bin = &arena->bins[run_binind]; - actual_binind = bin - arena->bins; - assert(run_binind == actual_binind); - bin_info = &arena_bin_info[actual_binind]; - rpages = arena_miscelm_to_rpages(miscelm); - assert(((uintptr_t)ptr - ((uintptr_t)rpages + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval - == 0); - } - - return (binind); -} -# endif /* JEMALLOC_ARENA_INLINE_A */ - -# ifdef JEMALLOC_ARENA_INLINE_B -JEMALLOC_INLINE szind_t -arena_bin_index(arena_t *arena, arena_bin_t *bin) -{ - szind_t binind = bin - arena->bins; - assert(binind < NBINS); - return (binind); -} - -JEMALLOC_INLINE unsigned -arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) -{ - unsigned shift, diff, regind; - size_t interval; - arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run); - void *rpages = arena_miscelm_to_rpages(miscelm); - - /* - * Freeing a pointer lower than region zero can cause assertion - * failure. - */ - assert((uintptr_t)ptr >= (uintptr_t)rpages + - (uintptr_t)bin_info->reg0_offset); - - /* - * Avoid doing division with a variable divisor if possible. Using - * actual division here can reduce allocator throughput by over 20%! - */ - diff = (unsigned)((uintptr_t)ptr - (uintptr_t)rpages - - bin_info->reg0_offset); - - /* Rescale (factor powers of 2 out of the numerator and denominator). */ - interval = bin_info->reg_interval; - shift = jemalloc_ffs(interval) - 1; - diff >>= shift; - interval >>= shift; - - if (interval == 1) { - /* The divisor was a power of 2. */ - regind = diff; - } else { - /* - * To divide by a number D that is not a power of two we - * multiply by (2^21 / D) and then right shift by 21 positions. - * - * X / D - * - * becomes - * - * (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT - * - * We can omit the first three elements, because we never - * divide by 0, and 1 and 2 are both powers of two, which are - * handled above. - */ -#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) -#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) - static const unsigned interval_invs[] = { - SIZE_INV(3), - SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), - SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), - SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), - SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), - SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), - SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), - SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) - }; - - if (likely(interval <= ((sizeof(interval_invs) / - sizeof(unsigned)) + 2))) { - regind = (diff * interval_invs[interval - 3]) >> - SIZE_INV_SHIFT; - } else - regind = diff / interval; -#undef SIZE_INV -#undef SIZE_INV_SHIFT - } - assert(diff == regind * interval); - assert(regind < bin_info->nregs); - - return (regind); -} - -JEMALLOC_INLINE prof_tctx_t * -arena_prof_tctx_get(const void *ptr) -{ - prof_tctx_t *ret; - arena_chunk_t *chunk; - - cassert(config_prof); - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) { - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - size_t mapbits = arena_mapbits_get(chunk, pageind); - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) - ret = (prof_tctx_t *)(uintptr_t)1U; - else { - arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk, - pageind); - ret = atomic_read_p(&elm->prof_tctx_pun); - } - } else - ret = huge_prof_tctx_get(ptr); - - return (ret); -} - -JEMALLOC_INLINE void -arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx) -{ - arena_chunk_t *chunk; - - cassert(config_prof); - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) { - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - - if (unlikely(usize > SMALL_MAXCLASS || (uintptr_t)tctx > - (uintptr_t)1U)) { - arena_chunk_map_misc_t *elm; - - assert(arena_mapbits_large_get(chunk, pageind) != 0); - - elm = arena_miscelm_get(chunk, pageind); - atomic_write_p(&elm->prof_tctx_pun, tctx); - } else { - /* - * tctx must always be initialized for large runs. - * Assert that the surrounding conditional logic is - * equivalent to checking whether ptr refers to a large - * run. - */ - assert(arena_mapbits_large_get(chunk, pageind) == 0); - } - } else - huge_prof_tctx_set(ptr, tctx); -} - -JEMALLOC_INLINE void -arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, - prof_tctx_t *old_tctx) -{ - - cassert(config_prof); - assert(ptr != NULL); - - if (unlikely(usize > SMALL_MAXCLASS || (ptr == old_ptr && - (uintptr_t)old_tctx > (uintptr_t)1U))) { - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) { - size_t pageind; - arena_chunk_map_misc_t *elm; - - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> - LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != - 0); - assert(arena_mapbits_large_get(chunk, pageind) != 0); - - elm = arena_miscelm_get(chunk, pageind); - atomic_write_p(&elm->prof_tctx_pun, - (prof_tctx_t *)(uintptr_t)1U); - } else - huge_prof_tctx_reset(ptr); - } -} - -JEMALLOC_ALWAYS_INLINE void * -arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - tcache_t *tcache) -{ - - assert(size != 0); - - arena = arena_choose(tsd, arena); - if (unlikely(arena == NULL)) - return (NULL); - - if (likely(size <= SMALL_MAXCLASS)) { - if (likely(tcache != NULL)) { - return (tcache_alloc_small(tsd, arena, tcache, size, - zero)); - } else - return (arena_malloc_small(arena, size, zero)); - } else if (likely(size <= large_maxclass)) { - /* - * Initialize tcache after checking size in order to avoid - * infinite recursion during tcache initialization. - */ - if (likely(tcache != NULL) && size <= tcache_maxclass) { - return (tcache_alloc_large(tsd, arena, tcache, size, - zero)); - } else - return (arena_malloc_large(arena, size, zero)); - } else - return (huge_malloc(tsd, arena, size, zero, tcache)); -} - -JEMALLOC_ALWAYS_INLINE arena_t * -arena_aalloc(const void *ptr) -{ - arena_chunk_t *chunk; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) - return (extent_node_arena_get(&chunk->node)); - else - return (huge_aalloc(ptr)); -} - -/* Return the size of the allocation pointed to by ptr. */ -JEMALLOC_ALWAYS_INLINE size_t -arena_salloc(const void *ptr, bool demote) -{ - size_t ret; - arena_chunk_t *chunk; - size_t pageind; - szind_t binind; - - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) { - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - binind = arena_mapbits_binind_get(chunk, pageind); - if (unlikely(binind == BININD_INVALID || (config_prof && !demote - && arena_mapbits_large_get(chunk, pageind) != 0))) { - /* - * Large allocation. In the common case (demote), and - * as this is an inline function, most callers will only - * end up looking at binind to determine that ptr is a - * small allocation. - */ - assert(config_cache_oblivious || ((uintptr_t)ptr & - PAGE_MASK) == 0); - ret = arena_mapbits_large_size_get(chunk, pageind) - - large_pad; - assert(ret != 0); - assert(pageind + ((ret+large_pad)>>LG_PAGE) <= - chunk_npages); - assert(arena_mapbits_dirty_get(chunk, pageind) == - arena_mapbits_dirty_get(chunk, - pageind+((ret+large_pad)>>LG_PAGE)-1)); - } else { - /* - * Small allocation (possibly promoted to a large - * object). - */ - assert(arena_mapbits_large_get(chunk, pageind) != 0 || - arena_ptr_small_binind_get(ptr, - arena_mapbits_get(chunk, pageind)) == binind); - ret = index2size(binind); - } - } else - ret = huge_salloc(ptr); - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void -arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) -{ - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) { - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; - mapbits = arena_mapbits_get(chunk, pageind); - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) { - /* Small allocation. */ - if (likely(tcache != NULL)) { - szind_t binind = arena_ptr_small_binind_get(ptr, - mapbits); - tcache_dalloc_small(tsd, tcache, ptr, binind); - } else { - arena_dalloc_small(extent_node_arena_get( - &chunk->node), chunk, ptr, pageind); - } - } else { - size_t size = arena_mapbits_large_size_get(chunk, - pageind); - - assert(config_cache_oblivious || ((uintptr_t)ptr & - PAGE_MASK) == 0); - - if (likely(tcache != NULL) && size - large_pad <= - tcache_maxclass) { - tcache_dalloc_large(tsd, tcache, ptr, size - - large_pad); - } else { - arena_dalloc_large(extent_node_arena_get( - &chunk->node), chunk, ptr); - } - } - } else - huge_dalloc(tsd, ptr, tcache); -} - -JEMALLOC_ALWAYS_INLINE void -arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) -{ - arena_chunk_t *chunk; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (likely(chunk != ptr)) { - if (config_prof && opt_prof) { - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> - LG_PAGE; - assert(arena_mapbits_allocated_get(chunk, pageind) != 0); - if (arena_mapbits_large_get(chunk, pageind) != 0) { - /* - * Make sure to use promoted size, not request - * size. - */ - size = arena_mapbits_large_size_get(chunk, - pageind) - large_pad; - } - } - assert(s2u(size) == s2u(arena_salloc(ptr, false))); - - if (likely(size <= SMALL_MAXCLASS)) { - /* Small allocation. */ - if (likely(tcache != NULL)) { - szind_t binind = size2index(size); - tcache_dalloc_small(tsd, tcache, ptr, binind); - } else { - size_t pageind = ((uintptr_t)ptr - - (uintptr_t)chunk) >> LG_PAGE; - arena_dalloc_small(extent_node_arena_get( - &chunk->node), chunk, ptr, pageind); - } - } else { - assert(config_cache_oblivious || ((uintptr_t)ptr & - PAGE_MASK) == 0); - - if (likely(tcache != NULL) && size <= tcache_maxclass) - tcache_dalloc_large(tsd, tcache, ptr, size); - else { - arena_dalloc_large(extent_node_arena_get( - &chunk->node), chunk, ptr); - } - } - } else - huge_dalloc(tsd, ptr, tcache); -} -# endif /* JEMALLOC_ARENA_INLINE_B */ -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/arena_externs.h b/deps/jemalloc/include/jemalloc/internal/arena_externs.h new file mode 100644 index 000000000..4b3732b41 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/arena_externs.h @@ -0,0 +1,94 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H +#define JEMALLOC_INTERNAL_ARENA_EXTERNS_H + +#include "jemalloc/internal/bin.h" +#include "jemalloc/internal/extent_dss.h" +#include "jemalloc/internal/pages.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/stats.h" + +extern ssize_t opt_dirty_decay_ms; +extern ssize_t opt_muzzy_decay_ms; + +extern percpu_arena_mode_t opt_percpu_arena; +extern const char *percpu_arena_mode_names[]; + +extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS]; +extern malloc_mutex_t arenas_lock; + +void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, + unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, + ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy); +void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, + const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, + size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats, + bin_stats_t *bstats, arena_stats_large_t *lstats); +void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent); +#ifdef JEMALLOC_JET +size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr); +#endif +extent_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, + size_t usize, size_t alignment, bool *zero); +void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, + extent_t *extent); +void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, + extent_t *extent, size_t oldsize); +void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, + extent_t *extent, size_t oldsize); +ssize_t arena_dirty_decay_ms_get(arena_t *arena); +bool arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms); +ssize_t arena_muzzy_decay_ms_get(arena_t *arena); +bool arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms); +void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, + bool all); +void arena_reset(tsd_t *tsd, arena_t *arena); +void arena_destroy(tsd_t *tsd, arena_t *arena); +void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, + cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); +void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info, + bool zero); + +typedef void (arena_dalloc_junk_small_t)(void *, const bin_info_t *); +extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small; + +void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, + szind_t ind, bool zero); +void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, + size_t alignment, bool zero, tcache_t *tcache); +void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize); +void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, + bool slow_path); +void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, + extent_t *extent, void *ptr); +void arena_dalloc_small(tsdn_t *tsdn, void *ptr); +bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, + size_t extra, bool zero); +void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize, + size_t size, size_t alignment, bool zero, tcache_t *tcache); +dss_prec_t arena_dss_prec_get(arena_t *arena); +bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec); +ssize_t arena_dirty_decay_ms_default_get(void); +bool arena_dirty_decay_ms_default_set(ssize_t decay_ms); +ssize_t arena_muzzy_decay_ms_default_get(void); +bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms); +bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, + size_t *old_limit, size_t *new_limit); +unsigned arena_nthreads_get(arena_t *arena, bool internal); +void arena_nthreads_inc(arena_t *arena, bool internal); +void arena_nthreads_dec(arena_t *arena, bool internal); +size_t arena_extent_sn_next(arena_t *arena); +arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); +void arena_boot(void); +void arena_prefork0(tsdn_t *tsdn, arena_t *arena); +void arena_prefork1(tsdn_t *tsdn, arena_t *arena); +void arena_prefork2(tsdn_t *tsdn, arena_t *arena); +void arena_prefork3(tsdn_t *tsdn, arena_t *arena); +void arena_prefork4(tsdn_t *tsdn, arena_t *arena); +void arena_prefork5(tsdn_t *tsdn, arena_t *arena); +void arena_prefork6(tsdn_t *tsdn, arena_t *arena); +void arena_prefork7(tsdn_t *tsdn, arena_t *arena); +void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena); +void arena_postfork_child(tsdn_t *tsdn, arena_t *arena); + +#endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/arena_inlines_a.h b/deps/jemalloc/include/jemalloc/internal/arena_inlines_a.h new file mode 100644 index 000000000..9abf7f6ac --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/arena_inlines_a.h @@ -0,0 +1,57 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_A_H +#define JEMALLOC_INTERNAL_ARENA_INLINES_A_H + +static inline unsigned +arena_ind_get(const arena_t *arena) { + return base_ind_get(arena->base); +} + +static inline void +arena_internal_add(arena_t *arena, size_t size) { + atomic_fetch_add_zu(&arena->stats.internal, size, ATOMIC_RELAXED); +} + +static inline void +arena_internal_sub(arena_t *arena, size_t size) { + atomic_fetch_sub_zu(&arena->stats.internal, size, ATOMIC_RELAXED); +} + +static inline size_t +arena_internal_get(arena_t *arena) { + return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED); +} + +static inline bool +arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) { + cassert(config_prof); + + if (likely(prof_interval == 0 || !prof_active_get_unlocked())) { + return false; + } + + return prof_accum_add(tsdn, &arena->prof_accum, accumbytes); +} + +static inline void +percpu_arena_update(tsd_t *tsd, unsigned cpu) { + assert(have_percpu_arena); + arena_t *oldarena = tsd_arena_get(tsd); + assert(oldarena != NULL); + unsigned oldind = arena_ind_get(oldarena); + + if (oldind != cpu) { + unsigned newind = cpu; + arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true); + assert(newarena != NULL); + + /* Set new arena/tcache associations. */ + arena_migrate(tsd, oldind, newind); + tcache_t *tcache = tcache_get(tsd); + if (tcache != NULL) { + tcache_arena_reassociate(tsd_tsdn(tsd), tcache, + newarena); + } + } +} + +#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_A_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h b/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h new file mode 100644 index 000000000..2b7e77e72 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h @@ -0,0 +1,354 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H +#define JEMALLOC_INTERNAL_ARENA_INLINES_B_H + +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/ticker.h" + +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) { + cassert(config_prof); + assert(ptr != NULL); + + /* Static check. */ + if (alloc_ctx == NULL) { + const extent_t *extent = iealloc(tsdn, ptr); + if (unlikely(!extent_slab_get(extent))) { + return large_prof_tctx_get(tsdn, extent); + } + } else { + if (unlikely(!alloc_ctx->slab)) { + return large_prof_tctx_get(tsdn, iealloc(tsdn, ptr)); + } + } + return (prof_tctx_t *)(uintptr_t)1U; +} + +JEMALLOC_ALWAYS_INLINE void +arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, UNUSED size_t usize, + alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) { + cassert(config_prof); + assert(ptr != NULL); + + /* Static check. */ + if (alloc_ctx == NULL) { + extent_t *extent = iealloc(tsdn, ptr); + if (unlikely(!extent_slab_get(extent))) { + large_prof_tctx_set(tsdn, extent, tctx); + } + } else { + if (unlikely(!alloc_ctx->slab)) { + large_prof_tctx_set(tsdn, iealloc(tsdn, ptr), tctx); + } + } +} + +static inline void +arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, UNUSED prof_tctx_t *tctx) { + cassert(config_prof); + assert(ptr != NULL); + + extent_t *extent = iealloc(tsdn, ptr); + assert(!extent_slab_get(extent)); + + large_prof_tctx_reset(tsdn, extent); +} + +JEMALLOC_ALWAYS_INLINE void +arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) { + tsd_t *tsd; + ticker_t *decay_ticker; + + if (unlikely(tsdn_null(tsdn))) { + return; + } + tsd = tsdn_tsd(tsdn); + decay_ticker = decay_ticker_get(tsd, arena_ind_get(arena)); + if (unlikely(decay_ticker == NULL)) { + return; + } + if (unlikely(ticker_ticks(decay_ticker, nticks))) { + arena_decay(tsdn, arena, false, false); + } +} + +JEMALLOC_ALWAYS_INLINE void +arena_decay_tick(tsdn_t *tsdn, arena_t *arena) { + malloc_mutex_assert_not_owner(tsdn, &arena->decay_dirty.mtx); + malloc_mutex_assert_not_owner(tsdn, &arena->decay_muzzy.mtx); + + arena_decay_ticks(tsdn, arena, 1); +} + +JEMALLOC_ALWAYS_INLINE void * +arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero, + tcache_t *tcache, bool slow_path) { + assert(!tsdn_null(tsdn) || tcache == NULL); + assert(size != 0); + + if (likely(tcache != NULL)) { + if (likely(size <= SMALL_MAXCLASS)) { + return tcache_alloc_small(tsdn_tsd(tsdn), arena, + tcache, size, ind, zero, slow_path); + } + if (likely(size <= tcache_maxclass)) { + return tcache_alloc_large(tsdn_tsd(tsdn), arena, + tcache, size, ind, zero, slow_path); + } + /* (size > tcache_maxclass) case falls through. */ + assert(size > tcache_maxclass); + } + + return arena_malloc_hard(tsdn, arena, size, ind, zero); +} + +JEMALLOC_ALWAYS_INLINE arena_t * +arena_aalloc(tsdn_t *tsdn, const void *ptr) { + return extent_arena_get(iealloc(tsdn, ptr)); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_salloc(tsdn_t *tsdn, const void *ptr) { + assert(ptr != NULL); + + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); + + szind_t szind = rtree_szind_read(tsdn, &extents_rtree, rtree_ctx, + (uintptr_t)ptr, true); + assert(szind != NSIZES); + + return sz_index2size(szind); +} + +JEMALLOC_ALWAYS_INLINE size_t +arena_vsalloc(tsdn_t *tsdn, const void *ptr) { + /* + * Return 0 if ptr is not within an extent managed by jemalloc. This + * function has two extra costs relative to isalloc(): + * - The rtree calls cannot claim to be dependent lookups, which induces + * rtree lookup load dependencies. + * - The lookup may fail, so there is an extra branch to check for + * failure. + */ + + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); + + extent_t *extent; + szind_t szind; + if (rtree_extent_szind_read(tsdn, &extents_rtree, rtree_ctx, + (uintptr_t)ptr, false, &extent, &szind)) { + return 0; + } + + if (extent == NULL) { + return 0; + } + assert(extent_state_get(extent) == extent_state_active); + /* Only slab members should be looked up via interior pointers. */ + assert(extent_addr_get(extent) == ptr || extent_slab_get(extent)); + + assert(szind != NSIZES); + + return sz_index2size(szind); +} + +static inline void +arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) { + assert(ptr != NULL); + + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); + + szind_t szind; + bool slab; + rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, + true, &szind, &slab); + + if (config_debug) { + extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, + rtree_ctx, (uintptr_t)ptr, true); + assert(szind == extent_szind_get(extent)); + assert(szind < NSIZES); + assert(slab == extent_slab_get(extent)); + } + + if (likely(slab)) { + /* Small allocation. */ + arena_dalloc_small(tsdn, ptr); + } else { + extent_t *extent = iealloc(tsdn, ptr); + large_dalloc(tsdn, extent); + } +} + +JEMALLOC_ALWAYS_INLINE void +arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, + alloc_ctx_t *alloc_ctx, bool slow_path) { + assert(!tsdn_null(tsdn) || tcache == NULL); + assert(ptr != NULL); + + if (unlikely(tcache == NULL)) { + arena_dalloc_no_tcache(tsdn, ptr); + return; + } + + szind_t szind; + bool slab; + rtree_ctx_t *rtree_ctx; + if (alloc_ctx != NULL) { + szind = alloc_ctx->szind; + slab = alloc_ctx->slab; + assert(szind != NSIZES); + } else { + rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn)); + rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, + (uintptr_t)ptr, true, &szind, &slab); + } + + if (config_debug) { + rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn)); + extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, + rtree_ctx, (uintptr_t)ptr, true); + assert(szind == extent_szind_get(extent)); + assert(szind < NSIZES); + assert(slab == extent_slab_get(extent)); + } + + if (likely(slab)) { + /* Small allocation. */ + tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind, + slow_path); + } else { + if (szind < nhbins) { + if (config_prof && unlikely(szind < NBINS)) { + arena_dalloc_promoted(tsdn, ptr, tcache, + slow_path); + } else { + tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, + szind, slow_path); + } + } else { + extent_t *extent = iealloc(tsdn, ptr); + large_dalloc(tsdn, extent); + } + } +} + +static inline void +arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) { + assert(ptr != NULL); + assert(size <= LARGE_MAXCLASS); + + szind_t szind; + bool slab; + if (!config_prof || !opt_prof) { + /* + * There is no risk of being confused by a promoted sampled + * object, so base szind and slab on the given size. + */ + szind = sz_size2index(size); + slab = (szind < NBINS); + } + + if ((config_prof && opt_prof) || config_debug) { + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, + &rtree_ctx_fallback); + + rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, + (uintptr_t)ptr, true, &szind, &slab); + + assert(szind == sz_size2index(size)); + assert((config_prof && opt_prof) || slab == (szind < NBINS)); + + if (config_debug) { + extent_t *extent = rtree_extent_read(tsdn, + &extents_rtree, rtree_ctx, (uintptr_t)ptr, true); + assert(szind == extent_szind_get(extent)); + assert(slab == extent_slab_get(extent)); + } + } + + if (likely(slab)) { + /* Small allocation. */ + arena_dalloc_small(tsdn, ptr); + } else { + extent_t *extent = iealloc(tsdn, ptr); + large_dalloc(tsdn, extent); + } +} + +JEMALLOC_ALWAYS_INLINE void +arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, + alloc_ctx_t *alloc_ctx, bool slow_path) { + assert(!tsdn_null(tsdn) || tcache == NULL); + assert(ptr != NULL); + assert(size <= LARGE_MAXCLASS); + + if (unlikely(tcache == NULL)) { + arena_sdalloc_no_tcache(tsdn, ptr, size); + return; + } + + szind_t szind; + bool slab; + UNUSED alloc_ctx_t local_ctx; + if (config_prof && opt_prof) { + if (alloc_ctx == NULL) { + /* Uncommon case and should be a static check. */ + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, + &rtree_ctx_fallback); + rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, + (uintptr_t)ptr, true, &local_ctx.szind, + &local_ctx.slab); + assert(local_ctx.szind == sz_size2index(size)); + alloc_ctx = &local_ctx; + } + slab = alloc_ctx->slab; + szind = alloc_ctx->szind; + } else { + /* + * There is no risk of being confused by a promoted sampled + * object, so base szind and slab on the given size. + */ + szind = sz_size2index(size); + slab = (szind < NBINS); + } + + if (config_debug) { + rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn)); + rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, + (uintptr_t)ptr, true, &szind, &slab); + extent_t *extent = rtree_extent_read(tsdn, + &extents_rtree, rtree_ctx, (uintptr_t)ptr, true); + assert(szind == extent_szind_get(extent)); + assert(slab == extent_slab_get(extent)); + } + + if (likely(slab)) { + /* Small allocation. */ + tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind, + slow_path); + } else { + if (szind < nhbins) { + if (config_prof && unlikely(szind < NBINS)) { + arena_dalloc_promoted(tsdn, ptr, tcache, + slow_path); + } else { + tcache_dalloc_large(tsdn_tsd(tsdn), + tcache, ptr, szind, slow_path); + } + } else { + extent_t *extent = iealloc(tsdn, ptr); + large_dalloc(tsdn, extent); + } + } +} + +#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/arena_stats.h b/deps/jemalloc/include/jemalloc/internal/arena_stats.h new file mode 100644 index 000000000..5f3dca8b1 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/arena_stats.h @@ -0,0 +1,237 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_STATS_H +#define JEMALLOC_INTERNAL_ARENA_STATS_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mutex_prof.h" +#include "jemalloc/internal/size_classes.h" + +/* + * In those architectures that support 64-bit atomics, we use atomic updates for + * our 64-bit values. Otherwise, we use a plain uint64_t and synchronize + * externally. + */ +#ifdef JEMALLOC_ATOMIC_U64 +typedef atomic_u64_t arena_stats_u64_t; +#else +/* Must hold the arena stats mutex while reading atomically. */ +typedef uint64_t arena_stats_u64_t; +#endif + +typedef struct arena_stats_large_s arena_stats_large_t; +struct arena_stats_large_s { + /* + * Total number of allocation/deallocation requests served directly by + * the arena. + */ + arena_stats_u64_t nmalloc; + arena_stats_u64_t ndalloc; + + /* + * Number of allocation requests that correspond to this size class. + * This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + arena_stats_u64_t nrequests; /* Partially derived. */ + + /* Current number of allocations of this size class. */ + size_t curlextents; /* Derived. */ +}; + +typedef struct arena_stats_decay_s arena_stats_decay_t; +struct arena_stats_decay_s { + /* Total number of purge sweeps. */ + arena_stats_u64_t npurge; + /* Total number of madvise calls made. */ + arena_stats_u64_t nmadvise; + /* Total number of pages purged. */ + arena_stats_u64_t purged; +}; + +/* + * Arena stats. Note that fields marked "derived" are not directly maintained + * within the arena code; rather their values are derived during stats merge + * requests. + */ +typedef struct arena_stats_s arena_stats_t; +struct arena_stats_s { +#ifndef JEMALLOC_ATOMIC_U64 + malloc_mutex_t mtx; +#endif + + /* Number of bytes currently mapped, excluding retained memory. */ + atomic_zu_t mapped; /* Partially derived. */ + + /* + * Number of unused virtual memory bytes currently retained. Retained + * bytes are technically mapped (though always decommitted or purged), + * but they are excluded from the mapped statistic (above). + */ + atomic_zu_t retained; /* Derived. */ + + arena_stats_decay_t decay_dirty; + arena_stats_decay_t decay_muzzy; + + atomic_zu_t base; /* Derived. */ + atomic_zu_t internal; + atomic_zu_t resident; /* Derived. */ + atomic_zu_t metadata_thp; + + atomic_zu_t allocated_large; /* Derived. */ + arena_stats_u64_t nmalloc_large; /* Derived. */ + arena_stats_u64_t ndalloc_large; /* Derived. */ + arena_stats_u64_t nrequests_large; /* Derived. */ + + /* Number of bytes cached in tcache associated with this arena. */ + atomic_zu_t tcache_bytes; /* Derived. */ + + mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]; + + /* One element for each large size class. */ + arena_stats_large_t lstats[NSIZES - NBINS]; + + /* Arena uptime. */ + nstime_t uptime; +}; + +static inline bool +arena_stats_init(UNUSED tsdn_t *tsdn, arena_stats_t *arena_stats) { + if (config_debug) { + for (size_t i = 0; i < sizeof(arena_stats_t); i++) { + assert(((char *)arena_stats)[i] == 0); + } + } +#ifndef JEMALLOC_ATOMIC_U64 + if (malloc_mutex_init(&arena_stats->mtx, "arena_stats", + WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) { + return true; + } +#endif + /* Memory is zeroed, so there is no need to clear stats. */ + return false; +} + +static inline void +arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) { +#ifndef JEMALLOC_ATOMIC_U64 + malloc_mutex_lock(tsdn, &arena_stats->mtx); +#endif +} + +static inline void +arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) { +#ifndef JEMALLOC_ATOMIC_U64 + malloc_mutex_unlock(tsdn, &arena_stats->mtx); +#endif +} + +static inline uint64_t +arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, + arena_stats_u64_t *p) { +#ifdef JEMALLOC_ATOMIC_U64 + return atomic_load_u64(p, ATOMIC_RELAXED); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + return *p; +#endif +} + +static inline void +arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, + arena_stats_u64_t *p, uint64_t x) { +#ifdef JEMALLOC_ATOMIC_U64 + atomic_fetch_add_u64(p, x, ATOMIC_RELAXED); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + *p += x; +#endif +} + +UNUSED static inline void +arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, + arena_stats_u64_t *p, uint64_t x) { +#ifdef JEMALLOC_ATOMIC_U64 + UNUSED uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED); + assert(r - x <= r); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + *p -= x; + assert(*p + x >= *p); +#endif +} + +/* + * Non-atomically sets *dst += src. *dst needs external synchronization. + * This lets us avoid the cost of a fetch_add when its unnecessary (note that + * the types here are atomic). + */ +static inline void +arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) { +#ifdef JEMALLOC_ATOMIC_U64 + uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED); + atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED); +#else + *dst += src; +#endif +} + +static inline size_t +arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) { +#ifdef JEMALLOC_ATOMIC_U64 + return atomic_load_zu(p, ATOMIC_RELAXED); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + return atomic_load_zu(p, ATOMIC_RELAXED); +#endif +} + +static inline void +arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p, + size_t x) { +#ifdef JEMALLOC_ATOMIC_U64 + atomic_fetch_add_zu(p, x, ATOMIC_RELAXED); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + size_t cur = atomic_load_zu(p, ATOMIC_RELAXED); + atomic_store_zu(p, cur + x, ATOMIC_RELAXED); +#endif +} + +static inline void +arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p, + size_t x) { +#ifdef JEMALLOC_ATOMIC_U64 + UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED); + assert(r - x <= r); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + size_t cur = atomic_load_zu(p, ATOMIC_RELAXED); + atomic_store_zu(p, cur - x, ATOMIC_RELAXED); +#endif +} + +/* Like the _u64 variant, needs an externally synchronized *dst. */ +static inline void +arena_stats_accum_zu(atomic_zu_t *dst, size_t src) { + size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED); + atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED); +} + +static inline void +arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, + szind_t szind, uint64_t nrequests) { + arena_stats_lock(tsdn, arena_stats); + arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind - + NBINS].nrequests, nrequests); + arena_stats_unlock(tsdn, arena_stats); +} + +static inline void +arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) { + arena_stats_lock(tsdn, arena_stats); + arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size); + arena_stats_unlock(tsdn, arena_stats); +} + + +#endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/arena_structs_a.h b/deps/jemalloc/include/jemalloc/internal/arena_structs_a.h new file mode 100644 index 000000000..46aa77c88 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/arena_structs_a.h @@ -0,0 +1,11 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H +#define JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H + +#include "jemalloc/internal/bitmap.h" + +struct arena_slab_data_s { + /* Per region allocated/deallocated bitmap. */ + bitmap_t bitmap[BITMAP_GROUPS_MAX]; +}; + +#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h b/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h new file mode 100644 index 000000000..38bc95962 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h @@ -0,0 +1,229 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H +#define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H + +#include "jemalloc/internal/arena_stats.h" +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/bin.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/extent_dss.h" +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/smoothstep.h" +#include "jemalloc/internal/ticker.h" + +struct arena_decay_s { + /* Synchronizes all non-atomic fields. */ + malloc_mutex_t mtx; + /* + * True if a thread is currently purging the extents associated with + * this decay structure. + */ + bool purging; + /* + * Approximate time in milliseconds from the creation of a set of unused + * dirty pages until an equivalent set of unused dirty pages is purged + * and/or reused. + */ + atomic_zd_t time_ms; + /* time / SMOOTHSTEP_NSTEPS. */ + nstime_t interval; + /* + * Time at which the current decay interval logically started. We do + * not actually advance to a new epoch until sometime after it starts + * because of scheduling and computation delays, and it is even possible + * to completely skip epochs. In all cases, during epoch advancement we + * merge all relevant activity into the most recently recorded epoch. + */ + nstime_t epoch; + /* Deadline randomness generator. */ + uint64_t jitter_state; + /* + * Deadline for current epoch. This is the sum of interval and per + * epoch jitter which is a uniform random variable in [0..interval). + * Epochs always advance by precise multiples of interval, but we + * randomize the deadline to reduce the likelihood of arenas purging in + * lockstep. + */ + nstime_t deadline; + /* + * Number of unpurged pages at beginning of current epoch. During epoch + * advancement we use the delta between arena->decay_*.nunpurged and + * extents_npages_get(&arena->extents_*) to determine how many dirty + * pages, if any, were generated. + */ + size_t nunpurged; + /* + * Trailing log of how many unused dirty pages were generated during + * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last + * element is the most recent epoch. Corresponding epoch times are + * relative to epoch. + */ + size_t backlog[SMOOTHSTEP_NSTEPS]; + + /* + * Pointer to associated stats. These stats are embedded directly in + * the arena's stats due to how stats structures are shared between the + * arena and ctl code. + * + * Synchronization: Same as associated arena's stats field. */ + arena_stats_decay_t *stats; + /* Peak number of pages in associated extents. Used for debug only. */ + uint64_t ceil_npages; +}; + +struct arena_s { + /* + * Number of threads currently assigned to this arena. Each thread has + * two distinct assignments, one for application-serving allocation, and + * the other for internal metadata allocation. Internal metadata must + * not be allocated from arenas explicitly created via the arenas.create + * mallctl, because the arena.<i>.reset mallctl indiscriminately + * discards all allocations for the affected arena. + * + * 0: Application allocation. + * 1: Internal metadata allocation. + * + * Synchronization: atomic. + */ + atomic_u_t nthreads[2]; + + /* + * When percpu_arena is enabled, to amortize the cost of reading / + * updating the current CPU id, track the most recent thread accessing + * this arena, and only read CPU if there is a mismatch. + */ + tsdn_t *last_thd; + + /* Synchronization: internal. */ + arena_stats_t stats; + + /* + * Lists of tcaches and cache_bin_array_descriptors for extant threads + * associated with this arena. Stats from these are merged + * incrementally, and at exit if opt_stats_print is enabled. + * + * Synchronization: tcache_ql_mtx. + */ + ql_head(tcache_t) tcache_ql; + ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql; + malloc_mutex_t tcache_ql_mtx; + + /* Synchronization: internal. */ + prof_accum_t prof_accum; + uint64_t prof_accumbytes; + + /* + * PRNG state for cache index randomization of large allocation base + * pointers. + * + * Synchronization: atomic. + */ + atomic_zu_t offset_state; + + /* + * Extent serial number generator state. + * + * Synchronization: atomic. + */ + atomic_zu_t extent_sn_next; + + /* + * Represents a dss_prec_t, but atomically. + * + * Synchronization: atomic. + */ + atomic_u_t dss_prec; + + /* + * Number of pages in active extents. + * + * Synchronization: atomic. + */ + atomic_zu_t nactive; + + /* + * Extant large allocations. + * + * Synchronization: large_mtx. + */ + extent_list_t large; + /* Synchronizes all large allocation/update/deallocation. */ + malloc_mutex_t large_mtx; + + /* + * Collections of extents that were previously allocated. These are + * used when allocating extents, in an attempt to re-use address space. + * + * Synchronization: internal. + */ + extents_t extents_dirty; + extents_t extents_muzzy; + extents_t extents_retained; + + /* + * Decay-based purging state, responsible for scheduling extent state + * transitions. + * + * Synchronization: internal. + */ + arena_decay_t decay_dirty; /* dirty --> muzzy */ + arena_decay_t decay_muzzy; /* muzzy --> retained */ + + /* + * Next extent size class in a growing series to use when satisfying a + * request via the extent hooks (only if opt_retain). This limits the + * number of disjoint virtual memory ranges so that extent merging can + * be effective even if multiple arenas' extent allocation requests are + * highly interleaved. + * + * retain_grow_limit is the max allowed size ind to expand (unless the + * required size is greater). Default is no limit, and controlled + * through mallctl only. + * + * Synchronization: extent_grow_mtx + */ + pszind_t extent_grow_next; + pszind_t retain_grow_limit; + malloc_mutex_t extent_grow_mtx; + + /* + * Available extent structures that were allocated via + * base_alloc_extent(). + * + * Synchronization: extent_avail_mtx. + */ + extent_tree_t extent_avail; + malloc_mutex_t extent_avail_mtx; + + /* + * bins is used to store heaps of free regions. + * + * Synchronization: internal. + */ + bin_t bins[NBINS]; + + /* + * Base allocator, from which arena metadata are allocated. + * + * Synchronization: internal. + */ + base_t *base; + /* Used to determine uptime. Read-only after initialization. */ + nstime_t create_time; +}; + +/* Used in conjunction with tsd for fast arena-related context lookup. */ +struct arena_tdata_s { + ticker_t decay_ticker; +}; + +/* Used to pass rtree lookup context down the path. */ +struct alloc_ctx_s { + szind_t szind; + bool slab; +}; + +#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/arena_types.h b/deps/jemalloc/include/jemalloc/internal/arena_types.h new file mode 100644 index 000000000..70001b5f1 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/arena_types.h @@ -0,0 +1,43 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H +#define JEMALLOC_INTERNAL_ARENA_TYPES_H + +/* Maximum number of regions in one slab. */ +#define LG_SLAB_MAXREGS (LG_PAGE - LG_TINY_MIN) +#define SLAB_MAXREGS (1U << LG_SLAB_MAXREGS) + +/* Default decay times in milliseconds. */ +#define DIRTY_DECAY_MS_DEFAULT ZD(10 * 1000) +#define MUZZY_DECAY_MS_DEFAULT ZD(10 * 1000) +/* Number of event ticks between time checks. */ +#define DECAY_NTICKS_PER_UPDATE 1000 + +typedef struct arena_slab_data_s arena_slab_data_t; +typedef struct arena_decay_s arena_decay_t; +typedef struct arena_s arena_t; +typedef struct arena_tdata_s arena_tdata_t; +typedef struct alloc_ctx_s alloc_ctx_t; + +typedef enum { + percpu_arena_mode_names_base = 0, /* Used for options processing. */ + + /* + * *_uninit are used only during bootstrapping, and must correspond + * to initialized variant plus percpu_arena_mode_enabled_base. + */ + percpu_arena_uninit = 0, + per_phycpu_arena_uninit = 1, + + /* All non-disabled modes must come after percpu_arena_disabled. */ + percpu_arena_disabled = 2, + + percpu_arena_mode_names_limit = 3, /* Used for options processing. */ + percpu_arena_mode_enabled_base = 3, + + percpu_arena = 3, + per_phycpu_arena = 4 /* Hyper threads share arena. */ +} percpu_arena_mode_t; + +#define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base) +#define PERCPU_ARENA_DEFAULT percpu_arena_disabled + +#endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/assert.h b/deps/jemalloc/include/jemalloc/internal/assert.h new file mode 100644 index 000000000..be4d45b32 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/assert.h @@ -0,0 +1,56 @@ +#include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/util.h" + +/* + * Define a custom assert() in order to reduce the chances of deadlock during + * assertion failure. + */ +#ifndef assert +#define assert(e) do { \ + if (unlikely(config_debug && !(e))) { \ + malloc_printf( \ + "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \ + __FILE__, __LINE__, #e); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef not_reached +#define not_reached() do { \ + if (config_debug) { \ + malloc_printf( \ + "<jemalloc>: %s:%d: Unreachable code reached\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ + unreachable(); \ +} while (0) +#endif + +#ifndef not_implemented +#define not_implemented() do { \ + if (config_debug) { \ + malloc_printf("<jemalloc>: %s:%d: Not implemented\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef assert_not_implemented +#define assert_not_implemented(e) do { \ + if (unlikely(config_debug && !(e))) { \ + not_implemented(); \ + } \ +} while (0) +#endif + +/* Use to assert a particular configuration, e.g., cassert(config_debug). */ +#ifndef cassert +#define cassert(c) do { \ + if (unlikely(!(c))) { \ + not_reached(); \ + } \ +} while (0) +#endif diff --git a/deps/jemalloc/include/jemalloc/internal/atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic.h index a9aad35d1..adadb1a3a 100644 --- a/deps/jemalloc/include/jemalloc/internal/atomic.h +++ b/deps/jemalloc/include/jemalloc/internal/atomic.h @@ -1,651 +1,77 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#define atomic_read_uint64(p) atomic_add_uint64(p, 0) -#define atomic_read_uint32(p) atomic_add_uint32(p, 0) -#define atomic_read_p(p) atomic_add_p(p, NULL) -#define atomic_read_z(p) atomic_add_z(p, 0) -#define atomic_read_u(p) atomic_add_u(p, 0) - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_INTERNAL_ATOMIC_H +#define JEMALLOC_INTERNAL_ATOMIC_H + +#define ATOMIC_INLINE static inline + +#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) +# include "jemalloc/internal/atomic_gcc_atomic.h" +#elif defined(JEMALLOC_GCC_SYNC_ATOMICS) +# include "jemalloc/internal/atomic_gcc_sync.h" +#elif defined(_MSC_VER) +# include "jemalloc/internal/atomic_msvc.h" +#elif defined(JEMALLOC_C11_ATOMICS) +# include "jemalloc/internal/atomic_c11.h" +#else +# error "Don't have atomics implemented on this platform." +#endif /* - * All arithmetic functions return the arithmetic result of the atomic - * operation. Some atomic operation APIs return the value prior to mutation, in - * which case the following functions must redundantly compute the result so - * that it can be returned. These functions are normally inlined, so the extra - * operations can be optimized away if the return values aren't used by the - * callers. + * This header gives more or less a backport of C11 atomics. The user can write + * JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_sizeof_type); to generate + * counterparts of the C11 atomic functions for type, as so: + * JEMALLOC_GENERATE_ATOMICS(int *, pi, 3); + * and then write things like: + * int *some_ptr; + * atomic_pi_t atomic_ptr_to_int; + * atomic_store_pi(&atomic_ptr_to_int, some_ptr, ATOMIC_RELAXED); + * int *prev_value = atomic_exchange_pi(&ptr_to_int, NULL, ATOMIC_ACQ_REL); + * assert(some_ptr == prev_value); + * and expect things to work in the obvious way. * - * <t> atomic_read_<t>(<t> *p) { return (*p); } - * <t> atomic_add_<t>(<t> *p, <t> x) { return (*p + x); } - * <t> atomic_sub_<t>(<t> *p, <t> x) { return (*p - x); } - * bool atomic_cas_<t>(<t> *p, <t> c, <t> s) - * { - * if (*p != c) - * return (true); - * *p = s; - * return (false); - * } - * void atomic_write_<t>(<t> *p, <t> x) { *p = x; } + * Also included (with naming differences to avoid conflicts with the standard + * library): + * atomic_fence(atomic_memory_order_t) (mimics C11's atomic_thread_fence). + * ATOMIC_INIT (mimics C11's ATOMIC_VAR_INIT). */ -#ifndef JEMALLOC_ENABLE_INLINE -uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); -uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); -bool atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s); -void atomic_write_uint64(uint64_t *p, uint64_t x); -uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); -uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); -bool atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s); -void atomic_write_uint32(uint32_t *p, uint32_t x); -void *atomic_add_p(void **p, void *x); -void *atomic_sub_p(void **p, void *x); -bool atomic_cas_p(void **p, void *c, void *s); -void atomic_write_p(void **p, const void *x); -size_t atomic_add_z(size_t *p, size_t x); -size_t atomic_sub_z(size_t *p, size_t x); -bool atomic_cas_z(size_t *p, size_t c, size_t s); -void atomic_write_z(size_t *p, size_t x); -unsigned atomic_add_u(unsigned *p, unsigned x); -unsigned atomic_sub_u(unsigned *p, unsigned x); -bool atomic_cas_u(unsigned *p, unsigned c, unsigned s); -void atomic_write_u(unsigned *p, unsigned x); -#endif +/* + * Pure convenience, so that we don't have to type "atomic_memory_order_" + * quite so often. + */ +#define ATOMIC_RELAXED atomic_memory_order_relaxed +#define ATOMIC_ACQUIRE atomic_memory_order_acquire +#define ATOMIC_RELEASE atomic_memory_order_release +#define ATOMIC_ACQ_REL atomic_memory_order_acq_rel +#define ATOMIC_SEQ_CST atomic_memory_order_seq_cst -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) -/******************************************************************************/ -/* 64-bit operations. */ +/* + * Not all platforms have 64-bit atomics. If we do, this #define exposes that + * fact. + */ #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) -# if (defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - uint64_t t = x; - - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (t + x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - uint64_t t; - - x = (uint64_t)(-(int64_t)x); - t = x; - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (t + x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) -{ - uint8_t success; - - asm volatile ( - "lock; cmpxchgq %4, %0;" - "sete %1;" - : "=m" (*p), "=a" (success) /* Outputs. */ - : "m" (*p), "a" (c), "r" (s) /* Inputs. */ - : "memory" /* Clobbers. */ - ); - - return (!(bool)success); -} - -JEMALLOC_INLINE void -atomic_write_uint64(uint64_t *p, uint64_t x) -{ - - asm volatile ( - "xchgq %1, %0;" /* Lock is implied by xchgq. */ - : "=m" (*p), "+r" (x) /* Outputs. */ - : "m" (*p) /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -# elif (defined(JEMALLOC_C11ATOMICS)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - return (atomic_fetch_add(a, x) + x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - return (atomic_fetch_sub(a, x) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) -{ - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - return (!atomic_compare_exchange_strong(a, &c, s)); -} - -JEMALLOC_INLINE void -atomic_write_uint64(uint64_t *p, uint64_t x) -{ - volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p; - atomic_store(a, x); -} -# elif (defined(JEMALLOC_ATOMIC9)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - /* - * atomic_fetchadd_64() doesn't exist, but we only ever use this - * function on LP64 systems, so atomic_fetchadd_long() will do. - */ - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return (atomic_fetchadd_long(p, (unsigned long)x) + x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) -{ - - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - return (!atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s)); -} - -JEMALLOC_INLINE void -atomic_write_uint64(uint64_t *p, uint64_t x) -{ - - assert(sizeof(uint64_t) == sizeof(unsigned long)); - - atomic_store_rel_long(p, x); -} -# elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); -} - -JEMALLOC_INLINE bool -atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) -{ - - return (!OSAtomicCompareAndSwap64(c, s, (int64_t *)p)); -} - -JEMALLOC_INLINE void -atomic_write_uint64(uint64_t *p, uint64_t x) -{ - uint64_t o; - - /*The documented OSAtomic*() API does not expose an atomic exchange. */ - do { - o = atomic_read_uint64(p); - } while (atomic_cas_uint64(p, o, x)); -} -# elif (defined(_MSC_VER)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (InterlockedExchangeAdd64(p, x) + x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) -{ - uint64_t o; - - o = InterlockedCompareExchange64(p, s, c); - return (o != c); -} - -JEMALLOC_INLINE void -atomic_write_uint64(uint64_t *p, uint64_t x) -{ - - InterlockedExchange64(p, x); -} -# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \ - defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} - -JEMALLOC_INLINE bool -atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s) -{ - - return (!__sync_bool_compare_and_swap(p, c, s)); -} - -JEMALLOC_INLINE void -atomic_write_uint64(uint64_t *p, uint64_t x) -{ - - __sync_lock_test_and_set(p, x); -} -# else -# error "Missing implementation for 64-bit atomic operations" -# endif +# define JEMALLOC_ATOMIC_U64 #endif -/******************************************************************************/ -/* 32-bit operations. */ -#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - uint32_t t = x; - - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (t + x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - uint32_t t; - - x = (uint32_t)(-(int32_t)x); - t = x; - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (t), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (t + x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) -{ - uint8_t success; - - asm volatile ( - "lock; cmpxchgl %4, %0;" - "sete %1;" - : "=m" (*p), "=a" (success) /* Outputs. */ - : "m" (*p), "a" (c), "r" (s) /* Inputs. */ - : "memory" - ); - - return (!(bool)success); -} - -JEMALLOC_INLINE void -atomic_write_uint32(uint32_t *p, uint32_t x) -{ - - asm volatile ( - "xchgl %1, %0;" /* Lock is implied by xchgl. */ - : "=m" (*p), "+r" (x) /* Outputs. */ - : "m" (*p) /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -# elif (defined(JEMALLOC_C11ATOMICS)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - return (atomic_fetch_add(a, x) + x); -} +JEMALLOC_GENERATE_ATOMICS(void *, p, LG_SIZEOF_PTR) -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - return (atomic_fetch_sub(a, x) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) -{ - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - return (!atomic_compare_exchange_strong(a, &c, s)); -} - -JEMALLOC_INLINE void -atomic_write_uint32(uint32_t *p, uint32_t x) -{ - volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p; - atomic_store(a, x); -} -#elif (defined(JEMALLOC_ATOMIC9)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (atomic_fetchadd_32(p, x) + x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) -{ - - return (!atomic_cmpset_32(p, c, s)); -} - -JEMALLOC_INLINE void -atomic_write_uint32(uint32_t *p, uint32_t x) -{ - - atomic_store_rel_32(p, x); -} -#elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); -} - -JEMALLOC_INLINE bool -atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) -{ - - return (!OSAtomicCompareAndSwap32(c, s, (int32_t *)p)); -} - -JEMALLOC_INLINE void -atomic_write_uint32(uint32_t *p, uint32_t x) -{ - uint32_t o; - - /*The documented OSAtomic*() API does not expose an atomic exchange. */ - do { - o = atomic_read_uint32(p); - } while (atomic_cas_uint32(p, o, x)); -} -#elif (defined(_MSC_VER)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (InterlockedExchangeAdd(p, x) + x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (InterlockedExchangeAdd(p, -((int32_t)x)) - x); -} - -JEMALLOC_INLINE bool -atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) -{ - uint32_t o; - - o = InterlockedCompareExchange(p, s, c); - return (o != c); -} - -JEMALLOC_INLINE void -atomic_write_uint32(uint32_t *p, uint32_t x) -{ - - InterlockedExchange(p, x); -} -#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \ - defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} - -JEMALLOC_INLINE bool -atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s) -{ - - return (!__sync_bool_compare_and_swap(p, c, s)); -} - -JEMALLOC_INLINE void -atomic_write_uint32(uint32_t *p, uint32_t x) -{ - - __sync_lock_test_and_set(p, x); -} -#else -# error "Missing implementation for 32-bit atomic operations" -#endif - -/******************************************************************************/ -/* Pointer operations. */ -JEMALLOC_INLINE void * -atomic_add_p(void **p, void *x) -{ - -#if (LG_SIZEOF_PTR == 3) - return ((void *)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); -#elif (LG_SIZEOF_PTR == 2) - return ((void *)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); -#endif -} - -JEMALLOC_INLINE void * -atomic_sub_p(void **p, void *x) -{ - -#if (LG_SIZEOF_PTR == 3) - return ((void *)atomic_add_uint64((uint64_t *)p, - (uint64_t)-((int64_t)x))); -#elif (LG_SIZEOF_PTR == 2) - return ((void *)atomic_add_uint32((uint32_t *)p, - (uint32_t)-((int32_t)x))); -#endif -} - -JEMALLOC_INLINE bool -atomic_cas_p(void **p, void *c, void *s) -{ - -#if (LG_SIZEOF_PTR == 3) - return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); -#elif (LG_SIZEOF_PTR == 2) - return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); -#endif -} - -JEMALLOC_INLINE void -atomic_write_p(void **p, const void *x) -{ - -#if (LG_SIZEOF_PTR == 3) - atomic_write_uint64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_PTR == 2) - atomic_write_uint32((uint32_t *)p, (uint32_t)x); -#endif -} - -/******************************************************************************/ -/* size_t operations. */ -JEMALLOC_INLINE size_t -atomic_add_z(size_t *p, size_t x) -{ - -#if (LG_SIZEOF_PTR == 3) - return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); -#elif (LG_SIZEOF_PTR == 2) - return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); -#endif -} - -JEMALLOC_INLINE size_t -atomic_sub_z(size_t *p, size_t x) -{ - -#if (LG_SIZEOF_PTR == 3) - return ((size_t)atomic_add_uint64((uint64_t *)p, - (uint64_t)-((int64_t)x))); -#elif (LG_SIZEOF_PTR == 2) - return ((size_t)atomic_add_uint32((uint32_t *)p, - (uint32_t)-((int32_t)x))); -#endif -} - -JEMALLOC_INLINE bool -atomic_cas_z(size_t *p, size_t c, size_t s) -{ - -#if (LG_SIZEOF_PTR == 3) - return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); -#elif (LG_SIZEOF_PTR == 2) - return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); -#endif -} - -JEMALLOC_INLINE void -atomic_write_z(size_t *p, size_t x) -{ - -#if (LG_SIZEOF_PTR == 3) - atomic_write_uint64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_PTR == 2) - atomic_write_uint32((uint32_t *)p, (uint32_t)x); -#endif -} - -/******************************************************************************/ -/* unsigned operations. */ -JEMALLOC_INLINE unsigned -atomic_add_u(unsigned *p, unsigned x) -{ - -#if (LG_SIZEOF_INT == 3) - return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); -#elif (LG_SIZEOF_INT == 2) - return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); -#endif -} - -JEMALLOC_INLINE unsigned -atomic_sub_u(unsigned *p, unsigned x) -{ +/* + * There's no actual guarantee that sizeof(bool) == 1, but it's true on the only + * platform that actually needs to know the size, MSVC. + */ +JEMALLOC_GENERATE_ATOMICS(bool, b, 0) -#if (LG_SIZEOF_INT == 3) - return ((unsigned)atomic_add_uint64((uint64_t *)p, - (uint64_t)-((int64_t)x))); -#elif (LG_SIZEOF_INT == 2) - return ((unsigned)atomic_add_uint32((uint32_t *)p, - (uint32_t)-((int32_t)x))); -#endif -} +JEMALLOC_GENERATE_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT) -JEMALLOC_INLINE bool -atomic_cas_u(unsigned *p, unsigned c, unsigned s) -{ +JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR) -#if (LG_SIZEOF_INT == 3) - return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s)); -#elif (LG_SIZEOF_INT == 2) - return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s)); -#endif -} +JEMALLOC_GENERATE_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR) -JEMALLOC_INLINE void -atomic_write_u(unsigned *p, unsigned x) -{ +JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2) -#if (LG_SIZEOF_INT == 3) - atomic_write_uint64((uint64_t *)p, (uint64_t)x); -#elif (LG_SIZEOF_INT == 2) - atomic_write_uint32((uint32_t *)p, (uint32_t)x); +#ifdef JEMALLOC_ATOMIC_U64 +JEMALLOC_GENERATE_INT_ATOMICS(uint64_t, u64, 3) #endif -} -/******************************************************************************/ -#endif +#undef ATOMIC_INLINE -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_ATOMIC_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/atomic_c11.h b/deps/jemalloc/include/jemalloc/internal/atomic_c11.h new file mode 100644 index 000000000..a5f9313a6 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/atomic_c11.h @@ -0,0 +1,97 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_C11_H +#define JEMALLOC_INTERNAL_ATOMIC_C11_H + +#include <stdatomic.h> + +#define ATOMIC_INIT(...) ATOMIC_VAR_INIT(__VA_ARGS__) + +#define atomic_memory_order_t memory_order +#define atomic_memory_order_relaxed memory_order_relaxed +#define atomic_memory_order_acquire memory_order_acquire +#define atomic_memory_order_release memory_order_release +#define atomic_memory_order_acq_rel memory_order_acq_rel +#define atomic_memory_order_seq_cst memory_order_seq_cst + +#define atomic_fence atomic_thread_fence + +#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +typedef _Atomic(type) atomic_##short_type##_t; \ + \ +ATOMIC_INLINE type \ +atomic_load_##short_type(const atomic_##short_type##_t *a, \ + atomic_memory_order_t mo) { \ + /* \ + * A strict interpretation of the C standard prevents \ + * atomic_load from taking a const argument, but it's \ + * convenient for our purposes. This cast is a workaround. \ + */ \ + atomic_##short_type##_t* a_nonconst = \ + (atomic_##short_type##_t*)a; \ + return atomic_load_explicit(a_nonconst, mo); \ +} \ + \ +ATOMIC_INLINE void \ +atomic_store_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + atomic_store_explicit(a, val, mo); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return atomic_exchange_explicit(a, val, mo); \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + return atomic_compare_exchange_weak_explicit(a, expected, \ + desired, success_mo, failure_mo); \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + return atomic_compare_exchange_strong_explicit(a, expected, \ + desired, success_mo, failure_mo); \ +} + +/* + * Integral types have some special operations available that non-integral ones + * lack. + */ +#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \ + \ +ATOMIC_INLINE type \ +atomic_fetch_add_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return atomic_fetch_add_explicit(a, val, mo); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return atomic_fetch_sub_explicit(a, val, mo); \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_and_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return atomic_fetch_and_explicit(a, val, mo); \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_or_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return atomic_fetch_or_explicit(a, val, mo); \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return atomic_fetch_xor_explicit(a, val, mo); \ +} + +#endif /* JEMALLOC_INTERNAL_ATOMIC_C11_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h new file mode 100644 index 000000000..6b73a14f8 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h @@ -0,0 +1,127 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H +#define JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H + +#include "jemalloc/internal/assert.h" + +#define ATOMIC_INIT(...) {__VA_ARGS__} + +typedef enum { + atomic_memory_order_relaxed, + atomic_memory_order_acquire, + atomic_memory_order_release, + atomic_memory_order_acq_rel, + atomic_memory_order_seq_cst +} atomic_memory_order_t; + +ATOMIC_INLINE int +atomic_enum_to_builtin(atomic_memory_order_t mo) { + switch (mo) { + case atomic_memory_order_relaxed: + return __ATOMIC_RELAXED; + case atomic_memory_order_acquire: + return __ATOMIC_ACQUIRE; + case atomic_memory_order_release: + return __ATOMIC_RELEASE; + case atomic_memory_order_acq_rel: + return __ATOMIC_ACQ_REL; + case atomic_memory_order_seq_cst: + return __ATOMIC_SEQ_CST; + } + /* Can't happen; the switch is exhaustive. */ + not_reached(); +} + +ATOMIC_INLINE void +atomic_fence(atomic_memory_order_t mo) { + __atomic_thread_fence(atomic_enum_to_builtin(mo)); +} + +#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +typedef struct { \ + type repr; \ +} atomic_##short_type##_t; \ + \ +ATOMIC_INLINE type \ +atomic_load_##short_type(const atomic_##short_type##_t *a, \ + atomic_memory_order_t mo) { \ + type result; \ + __atomic_load(&a->repr, &result, atomic_enum_to_builtin(mo)); \ + return result; \ +} \ + \ +ATOMIC_INLINE void \ +atomic_store_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + __atomic_store(&a->repr, &val, atomic_enum_to_builtin(mo)); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + type result; \ + __atomic_exchange(&a->repr, &val, &result, \ + atomic_enum_to_builtin(mo)); \ + return result; \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + return __atomic_compare_exchange(&a->repr, expected, &desired, \ + true, atomic_enum_to_builtin(success_mo), \ + atomic_enum_to_builtin(failure_mo)); \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + return __atomic_compare_exchange(&a->repr, expected, &desired, \ + false, \ + atomic_enum_to_builtin(success_mo), \ + atomic_enum_to_builtin(failure_mo)); \ +} + + +#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \ + \ +ATOMIC_INLINE type \ +atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __atomic_fetch_add(&a->repr, val, \ + atomic_enum_to_builtin(mo)); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __atomic_fetch_sub(&a->repr, val, \ + atomic_enum_to_builtin(mo)); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __atomic_fetch_and(&a->repr, val, \ + atomic_enum_to_builtin(mo)); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __atomic_fetch_or(&a->repr, val, \ + atomic_enum_to_builtin(mo)); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __atomic_fetch_xor(&a->repr, val, \ + atomic_enum_to_builtin(mo)); \ +} + +#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h new file mode 100644 index 000000000..30846e4d2 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h @@ -0,0 +1,191 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H +#define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H + +#define ATOMIC_INIT(...) {__VA_ARGS__} + +typedef enum { + atomic_memory_order_relaxed, + atomic_memory_order_acquire, + atomic_memory_order_release, + atomic_memory_order_acq_rel, + atomic_memory_order_seq_cst +} atomic_memory_order_t; + +ATOMIC_INLINE void +atomic_fence(atomic_memory_order_t mo) { + /* Easy cases first: no barrier, and full barrier. */ + if (mo == atomic_memory_order_relaxed) { + asm volatile("" ::: "memory"); + return; + } + if (mo == atomic_memory_order_seq_cst) { + asm volatile("" ::: "memory"); + __sync_synchronize(); + asm volatile("" ::: "memory"); + return; + } + asm volatile("" ::: "memory"); +# if defined(__i386__) || defined(__x86_64__) + /* This is implicit on x86. */ +# elif defined(__ppc__) + asm volatile("lwsync"); +# elif defined(__sparc__) && defined(__arch64__) + if (mo == atomic_memory_order_acquire) { + asm volatile("membar #LoadLoad | #LoadStore"); + } else if (mo == atomic_memory_order_release) { + asm volatile("membar #LoadStore | #StoreStore"); + } else { + asm volatile("membar #LoadLoad | #LoadStore | #StoreStore"); + } +# else + __sync_synchronize(); +# endif + asm volatile("" ::: "memory"); +} + +/* + * A correct implementation of seq_cst loads and stores on weakly ordered + * architectures could do either of the following: + * 1. store() is weak-fence -> store -> strong fence, load() is load -> + * strong-fence. + * 2. store() is strong-fence -> store, load() is strong-fence -> load -> + * weak-fence. + * The tricky thing is, load() and store() above can be the load or store + * portions of a gcc __sync builtin, so we have to follow GCC's lead, which + * means going with strategy 2. + * On strongly ordered architectures, the natural strategy is to stick a strong + * fence after seq_cst stores, and have naked loads. So we want the strong + * fences in different places on different architectures. + * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to + * accomplish this. + */ + +ATOMIC_INLINE void +atomic_pre_sc_load_fence() { +# if defined(__i386__) || defined(__x86_64__) || \ + (defined(__sparc__) && defined(__arch64__)) + atomic_fence(atomic_memory_order_relaxed); +# else + atomic_fence(atomic_memory_order_seq_cst); +# endif +} + +ATOMIC_INLINE void +atomic_post_sc_store_fence() { +# if defined(__i386__) || defined(__x86_64__) || \ + (defined(__sparc__) && defined(__arch64__)) + atomic_fence(atomic_memory_order_seq_cst); +# else + atomic_fence(atomic_memory_order_relaxed); +# endif + +} + +#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +typedef struct { \ + type volatile repr; \ +} atomic_##short_type##_t; \ + \ +ATOMIC_INLINE type \ +atomic_load_##short_type(const atomic_##short_type##_t *a, \ + atomic_memory_order_t mo) { \ + if (mo == atomic_memory_order_seq_cst) { \ + atomic_pre_sc_load_fence(); \ + } \ + type result = a->repr; \ + if (mo != atomic_memory_order_relaxed) { \ + atomic_fence(atomic_memory_order_acquire); \ + } \ + return result; \ +} \ + \ +ATOMIC_INLINE void \ +atomic_store_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + if (mo != atomic_memory_order_relaxed) { \ + atomic_fence(atomic_memory_order_release); \ + } \ + a->repr = val; \ + if (mo == atomic_memory_order_seq_cst) { \ + atomic_post_sc_store_fence(); \ + } \ +} \ + \ +ATOMIC_INLINE type \ +atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + /* \ + * Because of FreeBSD, we care about gcc 4.2, which doesn't have\ + * an atomic exchange builtin. We fake it with a CAS loop. \ + */ \ + while (true) { \ + type old = a->repr; \ + if (__sync_bool_compare_and_swap(&a->repr, old, val)) { \ + return old; \ + } \ + } \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + type prev = __sync_val_compare_and_swap(&a->repr, *expected, \ + desired); \ + if (prev == *expected) { \ + return true; \ + } else { \ + *expected = prev; \ + return false; \ + } \ +} \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + type prev = __sync_val_compare_and_swap(&a->repr, *expected, \ + desired); \ + if (prev == *expected) { \ + return true; \ + } else { \ + *expected = prev; \ + return false; \ + } \ +} + +#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \ + /* unused */ lg_size) \ +JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \ + \ +ATOMIC_INLINE type \ +atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __sync_fetch_and_add(&a->repr, val); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __sync_fetch_and_sub(&a->repr, val); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __sync_fetch_and_and(&a->repr, val); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __sync_fetch_and_or(&a->repr, val); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return __sync_fetch_and_xor(&a->repr, val); \ +} + +#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/atomic_msvc.h b/deps/jemalloc/include/jemalloc/internal/atomic_msvc.h new file mode 100644 index 000000000..67057ce50 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/atomic_msvc.h @@ -0,0 +1,158 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_MSVC_H +#define JEMALLOC_INTERNAL_ATOMIC_MSVC_H + +#define ATOMIC_INIT(...) {__VA_ARGS__} + +typedef enum { + atomic_memory_order_relaxed, + atomic_memory_order_acquire, + atomic_memory_order_release, + atomic_memory_order_acq_rel, + atomic_memory_order_seq_cst +} atomic_memory_order_t; + +typedef char atomic_repr_0_t; +typedef short atomic_repr_1_t; +typedef long atomic_repr_2_t; +typedef __int64 atomic_repr_3_t; + +ATOMIC_INLINE void +atomic_fence(atomic_memory_order_t mo) { + _ReadWriteBarrier(); +# if defined(_M_ARM) || defined(_M_ARM64) + /* ARM needs a barrier for everything but relaxed. */ + if (mo != atomic_memory_order_relaxed) { + MemoryBarrier(); + } +# elif defined(_M_IX86) || defined (_M_X64) + /* x86 needs a barrier only for seq_cst. */ + if (mo == atomic_memory_order_seq_cst) { + MemoryBarrier(); + } +# else +# error "Don't know how to create atomics for this platform for MSVC." +# endif + _ReadWriteBarrier(); +} + +#define ATOMIC_INTERLOCKED_REPR(lg_size) atomic_repr_ ## lg_size ## _t + +#define ATOMIC_CONCAT(a, b) ATOMIC_RAW_CONCAT(a, b) +#define ATOMIC_RAW_CONCAT(a, b) a ## b + +#define ATOMIC_INTERLOCKED_NAME(base_name, lg_size) ATOMIC_CONCAT( \ + base_name, ATOMIC_INTERLOCKED_SUFFIX(lg_size)) + +#define ATOMIC_INTERLOCKED_SUFFIX(lg_size) \ + ATOMIC_CONCAT(ATOMIC_INTERLOCKED_SUFFIX_, lg_size) + +#define ATOMIC_INTERLOCKED_SUFFIX_0 8 +#define ATOMIC_INTERLOCKED_SUFFIX_1 16 +#define ATOMIC_INTERLOCKED_SUFFIX_2 +#define ATOMIC_INTERLOCKED_SUFFIX_3 64 + +#define JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size) \ +typedef struct { \ + ATOMIC_INTERLOCKED_REPR(lg_size) repr; \ +} atomic_##short_type##_t; \ + \ +ATOMIC_INLINE type \ +atomic_load_##short_type(const atomic_##short_type##_t *a, \ + atomic_memory_order_t mo) { \ + ATOMIC_INTERLOCKED_REPR(lg_size) ret = a->repr; \ + if (mo != atomic_memory_order_relaxed) { \ + atomic_fence(atomic_memory_order_acquire); \ + } \ + return (type) ret; \ +} \ + \ +ATOMIC_INLINE void \ +atomic_store_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + if (mo != atomic_memory_order_relaxed) { \ + atomic_fence(atomic_memory_order_release); \ + } \ + a->repr = (ATOMIC_INTERLOCKED_REPR(lg_size)) val; \ + if (mo == atomic_memory_order_seq_cst) { \ + atomic_fence(atomic_memory_order_seq_cst); \ + } \ +} \ + \ +ATOMIC_INLINE type \ +atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \ + atomic_memory_order_t mo) { \ + return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchange, \ + lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + ATOMIC_INTERLOCKED_REPR(lg_size) e = \ + (ATOMIC_INTERLOCKED_REPR(lg_size))*expected; \ + ATOMIC_INTERLOCKED_REPR(lg_size) d = \ + (ATOMIC_INTERLOCKED_REPR(lg_size))desired; \ + ATOMIC_INTERLOCKED_REPR(lg_size) old = \ + ATOMIC_INTERLOCKED_NAME(_InterlockedCompareExchange, \ + lg_size)(&a->repr, d, e); \ + if (old == e) { \ + return true; \ + } else { \ + *expected = (type)old; \ + return false; \ + } \ +} \ + \ +ATOMIC_INLINE bool \ +atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \ + type *expected, type desired, atomic_memory_order_t success_mo, \ + atomic_memory_order_t failure_mo) { \ + /* We implement the weak version with strong semantics. */ \ + return atomic_compare_exchange_weak_##short_type(a, expected, \ + desired, success_mo, failure_mo); \ +} + + +#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size) \ +JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size) \ + \ +ATOMIC_INLINE type \ +atomic_fetch_add_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchangeAdd, \ + lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \ +} \ + \ +ATOMIC_INLINE type \ +atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + /* \ + * MSVC warns on negation of unsigned operands, but for us it \ + * gives exactly the right semantics (MAX_TYPE + 1 - operand). \ + */ \ + __pragma(warning(push)) \ + __pragma(warning(disable: 4146)) \ + return atomic_fetch_add_##short_type(a, -val, mo); \ + __pragma(warning(pop)) \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_and_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedAnd, lg_size)( \ + &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_or_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedOr, lg_size)( \ + &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \ +} \ +ATOMIC_INLINE type \ +atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, \ + type val, atomic_memory_order_t mo) { \ + return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedXor, lg_size)( \ + &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \ +} + +#endif /* JEMALLOC_INTERNAL_ATOMIC_MSVC_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h b/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h new file mode 100644 index 000000000..3209aa49f --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h @@ -0,0 +1,33 @@ +#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H +#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H + +extern bool opt_background_thread; +extern size_t opt_max_background_threads; +extern malloc_mutex_t background_thread_lock; +extern atomic_b_t background_thread_enabled_state; +extern size_t n_background_threads; +extern size_t max_background_threads; +extern background_thread_info_t *background_thread_info; +extern bool can_enable_background_thread; + +bool background_thread_create(tsd_t *tsd, unsigned arena_ind); +bool background_threads_enable(tsd_t *tsd); +bool background_threads_disable(tsd_t *tsd); +void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, + arena_decay_t *decay, size_t npages_new); +void background_thread_prefork0(tsdn_t *tsdn); +void background_thread_prefork1(tsdn_t *tsdn); +void background_thread_postfork_parent(tsdn_t *tsdn); +void background_thread_postfork_child(tsdn_t *tsdn); +bool background_thread_stats_read(tsdn_t *tsdn, + background_thread_stats_t *stats); +void background_thread_ctl_init(tsdn_t *tsdn); + +#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER +extern int pthread_create_wrapper(pthread_t *__restrict, const pthread_attr_t *, + void *(*)(void *), void *__restrict); +#endif +bool background_thread_boot0(void); +bool background_thread_boot1(tsdn_t *tsdn); + +#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h b/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h new file mode 100644 index 000000000..ef50231e8 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h @@ -0,0 +1,57 @@ +#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H +#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H + +JEMALLOC_ALWAYS_INLINE bool +background_thread_enabled(void) { + return atomic_load_b(&background_thread_enabled_state, ATOMIC_RELAXED); +} + +JEMALLOC_ALWAYS_INLINE void +background_thread_enabled_set(tsdn_t *tsdn, bool state) { + malloc_mutex_assert_owner(tsdn, &background_thread_lock); + atomic_store_b(&background_thread_enabled_state, state, ATOMIC_RELAXED); +} + +JEMALLOC_ALWAYS_INLINE background_thread_info_t * +arena_background_thread_info_get(arena_t *arena) { + unsigned arena_ind = arena_ind_get(arena); + return &background_thread_info[arena_ind % ncpus]; +} + +JEMALLOC_ALWAYS_INLINE uint64_t +background_thread_wakeup_time_get(background_thread_info_t *info) { + uint64_t next_wakeup = nstime_ns(&info->next_wakeup); + assert(atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE) == + (next_wakeup == BACKGROUND_THREAD_INDEFINITE_SLEEP)); + return next_wakeup; +} + +JEMALLOC_ALWAYS_INLINE void +background_thread_wakeup_time_set(tsdn_t *tsdn, background_thread_info_t *info, + uint64_t wakeup_time) { + malloc_mutex_assert_owner(tsdn, &info->mtx); + atomic_store_b(&info->indefinite_sleep, + wakeup_time == BACKGROUND_THREAD_INDEFINITE_SLEEP, ATOMIC_RELEASE); + nstime_init(&info->next_wakeup, wakeup_time); +} + +JEMALLOC_ALWAYS_INLINE bool +background_thread_indefinite_sleep(background_thread_info_t *info) { + return atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE); +} + +JEMALLOC_ALWAYS_INLINE void +arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena, + bool is_background_thread) { + if (!background_thread_enabled() || is_background_thread) { + return; + } + background_thread_info_t *info = + arena_background_thread_info_get(arena); + if (background_thread_indefinite_sleep(info)) { + background_thread_interval_check(tsdn, arena, + &arena->decay_dirty, 0); + } +} + +#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h b/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h new file mode 100644 index 000000000..c1107dfe9 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h @@ -0,0 +1,53 @@ +#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H +#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H + +/* This file really combines "structs" and "types", but only transitionally. */ + +#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK) +# define JEMALLOC_PTHREAD_CREATE_WRAPPER +#endif + +#define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX +#define MAX_BACKGROUND_THREAD_LIMIT MALLOCX_ARENA_LIMIT + +typedef enum { + background_thread_stopped, + background_thread_started, + /* Thread waits on the global lock when paused (for arena_reset). */ + background_thread_paused, +} background_thread_state_t; + +struct background_thread_info_s { +#ifdef JEMALLOC_BACKGROUND_THREAD + /* Background thread is pthread specific. */ + pthread_t thread; + pthread_cond_t cond; +#endif + malloc_mutex_t mtx; + background_thread_state_t state; + /* When true, it means no wakeup scheduled. */ + atomic_b_t indefinite_sleep; + /* Next scheduled wakeup time (absolute time in ns). */ + nstime_t next_wakeup; + /* + * Since the last background thread run, newly added number of pages + * that need to be purged by the next wakeup. This is adjusted on + * epoch advance, and is used to determine whether we should signal the + * background thread to wake up earlier. + */ + size_t npages_to_purge_new; + /* Stats: total number of runs since started. */ + uint64_t tot_n_runs; + /* Stats: total sleep time since started. */ + nstime_t tot_sleep_time; +}; +typedef struct background_thread_info_s background_thread_info_t; + +struct background_thread_stats_s { + size_t num_threads; + uint64_t num_runs; + nstime_t run_interval; +}; +typedef struct background_thread_stats_s background_thread_stats_t; + +#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/base.h b/deps/jemalloc/include/jemalloc/internal/base.h deleted file mode 100644 index 39e46ee44..000000000 --- a/deps/jemalloc/include/jemalloc/internal/base.h +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void *base_alloc(size_t size); -void base_stats_get(size_t *allocated, size_t *resident, size_t *mapped); -bool base_boot(void); -void base_prefork(void); -void base_postfork_parent(void); -void base_postfork_child(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/base_externs.h b/deps/jemalloc/include/jemalloc/internal/base_externs.h new file mode 100644 index 000000000..7b705c9b4 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/base_externs.h @@ -0,0 +1,22 @@ +#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H +#define JEMALLOC_INTERNAL_BASE_EXTERNS_H + +extern metadata_thp_mode_t opt_metadata_thp; +extern const char *metadata_thp_mode_names[]; + +base_t *b0get(void); +base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); +void base_delete(tsdn_t *tsdn, base_t *base); +extent_hooks_t *base_extent_hooks_get(base_t *base); +extent_hooks_t *base_extent_hooks_set(base_t *base, + extent_hooks_t *extent_hooks); +void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment); +extent_t *base_alloc_extent(tsdn_t *tsdn, base_t *base); +void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, + size_t *resident, size_t *mapped, size_t *n_thp); +void base_prefork(tsdn_t *tsdn, base_t *base); +void base_postfork_parent(tsdn_t *tsdn, base_t *base); +void base_postfork_child(tsdn_t *tsdn, base_t *base); +bool base_boot(tsdn_t *tsdn); + +#endif /* JEMALLOC_INTERNAL_BASE_EXTERNS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/base_inlines.h b/deps/jemalloc/include/jemalloc/internal/base_inlines.h new file mode 100644 index 000000000..aec0e2e1e --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/base_inlines.h @@ -0,0 +1,13 @@ +#ifndef JEMALLOC_INTERNAL_BASE_INLINES_H +#define JEMALLOC_INTERNAL_BASE_INLINES_H + +static inline unsigned +base_ind_get(const base_t *base) { + return base->ind; +} + +static inline bool +metadata_thp_enabled(void) { + return (opt_metadata_thp != metadata_thp_disabled); +} +#endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/base_structs.h b/deps/jemalloc/include/jemalloc/internal/base_structs.h new file mode 100644 index 000000000..2102247ac --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/base_structs.h @@ -0,0 +1,59 @@ +#ifndef JEMALLOC_INTERNAL_BASE_STRUCTS_H +#define JEMALLOC_INTERNAL_BASE_STRUCTS_H + +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/size_classes.h" + +/* Embedded at the beginning of every block of base-managed virtual memory. */ +struct base_block_s { + /* Total size of block's virtual memory mapping. */ + size_t size; + + /* Next block in list of base's blocks. */ + base_block_t *next; + + /* Tracks unused trailing space. */ + extent_t extent; +}; + +struct base_s { + /* Associated arena's index within the arenas array. */ + unsigned ind; + + /* + * User-configurable extent hook functions. Points to an + * extent_hooks_t. + */ + atomic_p_t extent_hooks; + + /* Protects base_alloc() and base_stats_get() operations. */ + malloc_mutex_t mtx; + + /* Using THP when true (metadata_thp auto mode). */ + bool auto_thp_switched; + /* + * Most recent size class in the series of increasingly large base + * extents. Logarithmic spacing between subsequent allocations ensures + * that the total number of distinct mappings remains small. + */ + pszind_t pind_last; + + /* Serial number generation state. */ + size_t extent_sn_next; + + /* Chain of all blocks associated with base. */ + base_block_t *blocks; + + /* Heap of extents that track unused trailing space within blocks. */ + extent_heap_t avail[NSIZES]; + + /* Stats, only maintained if config_stats. */ + size_t allocated; + size_t resident; + size_t mapped; + /* Number of THP regions touched. */ + size_t n_thp; +}; + +#endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/base_types.h b/deps/jemalloc/include/jemalloc/internal/base_types.h new file mode 100644 index 000000000..b6db77df7 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/base_types.h @@ -0,0 +1,33 @@ +#ifndef JEMALLOC_INTERNAL_BASE_TYPES_H +#define JEMALLOC_INTERNAL_BASE_TYPES_H + +typedef struct base_block_s base_block_t; +typedef struct base_s base_t; + +#define METADATA_THP_DEFAULT metadata_thp_disabled + +/* + * In auto mode, arenas switch to huge pages for the base allocator on the + * second base block. a0 switches to thp on the 5th block (after 20 megabytes + * of metadata), since more metadata (e.g. rtree nodes) come from a0's base. + */ + +#define BASE_AUTO_THP_THRESHOLD 2 +#define BASE_AUTO_THP_THRESHOLD_A0 5 + +typedef enum { + metadata_thp_disabled = 0, + /* + * Lazily enable hugepage for metadata. To avoid high RSS caused by THP + * + low usage arena (i.e. THP becomes a significant percentage), the + * "auto" option only starts using THP after a base allocator used up + * the first THP region. Starting from the second hugepage (in a single + * arena), "auto" behaves the same as "always", i.e. madvise hugepage + * right away. + */ + metadata_thp_auto = 1, + metadata_thp_always = 2, + metadata_thp_mode_limit = 3 +} metadata_thp_mode_t; + +#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/bin.h b/deps/jemalloc/include/jemalloc/internal/bin.h new file mode 100644 index 000000000..9b416ada7 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/bin.h @@ -0,0 +1,106 @@ +#ifndef JEMALLOC_INTERNAL_BIN_H +#define JEMALLOC_INTERNAL_BIN_H + +#include "jemalloc/internal/extent_types.h" +#include "jemalloc/internal/extent_structs.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/bin_stats.h" + +/* + * A bin contains a set of extents that are currently being used for slab + * allocations. + */ + +/* + * Read-only information associated with each element of arena_t's bins array + * is stored separately, partly to reduce memory usage (only one copy, rather + * than one per arena), but mainly to avoid false cacheline sharing. + * + * Each slab has the following layout: + * + * /--------------------\ + * | region 0 | + * |--------------------| + * | region 1 | + * |--------------------| + * | ... | + * | ... | + * | ... | + * |--------------------| + * | region nregs-1 | + * \--------------------/ + */ +typedef struct bin_info_s bin_info_t; +struct bin_info_s { + /* Size of regions in a slab for this bin's size class. */ + size_t reg_size; + + /* Total size of a slab for this bin's size class. */ + size_t slab_size; + + /* Total number of regions in a slab for this bin's size class. */ + uint32_t nregs; + + /* + * Metadata used to manipulate bitmaps for slabs associated with this + * bin. + */ + bitmap_info_t bitmap_info; +}; + +extern const bin_info_t bin_infos[NBINS]; + + +typedef struct bin_s bin_t; +struct bin_s { + /* All operations on bin_t fields require lock ownership. */ + malloc_mutex_t lock; + + /* + * Current slab being used to service allocations of this bin's size + * class. slabcur is independent of slabs_{nonfull,full}; whenever + * slabcur is reassigned, the previous slab must be deallocated or + * inserted into slabs_{nonfull,full}. + */ + extent_t *slabcur; + + /* + * Heap of non-full slabs. This heap is used to assure that new + * allocations come from the non-full slab that is oldest/lowest in + * memory. + */ + extent_heap_t slabs_nonfull; + + /* List used to track full slabs. */ + extent_list_t slabs_full; + + /* Bin statistics. */ + bin_stats_t stats; +}; + +/* Initializes a bin to empty. Returns true on error. */ +bool bin_init(bin_t *bin); + +/* Forking. */ +void bin_prefork(tsdn_t *tsdn, bin_t *bin); +void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin); +void bin_postfork_child(tsdn_t *tsdn, bin_t *bin); + +/* Stats. */ +static inline void +bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) { + malloc_mutex_lock(tsdn, &bin->lock); + malloc_mutex_prof_read(tsdn, &dst_bin_stats->mutex_data, &bin->lock); + dst_bin_stats->nmalloc += bin->stats.nmalloc; + dst_bin_stats->ndalloc += bin->stats.ndalloc; + dst_bin_stats->nrequests += bin->stats.nrequests; + dst_bin_stats->curregs += bin->stats.curregs; + dst_bin_stats->nfills += bin->stats.nfills; + dst_bin_stats->nflushes += bin->stats.nflushes; + dst_bin_stats->nslabs += bin->stats.nslabs; + dst_bin_stats->reslabs += bin->stats.reslabs; + dst_bin_stats->curslabs += bin->stats.curslabs; + malloc_mutex_unlock(tsdn, &bin->lock); +} + +#endif /* JEMALLOC_INTERNAL_BIN_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/bin_stats.h b/deps/jemalloc/include/jemalloc/internal/bin_stats.h new file mode 100644 index 000000000..86e673ec4 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/bin_stats.h @@ -0,0 +1,51 @@ +#ifndef JEMALLOC_INTERNAL_BIN_STATS_H +#define JEMALLOC_INTERNAL_BIN_STATS_H + +#include "jemalloc/internal/mutex_prof.h" + +typedef struct bin_stats_s bin_stats_t; +struct bin_stats_s { + /* + * Total number of allocation/deallocation requests served directly by + * the bin. Note that tcache may allocate an object, then recycle it + * many times, resulting many increments to nrequests, but only one + * each to nmalloc and ndalloc. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* + * Number of allocation requests that correspond to the size of this + * bin. This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + uint64_t nrequests; + + /* + * Current number of regions of this size class, including regions + * currently cached by tcache. + */ + size_t curregs; + + /* Number of tcache fills from this bin. */ + uint64_t nfills; + + /* Number of tcache flushes to this bin. */ + uint64_t nflushes; + + /* Total number of slabs created for this bin's size class. */ + uint64_t nslabs; + + /* + * Total number of slabs reused by extracting them from the slabs heap + * for this bin's size class. + */ + uint64_t reslabs; + + /* Current number of slabs in this bin. */ + size_t curslabs; + + mutex_prof_data_t mutex_data; +}; + +#endif /* JEMALLOC_INTERNAL_BIN_STATS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/bit_util.h b/deps/jemalloc/include/jemalloc/internal/bit_util.h new file mode 100644 index 000000000..8d078a8a3 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/bit_util.h @@ -0,0 +1,165 @@ +#ifndef JEMALLOC_INTERNAL_BIT_UTIL_H +#define JEMALLOC_INTERNAL_BIT_UTIL_H + +#include "jemalloc/internal/assert.h" + +#define BIT_UTIL_INLINE static inline + +/* Sanity check. */ +#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \ + || !defined(JEMALLOC_INTERNAL_FFS) +# error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure +#endif + + +BIT_UTIL_INLINE unsigned +ffs_llu(unsigned long long bitmap) { + return JEMALLOC_INTERNAL_FFSLL(bitmap); +} + +BIT_UTIL_INLINE unsigned +ffs_lu(unsigned long bitmap) { + return JEMALLOC_INTERNAL_FFSL(bitmap); +} + +BIT_UTIL_INLINE unsigned +ffs_u(unsigned bitmap) { + return JEMALLOC_INTERNAL_FFS(bitmap); +} + +BIT_UTIL_INLINE unsigned +ffs_zu(size_t bitmap) { +#if LG_SIZEOF_PTR == LG_SIZEOF_INT + return ffs_u(bitmap); +#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG + return ffs_lu(bitmap); +#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG + return ffs_llu(bitmap); +#else +#error No implementation for size_t ffs() +#endif +} + +BIT_UTIL_INLINE unsigned +ffs_u64(uint64_t bitmap) { +#if LG_SIZEOF_LONG == 3 + return ffs_lu(bitmap); +#elif LG_SIZEOF_LONG_LONG == 3 + return ffs_llu(bitmap); +#else +#error No implementation for 64-bit ffs() +#endif +} + +BIT_UTIL_INLINE unsigned +ffs_u32(uint32_t bitmap) { +#if LG_SIZEOF_INT == 2 + return ffs_u(bitmap); +#else +#error No implementation for 32-bit ffs() +#endif + return ffs_u(bitmap); +} + +BIT_UTIL_INLINE uint64_t +pow2_ceil_u64(uint64_t x) { + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x |= x >> 32; + x++; + return x; +} + +BIT_UTIL_INLINE uint32_t +pow2_ceil_u32(uint32_t x) { + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return x; +} + +/* Compute the smallest power of 2 that is >= x. */ +BIT_UTIL_INLINE size_t +pow2_ceil_zu(size_t x) { +#if (LG_SIZEOF_PTR == 3) + return pow2_ceil_u64(x); +#else + return pow2_ceil_u32(x); +#endif +} + +#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +BIT_UTIL_INLINE unsigned +lg_floor(size_t x) { + size_t ret; + assert(x != 0); + + asm ("bsr %1, %0" + : "=r"(ret) // Outputs. + : "r"(x) // Inputs. + ); + assert(ret < UINT_MAX); + return (unsigned)ret; +} +#elif (defined(_MSC_VER)) +BIT_UTIL_INLINE unsigned +lg_floor(size_t x) { + unsigned long ret; + + assert(x != 0); + +#if (LG_SIZEOF_PTR == 3) + _BitScanReverse64(&ret, x); +#elif (LG_SIZEOF_PTR == 2) + _BitScanReverse(&ret, x); +#else +# error "Unsupported type size for lg_floor()" +#endif + assert(ret < UINT_MAX); + return (unsigned)ret; +} +#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) +BIT_UTIL_INLINE unsigned +lg_floor(size_t x) { + assert(x != 0); + +#if (LG_SIZEOF_PTR == LG_SIZEOF_INT) + return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x); +#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG) + return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x); +#else +# error "Unsupported type size for lg_floor()" +#endif +} +#else +BIT_UTIL_INLINE unsigned +lg_floor(size_t x) { + assert(x != 0); + + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); +#if (LG_SIZEOF_PTR == 3) + x |= (x >> 32); +#endif + if (x == SIZE_T_MAX) { + return (8 << LG_SIZEOF_PTR) - 1; + } + x++; + return ffs_zu(x) - 2; +} +#endif + +#undef BIT_UTIL_INLINE + +#endif /* JEMALLOC_INTERNAL_BIT_UTIL_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/bitmap.h b/deps/jemalloc/include/jemalloc/internal/bitmap.h index fcc6005c7..ac990290a 100644 --- a/deps/jemalloc/include/jemalloc/internal/bitmap.h +++ b/deps/jemalloc/include/jemalloc/internal/bitmap.h @@ -1,83 +1,159 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_BITMAP_H +#define JEMALLOC_INTERNAL_BITMAP_H -/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ -#define LG_BITMAP_MAXBITS LG_RUN_MAXREGS -#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS) +#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/bit_util.h" +#include "jemalloc/internal/size_classes.h" -typedef struct bitmap_level_s bitmap_level_t; -typedef struct bitmap_info_s bitmap_info_t; typedef unsigned long bitmap_t; -#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG +#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG + +/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ +#if LG_SLAB_MAXREGS > LG_CEIL_NSIZES +/* Maximum bitmap bit count is determined by maximum regions per slab. */ +# define LG_BITMAP_MAXBITS LG_SLAB_MAXREGS +#else +/* Maximum bitmap bit count is determined by number of extent size classes. */ +# define LG_BITMAP_MAXBITS LG_CEIL_NSIZES +#endif +#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS) /* Number of bits per group. */ -#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3) -#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) -#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) +#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3) +#define BITMAP_GROUP_NBITS (1U << LG_BITMAP_GROUP_NBITS) +#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) + +/* + * Do some analysis on how big the bitmap is before we use a tree. For a brute + * force linear search, if we would have to call ffs_lu() more than 2^3 times, + * use a tree instead. + */ +#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3 +# define BITMAP_USE_TREE +#endif /* Number of groups required to store a given number of bits. */ -#define BITMAP_BITS2GROUPS(nbits) \ - ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS) +#define BITMAP_BITS2GROUPS(nbits) \ + (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS) /* * Number of groups required at a particular level for a given number of bits. */ -#define BITMAP_GROUPS_L0(nbits) \ +#define BITMAP_GROUPS_L0(nbits) \ BITMAP_BITS2GROUPS(nbits) -#define BITMAP_GROUPS_L1(nbits) \ +#define BITMAP_GROUPS_L1(nbits) \ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits)) -#define BITMAP_GROUPS_L2(nbits) \ +#define BITMAP_GROUPS_L2(nbits) \ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))) -#define BITMAP_GROUPS_L3(nbits) \ +#define BITMAP_GROUPS_L3(nbits) \ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \ BITMAP_BITS2GROUPS((nbits))))) +#define BITMAP_GROUPS_L4(nbits) \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \ + BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))))) /* * Assuming the number of levels, number of groups required for a given number * of bits. */ -#define BITMAP_GROUPS_1_LEVEL(nbits) \ +#define BITMAP_GROUPS_1_LEVEL(nbits) \ BITMAP_GROUPS_L0(nbits) -#define BITMAP_GROUPS_2_LEVEL(nbits) \ +#define BITMAP_GROUPS_2_LEVEL(nbits) \ (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits)) -#define BITMAP_GROUPS_3_LEVEL(nbits) \ +#define BITMAP_GROUPS_3_LEVEL(nbits) \ (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits)) -#define BITMAP_GROUPS_4_LEVEL(nbits) \ +#define BITMAP_GROUPS_4_LEVEL(nbits) \ (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits)) +#define BITMAP_GROUPS_5_LEVEL(nbits) \ + (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits)) /* * Maximum number of groups required to support LG_BITMAP_MAXBITS. */ +#ifdef BITMAP_USE_TREE + #if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS +# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_1_LEVEL(nbits) # define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS) #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2 +# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_2_LEVEL(nbits) # define BITMAP_GROUPS_MAX BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS) #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3 +# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_3_LEVEL(nbits) # define BITMAP_GROUPS_MAX BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS) #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4 +# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_4_LEVEL(nbits) # define BITMAP_GROUPS_MAX BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS) +#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5 +# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_5_LEVEL(nbits) +# define BITMAP_GROUPS_MAX BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS) #else # error "Unsupported bitmap size" #endif -/* Maximum number of levels possible. */ -#define BITMAP_MAX_LEVELS \ - (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ - + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) +/* + * Maximum number of levels possible. This could be statically computed based + * on LG_BITMAP_MAXBITS: + * + * #define BITMAP_MAX_LEVELS \ + * (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ + * + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) + * + * However, that would not allow the generic BITMAP_INFO_INITIALIZER() macro, so + * instead hardcode BITMAP_MAX_LEVELS to the largest number supported by the + * various cascading macros. The only additional cost this incurs is some + * unused trailing entries in bitmap_info_t structures; the bitmaps themselves + * are not impacted. + */ +#define BITMAP_MAX_LEVELS 5 + +#define BITMAP_INFO_INITIALIZER(nbits) { \ + /* nbits. */ \ + nbits, \ + /* nlevels. */ \ + (BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) + \ + (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) + \ + (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) + \ + (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1, \ + /* levels. */ \ + { \ + {0}, \ + {BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) + \ + BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) + \ + BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)}, \ + {BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) + \ + BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) \ + + BITMAP_GROUPS_L0(nbits)} \ + } \ +} + +#else /* BITMAP_USE_TREE */ -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS +#define BITMAP_GROUPS(nbits) BITMAP_BITS2GROUPS(nbits) +#define BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS) -struct bitmap_level_s { +#define BITMAP_INFO_INITIALIZER(nbits) { \ + /* nbits. */ \ + nbits, \ + /* ngroups. */ \ + BITMAP_BITS2GROUPS(nbits) \ +} + +#endif /* BITMAP_USE_TREE */ + +typedef struct bitmap_level_s { /* Offset of this level's groups within the array of groups. */ size_t group_offset; -}; +} bitmap_level_t; -struct bitmap_info_s { +typedef struct bitmap_info_s { /* Logical number of bits in bitmap (stored at bottom level). */ size_t nbits; +#ifdef BITMAP_USE_TREE /* Number of levels necessary for nbits. */ unsigned nlevels; @@ -86,54 +162,48 @@ struct bitmap_info_s { * bottom to top (e.g. the bottom level is stored in levels[0]). */ bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); -size_t bitmap_info_ngroups(const bitmap_info_t *binfo); -size_t bitmap_size(size_t nbits); -void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo); -bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo); -void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -#endif +#else /* BITMAP_USE_TREE */ + /* Number of groups necessary for nbits. */ + size_t ngroups; +#endif /* BITMAP_USE_TREE */ +} bitmap_info_t; -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_)) -JEMALLOC_INLINE bool -bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ - unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1; +void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); +void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill); +size_t bitmap_size(const bitmap_info_t *binfo); + +static inline bool +bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) { +#ifdef BITMAP_USE_TREE + size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1; bitmap_t rg = bitmap[rgoff]; /* The bitmap is full iff the root group is 0. */ return (rg == 0); +#else + size_t i; + + for (i = 0; i < binfo->ngroups; i++) { + if (bitmap[i] != 0) { + return false; + } + } + return true; +#endif } -JEMALLOC_INLINE bool -bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ +static inline bool +bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) { size_t goff; bitmap_t g; assert(bit < binfo->nbits); goff = bit >> LG_BITMAP_GROUP_NBITS; g = bitmap[goff]; - return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))); + return !(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); } -JEMALLOC_INLINE void -bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ +static inline void +bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) { size_t goff; bitmap_t *gp; bitmap_t g; @@ -143,10 +213,11 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) goff = bit >> LG_BITMAP_GROUP_NBITS; gp = &bitmap[goff]; g = *gp; - assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(bitmap_get(bitmap, binfo, bit)); +#ifdef BITMAP_USE_TREE /* Propagate group state transitions up the tree. */ if (g == 0) { unsigned i; @@ -155,45 +226,113 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) goff = bit >> LG_BITMAP_GROUP_NBITS; gp = &bitmap[binfo->levels[i].group_offset + goff]; g = *gp; - assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; - if (g != 0) + if (g != 0) { break; + } } } +#endif +} + +/* ffu: find first unset >= bit. */ +static inline size_t +bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) { + assert(min_bit < binfo->nbits); + +#ifdef BITMAP_USE_TREE + size_t bit = 0; + for (unsigned level = binfo->nlevels; level--;) { + size_t lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level + + 1)); + bitmap_t group = bitmap[binfo->levels[level].group_offset + (bit + >> lg_bits_per_group)]; + unsigned group_nmask = (unsigned)(((min_bit > bit) ? (min_bit - + bit) : 0) >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS)); + assert(group_nmask <= BITMAP_GROUP_NBITS); + bitmap_t group_mask = ~((1LU << group_nmask) - 1); + bitmap_t group_masked = group & group_mask; + if (group_masked == 0LU) { + if (group == 0LU) { + return binfo->nbits; + } + /* + * min_bit was preceded by one or more unset bits in + * this group, but there are no other unset bits in this + * group. Try again starting at the first bit of the + * next sibling. This will recurse at most once per + * non-root level. + */ + size_t sib_base = bit + (ZU(1) << lg_bits_per_group); + assert(sib_base > min_bit); + assert(sib_base > bit); + if (sib_base >= binfo->nbits) { + return binfo->nbits; + } + return bitmap_ffu(bitmap, binfo, sib_base); + } + bit += ((size_t)(ffs_lu(group_masked) - 1)) << + (lg_bits_per_group - LG_BITMAP_GROUP_NBITS); + } + assert(bit >= min_bit); + assert(bit < binfo->nbits); + return bit; +#else + size_t i = min_bit >> LG_BITMAP_GROUP_NBITS; + bitmap_t g = bitmap[i] & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK)) + - 1); + size_t bit; + do { + bit = ffs_lu(g); + if (bit != 0) { + return (i << LG_BITMAP_GROUP_NBITS) + (bit - 1); + } + i++; + g = bitmap[i]; + } while (i < binfo->ngroups); + return binfo->nbits; +#endif } /* sfu: set first unset. */ -JEMALLOC_INLINE size_t -bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ +static inline size_t +bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) { size_t bit; bitmap_t g; unsigned i; assert(!bitmap_full(bitmap, binfo)); +#ifdef BITMAP_USE_TREE i = binfo->nlevels - 1; g = bitmap[binfo->levels[i].group_offset]; - bit = jemalloc_ffsl(g) - 1; + bit = ffs_lu(g) - 1; while (i > 0) { i--; g = bitmap[binfo->levels[i].group_offset + bit]; - bit = (bit << LG_BITMAP_GROUP_NBITS) + (jemalloc_ffsl(g) - 1); + bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1); } - +#else + i = 0; + g = bitmap[0]; + while ((bit = ffs_lu(g)) == 0) { + i++; + g = bitmap[i]; + } + bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1); +#endif bitmap_set(bitmap, binfo, bit); - return (bit); + return bit; } -JEMALLOC_INLINE void -bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ +static inline void +bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) { size_t goff; bitmap_t *gp; bitmap_t g; - bool propagate; + UNUSED bool propagate; assert(bit < binfo->nbits); assert(bitmap_get(bitmap, binfo, bit)); @@ -201,10 +340,11 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) gp = &bitmap[goff]; g = *gp; propagate = (g == 0); - assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; assert(!bitmap_get(bitmap, binfo, bit)); +#ifdef BITMAP_USE_TREE /* Propagate group state transitions up the tree. */ if (propagate) { unsigned i; @@ -214,17 +354,16 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) gp = &bitmap[binfo->levels[i].group_offset + goff]; g = *gp; propagate = (g == 0); - assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) + assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK); *gp = g; - if (!propagate) + if (!propagate) { break; + } } } +#endif /* BITMAP_USE_TREE */ } -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_BITMAP_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/cache_bin.h b/deps/jemalloc/include/jemalloc/internal/cache_bin.h new file mode 100644 index 000000000..12f3ef2dd --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/cache_bin.h @@ -0,0 +1,114 @@ +#ifndef JEMALLOC_INTERNAL_CACHE_BIN_H +#define JEMALLOC_INTERNAL_CACHE_BIN_H + +#include "jemalloc/internal/ql.h" + +/* + * The cache_bins are the mechanism that the tcache and the arena use to + * communicate. The tcache fills from and flushes to the arena by passing a + * cache_bin_t to fill/flush. When the arena needs to pull stats from the + * tcaches associated with it, it does so by iterating over its + * cache_bin_array_descriptor_t objects and reading out per-bin stats it + * contains. This makes it so that the arena need not know about the existence + * of the tcache at all. + */ + + +/* + * The count of the number of cached allocations in a bin. We make this signed + * so that negative numbers can encode "invalid" states (e.g. a low water mark + * of -1 for a cache that has been depleted). + */ +typedef int32_t cache_bin_sz_t; + +typedef struct cache_bin_stats_s cache_bin_stats_t; +struct cache_bin_stats_s { + /* + * Number of allocation requests that corresponded to the size of this + * bin. + */ + uint64_t nrequests; +}; + +/* + * Read-only information associated with each element of tcache_t's tbins array + * is stored separately, mainly to reduce memory usage. + */ +typedef struct cache_bin_info_s cache_bin_info_t; +struct cache_bin_info_s { + /* Upper limit on ncached. */ + cache_bin_sz_t ncached_max; +}; + +typedef struct cache_bin_s cache_bin_t; +struct cache_bin_s { + /* Min # cached since last GC. */ + cache_bin_sz_t low_water; + /* # of cached objects. */ + cache_bin_sz_t ncached; + /* + * ncached and stats are both modified frequently. Let's keep them + * close so that they have a higher chance of being on the same + * cacheline, thus less write-backs. + */ + cache_bin_stats_t tstats; + /* + * Stack of available objects. + * + * To make use of adjacent cacheline prefetch, the items in the avail + * stack goes to higher address for newer allocations. avail points + * just above the available space, which means that + * avail[-ncached, ... -1] are available items and the lowest item will + * be allocated first. + */ + void **avail; +}; + +typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t; +struct cache_bin_array_descriptor_s { + /* + * The arena keeps a list of the cache bins associated with it, for + * stats collection. + */ + ql_elm(cache_bin_array_descriptor_t) link; + /* Pointers to the tcache bins. */ + cache_bin_t *bins_small; + cache_bin_t *bins_large; +}; + +static inline void +cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor, + cache_bin_t *bins_small, cache_bin_t *bins_large) { + ql_elm_new(descriptor, link); + descriptor->bins_small = bins_small; + descriptor->bins_large = bins_large; +} + +JEMALLOC_ALWAYS_INLINE void * +cache_bin_alloc_easy(cache_bin_t *bin, bool *success) { + void *ret; + + if (unlikely(bin->ncached == 0)) { + bin->low_water = -1; + *success = false; + return NULL; + } + /* + * success (instead of ret) should be checked upon the return of this + * function. We avoid checking (ret == NULL) because there is never a + * null stored on the avail stack (which is unknown to the compiler), + * and eagerly checking ret would cause pipeline stall (waiting for the + * cacheline). + */ + *success = true; + ret = *(bin->avail - bin->ncached); + bin->ncached--; + + if (unlikely(bin->ncached < bin->low_water)) { + bin->low_water = bin->ncached; + } + + return ret; +} + +#endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/chunk.h b/deps/jemalloc/include/jemalloc/internal/chunk.h deleted file mode 100644 index 5d1938353..000000000 --- a/deps/jemalloc/include/jemalloc/internal/chunk.h +++ /dev/null @@ -1,99 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* - * Size and alignment of memory chunks that are allocated by the OS's virtual - * memory system. - */ -#define LG_CHUNK_DEFAULT 21 - -/* Return the chunk address for allocation address a. */ -#define CHUNK_ADDR2BASE(a) \ - ((void *)((uintptr_t)(a) & ~chunksize_mask)) - -/* Return the chunk offset of address a. */ -#define CHUNK_ADDR2OFFSET(a) \ - ((size_t)((uintptr_t)(a) & chunksize_mask)) - -/* Return the smallest chunk multiple that is >= s. */ -#define CHUNK_CEILING(s) \ - (((s) + chunksize_mask) & ~chunksize_mask) - -#define CHUNK_HOOKS_INITIALIZER { \ - NULL, \ - NULL, \ - NULL, \ - NULL, \ - NULL, \ - NULL, \ - NULL \ -} - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern size_t opt_lg_chunk; -extern const char *opt_dss; - -extern rtree_t chunks_rtree; - -extern size_t chunksize; -extern size_t chunksize_mask; /* (chunksize - 1). */ -extern size_t chunk_npages; - -extern const chunk_hooks_t chunk_hooks_default; - -chunk_hooks_t chunk_hooks_get(arena_t *arena); -chunk_hooks_t chunk_hooks_set(arena_t *arena, - const chunk_hooks_t *chunk_hooks); - -bool chunk_register(const void *chunk, const extent_node_t *node); -void chunk_deregister(const void *chunk, const extent_node_t *node); -void *chunk_alloc_base(size_t size); -void *chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, - void *new_addr, size_t size, size_t alignment, bool *zero, - bool dalloc_node); -void *chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, - void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit); -void chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, - void *chunk, size_t size, bool committed); -void chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, - void *chunk, size_t size, bool zeroed, bool committed); -void chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, - void *chunk, size_t size, bool committed); -bool chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, - size_t length); -bool chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, - void *chunk, size_t size, size_t offset, size_t length); -bool chunk_boot(void); -void chunk_prefork(void); -void chunk_postfork_parent(void); -void chunk_postfork_child(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -extent_node_t *chunk_lookup(const void *chunk, bool dependent); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_CHUNK_C_)) -JEMALLOC_INLINE extent_node_t * -chunk_lookup(const void *ptr, bool dependent) -{ - - return (rtree_get(&chunks_rtree, (uintptr_t)ptr, dependent)); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - -#include "jemalloc/internal/chunk_dss.h" -#include "jemalloc/internal/chunk_mmap.h" diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h deleted file mode 100644 index 388f46be0..000000000 --- a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h +++ /dev/null @@ -1,39 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef enum { - dss_prec_disabled = 0, - dss_prec_primary = 1, - dss_prec_secondary = 2, - - dss_prec_limit = 3 -} dss_prec_t; -#define DSS_PREC_DEFAULT dss_prec_secondary -#define DSS_DEFAULT "secondary" - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -extern const char *dss_prec_names[]; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -dss_prec_t chunk_dss_prec_get(void); -bool chunk_dss_prec_set(dss_prec_t dss_prec); -void *chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, - size_t alignment, bool *zero, bool *commit); -bool chunk_in_dss(void *chunk); -bool chunk_dss_boot(void); -void chunk_dss_prefork(void); -void chunk_dss_postfork_parent(void); -void chunk_dss_postfork_child(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h deleted file mode 100644 index 7d8014c58..000000000 --- a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h +++ /dev/null @@ -1,21 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, - bool *commit); -bool chunk_dalloc_mmap(void *chunk, size_t size); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/ckh.h b/deps/jemalloc/include/jemalloc/internal/ckh.h index 75c1c979f..7b3850bc1 100644 --- a/deps/jemalloc/include/jemalloc/internal/ckh.h +++ b/deps/jemalloc/include/jemalloc/internal/ckh.h @@ -1,88 +1,101 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_CKH_H +#define JEMALLOC_INTERNAL_CKH_H -typedef struct ckh_s ckh_t; -typedef struct ckhc_s ckhc_t; +#include "jemalloc/internal/tsd.h" -/* Typedefs to allow easy function pointer passing. */ -typedef void ckh_hash_t (const void *, size_t[2]); -typedef bool ckh_keycomp_t (const void *, const void *); +/* Cuckoo hashing implementation. Skip to the end for the interface. */ + +/******************************************************************************/ +/* INTERNAL DEFINITIONS -- IGNORE */ +/******************************************************************************/ /* Maintain counters used to get an idea of performance. */ -/* #define CKH_COUNT */ +/* #define CKH_COUNT */ /* Print counter values in ckh_delete() (requires CKH_COUNT). */ -/* #define CKH_VERBOSE */ +/* #define CKH_VERBOSE */ /* * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit * one bucket per L1 cache line. */ -#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) +#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS +/* Typedefs to allow easy function pointer passing. */ +typedef void ckh_hash_t (const void *, size_t[2]); +typedef bool ckh_keycomp_t (const void *, const void *); /* Hash table cell. */ -struct ckhc_s { - const void *key; - const void *data; -}; +typedef struct { + const void *key; + const void *data; +} ckhc_t; -struct ckh_s { +/* The hash table itself. */ +typedef struct { #ifdef CKH_COUNT /* Counters used to get an idea of performance. */ - uint64_t ngrows; - uint64_t nshrinks; - uint64_t nshrinkfails; - uint64_t ninserts; - uint64_t nrelocs; + uint64_t ngrows; + uint64_t nshrinks; + uint64_t nshrinkfails; + uint64_t ninserts; + uint64_t nrelocs; #endif /* Used for pseudo-random number generation. */ -#define CKH_A 1103515241 -#define CKH_C 12347 - uint32_t prng_state; + uint64_t prng_state; /* Total number of items. */ - size_t count; + size_t count; /* * Minimum and current number of hash table buckets. There are * 2^LG_CKH_BUCKET_CELLS cells per bucket. */ - unsigned lg_minbuckets; - unsigned lg_curbuckets; + unsigned lg_minbuckets; + unsigned lg_curbuckets; /* Hash and comparison functions. */ - ckh_hash_t *hash; - ckh_keycomp_t *keycomp; + ckh_hash_t *hash; + ckh_keycomp_t *keycomp; /* Hash table with 2^lg_curbuckets buckets. */ - ckhc_t *tab; -}; + ckhc_t *tab; +} ckh_t; -#endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS +/* BEGIN PUBLIC API */ +/******************************************************************************/ -bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, +/* Lifetime management. Minitems is the initial capacity. */ +bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp); -void ckh_delete(tsd_t *tsd, ckh_t *ckh); -size_t ckh_count(ckh_t *ckh); -bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); -bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); -bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, +void ckh_delete(tsd_t *tsd, ckh_t *ckh); + +/* Get the number of elements in the set. */ +size_t ckh_count(ckh_t *ckh); + +/* + * To iterate over the elements in the table, initialize *tabind to 0 and call + * this function until it returns true. Each call that returns false will + * update *key and *data to the next element in the table, assuming the pointers + * are non-NULL. + */ +bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); + +/* + * Basic hash table operations -- insert, removal, lookup. For ckh_remove and + * ckh_search, key or data can be NULL. The hash-table only stores pointers to + * the key and value, and doesn't do any lifetime management. + */ +bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data); +bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data); -bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); -void ckh_string_hash(const void *key, size_t r_hash[2]); -bool ckh_string_keycomp(const void *k1, const void *k2); -void ckh_pointer_hash(const void *key, size_t r_hash[2]); -bool ckh_pointer_keycomp(const void *k1, const void *k2); +bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data); -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES +/* Some useful hash and comparison functions for strings and pointers. */ +void ckh_string_hash(const void *key, size_t r_hash[2]); +bool ckh_string_keycomp(const void *k1, const void *k2); +void ckh_pointer_hash(const void *key, size_t r_hash[2]); +bool ckh_pointer_keycomp(const void *k1, const void *k2); -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_CKH_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/ctl.h b/deps/jemalloc/include/jemalloc/internal/ctl.h index 751c14b5b..d927d9480 100644 --- a/deps/jemalloc/include/jemalloc/internal/ctl.h +++ b/deps/jemalloc/include/jemalloc/internal/ctl.h @@ -1,81 +1,107 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct ctl_node_s ctl_node_t; -typedef struct ctl_named_node_s ctl_named_node_t; -typedef struct ctl_indexed_node_s ctl_indexed_node_t; -typedef struct ctl_arena_stats_s ctl_arena_stats_t; -typedef struct ctl_stats_s ctl_stats_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct ctl_node_s { - bool named; -}; - -struct ctl_named_node_s { - struct ctl_node_s node; - const char *name; +#ifndef JEMALLOC_INTERNAL_CTL_H +#define JEMALLOC_INTERNAL_CTL_H + +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/mutex_prof.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/stats.h" + +/* Maximum ctl tree depth. */ +#define CTL_MAX_DEPTH 7 + +typedef struct ctl_node_s { + bool named; +} ctl_node_t; + +typedef struct ctl_named_node_s { + ctl_node_t node; + const char *name; /* If (nchildren == 0), this is a terminal node. */ - unsigned nchildren; - const ctl_node_t *children; - int (*ctl)(const size_t *, size_t, void *, size_t *, - void *, size_t); -}; + size_t nchildren; + const ctl_node_t *children; + int (*ctl)(tsd_t *, const size_t *, size_t, void *, size_t *, void *, + size_t); +} ctl_named_node_t; -struct ctl_indexed_node_s { - struct ctl_node_s node; - const ctl_named_node_t *(*index)(const size_t *, size_t, size_t); -}; +typedef struct ctl_indexed_node_s { + struct ctl_node_s node; + const ctl_named_node_t *(*index)(tsdn_t *, const size_t *, size_t, + size_t); +} ctl_indexed_node_t; -struct ctl_arena_stats_s { - bool initialized; - unsigned nthreads; - const char *dss; - ssize_t lg_dirty_mult; - size_t pactive; - size_t pdirty; - arena_stats_t astats; +typedef struct ctl_arena_stats_s { + arena_stats_t astats; /* Aggregate stats for small size classes, based on bin stats. */ - size_t allocated_small; - uint64_t nmalloc_small; - uint64_t ndalloc_small; - uint64_t nrequests_small; - - malloc_bin_stats_t bstats[NBINS]; - malloc_large_stats_t *lstats; /* nlclasses elements. */ - malloc_huge_stats_t *hstats; /* nhclasses elements. */ + size_t allocated_small; + uint64_t nmalloc_small; + uint64_t ndalloc_small; + uint64_t nrequests_small; + + bin_stats_t bstats[NBINS]; + arena_stats_large_t lstats[NSIZES - NBINS]; +} ctl_arena_stats_t; + +typedef struct ctl_stats_s { + size_t allocated; + size_t active; + size_t metadata; + size_t metadata_thp; + size_t resident; + size_t mapped; + size_t retained; + + background_thread_stats_t background_thread; + mutex_prof_data_t mutex_prof_data[mutex_prof_num_global_mutexes]; +} ctl_stats_t; + +typedef struct ctl_arena_s ctl_arena_t; +struct ctl_arena_s { + unsigned arena_ind; + bool initialized; + ql_elm(ctl_arena_t) destroyed_link; + + /* Basic stats, supported even if !config_stats. */ + unsigned nthreads; + const char *dss; + ssize_t dirty_decay_ms; + ssize_t muzzy_decay_ms; + size_t pactive; + size_t pdirty; + size_t pmuzzy; + + /* NULL if !config_stats. */ + ctl_arena_stats_t *astats; }; -struct ctl_stats_s { - size_t allocated; - size_t active; - size_t metadata; - size_t resident; - size_t mapped; - unsigned narenas; - ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -int ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, - size_t newlen); -int ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp); - -int ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, +typedef struct ctl_arenas_s { + uint64_t epoch; + unsigned narenas; + ql_head(ctl_arena_t) destroyed; + + /* + * Element 0 corresponds to merged stats for extant arenas (accessed via + * MALLCTL_ARENAS_ALL), element 1 corresponds to merged stats for + * destroyed arenas (accessed via MALLCTL_ARENAS_DESTROYED), and the + * remaining MALLOCX_ARENA_LIMIT elements correspond to arenas. + */ + ctl_arena_t *arenas[2 + MALLOCX_ARENA_LIMIT]; +} ctl_arenas_t; + +int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); -bool ctl_boot(void); -void ctl_prefork(void); -void ctl_postfork_parent(void); -void ctl_postfork_child(void); +int ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp); -#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ +int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen); +bool ctl_boot(void); +void ctl_prefork(tsdn_t *tsdn); +void ctl_postfork_parent(tsdn_t *tsdn); +void ctl_postfork_child(tsdn_t *tsdn); + +#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ != 0) { \ malloc_printf( \ @@ -85,7 +111,7 @@ void ctl_postfork_child(void); } \ } while (0) -#define xmallctlnametomib(name, mibp, miblenp) do { \ +#define xmallctlnametomib(name, mibp, miblenp) do { \ if (je_mallctlnametomib(name, mibp, miblenp) != 0) { \ malloc_printf("<jemalloc>: Failure in " \ "xmallctlnametomib(\"%s\", ...)\n", name); \ @@ -93,7 +119,7 @@ void ctl_postfork_child(void); } \ } while (0) -#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ +#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, \ newlen) != 0) { \ malloc_write( \ @@ -102,10 +128,4 @@ void ctl_postfork_child(void); } \ } while (0) -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - +#endif /* JEMALLOC_INTERNAL_CTL_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/div.h b/deps/jemalloc/include/jemalloc/internal/div.h new file mode 100644 index 000000000..aebae9398 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/div.h @@ -0,0 +1,41 @@ +#ifndef JEMALLOC_INTERNAL_DIV_H +#define JEMALLOC_INTERNAL_DIV_H + +#include "jemalloc/internal/assert.h" + +/* + * This module does the division that computes the index of a region in a slab, + * given its offset relative to the base. + * That is, given a divisor d, an n = i * d (all integers), we'll return i. + * We do some pre-computation to do this more quickly than a CPU division + * instruction. + * We bound n < 2^32, and don't support dividing by one. + */ + +typedef struct div_info_s div_info_t; +struct div_info_s { + uint32_t magic; +#ifdef JEMALLOC_DEBUG + size_t d; +#endif +}; + +void div_init(div_info_t *div_info, size_t divisor); + +static inline size_t +div_compute(div_info_t *div_info, size_t n) { + assert(n <= (uint32_t)-1); + /* + * This generates, e.g. mov; imul; shr on x86-64. On a 32-bit machine, + * the compilers I tried were all smart enough to turn this into the + * appropriate "get the high 32 bits of the result of a multiply" (e.g. + * mul; mov edx eax; on x86, umull on arm, etc.). + */ + size_t i = ((uint64_t)n * (uint64_t)div_info->magic) >> 32; +#ifdef JEMALLOC_DEBUG + assert(i * div_info->d == n); +#endif + return i; +} + +#endif /* JEMALLOC_INTERNAL_DIV_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/emitter.h b/deps/jemalloc/include/jemalloc/internal/emitter.h new file mode 100644 index 000000000..3a2b2f7f2 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/emitter.h @@ -0,0 +1,435 @@ +#ifndef JEMALLOC_INTERNAL_EMITTER_H +#define JEMALLOC_INTERNAL_EMITTER_H + +#include "jemalloc/internal/ql.h" + +typedef enum emitter_output_e emitter_output_t; +enum emitter_output_e { + emitter_output_json, + emitter_output_table +}; + +typedef enum emitter_justify_e emitter_justify_t; +enum emitter_justify_e { + emitter_justify_left, + emitter_justify_right, + /* Not for users; just to pass to internal functions. */ + emitter_justify_none +}; + +typedef enum emitter_type_e emitter_type_t; +enum emitter_type_e { + emitter_type_bool, + emitter_type_int, + emitter_type_unsigned, + emitter_type_uint32, + emitter_type_uint64, + emitter_type_size, + emitter_type_ssize, + emitter_type_string, + /* + * A title is a column title in a table; it's just a string, but it's + * not quoted. + */ + emitter_type_title, +}; + +typedef struct emitter_col_s emitter_col_t; +struct emitter_col_s { + /* Filled in by the user. */ + emitter_justify_t justify; + int width; + emitter_type_t type; + union { + bool bool_val; + int int_val; + unsigned unsigned_val; + uint32_t uint32_val; + uint64_t uint64_val; + size_t size_val; + ssize_t ssize_val; + const char *str_val; + }; + + /* Filled in by initialization. */ + ql_elm(emitter_col_t) link; +}; + +typedef struct emitter_row_s emitter_row_t; +struct emitter_row_s { + ql_head(emitter_col_t) cols; +}; + +static inline void +emitter_row_init(emitter_row_t *row) { + ql_new(&row->cols); +} + +static inline void +emitter_col_init(emitter_col_t *col, emitter_row_t *row) { + ql_elm_new(col, link); + ql_tail_insert(&row->cols, col, link); +} + +typedef struct emitter_s emitter_t; +struct emitter_s { + emitter_output_t output; + /* The output information. */ + void (*write_cb)(void *, const char *); + void *cbopaque; + int nesting_depth; + /* True if we've already emitted a value at the given depth. */ + bool item_at_depth; +}; + +static inline void +emitter_init(emitter_t *emitter, emitter_output_t emitter_output, + void (*write_cb)(void *, const char *), void *cbopaque) { + emitter->output = emitter_output; + emitter->write_cb = write_cb; + emitter->cbopaque = cbopaque; + emitter->item_at_depth = false; + emitter->nesting_depth = 0; +} + +/* Internal convenience function. Write to the emitter the given string. */ +JEMALLOC_FORMAT_PRINTF(2, 3) +static inline void +emitter_printf(emitter_t *emitter, const char *format, ...) { + va_list ap; + + va_start(ap, format); + malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap); + va_end(ap); +} + +/* Write to the emitter the given string, but only in table mode. */ +JEMALLOC_FORMAT_PRINTF(2, 3) +static inline void +emitter_table_printf(emitter_t *emitter, const char *format, ...) { + if (emitter->output == emitter_output_table) { + va_list ap; + va_start(ap, format); + malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap); + va_end(ap); + } +} + +static inline void +emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier, + emitter_justify_t justify, int width) { + size_t written; + if (justify == emitter_justify_none) { + written = malloc_snprintf(out_fmt, out_size, + "%%%s", fmt_specifier); + } else if (justify == emitter_justify_left) { + written = malloc_snprintf(out_fmt, out_size, + "%%-%d%s", width, fmt_specifier); + } else { + written = malloc_snprintf(out_fmt, out_size, + "%%%d%s", width, fmt_specifier); + } + /* Only happens in case of bad format string, which *we* choose. */ + assert(written < out_size); +} + +/* + * Internal. Emit the given value type in the relevant encoding (so that the + * bool true gets mapped to json "true", but the string "true" gets mapped to + * json "\"true\"", for instance. + * + * Width is ignored if justify is emitter_justify_none. + */ +static inline void +emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width, + emitter_type_t value_type, const void *value) { + size_t str_written; +#define BUF_SIZE 256 +#define FMT_SIZE 10 + /* + * We dynamically generate a format string to emit, to let us use the + * snprintf machinery. This is kinda hacky, but gets the job done + * quickly without having to think about the various snprintf edge + * cases. + */ + char fmt[FMT_SIZE]; + char buf[BUF_SIZE]; + +#define EMIT_SIMPLE(type, format) \ + emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width); \ + emitter_printf(emitter, fmt, *(const type *)value); \ + + switch (value_type) { + case emitter_type_bool: + emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width); + emitter_printf(emitter, fmt, *(const bool *)value ? + "true" : "false"); + break; + case emitter_type_int: + EMIT_SIMPLE(int, "d") + break; + case emitter_type_unsigned: + EMIT_SIMPLE(unsigned, "u") + break; + case emitter_type_ssize: + EMIT_SIMPLE(ssize_t, "zd") + break; + case emitter_type_size: + EMIT_SIMPLE(size_t, "zu") + break; + case emitter_type_string: + str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"", + *(const char *const *)value); + /* + * We control the strings we output; we shouldn't get anything + * anywhere near the fmt size. + */ + assert(str_written < BUF_SIZE); + emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width); + emitter_printf(emitter, fmt, buf); + break; + case emitter_type_uint32: + EMIT_SIMPLE(uint32_t, FMTu32) + break; + case emitter_type_uint64: + EMIT_SIMPLE(uint64_t, FMTu64) + break; + case emitter_type_title: + EMIT_SIMPLE(char *const, "s"); + break; + default: + unreachable(); + } +#undef BUF_SIZE +#undef FMT_SIZE +} + + +/* Internal functions. In json mode, tracks nesting state. */ +static inline void +emitter_nest_inc(emitter_t *emitter) { + emitter->nesting_depth++; + emitter->item_at_depth = false; +} + +static inline void +emitter_nest_dec(emitter_t *emitter) { + emitter->nesting_depth--; + emitter->item_at_depth = true; +} + +static inline void +emitter_indent(emitter_t *emitter) { + int amount = emitter->nesting_depth; + const char *indent_str; + if (emitter->output == emitter_output_json) { + indent_str = "\t"; + } else { + amount *= 2; + indent_str = " "; + } + for (int i = 0; i < amount; i++) { + emitter_printf(emitter, "%s", indent_str); + } +} + +static inline void +emitter_json_key_prefix(emitter_t *emitter) { + emitter_printf(emitter, "%s\n", emitter->item_at_depth ? "," : ""); + emitter_indent(emitter); +} + +static inline void +emitter_begin(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth == 0); + emitter_printf(emitter, "{"); + emitter_nest_inc(emitter); + } else { + // tabular init + emitter_printf(emitter, "%s", ""); + } +} + +static inline void +emitter_end(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth == 1); + emitter_nest_dec(emitter); + emitter_printf(emitter, "\n}\n"); + } +} + +/* + * Note emits a different kv pair as well, but only in table mode. Omits the + * note if table_note_key is NULL. + */ +static inline void +emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key, + emitter_type_t value_type, const void *value, + const char *table_note_key, emitter_type_t table_note_value_type, + const void *table_note_value) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth > 0); + emitter_json_key_prefix(emitter); + emitter_printf(emitter, "\"%s\": ", json_key); + emitter_print_value(emitter, emitter_justify_none, -1, + value_type, value); + } else { + emitter_indent(emitter); + emitter_printf(emitter, "%s: ", table_key); + emitter_print_value(emitter, emitter_justify_none, -1, + value_type, value); + if (table_note_key != NULL) { + emitter_printf(emitter, " (%s: ", table_note_key); + emitter_print_value(emitter, emitter_justify_none, -1, + table_note_value_type, table_note_value); + emitter_printf(emitter, ")"); + } + emitter_printf(emitter, "\n"); + } + emitter->item_at_depth = true; +} + +static inline void +emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key, + emitter_type_t value_type, const void *value) { + emitter_kv_note(emitter, json_key, table_key, value_type, value, NULL, + emitter_type_bool, NULL); +} + +static inline void +emitter_json_kv(emitter_t *emitter, const char *json_key, + emitter_type_t value_type, const void *value) { + if (emitter->output == emitter_output_json) { + emitter_kv(emitter, json_key, NULL, value_type, value); + } +} + +static inline void +emitter_table_kv(emitter_t *emitter, const char *table_key, + emitter_type_t value_type, const void *value) { + if (emitter->output == emitter_output_table) { + emitter_kv(emitter, NULL, table_key, value_type, value); + } +} + +static inline void +emitter_dict_begin(emitter_t *emitter, const char *json_key, + const char *table_header) { + if (emitter->output == emitter_output_json) { + emitter_json_key_prefix(emitter); + emitter_printf(emitter, "\"%s\": {", json_key); + emitter_nest_inc(emitter); + } else { + emitter_indent(emitter); + emitter_printf(emitter, "%s\n", table_header); + emitter_nest_inc(emitter); + } +} + +static inline void +emitter_dict_end(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth > 0); + emitter_nest_dec(emitter); + emitter_printf(emitter, "\n"); + emitter_indent(emitter); + emitter_printf(emitter, "}"); + } else { + emitter_nest_dec(emitter); + } +} + +static inline void +emitter_json_dict_begin(emitter_t *emitter, const char *json_key) { + if (emitter->output == emitter_output_json) { + emitter_dict_begin(emitter, json_key, NULL); + } +} + +static inline void +emitter_json_dict_end(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + emitter_dict_end(emitter); + } +} + +static inline void +emitter_table_dict_begin(emitter_t *emitter, const char *table_key) { + if (emitter->output == emitter_output_table) { + emitter_dict_begin(emitter, NULL, table_key); + } +} + +static inline void +emitter_table_dict_end(emitter_t *emitter) { + if (emitter->output == emitter_output_table) { + emitter_dict_end(emitter); + } +} + +static inline void +emitter_json_arr_begin(emitter_t *emitter, const char *json_key) { + if (emitter->output == emitter_output_json) { + emitter_json_key_prefix(emitter); + emitter_printf(emitter, "\"%s\": [", json_key); + emitter_nest_inc(emitter); + } +} + +static inline void +emitter_json_arr_end(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth > 0); + emitter_nest_dec(emitter); + emitter_printf(emitter, "\n"); + emitter_indent(emitter); + emitter_printf(emitter, "]"); + } +} + +static inline void +emitter_json_arr_obj_begin(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + emitter_json_key_prefix(emitter); + emitter_printf(emitter, "{"); + emitter_nest_inc(emitter); + } +} + +static inline void +emitter_json_arr_obj_end(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth > 0); + emitter_nest_dec(emitter); + emitter_printf(emitter, "\n"); + emitter_indent(emitter); + emitter_printf(emitter, "}"); + } +} + +static inline void +emitter_json_arr_value(emitter_t *emitter, emitter_type_t value_type, + const void *value) { + if (emitter->output == emitter_output_json) { + emitter_json_key_prefix(emitter); + emitter_print_value(emitter, emitter_justify_none, -1, + value_type, value); + } +} + +static inline void +emitter_table_row(emitter_t *emitter, emitter_row_t *row) { + if (emitter->output != emitter_output_table) { + return; + } + emitter_col_t *col; + ql_foreach(col, &row->cols, link) { + emitter_print_value(emitter, col->justify, col->width, + col->type, (const void *)&col->bool_val); + } + emitter_table_printf(emitter, "\n"); +} + +#endif /* JEMALLOC_INTERNAL_EMITTER_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/extent.h b/deps/jemalloc/include/jemalloc/internal/extent.h deleted file mode 100644 index 386d50ef4..000000000 --- a/deps/jemalloc/include/jemalloc/internal/extent.h +++ /dev/null @@ -1,239 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct extent_node_s extent_node_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -/* Tree of extents. Use accessor functions for en_* fields. */ -struct extent_node_s { - /* Arena from which this extent came, if any. */ - arena_t *en_arena; - - /* Pointer to the extent that this tree node is responsible for. */ - void *en_addr; - - /* Total region size. */ - size_t en_size; - - /* - * The zeroed flag is used by chunk recycling code to track whether - * memory is zero-filled. - */ - bool en_zeroed; - - /* - * True if physical memory is committed to the extent, whether - * explicitly or implicitly as on a system that overcommits and - * satisfies physical memory needs on demand via soft page faults. - */ - bool en_committed; - - /* - * The achunk flag is used to validate that huge allocation lookups - * don't return arena chunks. - */ - bool en_achunk; - - /* Profile counters, used for huge objects. */ - prof_tctx_t *en_prof_tctx; - - /* Linkage for arena's runs_dirty and chunks_cache rings. */ - arena_runs_dirty_link_t rd; - qr(extent_node_t) cc_link; - - union { - /* Linkage for the size/address-ordered tree. */ - rb_node(extent_node_t) szad_link; - - /* Linkage for arena's huge and node_cache lists. */ - ql_elm(extent_node_t) ql_link; - }; - - /* Linkage for the address-ordered tree. */ - rb_node(extent_node_t) ad_link; -}; -typedef rb_tree(extent_node_t) extent_tree_t; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t) - -rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -arena_t *extent_node_arena_get(const extent_node_t *node); -void *extent_node_addr_get(const extent_node_t *node); -size_t extent_node_size_get(const extent_node_t *node); -bool extent_node_zeroed_get(const extent_node_t *node); -bool extent_node_committed_get(const extent_node_t *node); -bool extent_node_achunk_get(const extent_node_t *node); -prof_tctx_t *extent_node_prof_tctx_get(const extent_node_t *node); -void extent_node_arena_set(extent_node_t *node, arena_t *arena); -void extent_node_addr_set(extent_node_t *node, void *addr); -void extent_node_size_set(extent_node_t *node, size_t size); -void extent_node_zeroed_set(extent_node_t *node, bool zeroed); -void extent_node_committed_set(extent_node_t *node, bool committed); -void extent_node_achunk_set(extent_node_t *node, bool achunk); -void extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx); -void extent_node_init(extent_node_t *node, arena_t *arena, void *addr, - size_t size, bool zeroed, bool committed); -void extent_node_dirty_linkage_init(extent_node_t *node); -void extent_node_dirty_insert(extent_node_t *node, - arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty); -void extent_node_dirty_remove(extent_node_t *node); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_)) -JEMALLOC_INLINE arena_t * -extent_node_arena_get(const extent_node_t *node) -{ - - return (node->en_arena); -} - -JEMALLOC_INLINE void * -extent_node_addr_get(const extent_node_t *node) -{ - - return (node->en_addr); -} - -JEMALLOC_INLINE size_t -extent_node_size_get(const extent_node_t *node) -{ - - return (node->en_size); -} - -JEMALLOC_INLINE bool -extent_node_zeroed_get(const extent_node_t *node) -{ - - return (node->en_zeroed); -} - -JEMALLOC_INLINE bool -extent_node_committed_get(const extent_node_t *node) -{ - - assert(!node->en_achunk); - return (node->en_committed); -} - -JEMALLOC_INLINE bool -extent_node_achunk_get(const extent_node_t *node) -{ - - return (node->en_achunk); -} - -JEMALLOC_INLINE prof_tctx_t * -extent_node_prof_tctx_get(const extent_node_t *node) -{ - - return (node->en_prof_tctx); -} - -JEMALLOC_INLINE void -extent_node_arena_set(extent_node_t *node, arena_t *arena) -{ - - node->en_arena = arena; -} - -JEMALLOC_INLINE void -extent_node_addr_set(extent_node_t *node, void *addr) -{ - - node->en_addr = addr; -} - -JEMALLOC_INLINE void -extent_node_size_set(extent_node_t *node, size_t size) -{ - - node->en_size = size; -} - -JEMALLOC_INLINE void -extent_node_zeroed_set(extent_node_t *node, bool zeroed) -{ - - node->en_zeroed = zeroed; -} - -JEMALLOC_INLINE void -extent_node_committed_set(extent_node_t *node, bool committed) -{ - - node->en_committed = committed; -} - -JEMALLOC_INLINE void -extent_node_achunk_set(extent_node_t *node, bool achunk) -{ - - node->en_achunk = achunk; -} - -JEMALLOC_INLINE void -extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx) -{ - - node->en_prof_tctx = tctx; -} - -JEMALLOC_INLINE void -extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size, - bool zeroed, bool committed) -{ - - extent_node_arena_set(node, arena); - extent_node_addr_set(node, addr); - extent_node_size_set(node, size); - extent_node_zeroed_set(node, zeroed); - extent_node_committed_set(node, committed); - extent_node_achunk_set(node, false); - if (config_prof) - extent_node_prof_tctx_set(node, NULL); -} - -JEMALLOC_INLINE void -extent_node_dirty_linkage_init(extent_node_t *node) -{ - - qr_new(&node->rd, rd_link); - qr_new(node, cc_link); -} - -JEMALLOC_INLINE void -extent_node_dirty_insert(extent_node_t *node, - arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty) -{ - - qr_meld(runs_dirty, &node->rd, rd_link); - qr_meld(chunks_dirty, node, cc_link); -} - -JEMALLOC_INLINE void -extent_node_dirty_remove(extent_node_t *node) -{ - - qr_remove(&node->rd, rd_link); - qr_remove(node, cc_link); -} - -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - diff --git a/deps/jemalloc/include/jemalloc/internal/extent_dss.h b/deps/jemalloc/include/jemalloc/internal/extent_dss.h new file mode 100644 index 000000000..e8f02ce2a --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/extent_dss.h @@ -0,0 +1,26 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_DSS_H +#define JEMALLOC_INTERNAL_EXTENT_DSS_H + +typedef enum { + dss_prec_disabled = 0, + dss_prec_primary = 1, + dss_prec_secondary = 2, + + dss_prec_limit = 3 +} dss_prec_t; +#define DSS_PREC_DEFAULT dss_prec_secondary +#define DSS_DEFAULT "secondary" + +extern const char *dss_prec_names[]; + +extern const char *opt_dss; + +dss_prec_t extent_dss_prec_get(void); +bool extent_dss_prec_set(dss_prec_t dss_prec); +void *extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, + size_t size, size_t alignment, bool *zero, bool *commit); +bool extent_in_dss(void *addr); +bool extent_dss_mergeable(void *addr_a, void *addr_b); +void extent_dss_boot(void); + +#endif /* JEMALLOC_INTERNAL_EXTENT_DSS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/extent_externs.h b/deps/jemalloc/include/jemalloc/internal/extent_externs.h new file mode 100644 index 000000000..b8a4d026c --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/extent_externs.h @@ -0,0 +1,73 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_EXTERNS_H +#define JEMALLOC_INTERNAL_EXTENT_EXTERNS_H + +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mutex_pool.h" +#include "jemalloc/internal/ph.h" +#include "jemalloc/internal/rtree.h" + +extern size_t opt_lg_extent_max_active_fit; + +extern rtree_t extents_rtree; +extern const extent_hooks_t extent_hooks_default; +extern mutex_pool_t extent_mutex_pool; + +extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena); +void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent); + +extent_hooks_t *extent_hooks_get(arena_t *arena); +extent_hooks_t *extent_hooks_set(tsd_t *tsd, arena_t *arena, + extent_hooks_t *extent_hooks); + +#ifdef JEMALLOC_JET +size_t extent_size_quantize_floor(size_t size); +size_t extent_size_quantize_ceil(size_t size); +#endif + +rb_proto(, extent_avail_, extent_tree_t, extent_t) +ph_proto(, extent_heap_, extent_heap_t, extent_t) + +bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state, + bool delay_coalesce); +extent_state_t extents_state_get(const extents_t *extents); +size_t extents_npages_get(extents_t *extents); +extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr, + size_t size, size_t pad, size_t alignment, bool slab, szind_t szind, + bool *zero, bool *commit); +void extents_dalloc(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent); +extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min); +void extents_prefork(tsdn_t *tsdn, extents_t *extents); +void extents_postfork_parent(tsdn_t *tsdn, extents_t *extents); +void extents_postfork_child(tsdn_t *tsdn, extents_t *extents); +extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad, + size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit); +void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent); +void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent); +void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent); +bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, + size_t length); +bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, + size_t length); +bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, + size_t length); +bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset, + size_t length); +extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a, + szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b); +bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b); + +bool extent_boot(void); + +#endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/extent_inlines.h b/deps/jemalloc/include/jemalloc/internal/extent_inlines.h new file mode 100644 index 000000000..77181df8d --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/extent_inlines.h @@ -0,0 +1,433 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_INLINES_H +#define JEMALLOC_INTERNAL_EXTENT_INLINES_H + +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mutex_pool.h" +#include "jemalloc/internal/pages.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/sz.h" + +static inline void +extent_lock(tsdn_t *tsdn, extent_t *extent) { + assert(extent != NULL); + mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)extent); +} + +static inline void +extent_unlock(tsdn_t *tsdn, extent_t *extent) { + assert(extent != NULL); + mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)extent); +} + +static inline void +extent_lock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) { + assert(extent1 != NULL && extent2 != NULL); + mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1, + (uintptr_t)extent2); +} + +static inline void +extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) { + assert(extent1 != NULL && extent2 != NULL); + mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1, + (uintptr_t)extent2); +} + +static inline arena_t * +extent_arena_get(const extent_t *extent) { + unsigned arena_ind = (unsigned)((extent->e_bits & + EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT); + /* + * The following check is omitted because we should never actually read + * a NULL arena pointer. + */ + if (false && arena_ind >= MALLOCX_ARENA_LIMIT) { + return NULL; + } + assert(arena_ind < MALLOCX_ARENA_LIMIT); + return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_ACQUIRE); +} + +static inline szind_t +extent_szind_get_maybe_invalid(const extent_t *extent) { + szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >> + EXTENT_BITS_SZIND_SHIFT); + assert(szind <= NSIZES); + return szind; +} + +static inline szind_t +extent_szind_get(const extent_t *extent) { + szind_t szind = extent_szind_get_maybe_invalid(extent); + assert(szind < NSIZES); /* Never call when "invalid". */ + return szind; +} + +static inline size_t +extent_usize_get(const extent_t *extent) { + return sz_index2size(extent_szind_get(extent)); +} + +static inline size_t +extent_sn_get(const extent_t *extent) { + return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >> + EXTENT_BITS_SN_SHIFT); +} + +static inline extent_state_t +extent_state_get(const extent_t *extent) { + return (extent_state_t)((extent->e_bits & EXTENT_BITS_STATE_MASK) >> + EXTENT_BITS_STATE_SHIFT); +} + +static inline bool +extent_zeroed_get(const extent_t *extent) { + return (bool)((extent->e_bits & EXTENT_BITS_ZEROED_MASK) >> + EXTENT_BITS_ZEROED_SHIFT); +} + +static inline bool +extent_committed_get(const extent_t *extent) { + return (bool)((extent->e_bits & EXTENT_BITS_COMMITTED_MASK) >> + EXTENT_BITS_COMMITTED_SHIFT); +} + +static inline bool +extent_dumpable_get(const extent_t *extent) { + return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >> + EXTENT_BITS_DUMPABLE_SHIFT); +} + +static inline bool +extent_slab_get(const extent_t *extent) { + return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >> + EXTENT_BITS_SLAB_SHIFT); +} + +static inline unsigned +extent_nfree_get(const extent_t *extent) { + assert(extent_slab_get(extent)); + return (unsigned)((extent->e_bits & EXTENT_BITS_NFREE_MASK) >> + EXTENT_BITS_NFREE_SHIFT); +} + +static inline void * +extent_base_get(const extent_t *extent) { + assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) || + !extent_slab_get(extent)); + return PAGE_ADDR2BASE(extent->e_addr); +} + +static inline void * +extent_addr_get(const extent_t *extent) { + assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) || + !extent_slab_get(extent)); + return extent->e_addr; +} + +static inline size_t +extent_size_get(const extent_t *extent) { + return (extent->e_size_esn & EXTENT_SIZE_MASK); +} + +static inline size_t +extent_esn_get(const extent_t *extent) { + return (extent->e_size_esn & EXTENT_ESN_MASK); +} + +static inline size_t +extent_bsize_get(const extent_t *extent) { + return extent->e_bsize; +} + +static inline void * +extent_before_get(const extent_t *extent) { + return (void *)((uintptr_t)extent_base_get(extent) - PAGE); +} + +static inline void * +extent_last_get(const extent_t *extent) { + return (void *)((uintptr_t)extent_base_get(extent) + + extent_size_get(extent) - PAGE); +} + +static inline void * +extent_past_get(const extent_t *extent) { + return (void *)((uintptr_t)extent_base_get(extent) + + extent_size_get(extent)); +} + +static inline arena_slab_data_t * +extent_slab_data_get(extent_t *extent) { + assert(extent_slab_get(extent)); + return &extent->e_slab_data; +} + +static inline const arena_slab_data_t * +extent_slab_data_get_const(const extent_t *extent) { + assert(extent_slab_get(extent)); + return &extent->e_slab_data; +} + +static inline prof_tctx_t * +extent_prof_tctx_get(const extent_t *extent) { + return (prof_tctx_t *)atomic_load_p(&extent->e_prof_tctx, + ATOMIC_ACQUIRE); +} + +static inline void +extent_arena_set(extent_t *extent, arena_t *arena) { + unsigned arena_ind = (arena != NULL) ? arena_ind_get(arena) : ((1U << + MALLOCX_ARENA_BITS) - 1); + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) | + ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT); +} + +static inline void +extent_addr_set(extent_t *extent, void *addr) { + extent->e_addr = addr; +} + +static inline void +extent_addr_randomize(UNUSED tsdn_t *tsdn, extent_t *extent, size_t alignment) { + assert(extent_base_get(extent) == extent_addr_get(extent)); + + if (alignment < PAGE) { + unsigned lg_range = LG_PAGE - + lg_floor(CACHELINE_CEILING(alignment)); + size_t r; + if (!tsdn_null(tsdn)) { + tsd_t *tsd = tsdn_tsd(tsdn); + r = (size_t)prng_lg_range_u64( + tsd_offset_statep_get(tsd), lg_range); + } else { + r = prng_lg_range_zu( + &extent_arena_get(extent)->offset_state, + lg_range, true); + } + uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE - + lg_range); + extent->e_addr = (void *)((uintptr_t)extent->e_addr + + random_offset); + assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) == + extent->e_addr); + } +} + +static inline void +extent_size_set(extent_t *extent, size_t size) { + assert((size & ~EXTENT_SIZE_MASK) == 0); + extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK); +} + +static inline void +extent_esn_set(extent_t *extent, size_t esn) { + extent->e_size_esn = (extent->e_size_esn & ~EXTENT_ESN_MASK) | (esn & + EXTENT_ESN_MASK); +} + +static inline void +extent_bsize_set(extent_t *extent, size_t bsize) { + extent->e_bsize = bsize; +} + +static inline void +extent_szind_set(extent_t *extent, szind_t szind) { + assert(szind <= NSIZES); /* NSIZES means "invalid". */ + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) | + ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT); +} + +static inline void +extent_nfree_set(extent_t *extent, unsigned nfree) { + assert(extent_slab_get(extent)); + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_NFREE_MASK) | + ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT); +} + +static inline void +extent_nfree_inc(extent_t *extent) { + assert(extent_slab_get(extent)); + extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT); +} + +static inline void +extent_nfree_dec(extent_t *extent) { + assert(extent_slab_get(extent)); + extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT); +} + +static inline void +extent_sn_set(extent_t *extent, size_t sn) { + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) | + ((uint64_t)sn << EXTENT_BITS_SN_SHIFT); +} + +static inline void +extent_state_set(extent_t *extent, extent_state_t state) { + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_STATE_MASK) | + ((uint64_t)state << EXTENT_BITS_STATE_SHIFT); +} + +static inline void +extent_zeroed_set(extent_t *extent, bool zeroed) { + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ZEROED_MASK) | + ((uint64_t)zeroed << EXTENT_BITS_ZEROED_SHIFT); +} + +static inline void +extent_committed_set(extent_t *extent, bool committed) { + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_COMMITTED_MASK) | + ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT); +} + +static inline void +extent_dumpable_set(extent_t *extent, bool dumpable) { + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) | + ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT); +} + +static inline void +extent_slab_set(extent_t *extent, bool slab) { + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) | + ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT); +} + +static inline void +extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) { + atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE); +} + +static inline void +extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size, + bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed, + bool committed, bool dumpable) { + assert(addr == PAGE_ADDR2BASE(addr) || !slab); + + extent_arena_set(extent, arena); + extent_addr_set(extent, addr); + extent_size_set(extent, size); + extent_slab_set(extent, slab); + extent_szind_set(extent, szind); + extent_sn_set(extent, sn); + extent_state_set(extent, state); + extent_zeroed_set(extent, zeroed); + extent_committed_set(extent, committed); + extent_dumpable_set(extent, dumpable); + ql_elm_new(extent, ql_link); + if (config_prof) { + extent_prof_tctx_set(extent, NULL); + } +} + +static inline void +extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) { + extent_arena_set(extent, NULL); + extent_addr_set(extent, addr); + extent_bsize_set(extent, bsize); + extent_slab_set(extent, false); + extent_szind_set(extent, NSIZES); + extent_sn_set(extent, sn); + extent_state_set(extent, extent_state_active); + extent_zeroed_set(extent, true); + extent_committed_set(extent, true); + extent_dumpable_set(extent, true); +} + +static inline void +extent_list_init(extent_list_t *list) { + ql_new(list); +} + +static inline extent_t * +extent_list_first(const extent_list_t *list) { + return ql_first(list); +} + +static inline extent_t * +extent_list_last(const extent_list_t *list) { + return ql_last(list, ql_link); +} + +static inline void +extent_list_append(extent_list_t *list, extent_t *extent) { + ql_tail_insert(list, extent, ql_link); +} + +static inline void +extent_list_prepend(extent_list_t *list, extent_t *extent) { + ql_head_insert(list, extent, ql_link); +} + +static inline void +extent_list_replace(extent_list_t *list, extent_t *to_remove, + extent_t *to_insert) { + ql_after_insert(to_remove, to_insert, ql_link); + ql_remove(list, to_remove, ql_link); +} + +static inline void +extent_list_remove(extent_list_t *list, extent_t *extent) { + ql_remove(list, extent, ql_link); +} + +static inline int +extent_sn_comp(const extent_t *a, const extent_t *b) { + size_t a_sn = extent_sn_get(a); + size_t b_sn = extent_sn_get(b); + + return (a_sn > b_sn) - (a_sn < b_sn); +} + +static inline int +extent_esn_comp(const extent_t *a, const extent_t *b) { + size_t a_esn = extent_esn_get(a); + size_t b_esn = extent_esn_get(b); + + return (a_esn > b_esn) - (a_esn < b_esn); +} + +static inline int +extent_ad_comp(const extent_t *a, const extent_t *b) { + uintptr_t a_addr = (uintptr_t)extent_addr_get(a); + uintptr_t b_addr = (uintptr_t)extent_addr_get(b); + + return (a_addr > b_addr) - (a_addr < b_addr); +} + +static inline int +extent_ead_comp(const extent_t *a, const extent_t *b) { + uintptr_t a_eaddr = (uintptr_t)a; + uintptr_t b_eaddr = (uintptr_t)b; + + return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr); +} + +static inline int +extent_snad_comp(const extent_t *a, const extent_t *b) { + int ret; + + ret = extent_sn_comp(a, b); + if (ret != 0) { + return ret; + } + + ret = extent_ad_comp(a, b); + return ret; +} + +static inline int +extent_esnead_comp(const extent_t *a, const extent_t *b) { + int ret; + + ret = extent_esn_comp(a, b); + if (ret != 0) { + return ret; + } + + ret = extent_ead_comp(a, b); + return ret; +} + +#endif /* JEMALLOC_INTERNAL_EXTENT_INLINES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/extent_mmap.h b/deps/jemalloc/include/jemalloc/internal/extent_mmap.h new file mode 100644 index 000000000..55f17ee48 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/extent_mmap.h @@ -0,0 +1,10 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H +#define JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H + +extern bool opt_retain; + +void *extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, + bool *zero, bool *commit); +bool extent_dalloc_mmap(void *addr, size_t size); + +#endif /* JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/extent_structs.h b/deps/jemalloc/include/jemalloc/internal/extent_structs.h new file mode 100644 index 000000000..4873b9e9e --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/extent_structs.h @@ -0,0 +1,219 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_STRUCTS_H +#define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/ph.h" +#include "jemalloc/internal/size_classes.h" + +typedef enum { + extent_state_active = 0, + extent_state_dirty = 1, + extent_state_muzzy = 2, + extent_state_retained = 3 +} extent_state_t; + +/* Extent (span of pages). Use accessor functions for e_* fields. */ +struct extent_s { + /* + * Bitfield containing several fields: + * + * a: arena_ind + * b: slab + * c: committed + * d: dumpable + * z: zeroed + * t: state + * i: szind + * f: nfree + * n: sn + * + * nnnnnnnn ... nnnnffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa + * + * arena_ind: Arena from which this extent came, or all 1 bits if + * unassociated. + * + * slab: The slab flag indicates whether the extent is used for a slab + * of small regions. This helps differentiate small size classes, + * and it indicates whether interior pointers can be looked up via + * iealloc(). + * + * committed: The committed flag indicates whether physical memory is + * committed to the extent, whether explicitly or implicitly + * as on a system that overcommits and satisfies physical + * memory needs on demand via soft page faults. + * + * dumpable: The dumpable flag indicates whether or not we've set the + * memory in question to be dumpable. Note that this + * interacts somewhat subtly with user-specified extent hooks, + * since we don't know if *they* are fiddling with + * dumpability (in which case, we don't want to undo whatever + * they're doing). To deal with this scenario, we: + * - Make dumpable false only for memory allocated with the + * default hooks. + * - Only allow memory to go from non-dumpable to dumpable, + * and only once. + * - Never make the OS call to allow dumping when the + * dumpable bit is already set. + * These three constraints mean that we will never + * accidentally dump user memory that the user meant to set + * nondumpable with their extent hooks. + * + * + * zeroed: The zeroed flag is used by extent recycling code to track + * whether memory is zero-filled. + * + * state: The state flag is an extent_state_t. + * + * szind: The szind flag indicates usable size class index for + * allocations residing in this extent, regardless of whether the + * extent is a slab. Extent size and usable size often differ + * even for non-slabs, either due to sz_large_pad or promotion of + * sampled small regions. + * + * nfree: Number of free regions in slab. + * + * sn: Serial number (potentially non-unique). + * + * Serial numbers may wrap around if !opt_retain, but as long as + * comparison functions fall back on address comparison for equal + * serial numbers, stable (if imperfect) ordering is maintained. + * + * Serial numbers may not be unique even in the absence of + * wrap-around, e.g. when splitting an extent and assigning the same + * serial number to both resulting adjacent extents. + */ + uint64_t e_bits; +#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT)) + +#define EXTENT_BITS_ARENA_WIDTH MALLOCX_ARENA_BITS +#define EXTENT_BITS_ARENA_SHIFT 0 +#define EXTENT_BITS_ARENA_MASK MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT) + +#define EXTENT_BITS_SLAB_WIDTH 1 +#define EXTENT_BITS_SLAB_SHIFT (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT) +#define EXTENT_BITS_SLAB_MASK MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT) + +#define EXTENT_BITS_COMMITTED_WIDTH 1 +#define EXTENT_BITS_COMMITTED_SHIFT (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT) +#define EXTENT_BITS_COMMITTED_MASK MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT) + +#define EXTENT_BITS_DUMPABLE_WIDTH 1 +#define EXTENT_BITS_DUMPABLE_SHIFT (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT) +#define EXTENT_BITS_DUMPABLE_MASK MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT) + +#define EXTENT_BITS_ZEROED_WIDTH 1 +#define EXTENT_BITS_ZEROED_SHIFT (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT) +#define EXTENT_BITS_ZEROED_MASK MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT) + +#define EXTENT_BITS_STATE_WIDTH 2 +#define EXTENT_BITS_STATE_SHIFT (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT) +#define EXTENT_BITS_STATE_MASK MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT) + +#define EXTENT_BITS_SZIND_WIDTH LG_CEIL_NSIZES +#define EXTENT_BITS_SZIND_SHIFT (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT) +#define EXTENT_BITS_SZIND_MASK MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT) + +#define EXTENT_BITS_NFREE_WIDTH (LG_SLAB_MAXREGS + 1) +#define EXTENT_BITS_NFREE_SHIFT (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT) +#define EXTENT_BITS_NFREE_MASK MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT) + +#define EXTENT_BITS_SN_SHIFT (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT) +#define EXTENT_BITS_SN_MASK (UINT64_MAX << EXTENT_BITS_SN_SHIFT) + + /* Pointer to the extent that this structure is responsible for. */ + void *e_addr; + + union { + /* + * Extent size and serial number associated with the extent + * structure (different than the serial number for the extent at + * e_addr). + * + * ssssssss [...] ssssssss ssssnnnn nnnnnnnn + */ + size_t e_size_esn; + #define EXTENT_SIZE_MASK ((size_t)~(PAGE-1)) + #define EXTENT_ESN_MASK ((size_t)PAGE-1) + /* Base extent size, which may not be a multiple of PAGE. */ + size_t e_bsize; + }; + + /* + * List linkage, used by a variety of lists: + * - bin_t's slabs_full + * - extents_t's LRU + * - stashed dirty extents + * - arena's large allocations + */ + ql_elm(extent_t) ql_link; + + /* + * Linkage for per size class sn/address-ordered heaps, and + * for extent_avail + */ + phn(extent_t) ph_link; + + union { + /* Small region slab metadata. */ + arena_slab_data_t e_slab_data; + + /* + * Profile counters, used for large objects. Points to a + * prof_tctx_t. + */ + atomic_p_t e_prof_tctx; + }; +}; +typedef ql_head(extent_t) extent_list_t; +typedef ph(extent_t) extent_tree_t; +typedef ph(extent_t) extent_heap_t; + +/* Quantized collection of extents, with built-in LRU queue. */ +struct extents_s { + malloc_mutex_t mtx; + + /* + * Quantized per size class heaps of extents. + * + * Synchronization: mtx. + */ + extent_heap_t heaps[NPSIZES+1]; + + /* + * Bitmap for which set bits correspond to non-empty heaps. + * + * Synchronization: mtx. + */ + bitmap_t bitmap[BITMAP_GROUPS(NPSIZES+1)]; + + /* + * LRU of all extents in heaps. + * + * Synchronization: mtx. + */ + extent_list_t lru; + + /* + * Page sum for all extents in heaps. + * + * The synchronization here is a little tricky. Modifications to npages + * must hold mtx, but reads need not (though, a reader who sees npages + * without holding the mutex can't assume anything about the rest of the + * state of the extents_t). + */ + atomic_zu_t npages; + + /* All stored extents must be in the same state. */ + extent_state_t state; + + /* + * If true, delay coalescing until eviction; otherwise coalesce during + * deallocation. + */ + bool delay_coalesce; +}; + +#endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/extent_types.h b/deps/jemalloc/include/jemalloc/internal/extent_types.h new file mode 100644 index 000000000..c0561d99f --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/extent_types.h @@ -0,0 +1,17 @@ +#ifndef JEMALLOC_INTERNAL_EXTENT_TYPES_H +#define JEMALLOC_INTERNAL_EXTENT_TYPES_H + +typedef struct extent_s extent_t; +typedef struct extents_s extents_t; + +#define EXTENT_HOOKS_INITIALIZER NULL + +#define EXTENT_GROW_MAX_PIND (NPSIZES - 1) + +/* + * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit) + * is the max ratio between the size of the active extent and the new extent. + */ +#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6 + +#endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/hash.h b/deps/jemalloc/include/jemalloc/internal/hash.h index bcead337a..dcfc992df 100644 --- a/deps/jemalloc/include/jemalloc/internal/hash.h +++ b/deps/jemalloc/include/jemalloc/internal/hash.h @@ -1,93 +1,76 @@ +#ifndef JEMALLOC_INTERNAL_HASH_H +#define JEMALLOC_INTERNAL_HASH_H + +#include "jemalloc/internal/assert.h" + /* * The following hash function is based on MurmurHash3, placed into the public - * domain by Austin Appleby. See http://code.google.com/p/smhasher/ for + * domain by Austin Appleby. See https://github.com/aappleby/smhasher for * details. */ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -uint32_t hash_x86_32(const void *key, int len, uint32_t seed); -void hash_x86_128(const void *key, const int len, uint32_t seed, - uint64_t r_out[2]); -void hash_x64_128(const void *key, const int len, const uint32_t seed, - uint64_t r_out[2]); -void hash(const void *key, size_t len, const uint32_t seed, - size_t r_hash[2]); -#endif -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_)) /******************************************************************************/ /* Internal implementation. */ -JEMALLOC_INLINE uint32_t -hash_rotl_32(uint32_t x, int8_t r) -{ - +static inline uint32_t +hash_rotl_32(uint32_t x, int8_t r) { return ((x << r) | (x >> (32 - r))); } -JEMALLOC_INLINE uint64_t -hash_rotl_64(uint64_t x, int8_t r) -{ - +static inline uint64_t +hash_rotl_64(uint64_t x, int8_t r) { return ((x << r) | (x >> (64 - r))); } -JEMALLOC_INLINE uint32_t -hash_get_block_32(const uint32_t *p, int i) -{ +static inline uint32_t +hash_get_block_32(const uint32_t *p, int i) { + /* Handle unaligned read. */ + if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) { + uint32_t ret; - return (p[i]); + memcpy(&ret, (uint8_t *)(p + i), sizeof(uint32_t)); + return ret; + } + + return p[i]; } -JEMALLOC_INLINE uint64_t -hash_get_block_64(const uint64_t *p, int i) -{ +static inline uint64_t +hash_get_block_64(const uint64_t *p, int i) { + /* Handle unaligned read. */ + if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) { + uint64_t ret; - return (p[i]); -} + memcpy(&ret, (uint8_t *)(p + i), sizeof(uint64_t)); + return ret; + } -JEMALLOC_INLINE uint32_t -hash_fmix_32(uint32_t h) -{ + return p[i]; +} +static inline uint32_t +hash_fmix_32(uint32_t h) { h ^= h >> 16; h *= 0x85ebca6b; h ^= h >> 13; h *= 0xc2b2ae35; h ^= h >> 16; - return (h); + return h; } -JEMALLOC_INLINE uint64_t -hash_fmix_64(uint64_t k) -{ - +static inline uint64_t +hash_fmix_64(uint64_t k) { k ^= k >> 33; k *= KQU(0xff51afd7ed558ccd); k ^= k >> 33; k *= KQU(0xc4ceb9fe1a85ec53); k ^= k >> 33; - return (k); + return k; } -JEMALLOC_INLINE uint32_t -hash_x86_32(const void *key, int len, uint32_t seed) -{ +static inline uint32_t +hash_x86_32(const void *key, int len, uint32_t seed) { const uint8_t *data = (const uint8_t *) key; const int nblocks = len / 4; @@ -133,13 +116,12 @@ hash_x86_32(const void *key, int len, uint32_t seed) h1 = hash_fmix_32(h1); - return (h1); + return h1; } -UNUSED JEMALLOC_INLINE void +UNUSED static inline void hash_x86_128(const void *key, const int len, uint32_t seed, - uint64_t r_out[2]) -{ + uint64_t r_out[2]) { const uint8_t * data = (const uint8_t *) key; const int nblocks = len / 16; @@ -238,10 +220,9 @@ hash_x86_128(const void *key, const int len, uint32_t seed, r_out[1] = (((uint64_t) h4) << 32) | h3; } -UNUSED JEMALLOC_INLINE void +UNUSED static inline void hash_x64_128(const void *key, const int len, const uint32_t seed, - uint64_t r_out[2]) -{ + uint64_t r_out[2]) { const uint8_t *data = (const uint8_t *) key; const int nblocks = len / 16; @@ -279,22 +260,22 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, uint64_t k2 = 0; switch (len & 15) { - case 15: k2 ^= ((uint64_t)(tail[14])) << 48; - case 14: k2 ^= ((uint64_t)(tail[13])) << 40; - case 13: k2 ^= ((uint64_t)(tail[12])) << 32; - case 12: k2 ^= ((uint64_t)(tail[11])) << 24; - case 11: k2 ^= ((uint64_t)(tail[10])) << 16; - case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; + case 15: k2 ^= ((uint64_t)(tail[14])) << 48; /* falls through */ + case 14: k2 ^= ((uint64_t)(tail[13])) << 40; /* falls through */ + case 13: k2 ^= ((uint64_t)(tail[12])) << 32; /* falls through */ + case 12: k2 ^= ((uint64_t)(tail[11])) << 24; /* falls through */ + case 11: k2 ^= ((uint64_t)(tail[10])) << 16; /* falls through */ + case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; /* falls through */ case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0; k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; - - case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; - case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; - case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; - case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; - case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; - case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; - case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; + /* falls through */ + case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; /* falls through */ + case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; /* falls through */ + case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; /* falls through */ + case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; /* falls through */ + case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; /* falls through */ + case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; /* falls through */ + case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; /* falls through */ case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0; k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; } @@ -318,19 +299,20 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, /******************************************************************************/ /* API. */ -JEMALLOC_INLINE void -hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) -{ +static inline void +hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) { + assert(len <= INT_MAX); /* Unfortunate implementation limitation. */ + #if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN)) - hash_x64_128(key, len, seed, (uint64_t *)r_hash); + hash_x64_128(key, (int)len, seed, (uint64_t *)r_hash); #else - uint64_t hashes[2]; - hash_x86_128(key, len, seed, hashes); - r_hash[0] = (size_t)hashes[0]; - r_hash[1] = (size_t)hashes[1]; + { + uint64_t hashes[2]; + hash_x86_128(key, (int)len, seed, hashes); + r_hash[0] = (size_t)hashes[0]; + r_hash[1] = (size_t)hashes[1]; + } #endif } -#endif -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_HASH_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/hooks.h b/deps/jemalloc/include/jemalloc/internal/hooks.h new file mode 100644 index 000000000..cd49afcb0 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/hooks.h @@ -0,0 +1,19 @@ +#ifndef JEMALLOC_INTERNAL_HOOKS_H +#define JEMALLOC_INTERNAL_HOOKS_H + +extern JEMALLOC_EXPORT void (*hooks_arena_new_hook)(); +extern JEMALLOC_EXPORT void (*hooks_libc_hook)(); + +#define JEMALLOC_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn) + +#define open JEMALLOC_HOOK(open, hooks_libc_hook) +#define read JEMALLOC_HOOK(read, hooks_libc_hook) +#define write JEMALLOC_HOOK(write, hooks_libc_hook) +#define readlink JEMALLOC_HOOK(readlink, hooks_libc_hook) +#define close JEMALLOC_HOOK(close, hooks_libc_hook) +#define creat JEMALLOC_HOOK(creat, hooks_libc_hook) +#define secure_getenv JEMALLOC_HOOK(secure_getenv, hooks_libc_hook) +/* Note that this is undef'd and re-define'd in src/prof.c. */ +#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook) + +#endif /* JEMALLOC_INTERNAL_HOOKS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/huge.h b/deps/jemalloc/include/jemalloc/internal/huge.h deleted file mode 100644 index ece7af980..000000000 --- a/deps/jemalloc/include/jemalloc/internal/huge.h +++ /dev/null @@ -1,36 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero, - tcache_t *tcache); -void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment, - bool zero, tcache_t *tcache); -bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min, - size_t usize_max, bool zero); -void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, - size_t usize, size_t alignment, bool zero, tcache_t *tcache); -#ifdef JEMALLOC_JET -typedef void (huge_dalloc_junk_t)(void *, size_t); -extern huge_dalloc_junk_t *huge_dalloc_junk; -#endif -void huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); -arena_t *huge_aalloc(const void *ptr); -size_t huge_salloc(const void *ptr); -prof_tctx_t *huge_prof_tctx_get(const void *ptr); -void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx); -void huge_prof_tctx_reset(const void *ptr); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in deleted file mode 100644 index 8536a3eda..000000000 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in +++ /dev/null @@ -1,1134 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_H -#define JEMALLOC_INTERNAL_H - -#include "jemalloc_internal_defs.h" -#include "jemalloc/internal/jemalloc_internal_decls.h" - -#ifdef JEMALLOC_UTRACE -#include <sys/ktrace.h> -#endif - -#define JEMALLOC_NO_DEMANGLE -#ifdef JEMALLOC_JET -# define JEMALLOC_N(n) jet_##n -# include "jemalloc/internal/public_namespace.h" -# define JEMALLOC_NO_RENAME -# include "../jemalloc@install_suffix@.h" -# undef JEMALLOC_NO_RENAME -#else -# define JEMALLOC_N(n) @private_namespace@##n -# include "../jemalloc@install_suffix@.h" -#endif -#include "jemalloc/internal/private_namespace.h" - -static const bool config_debug = -#ifdef JEMALLOC_DEBUG - true -#else - false -#endif - ; -static const bool have_dss = -#ifdef JEMALLOC_DSS - true -#else - false -#endif - ; -static const bool config_fill = -#ifdef JEMALLOC_FILL - true -#else - false -#endif - ; -static const bool config_lazy_lock = -#ifdef JEMALLOC_LAZY_LOCK - true -#else - false -#endif - ; -static const bool config_prof = -#ifdef JEMALLOC_PROF - true -#else - false -#endif - ; -static const bool config_prof_libgcc = -#ifdef JEMALLOC_PROF_LIBGCC - true -#else - false -#endif - ; -static const bool config_prof_libunwind = -#ifdef JEMALLOC_PROF_LIBUNWIND - true -#else - false -#endif - ; -static const bool maps_coalesce = -#ifdef JEMALLOC_MAPS_COALESCE - true -#else - false -#endif - ; -static const bool config_munmap = -#ifdef JEMALLOC_MUNMAP - true -#else - false -#endif - ; -static const bool config_stats = -#ifdef JEMALLOC_STATS - true -#else - false -#endif - ; -static const bool config_tcache = -#ifdef JEMALLOC_TCACHE - true -#else - false -#endif - ; -static const bool config_tls = -#ifdef JEMALLOC_TLS - true -#else - false -#endif - ; -static const bool config_utrace = -#ifdef JEMALLOC_UTRACE - true -#else - false -#endif - ; -static const bool config_valgrind = -#ifdef JEMALLOC_VALGRIND - true -#else - false -#endif - ; -static const bool config_xmalloc = -#ifdef JEMALLOC_XMALLOC - true -#else - false -#endif - ; -static const bool config_ivsalloc = -#ifdef JEMALLOC_IVSALLOC - true -#else - false -#endif - ; -static const bool config_cache_oblivious = -#ifdef JEMALLOC_CACHE_OBLIVIOUS - true -#else - false -#endif - ; - -#ifdef JEMALLOC_C11ATOMICS -#include <stdatomic.h> -#endif - -#ifdef JEMALLOC_ATOMIC9 -#include <machine/atomic.h> -#endif - -#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) -#include <libkern/OSAtomic.h> -#endif - -#ifdef JEMALLOC_ZONE -#include <mach/mach_error.h> -#include <mach/mach_init.h> -#include <mach/vm_map.h> -#include <malloc/malloc.h> -#endif - -#define RB_COMPACT -#include "jemalloc/internal/rb.h" -#include "jemalloc/internal/qr.h" -#include "jemalloc/internal/ql.h" - -/* - * jemalloc can conceptually be broken into components (arena, tcache, etc.), - * but there are circular dependencies that cannot be broken without - * substantial performance degradation. In order to reduce the effect on - * visual code flow, read the header files in multiple passes, with one of the - * following cpp variables defined during each pass: - * - * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data - * types. - * JEMALLOC_H_STRUCTS : Data structures. - * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. - * JEMALLOC_H_INLINES : Inline functions. - */ -/******************************************************************************/ -#define JEMALLOC_H_TYPES - -#include "jemalloc/internal/jemalloc_internal_macros.h" - -/* Size class index type. */ -typedef unsigned szind_t; - -/* - * Flags bits: - * - * a: arena - * t: tcache - * 0: unused - * z: zero - * n: alignment - * - * aaaaaaaa aaaatttt tttttttt 0znnnnnn - */ -#define MALLOCX_ARENA_MASK ((int)~0xfffff) -#define MALLOCX_ARENA_MAX 0xffe -#define MALLOCX_TCACHE_MASK ((int)~0xfff000ffU) -#define MALLOCX_TCACHE_MAX 0xffd -#define MALLOCX_LG_ALIGN_MASK ((int)0x3f) -/* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */ -#define MALLOCX_ALIGN_GET_SPECIFIED(flags) \ - (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)) -#define MALLOCX_ALIGN_GET(flags) \ - (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1)) -#define MALLOCX_ZERO_GET(flags) \ - ((bool)(flags & MALLOCX_ZERO)) - -#define MALLOCX_TCACHE_GET(flags) \ - (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> 8)) - 2) -#define MALLOCX_ARENA_GET(flags) \ - (((unsigned)(((unsigned)flags) >> 20)) - 1) - -/* Smallest size class to support. */ -#define TINY_MIN (1U << LG_TINY_MIN) - -/* - * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size - * classes). - */ -#ifndef LG_QUANTUM -# if (defined(__i386__) || defined(_M_IX86)) -# define LG_QUANTUM 4 -# endif -# ifdef __ia64__ -# define LG_QUANTUM 4 -# endif -# ifdef __alpha__ -# define LG_QUANTUM 4 -# endif -# if (defined(__sparc64__) || defined(__sparcv9)) -# define LG_QUANTUM 4 -# endif -# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) -# define LG_QUANTUM 4 -# endif -# ifdef __arm__ -# define LG_QUANTUM 3 -# endif -# ifdef __aarch64__ -# define LG_QUANTUM 4 -# endif -# ifdef __hppa__ -# define LG_QUANTUM 4 -# endif -# ifdef __mips__ -# define LG_QUANTUM 3 -# endif -# ifdef __or1k__ -# define LG_QUANTUM 3 -# endif -# ifdef __powerpc__ -# define LG_QUANTUM 4 -# endif -# ifdef __s390__ -# define LG_QUANTUM 4 -# endif -# ifdef __SH4__ -# define LG_QUANTUM 4 -# endif -# ifdef __tile__ -# define LG_QUANTUM 4 -# endif -# ifdef __le32__ -# define LG_QUANTUM 4 -# endif -# ifndef LG_QUANTUM -# error "Unknown minimum alignment for architecture; specify via " - "--with-lg-quantum" -# endif -#endif - -#define QUANTUM ((size_t)(1U << LG_QUANTUM)) -#define QUANTUM_MASK (QUANTUM - 1) - -/* Return the smallest quantum multiple that is >= a. */ -#define QUANTUM_CEILING(a) \ - (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) - -#define LONG ((size_t)(1U << LG_SIZEOF_LONG)) -#define LONG_MASK (LONG - 1) - -/* Return the smallest long multiple that is >= a. */ -#define LONG_CEILING(a) \ - (((a) + LONG_MASK) & ~LONG_MASK) - -#define SIZEOF_PTR (1U << LG_SIZEOF_PTR) -#define PTR_MASK (SIZEOF_PTR - 1) - -/* Return the smallest (void *) multiple that is >= a. */ -#define PTR_CEILING(a) \ - (((a) + PTR_MASK) & ~PTR_MASK) - -/* - * Maximum size of L1 cache line. This is used to avoid cache line aliasing. - * In addition, this controls the spacing of cacheline-spaced size classes. - * - * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can - * only handle raw constants. - */ -#define LG_CACHELINE 6 -#define CACHELINE 64 -#define CACHELINE_MASK (CACHELINE - 1) - -/* Return the smallest cacheline multiple that is >= s. */ -#define CACHELINE_CEILING(s) \ - (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) - -/* Page size. LG_PAGE is determined by the configure script. */ -#ifdef PAGE_MASK -# undef PAGE_MASK -#endif -#define PAGE ((size_t)(1U << LG_PAGE)) -#define PAGE_MASK ((size_t)(PAGE - 1)) - -/* Return the smallest pagesize multiple that is >= s. */ -#define PAGE_CEILING(s) \ - (((s) + PAGE_MASK) & ~PAGE_MASK) - -/* Return the nearest aligned address at or below a. */ -#define ALIGNMENT_ADDR2BASE(a, alignment) \ - ((void *)((uintptr_t)(a) & (-(alignment)))) - -/* Return the offset between a and the nearest aligned address at or below a. */ -#define ALIGNMENT_ADDR2OFFSET(a, alignment) \ - ((size_t)((uintptr_t)(a) & (alignment - 1))) - -/* Return the smallest alignment multiple that is >= s. */ -#define ALIGNMENT_CEILING(s, alignment) \ - (((s) + (alignment - 1)) & (-(alignment))) - -/* Declare a variable-length array. */ -#if __STDC_VERSION__ < 199901L -# ifdef _MSC_VER -# include <malloc.h> -# define alloca _alloca -# else -# ifdef JEMALLOC_HAS_ALLOCA_H -# include <alloca.h> -# else -# include <stdlib.h> -# endif -# endif -# define VARIABLE_ARRAY(type, name, count) \ - type *name = alloca(sizeof(type) * (count)) -#else -# define VARIABLE_ARRAY(type, name, count) type name[(count)] -#endif - -#include "jemalloc/internal/valgrind.h" -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/tsd.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/pages.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/quarantine.h" -#include "jemalloc/internal/prof.h" - -#undef JEMALLOC_H_TYPES -/******************************************************************************/ -#define JEMALLOC_H_STRUCTS - -#include "jemalloc/internal/valgrind.h" -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/bitmap.h" -#define JEMALLOC_ARENA_STRUCTS_A -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_STRUCTS_A -#include "jemalloc/internal/extent.h" -#define JEMALLOC_ARENA_STRUCTS_B -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_STRUCTS_B -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/pages.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/quarantine.h" -#include "jemalloc/internal/prof.h" - -#include "jemalloc/internal/tsd.h" - -#undef JEMALLOC_H_STRUCTS -/******************************************************************************/ -#define JEMALLOC_H_EXTERNS - -extern bool opt_abort; -extern const char *opt_junk; -extern bool opt_junk_alloc; -extern bool opt_junk_free; -extern size_t opt_quarantine; -extern bool opt_redzone; -extern bool opt_utrace; -extern bool opt_xmalloc; -extern bool opt_zero; -extern size_t opt_narenas; - -extern bool in_valgrind; - -/* Number of CPUs. */ -extern unsigned ncpus; - -/* - * index2size_tab encodes the same information as could be computed (at - * unacceptable cost in some code paths) by index2size_compute(). - */ -extern size_t const index2size_tab[NSIZES]; -/* - * size2index_tab is a compact lookup table that rounds request sizes up to - * size classes. In order to reduce cache footprint, the table is compressed, - * and all accesses are via size2index(). - */ -extern uint8_t const size2index_tab[]; - -arena_t *a0get(void); -void *a0malloc(size_t size); -void a0dalloc(void *ptr); -void *bootstrap_malloc(size_t size); -void *bootstrap_calloc(size_t num, size_t size); -void bootstrap_free(void *ptr); -arena_t *arenas_extend(unsigned ind); -arena_t *arena_init(unsigned ind); -unsigned narenas_total_get(void); -arena_t *arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing); -arena_t *arena_choose_hard(tsd_t *tsd); -void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind); -unsigned arena_nbound(unsigned ind); -void thread_allocated_cleanup(tsd_t *tsd); -void thread_deallocated_cleanup(tsd_t *tsd); -void arena_cleanup(tsd_t *tsd); -void arenas_cache_cleanup(tsd_t *tsd); -void narenas_cache_cleanup(tsd_t *tsd); -void arenas_cache_bypass_cleanup(tsd_t *tsd); -void jemalloc_prefork(void); -void jemalloc_postfork_parent(void); -void jemalloc_postfork_child(void); - -#include "jemalloc/internal/valgrind.h" -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/pages.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/quarantine.h" -#include "jemalloc/internal/prof.h" -#include "jemalloc/internal/tsd.h" - -#undef JEMALLOC_H_EXTERNS -/******************************************************************************/ -#define JEMALLOC_H_INLINES - -#include "jemalloc/internal/valgrind.h" -#include "jemalloc/internal/util.h" -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prng.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/tsd.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/pages.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" - -#ifndef JEMALLOC_ENABLE_INLINE -szind_t size2index_compute(size_t size); -szind_t size2index_lookup(size_t size); -szind_t size2index(size_t size); -size_t index2size_compute(szind_t index); -size_t index2size_lookup(szind_t index); -size_t index2size(szind_t index); -size_t s2u_compute(size_t size); -size_t s2u_lookup(size_t size); -size_t s2u(size_t size); -size_t sa2u(size_t size, size_t alignment); -arena_t *arena_choose(tsd_t *tsd, arena_t *arena); -arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, - bool refresh_if_missing); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -JEMALLOC_INLINE szind_t -size2index_compute(size_t size) -{ - -#if (NTBINS != 0) - if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); - return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); - } -#endif - { - size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? - (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) - : lg_floor((size<<1)-1); - size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : - x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); - size_t grp = shift << LG_SIZE_CLASS_GROUP; - - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) - ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; - - size_t delta_inverse_mask = ZI(-1) << lg_delta; - size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & - ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); - - size_t index = NTBINS + grp + mod; - return (index); - } -} - -JEMALLOC_ALWAYS_INLINE szind_t -size2index_lookup(size_t size) -{ - - assert(size <= LOOKUP_MAXCLASS); - { - size_t ret = ((size_t)(size2index_tab[(size-1) >> - LG_TINY_MIN])); - assert(ret == size2index_compute(size)); - return (ret); - } -} - -JEMALLOC_ALWAYS_INLINE szind_t -size2index(size_t size) -{ - - assert(size > 0); - if (likely(size <= LOOKUP_MAXCLASS)) - return (size2index_lookup(size)); - return (size2index_compute(size)); -} - -JEMALLOC_INLINE size_t -index2size_compute(szind_t index) -{ - -#if (NTBINS > 0) - if (index < NTBINS) - return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index)); -#endif - { - size_t reduced_index = index - NTBINS; - size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP; - size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) - - 1); - - size_t grp_size_mask = ~((!!grp)-1); - size_t grp_size = ((ZU(1) << (LG_QUANTUM + - (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; - - size_t shift = (grp == 0) ? 1 : grp; - size_t lg_delta = shift + (LG_QUANTUM-1); - size_t mod_size = (mod+1) << lg_delta; - - size_t usize = grp_size + mod_size; - return (usize); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -index2size_lookup(szind_t index) -{ - size_t ret = (size_t)index2size_tab[index]; - assert(ret == index2size_compute(index)); - return (ret); -} - -JEMALLOC_ALWAYS_INLINE size_t -index2size(szind_t index) -{ - - assert(index < NSIZES); - return (index2size_lookup(index)); -} - -JEMALLOC_ALWAYS_INLINE size_t -s2u_compute(size_t size) -{ - -#if (NTBINS > 0) - if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { - size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; - size_t lg_ceil = lg_floor(pow2_ceil(size)); - return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : - (ZU(1) << lg_ceil)); - } -#endif - { - size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ? - (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1)) - : lg_floor((size<<1)-1); - size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) - ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; - size_t delta = ZU(1) << lg_delta; - size_t delta_mask = delta - 1; - size_t usize = (size + delta_mask) & ~delta_mask; - return (usize); - } -} - -JEMALLOC_ALWAYS_INLINE size_t -s2u_lookup(size_t size) -{ - size_t ret = index2size_lookup(size2index_lookup(size)); - - assert(ret == s2u_compute(size)); - return (ret); -} - -/* - * Compute usable size that would result from allocating an object with the - * specified size. - */ -JEMALLOC_ALWAYS_INLINE size_t -s2u(size_t size) -{ - - assert(size > 0); - if (likely(size <= LOOKUP_MAXCLASS)) - return (s2u_lookup(size)); - return (s2u_compute(size)); -} - -/* - * Compute usable size that would result from allocating an object with the - * specified size and alignment. - */ -JEMALLOC_ALWAYS_INLINE size_t -sa2u(size_t size, size_t alignment) -{ - size_t usize; - - assert(alignment != 0 && ((alignment - 1) & alignment) == 0); - - /* Try for a small size class. */ - if (size <= SMALL_MAXCLASS && alignment < PAGE) { - /* - * Round size up to the nearest multiple of alignment. - * - * This done, we can take advantage of the fact that for each - * small size class, every object is aligned at the smallest - * power of two that is non-zero in the base two representation - * of the size. For example: - * - * Size | Base 2 | Minimum alignment - * -----+----------+------------------ - * 96 | 1100000 | 32 - * 144 | 10100000 | 32 - * 192 | 11000000 | 64 - */ - usize = s2u(ALIGNMENT_CEILING(size, alignment)); - if (usize < LARGE_MINCLASS) - return (usize); - } - - /* Try for a large size class. */ - if (likely(size <= large_maxclass) && likely(alignment < chunksize)) { - /* - * We can't achieve subpage alignment, so round up alignment - * to the minimum that can actually be supported. - */ - alignment = PAGE_CEILING(alignment); - - /* Make sure result is a large size class. */ - usize = (size <= LARGE_MINCLASS) ? LARGE_MINCLASS : s2u(size); - - /* - * Calculate the size of the over-size run that arena_palloc() - * would need to allocate in order to guarantee the alignment. - */ - if (usize + large_pad + alignment - PAGE <= arena_maxrun) - return (usize); - } - - /* Huge size class. Beware of size_t overflow. */ - - /* - * We can't achieve subchunk alignment, so round up alignment to the - * minimum that can actually be supported. - */ - alignment = CHUNK_CEILING(alignment); - if (alignment == 0) { - /* size_t overflow. */ - return (0); - } - - /* Make sure result is a huge size class. */ - if (size <= chunksize) - usize = chunksize; - else { - usize = s2u(size); - if (usize < size) { - /* size_t overflow. */ - return (0); - } - } - - /* - * Calculate the multi-chunk mapping that huge_palloc() would need in - * order to guarantee the alignment. - */ - if (usize + alignment - PAGE < usize) { - /* size_t overflow. */ - return (0); - } - return (usize); -} - -/* Choose an arena based on a per-thread value. */ -JEMALLOC_INLINE arena_t * -arena_choose(tsd_t *tsd, arena_t *arena) -{ - arena_t *ret; - - if (arena != NULL) - return (arena); - - if (unlikely((ret = tsd_arena_get(tsd)) == NULL)) - ret = arena_choose_hard(tsd); - - return (ret); -} - -JEMALLOC_INLINE arena_t * -arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing, - bool refresh_if_missing) -{ - arena_t *arena; - arena_t **arenas_cache = tsd_arenas_cache_get(tsd); - - /* init_if_missing requires refresh_if_missing. */ - assert(!init_if_missing || refresh_if_missing); - - if (unlikely(arenas_cache == NULL)) { - /* arenas_cache hasn't been initialized yet. */ - return (arena_get_hard(tsd, ind, init_if_missing)); - } - if (unlikely(ind >= tsd_narenas_cache_get(tsd))) { - /* - * ind is invalid, cache is old (too small), or arena to be - * initialized. - */ - return (refresh_if_missing ? arena_get_hard(tsd, ind, - init_if_missing) : NULL); - } - arena = arenas_cache[ind]; - if (likely(arena != NULL) || !refresh_if_missing) - return (arena); - return (arena_get_hard(tsd, ind, init_if_missing)); -} -#endif - -#include "jemalloc/internal/bitmap.h" -/* - * Include portions of arena.h interleaved with tcache.h in order to resolve - * circular dependencies. - */ -#define JEMALLOC_ARENA_INLINE_A -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_INLINE_A -#include "jemalloc/internal/tcache.h" -#define JEMALLOC_ARENA_INLINE_B -#include "jemalloc/internal/arena.h" -#undef JEMALLOC_ARENA_INLINE_B -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/quarantine.h" - -#ifndef JEMALLOC_ENABLE_INLINE -arena_t *iaalloc(const void *ptr); -size_t isalloc(const void *ptr, bool demote); -void *iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, - bool is_metadata, arena_t *arena); -void *imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); -void *imalloc(tsd_t *tsd, size_t size); -void *icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena); -void *icalloc(tsd_t *tsd, size_t size); -void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, bool is_metadata, arena_t *arena); -void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, arena_t *arena); -void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero); -size_t ivsalloc(const void *ptr, bool demote); -size_t u2rz(size_t usize); -size_t p2rz(const void *ptr); -void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata); -void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache); -void idalloc(tsd_t *tsd, void *ptr); -void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache); -void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); -void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache); -void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, tcache_t *tcache, - arena_t *arena); -void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, - size_t alignment, bool zero, tcache_t *tcache, arena_t *arena); -void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, - size_t alignment, bool zero); -bool ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -JEMALLOC_ALWAYS_INLINE arena_t * -iaalloc(const void *ptr) -{ - - assert(ptr != NULL); - - return (arena_aalloc(ptr)); -} - -/* - * Typical usage: - * void *ptr = [...] - * size_t sz = isalloc(ptr, config_prof); - */ -JEMALLOC_ALWAYS_INLINE size_t -isalloc(const void *ptr, bool demote) -{ - - assert(ptr != NULL); - /* Demotion only makes sense if config_prof is true. */ - assert(config_prof || !demote); - - return (arena_salloc(ptr, demote)); -} - -JEMALLOC_ALWAYS_INLINE void * -iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata, - arena_t *arena) -{ - void *ret; - - assert(size != 0); - - ret = arena_malloc(tsd, arena, size, zero, tcache); - if (config_stats && is_metadata && likely(ret != NULL)) { - arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, - config_prof)); - } - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void * -imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) -{ - - return (iallocztm(tsd, size, false, tcache, false, arena)); -} - -JEMALLOC_ALWAYS_INLINE void * -imalloc(tsd_t *tsd, size_t size) -{ - - return (iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL)); -} - -JEMALLOC_ALWAYS_INLINE void * -icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena) -{ - - return (iallocztm(tsd, size, true, tcache, false, arena)); -} - -JEMALLOC_ALWAYS_INLINE void * -icalloc(tsd_t *tsd, size_t size) -{ - - return (iallocztm(tsd, size, true, tcache_get(tsd, true), false, NULL)); -} - -JEMALLOC_ALWAYS_INLINE void * -ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, bool is_metadata, arena_t *arena) -{ - void *ret; - - assert(usize != 0); - assert(usize == sa2u(usize, alignment)); - - ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache); - assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); - if (config_stats && is_metadata && likely(ret != NULL)) { - arena_metadata_allocated_add(iaalloc(ret), isalloc(ret, - config_prof)); - } - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void * -ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero, - tcache_t *tcache, arena_t *arena) -{ - - return (ipallocztm(tsd, usize, alignment, zero, tcache, false, arena)); -} - -JEMALLOC_ALWAYS_INLINE void * -ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) -{ - - return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, - NULL), false, NULL)); -} - -JEMALLOC_ALWAYS_INLINE size_t -ivsalloc(const void *ptr, bool demote) -{ - extent_node_t *node; - - /* Return 0 if ptr is not within a chunk managed by jemalloc. */ - node = chunk_lookup(ptr, false); - if (node == NULL) - return (0); - /* Only arena chunks should be looked up via interior pointers. */ - assert(extent_node_addr_get(node) == ptr || - extent_node_achunk_get(node)); - - return (isalloc(ptr, demote)); -} - -JEMALLOC_INLINE size_t -u2rz(size_t usize) -{ - size_t ret; - - if (usize <= SMALL_MAXCLASS) { - szind_t binind = size2index(usize); - ret = arena_bin_info[binind].redzone_size; - } else - ret = 0; - - return (ret); -} - -JEMALLOC_INLINE size_t -p2rz(const void *ptr) -{ - size_t usize = isalloc(ptr, false); - - return (u2rz(usize)); -} - -JEMALLOC_ALWAYS_INLINE void -idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata) -{ - - assert(ptr != NULL); - if (config_stats && is_metadata) { - arena_metadata_allocated_sub(iaalloc(ptr), isalloc(ptr, - config_prof)); - } - - arena_dalloc(tsd, ptr, tcache); -} - -JEMALLOC_ALWAYS_INLINE void -idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache) -{ - - idalloctm(tsd, ptr, tcache, false); -} - -JEMALLOC_ALWAYS_INLINE void -idalloc(tsd_t *tsd, void *ptr) -{ - - idalloctm(tsd, ptr, tcache_get(tsd, false), false); -} - -JEMALLOC_ALWAYS_INLINE void -iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache) -{ - - if (config_fill && unlikely(opt_quarantine)) - quarantine(tsd, ptr); - else - idalloctm(tsd, ptr, tcache, false); -} - -JEMALLOC_ALWAYS_INLINE void -isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) -{ - - arena_sdalloc(tsd, ptr, size, tcache); -} - -JEMALLOC_ALWAYS_INLINE void -isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache) -{ - - if (config_fill && unlikely(opt_quarantine)) - quarantine(tsd, ptr); - else - isdalloct(tsd, ptr, size, tcache); -} - -JEMALLOC_ALWAYS_INLINE void * -iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, - size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) -{ - void *p; - size_t usize, copysize; - - usize = sa2u(size + extra, alignment); - if (usize == 0) - return (NULL); - p = ipalloct(tsd, usize, alignment, zero, tcache, arena); - if (p == NULL) { - if (extra == 0) - return (NULL); - /* Try again, without extra this time. */ - usize = sa2u(size, alignment); - if (usize == 0) - return (NULL); - p = ipalloct(tsd, usize, alignment, zero, tcache, arena); - if (p == NULL) - return (NULL); - } - /* - * Copy at most size bytes (not size+extra), since the caller has no - * expectation that the extra bytes will be reliably preserved. - */ - copysize = (size < oldsize) ? size : oldsize; - memcpy(p, ptr, copysize); - isqalloc(tsd, ptr, oldsize, tcache); - return (p); -} - -JEMALLOC_ALWAYS_INLINE void * -iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, - bool zero, tcache_t *tcache, arena_t *arena) -{ - - assert(ptr != NULL); - assert(size != 0); - - if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) - != 0) { - /* - * Existing object alignment is inadequate; allocate new space - * and copy. - */ - return (iralloct_realign(tsd, ptr, oldsize, size, 0, alignment, - zero, tcache, arena)); - } - - return (arena_ralloc(tsd, arena, ptr, oldsize, size, alignment, zero, - tcache)); -} - -JEMALLOC_ALWAYS_INLINE void * -iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, - bool zero) -{ - - return (iralloct(tsd, ptr, oldsize, size, alignment, zero, - tcache_get(tsd, true), NULL)); -} - -JEMALLOC_ALWAYS_INLINE bool -ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, - bool zero) -{ - - assert(ptr != NULL); - assert(size != 0); - - if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) - != 0) { - /* Existing object alignment is inadequate. */ - return (true); - } - - return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero)); -} -#endif - -#include "jemalloc/internal/prof.h" - -#undef JEMALLOC_H_INLINES -/******************************************************************************/ -#endif /* JEMALLOC_INTERNAL_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h index a601d6ebb..be70df510 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h @@ -1,11 +1,20 @@ #ifndef JEMALLOC_INTERNAL_DECLS_H -#define JEMALLOC_INTERNAL_DECLS_H +#define JEMALLOC_INTERNAL_DECLS_H #include <math.h> #ifdef _WIN32 # include <windows.h> # include "msvc_compat/windows_extra.h" - +# ifdef _WIN64 +# if LG_VADDR <= 32 +# error Generate the headers using x64 vcargs +# endif +# else +# if LG_VADDR > 32 +# undef LG_VADDR +# define LG_VADDR 32 +# endif +# endif #else # include <sys/param.h> # include <sys/mman.h> @@ -14,10 +23,27 @@ # if !defined(SYS_write) && defined(__NR_write) # define SYS_write __NR_write # endif +# if defined(SYS_open) && defined(__aarch64__) + /* Android headers may define SYS_open to __NR_open even though + * __NR_open may not exist on AArch64 (superseded by __NR_openat). */ +# undef SYS_open +# endif # include <sys/uio.h> # endif # include <pthread.h> +# include <signal.h> +# ifdef JEMALLOC_OS_UNFAIR_LOCK +# include <os/lock.h> +# endif +# ifdef JEMALLOC_GLIBC_MALLOC_HOOK +# include <sched.h> +# endif # include <errno.h> +# include <sys/time.h> +# include <time.h> +# ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME +# include <mach/mach_time.h> +# endif #endif #include <sys/types.h> @@ -25,6 +51,9 @@ #ifndef SIZE_T_MAX # define SIZE_T_MAX SIZE_MAX #endif +#ifndef SSIZE_MAX +# define SSIZE_MAX ((ssize_t)(SIZE_T_MAX >> 1)) +#endif #include <stdarg.h> #include <stdbool.h> #include <stdio.h> @@ -50,9 +79,7 @@ typedef intptr_t ssize_t; # pragma warning(disable: 4996) #if _MSC_VER < 1800 static int -isblank(int c) -{ - +isblank(int c) { return (c == '\t' || c == ' '); } #endif diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in index b0f8caaf8..8dad9a1db 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -1,5 +1,5 @@ #ifndef JEMALLOC_INTERNAL_DEFS_H_ -#define JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ /* * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all * public APIs to be prefixed. This makes it possible, with some care, to use @@ -9,6 +9,18 @@ #undef JEMALLOC_CPREFIX /* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +#undef JEMALLOC_OVERRIDE___LIBC_CALLOC +#undef JEMALLOC_OVERRIDE___LIBC_FREE +#undef JEMALLOC_OVERRIDE___LIBC_MALLOC +#undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN +#undef JEMALLOC_OVERRIDE___LIBC_REALLOC +#undef JEMALLOC_OVERRIDE___LIBC_VALLOC +#undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN + +/* * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. * For shared libraries, symbol visibility mechanisms prevent these symbols * from being exported, but for static libraries, naming collisions are a real @@ -21,18 +33,24 @@ * order to yield to another virtual CPU. */ #undef CPU_SPINWAIT +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#undef HAVE_CPU_SPINWAIT + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#undef LG_VADDR /* Defined if C11 atomics are available. */ -#undef JEMALLOC_C11ATOMICS +#undef JEMALLOC_C11_ATOMICS -/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */ -#undef JEMALLOC_ATOMIC9 +/* Defined if GCC __atomic atomics are available. */ +#undef JEMALLOC_GCC_ATOMIC_ATOMICS -/* - * Defined if OSAtomic*() functions are available, as provided by Darwin, and - * documented in the atomic(3) manual page. - */ -#undef JEMALLOC_OSATOMIC +/* Defined if GCC __sync atomics are available. */ +#undef JEMALLOC_GCC_SYNC_ATOMICS /* * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and @@ -56,9 +74,9 @@ #undef JEMALLOC_HAVE_BUILTIN_CLZ /* - * Defined if madvise(2) is available. + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. */ -#undef JEMALLOC_HAVE_MADVISE +#undef JEMALLOC_OS_UNFAIR_LOCK /* * Defined if OSSpin*() functions are available, as provided by Darwin, and @@ -66,6 +84,9 @@ */ #undef JEMALLOC_OSSPIN +/* Defined if syscall(2) is usable. */ +#undef JEMALLOC_USE_SYSCALL + /* * Defined if secure_getenv(3) is available. */ @@ -76,6 +97,27 @@ */ #undef JEMALLOC_HAVE_ISSETUGID +/* Defined if pthread_atfork(3) is available. */ +#undef JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Defined if pthread_setname_np(3) is available. */ +#undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#undef JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +#undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME + /* * Defined if _malloc_thread_cleanup() exists. At least in the case of * FreeBSD, pthread_key_create() allocates, which if used during malloc @@ -102,12 +144,6 @@ /* Non-empty if the tls_model attribute is supported. */ #undef JEMALLOC_TLS_MODEL -/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ -#undef JEMALLOC_CC_SILENCE - -/* JEMALLOC_CODE_COVERAGE enables test code coverage analysis. */ -#undef JEMALLOC_CODE_COVERAGE - /* * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables * inline functions. @@ -130,36 +166,23 @@ #undef JEMALLOC_PROF_GCC /* - * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. - * This makes it possible to allocate/deallocate objects without any locking - * when the cache is in the steady state. - */ -#undef JEMALLOC_TCACHE - -/* - * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage * segment (DSS). */ #undef JEMALLOC_DSS -/* Support memory filling (junk/zero/quarantine/redzone). */ +/* Support memory filling (junk/zero). */ #undef JEMALLOC_FILL /* Support utrace(2)-based tracing. */ #undef JEMALLOC_UTRACE -/* Support Valgrind. */ -#undef JEMALLOC_VALGRIND - /* Support optional abort() on OOM. */ #undef JEMALLOC_XMALLOC /* Support lazy locking (avoid locking unless a second thread is launched). */ #undef JEMALLOC_LAZY_LOCK -/* Minimum size class to support is 2^LG_TINY_MIN bytes. */ -#undef LG_TINY_MIN - /* * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size * classes). @@ -170,6 +193,13 @@ #undef LG_PAGE /* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#undef LG_HUGEPAGE + +/* * If defined, adjacent virtual memory mappings with identical attributes * automatically coalesce, and they fragment when changes are made to subranges. * This is the normal order of things for mmap()/munmap(), but on Windows @@ -179,27 +209,29 @@ #undef JEMALLOC_MAPS_COALESCE /* - * If defined, use munmap() to unmap freed chunks, rather than storing them for - * later reuse. This is disabled by default on Linux because common sequences - * of mmap()/munmap() calls will cause virtual memory map holes. + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. */ -#undef JEMALLOC_MUNMAP +#undef JEMALLOC_RETAIN /* TLS is used to map arenas and magazine caches to threads. */ #undef JEMALLOC_TLS /* - * ffs()/ffsl() functions to use for bitmapping. Don't use these directly; - * instead, use jemalloc_ffs() or jemalloc_ffsl() from util.h. + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h */ -#undef JEMALLOC_INTERNAL_FFSL -#undef JEMALLOC_INTERNAL_FFS +#undef JEMALLOC_INTERNAL_UNREACHABLE /* - * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside - * within jemalloc-owned chunks before dereferencing them. + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. */ -#undef JEMALLOC_IVSALLOC +#undef JEMALLOC_INTERNAL_FFSLL +#undef JEMALLOC_INTERNAL_FFSL +#undef JEMALLOC_INTERNAL_FFS /* * If defined, explicitly attempt to more uniformly distribute large allocation @@ -208,23 +240,64 @@ #undef JEMALLOC_CACHE_OBLIVIOUS /* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +#undef JEMALLOC_LOG + +/* * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. */ #undef JEMALLOC_ZONE -#undef JEMALLOC_ZONE_VERSION + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +#undef JEMALLOC_SYSCTL_VM_OVERCOMMIT +#undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY + +/* Defined if madvise(2) is available. */ +#undef JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +#undef JEMALLOC_HAVE_MADVISE_HUGE /* * Methods for purging unused pages differ between operating systems. * - * madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages, + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, * such that new pages will be demand-zeroed if - * the address region is later touched. - * madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being - * unused, such that they will be discarded rather - * than swapped out. + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. */ -#undef JEMALLOC_PURGE_MADVISE_DONTNEED #undef JEMALLOC_PURGE_MADVISE_FREE +#undef JEMALLOC_PURGE_MADVISE_DONTNEED +#undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +#undef JEMALLOC_DEFINE_MADVISE_FREE + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +#undef JEMALLOC_MADVISE_DONTDUMP + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +#undef JEMALLOC_THP /* Define if operating system has alloca.h header. */ #undef JEMALLOC_HAS_ALLOCA_H @@ -241,6 +314,9 @@ /* sizeof(long) == 2^LG_SIZEOF_LONG. */ #undef LG_SIZEOF_LONG +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#undef LG_SIZEOF_LONG_LONG + /* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ #undef LG_SIZEOF_INTMAX_T @@ -250,13 +326,41 @@ /* glibc memalign hook. */ #undef JEMALLOC_GLIBC_MEMALIGN_HOOK +/* pthread support */ +#undef JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#undef JEMALLOC_HAVE_DLSYM + /* Adaptive mutex support in pthreads. */ #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP +/* GNU specific sched_getcpu support */ +#undef JEMALLOC_HAVE_SCHED_GETCPU + +/* GNU specific sched_setaffinity support */ +#undef JEMALLOC_HAVE_SCHED_SETAFFINITY + +/* + * If defined, all the features necessary for background threads are present. + */ +#undef JEMALLOC_BACKGROUND_THREAD + /* * If defined, jemalloc symbols are not exported (doesn't work when * JEMALLOC_PREFIX is not defined). */ #undef JEMALLOC_EXPORT +/* config.malloc_conf options string. */ +#undef JEMALLOC_CONFIG_MALLOC_CONF + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +#undef JEMALLOC_IS_MALLOC + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +#undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE + #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h new file mode 100644 index 000000000..e10fb275d --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h @@ -0,0 +1,53 @@ +#ifndef JEMALLOC_INTERNAL_EXTERNS_H +#define JEMALLOC_INTERNAL_EXTERNS_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/tsd_types.h" + +/* TSD checks this to set thread local slow state accordingly. */ +extern bool malloc_slow; + +/* Run-time options. */ +extern bool opt_abort; +extern bool opt_abort_conf; +extern const char *opt_junk; +extern bool opt_junk_alloc; +extern bool opt_junk_free; +extern bool opt_utrace; +extern bool opt_xmalloc; +extern bool opt_zero; +extern unsigned opt_narenas; + +/* Number of CPUs. */ +extern unsigned ncpus; + +/* Number of arenas used for automatic multiplexing of threads and arenas. */ +extern unsigned narenas_auto; + +/* + * Arenas that are used to service external requests. Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + */ +extern atomic_p_t arenas[]; + +void *a0malloc(size_t size); +void a0dalloc(void *ptr); +void *bootstrap_malloc(size_t size); +void *bootstrap_calloc(size_t num, size_t size); +void bootstrap_free(void *ptr); +void arena_set(unsigned ind, arena_t *arena); +unsigned narenas_total_get(void); +arena_t *arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); +arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind); +arena_t *arena_choose_hard(tsd_t *tsd, bool internal); +void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind); +void iarena_cleanup(tsd_t *tsd); +void arena_cleanup(tsd_t *tsd); +void arenas_tdata_cleanup(tsd_t *tsd); +void jemalloc_prefork(void); +void jemalloc_postfork_parent(void); +void jemalloc_postfork_child(void); +bool malloc_initialized(void); + +#endif /* JEMALLOC_INTERNAL_EXTERNS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_includes.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_includes.h new file mode 100644 index 000000000..437eaa407 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_includes.h @@ -0,0 +1,94 @@ +#ifndef JEMALLOC_INTERNAL_INCLUDES_H +#define JEMALLOC_INTERNAL_INCLUDES_H + +/* + * jemalloc can conceptually be broken into components (arena, tcache, etc.), + * but there are circular dependencies that cannot be broken without + * substantial performance degradation. + * + * Historically, we dealt with this by each header into four sections (types, + * structs, externs, and inlines), and included each header file multiple times + * in this file, picking out the portion we want on each pass using the + * following #defines: + * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data + * types. + * JEMALLOC_H_STRUCTS : Data structures. + * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. + * JEMALLOC_H_INLINES : Inline functions. + * + * We're moving toward a world in which the dependencies are explicit; each file + * will #include the headers it depends on (rather than relying on them being + * implicitly available via this file including every header file in the + * project). + * + * We're now in an intermediate state: we've broken up the header files to avoid + * having to include each one multiple times, but have not yet moved the + * dependency information into the header files (i.e. we still rely on the + * ordering in this file to ensure all a header's dependencies are available in + * its translation unit). Each component is now broken up into multiple header + * files, corresponding to the sections above (e.g. instead of "foo.h", we now + * have "foo_types.h", "foo_structs.h", "foo_externs.h", "foo_inlines.h"). + * + * Those files which have been converted to explicitly include their + * inter-component dependencies are now in the initial HERMETIC HEADERS + * section. All headers may still rely on jemalloc_preamble.h (which, by fiat, + * must be included first in every translation unit) for system headers and + * global jemalloc definitions, however. + */ + +/******************************************************************************/ +/* TYPES */ +/******************************************************************************/ + +#include "jemalloc/internal/extent_types.h" +#include "jemalloc/internal/base_types.h" +#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/tcache_types.h" +#include "jemalloc/internal/prof_types.h" + +/******************************************************************************/ +/* STRUCTS */ +/******************************************************************************/ + +#include "jemalloc/internal/arena_structs_a.h" +#include "jemalloc/internal/extent_structs.h" +#include "jemalloc/internal/base_structs.h" +#include "jemalloc/internal/prof_structs.h" +#include "jemalloc/internal/arena_structs_b.h" +#include "jemalloc/internal/tcache_structs.h" +#include "jemalloc/internal/background_thread_structs.h" + +/******************************************************************************/ +/* EXTERNS */ +/******************************************************************************/ + +#include "jemalloc/internal/jemalloc_internal_externs.h" +#include "jemalloc/internal/extent_externs.h" +#include "jemalloc/internal/base_externs.h" +#include "jemalloc/internal/arena_externs.h" +#include "jemalloc/internal/large_externs.h" +#include "jemalloc/internal/tcache_externs.h" +#include "jemalloc/internal/prof_externs.h" +#include "jemalloc/internal/background_thread_externs.h" + +/******************************************************************************/ +/* INLINES */ +/******************************************************************************/ + +#include "jemalloc/internal/jemalloc_internal_inlines_a.h" +#include "jemalloc/internal/base_inlines.h" +/* + * Include portions of arena code interleaved with tcache code in order to + * resolve circular dependencies. + */ +#include "jemalloc/internal/prof_inlines_a.h" +#include "jemalloc/internal/arena_inlines_a.h" +#include "jemalloc/internal/extent_inlines.h" +#include "jemalloc/internal/jemalloc_internal_inlines_b.h" +#include "jemalloc/internal/tcache_inlines.h" +#include "jemalloc/internal/arena_inlines_b.h" +#include "jemalloc/internal/jemalloc_internal_inlines_c.h" +#include "jemalloc/internal/prof_inlines_b.h" +#include "jemalloc/internal/background_thread_inlines.h" + +#endif /* JEMALLOC_INTERNAL_INCLUDES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h new file mode 100644 index 000000000..c6a1f7eb2 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h @@ -0,0 +1,172 @@ +#ifndef JEMALLOC_INTERNAL_INLINES_A_H +#define JEMALLOC_INTERNAL_INLINES_A_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/bit_util.h" +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/ticker.h" + +JEMALLOC_ALWAYS_INLINE malloc_cpuid_t +malloc_getcpu(void) { + assert(have_percpu_arena); +#if defined(JEMALLOC_HAVE_SCHED_GETCPU) + return (malloc_cpuid_t)sched_getcpu(); +#else + not_reached(); + return -1; +#endif +} + +/* Return the chosen arena index based on current cpu. */ +JEMALLOC_ALWAYS_INLINE unsigned +percpu_arena_choose(void) { + assert(have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena)); + + malloc_cpuid_t cpuid = malloc_getcpu(); + assert(cpuid >= 0); + + unsigned arena_ind; + if ((opt_percpu_arena == percpu_arena) || ((unsigned)cpuid < ncpus / + 2)) { + arena_ind = cpuid; + } else { + assert(opt_percpu_arena == per_phycpu_arena); + /* Hyper threads on the same physical CPU share arena. */ + arena_ind = cpuid - ncpus / 2; + } + + return arena_ind; +} + +/* Return the limit of percpu auto arena range, i.e. arenas[0...ind_limit). */ +JEMALLOC_ALWAYS_INLINE unsigned +percpu_arena_ind_limit(percpu_arena_mode_t mode) { + assert(have_percpu_arena && PERCPU_ARENA_ENABLED(mode)); + if (mode == per_phycpu_arena && ncpus > 1) { + if (ncpus % 2) { + /* This likely means a misconfig. */ + return ncpus / 2 + 1; + } + return ncpus / 2; + } else { + return ncpus; + } +} + +static inline arena_tdata_t * +arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) { + arena_tdata_t *tdata; + arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd); + + if (unlikely(arenas_tdata == NULL)) { + /* arenas_tdata hasn't been initialized yet. */ + return arena_tdata_get_hard(tsd, ind); + } + if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) { + /* + * ind is invalid, cache is old (too small), or tdata to be + * initialized. + */ + return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) : + NULL); + } + + tdata = &arenas_tdata[ind]; + if (likely(tdata != NULL) || !refresh_if_missing) { + return tdata; + } + return arena_tdata_get_hard(tsd, ind); +} + +static inline arena_t * +arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) { + arena_t *ret; + + assert(ind < MALLOCX_ARENA_LIMIT); + + ret = (arena_t *)atomic_load_p(&arenas[ind], ATOMIC_ACQUIRE); + if (unlikely(ret == NULL)) { + if (init_if_missing) { + ret = arena_init(tsdn, ind, + (extent_hooks_t *)&extent_hooks_default); + } + } + return ret; +} + +static inline ticker_t * +decay_ticker_get(tsd_t *tsd, unsigned ind) { + arena_tdata_t *tdata; + + tdata = arena_tdata_get(tsd, ind, true); + if (unlikely(tdata == NULL)) { + return NULL; + } + return &tdata->decay_ticker; +} + +JEMALLOC_ALWAYS_INLINE cache_bin_t * +tcache_small_bin_get(tcache_t *tcache, szind_t binind) { + assert(binind < NBINS); + return &tcache->bins_small[binind]; +} + +JEMALLOC_ALWAYS_INLINE cache_bin_t * +tcache_large_bin_get(tcache_t *tcache, szind_t binind) { + assert(binind >= NBINS &&binind < nhbins); + return &tcache->bins_large[binind - NBINS]; +} + +JEMALLOC_ALWAYS_INLINE bool +tcache_available(tsd_t *tsd) { + /* + * Thread specific auto tcache might be unavailable if: 1) during tcache + * initialization, or 2) disabled through thread.tcache.enabled mallctl + * or config options. This check covers all cases. + */ + if (likely(tsd_tcache_enabled_get(tsd))) { + /* Associated arena == NULL implies tcache init in progress. */ + assert(tsd_tcachep_get(tsd)->arena == NULL || + tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->avail != + NULL); + return true; + } + + return false; +} + +JEMALLOC_ALWAYS_INLINE tcache_t * +tcache_get(tsd_t *tsd) { + if (!tcache_available(tsd)) { + return NULL; + } + + return tsd_tcachep_get(tsd); +} + +static inline void +pre_reentrancy(tsd_t *tsd, arena_t *arena) { + /* arena is the current context. Reentry from a0 is not allowed. */ + assert(arena != arena_get(tsd_tsdn(tsd), 0, false)); + + bool fast = tsd_fast(tsd); + assert(tsd_reentrancy_level_get(tsd) < INT8_MAX); + ++*tsd_reentrancy_levelp_get(tsd); + if (fast) { + /* Prepare slow path for reentrancy. */ + tsd_slow_update(tsd); + assert(tsd->state == tsd_state_nominal_slow); + } +} + +static inline void +post_reentrancy(tsd_t *tsd) { + int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsd); + assert(*reentrancy_level > 0); + if (--*reentrancy_level == 0) { + tsd_slow_update(tsd); + } +} + +#endif /* JEMALLOC_INTERNAL_INLINES_A_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h new file mode 100644 index 000000000..2e76e5d8f --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h @@ -0,0 +1,86 @@ +#ifndef JEMALLOC_INTERNAL_INLINES_B_H +#define JEMALLOC_INTERNAL_INLINES_B_H + +#include "jemalloc/internal/rtree.h" + +/* Choose an arena based on a per-thread value. */ +static inline arena_t * +arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) { + arena_t *ret; + + if (arena != NULL) { + return arena; + } + + /* During reentrancy, arena 0 is the safest bet. */ + if (unlikely(tsd_reentrancy_level_get(tsd) > 0)) { + return arena_get(tsd_tsdn(tsd), 0, true); + } + + ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd); + if (unlikely(ret == NULL)) { + ret = arena_choose_hard(tsd, internal); + assert(ret); + if (tcache_available(tsd)) { + tcache_t *tcache = tcache_get(tsd); + if (tcache->arena != NULL) { + /* See comments in tcache_data_init().*/ + assert(tcache->arena == + arena_get(tsd_tsdn(tsd), 0, false)); + if (tcache->arena != ret) { + tcache_arena_reassociate(tsd_tsdn(tsd), + tcache, ret); + } + } else { + tcache_arena_associate(tsd_tsdn(tsd), tcache, + ret); + } + } + } + + /* + * Note that for percpu arena, if the current arena is outside of the + * auto percpu arena range, (i.e. thread is assigned to a manually + * managed arena), then percpu arena is skipped. + */ + if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena) && + !internal && (arena_ind_get(ret) < + percpu_arena_ind_limit(opt_percpu_arena)) && (ret->last_thd != + tsd_tsdn(tsd))) { + unsigned ind = percpu_arena_choose(); + if (arena_ind_get(ret) != ind) { + percpu_arena_update(tsd, ind); + ret = tsd_arena_get(tsd); + } + ret->last_thd = tsd_tsdn(tsd); + } + + return ret; +} + +static inline arena_t * +arena_choose(tsd_t *tsd, arena_t *arena) { + return arena_choose_impl(tsd, arena, false); +} + +static inline arena_t * +arena_ichoose(tsd_t *tsd, arena_t *arena) { + return arena_choose_impl(tsd, arena, true); +} + +static inline bool +arena_is_auto(arena_t *arena) { + assert(narenas_auto > 0); + return (arena_ind_get(arena) < narenas_auto); +} + +JEMALLOC_ALWAYS_INLINE extent_t * +iealloc(tsdn_t *tsdn, const void *ptr) { + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); + + return rtree_extent_read(tsdn, &extents_rtree, rtree_ctx, + (uintptr_t)ptr, true); +} + +#endif /* JEMALLOC_INTERNAL_INLINES_B_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h new file mode 100644 index 000000000..c829ac60c --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -0,0 +1,218 @@ +#ifndef JEMALLOC_INTERNAL_INLINES_C_H +#define JEMALLOC_INTERNAL_INLINES_C_H + +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/witness.h" + +/* + * Translating the names of the 'i' functions: + * Abbreviations used in the first part of the function name (before + * alloc/dalloc) describe what that function accomplishes: + * a: arena (query) + * s: size (query, or sized deallocation) + * e: extent (query) + * p: aligned (allocates) + * vs: size (query, without knowing that the pointer is into the heap) + * r: rallocx implementation + * x: xallocx implementation + * Abbreviations used in the second part of the function name (after + * alloc/dalloc) describe the arguments it takes + * z: whether to return zeroed memory + * t: accepts a tcache_t * parameter + * m: accepts an arena_t * parameter + */ + +JEMALLOC_ALWAYS_INLINE arena_t * +iaalloc(tsdn_t *tsdn, const void *ptr) { + assert(ptr != NULL); + + return arena_aalloc(tsdn, ptr); +} + +JEMALLOC_ALWAYS_INLINE size_t +isalloc(tsdn_t *tsdn, const void *ptr) { + assert(ptr != NULL); + + return arena_salloc(tsdn, ptr); +} + +JEMALLOC_ALWAYS_INLINE void * +iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache, + bool is_internal, arena_t *arena, bool slow_path) { + void *ret; + + assert(size != 0); + assert(!is_internal || tcache == NULL); + assert(!is_internal || arena == NULL || arena_is_auto(arena)); + if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) { + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + } + + ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path); + if (config_stats && is_internal && likely(ret != NULL)) { + arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret)); + } + return ret; +} + +JEMALLOC_ALWAYS_INLINE void * +ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) { + return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd), false, + NULL, slow_path); +} + +JEMALLOC_ALWAYS_INLINE void * +ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, bool is_internal, arena_t *arena) { + void *ret; + + assert(usize != 0); + assert(usize == sz_sa2u(usize, alignment)); + assert(!is_internal || tcache == NULL); + assert(!is_internal || arena == NULL || arena_is_auto(arena)); + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + + ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache); + assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); + if (config_stats && is_internal && likely(ret != NULL)) { + arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret)); + } + return ret; +} + +JEMALLOC_ALWAYS_INLINE void * +ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero, + tcache_t *tcache, arena_t *arena) { + return ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena); +} + +JEMALLOC_ALWAYS_INLINE void * +ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) { + return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero, + tcache_get(tsd), false, NULL); +} + +JEMALLOC_ALWAYS_INLINE size_t +ivsalloc(tsdn_t *tsdn, const void *ptr) { + return arena_vsalloc(tsdn, ptr); +} + +JEMALLOC_ALWAYS_INLINE void +idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx, + bool is_internal, bool slow_path) { + assert(ptr != NULL); + assert(!is_internal || tcache == NULL); + assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr))); + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + if (config_stats && is_internal) { + arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr)); + } + if (!is_internal && !tsdn_null(tsdn) && + tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) { + assert(tcache == NULL); + } + arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path); +} + +JEMALLOC_ALWAYS_INLINE void +idalloc(tsd_t *tsd, void *ptr) { + idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd), NULL, false, true); +} + +JEMALLOC_ALWAYS_INLINE void +isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache, + alloc_ctx_t *alloc_ctx, bool slow_path) { + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path); +} + +JEMALLOC_ALWAYS_INLINE void * +iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, + size_t extra, size_t alignment, bool zero, tcache_t *tcache, + arena_t *arena) { + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + void *p; + size_t usize, copysize; + + usize = sz_sa2u(size + extra, alignment); + if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) { + return NULL; + } + p = ipalloct(tsdn, usize, alignment, zero, tcache, arena); + if (p == NULL) { + if (extra == 0) { + return NULL; + } + /* Try again, without extra this time. */ + usize = sz_sa2u(size, alignment); + if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) { + return NULL; + } + p = ipalloct(tsdn, usize, alignment, zero, tcache, arena); + if (p == NULL) { + return NULL; + } + } + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + memcpy(p, ptr, copysize); + isdalloct(tsdn, ptr, oldsize, tcache, NULL, true); + return p; +} + +JEMALLOC_ALWAYS_INLINE void * +iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment, + bool zero, tcache_t *tcache, arena_t *arena) { + assert(ptr != NULL); + assert(size != 0); + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + /* + * Existing object alignment is inadequate; allocate new space + * and copy. + */ + return iralloct_realign(tsdn, ptr, oldsize, size, 0, alignment, + zero, tcache, arena); + } + + return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero, + tcache); +} + +JEMALLOC_ALWAYS_INLINE void * +iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment, + bool zero) { + return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero, + tcache_get(tsd), NULL); +} + +JEMALLOC_ALWAYS_INLINE bool +ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero) { + assert(ptr != NULL); + assert(size != 0); + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + /* Existing object alignment is inadequate. */ + return true; + } + + return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero); +} + +#endif /* JEMALLOC_INTERNAL_INLINES_C_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h index a08ba772e..ed75d3768 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h @@ -1,57 +1,43 @@ -/* - * JEMALLOC_ALWAYS_INLINE and JEMALLOC_INLINE are used within header files for - * functions that are static inline functions if inlining is enabled, and - * single-definition library-private functions if inlining is disabled. - * - * JEMALLOC_ALWAYS_INLINE_C and JEMALLOC_INLINE_C are for use in .c files, in - * which case the denoted functions are always static, regardless of whether - * inlining is enabled. - */ -#if defined(JEMALLOC_DEBUG) || defined(JEMALLOC_CODE_COVERAGE) - /* Disable inlining to make debugging/profiling easier. */ -# define JEMALLOC_ALWAYS_INLINE -# define JEMALLOC_ALWAYS_INLINE_C static -# define JEMALLOC_INLINE -# define JEMALLOC_INLINE_C static -# define inline -#else -# define JEMALLOC_ENABLE_INLINE -# ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ALWAYS_INLINE \ - static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline) -# define JEMALLOC_ALWAYS_INLINE_C \ - static inline JEMALLOC_ATTR(always_inline) -# else -# define JEMALLOC_ALWAYS_INLINE static inline -# define JEMALLOC_ALWAYS_INLINE_C static inline -# endif -# define JEMALLOC_INLINE static inline -# define JEMALLOC_INLINE_C static inline -# ifdef _MSC_VER -# define inline _inline -# endif -#endif +#ifndef JEMALLOC_INTERNAL_MACROS_H +#define JEMALLOC_INTERNAL_MACROS_H -#ifdef JEMALLOC_CC_SILENCE -# define UNUSED JEMALLOC_ATTR(unused) +#ifdef JEMALLOC_DEBUG +# define JEMALLOC_ALWAYS_INLINE static inline #else -# define UNUSED +# define JEMALLOC_ALWAYS_INLINE JEMALLOC_ATTR(always_inline) static inline +#endif +#ifdef _MSC_VER +# define inline _inline #endif -#define ZU(z) ((size_t)z) -#define ZI(z) ((ssize_t)z) -#define QU(q) ((uint64_t)q) -#define QI(q) ((int64_t)q) +#define UNUSED JEMALLOC_ATTR(unused) -#define KZU(z) ZU(z##ULL) -#define KZI(z) ZI(z##LL) -#define KQU(q) QU(q##ULL) -#define KQI(q) QI(q##LL) +#define ZU(z) ((size_t)z) +#define ZD(z) ((ssize_t)z) +#define QU(q) ((uint64_t)q) +#define QD(q) ((int64_t)q) + +#define KZU(z) ZU(z##ULL) +#define KZD(z) ZD(z##LL) +#define KQU(q) QU(q##ULL) +#define KQD(q) QI(q##LL) #ifndef __DECONST # define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) #endif -#ifndef JEMALLOC_HAS_RESTRICT +#if !defined(JEMALLOC_HAS_RESTRICT) || defined(__cplusplus) # define restrict #endif + +/* Various function pointers are statick and immutable except during testing. */ +#ifdef JEMALLOC_JET +# define JET_MUTABLE +#else +# define JET_MUTABLE const +#endif + +#define JEMALLOC_VA_ARGS_HEAD(head, ...) head +#define JEMALLOC_VA_ARGS_TAIL(head, ...) __VA_ARGS__ + +#endif /* JEMALLOC_INTERNAL_MACROS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h new file mode 100644 index 000000000..1b750b122 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h @@ -0,0 +1,185 @@ +#ifndef JEMALLOC_INTERNAL_TYPES_H +#define JEMALLOC_INTERNAL_TYPES_H + +/* Page size index type. */ +typedef unsigned pszind_t; + +/* Size class index type. */ +typedef unsigned szind_t; + +/* Processor / core id type. */ +typedef int malloc_cpuid_t; + +/* + * Flags bits: + * + * a: arena + * t: tcache + * 0: unused + * z: zero + * n: alignment + * + * aaaaaaaa aaaatttt tttttttt 0znnnnnn + */ +#define MALLOCX_ARENA_BITS 12 +#define MALLOCX_TCACHE_BITS 12 +#define MALLOCX_LG_ALIGN_BITS 6 +#define MALLOCX_ARENA_SHIFT 20 +#define MALLOCX_TCACHE_SHIFT 8 +#define MALLOCX_ARENA_MASK \ + (((1 << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT) +/* NB: Arena index bias decreases the maximum number of arenas by 1. */ +#define MALLOCX_ARENA_LIMIT ((1 << MALLOCX_ARENA_BITS) - 1) +#define MALLOCX_TCACHE_MASK \ + (((1 << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT) +#define MALLOCX_TCACHE_MAX ((1 << MALLOCX_TCACHE_BITS) - 3) +#define MALLOCX_LG_ALIGN_MASK ((1 << MALLOCX_LG_ALIGN_BITS) - 1) +/* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */ +#define MALLOCX_ALIGN_GET_SPECIFIED(flags) \ + (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)) +#define MALLOCX_ALIGN_GET(flags) \ + (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1)) +#define MALLOCX_ZERO_GET(flags) \ + ((bool)(flags & MALLOCX_ZERO)) + +#define MALLOCX_TCACHE_GET(flags) \ + (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> MALLOCX_TCACHE_SHIFT)) - 2) +#define MALLOCX_ARENA_GET(flags) \ + (((unsigned)(((unsigned)flags) >> MALLOCX_ARENA_SHIFT)) - 1) + +/* Smallest size class to support. */ +#define TINY_MIN (1U << LG_TINY_MIN) + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +#ifndef LG_QUANTUM +# if (defined(__i386__) || defined(_M_IX86)) +# define LG_QUANTUM 4 +# endif +# ifdef __ia64__ +# define LG_QUANTUM 4 +# endif +# ifdef __alpha__ +# define LG_QUANTUM 4 +# endif +# if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__)) +# define LG_QUANTUM 4 +# endif +# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) +# define LG_QUANTUM 4 +# endif +# ifdef __arm__ +# define LG_QUANTUM 3 +# endif +# ifdef __aarch64__ +# define LG_QUANTUM 4 +# endif +# ifdef __hppa__ +# define LG_QUANTUM 4 +# endif +# ifdef __m68k__ +# define LG_QUANTUM 3 +# endif +# ifdef __mips__ +# define LG_QUANTUM 3 +# endif +# ifdef __nios2__ +# define LG_QUANTUM 3 +# endif +# ifdef __or1k__ +# define LG_QUANTUM 3 +# endif +# ifdef __powerpc__ +# define LG_QUANTUM 4 +# endif +# if defined(__riscv) || defined(__riscv__) +# define LG_QUANTUM 4 +# endif +# ifdef __s390__ +# define LG_QUANTUM 4 +# endif +# if (defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || \ + defined(__SH4_SINGLE_ONLY__)) +# define LG_QUANTUM 4 +# endif +# ifdef __tile__ +# define LG_QUANTUM 4 +# endif +# ifdef __le32__ +# define LG_QUANTUM 4 +# endif +# ifndef LG_QUANTUM +# error "Unknown minimum alignment for architecture; specify via " + "--with-lg-quantum" +# endif +#endif + +#define QUANTUM ((size_t)(1U << LG_QUANTUM)) +#define QUANTUM_MASK (QUANTUM - 1) + +/* Return the smallest quantum multiple that is >= a. */ +#define QUANTUM_CEILING(a) \ + (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) + +#define LONG ((size_t)(1U << LG_SIZEOF_LONG)) +#define LONG_MASK (LONG - 1) + +/* Return the smallest long multiple that is >= a. */ +#define LONG_CEILING(a) \ + (((a) + LONG_MASK) & ~LONG_MASK) + +#define SIZEOF_PTR (1U << LG_SIZEOF_PTR) +#define PTR_MASK (SIZEOF_PTR - 1) + +/* Return the smallest (void *) multiple that is >= a. */ +#define PTR_CEILING(a) \ + (((a) + PTR_MASK) & ~PTR_MASK) + +/* + * Maximum size of L1 cache line. This is used to avoid cache line aliasing. + * In addition, this controls the spacing of cacheline-spaced size classes. + * + * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can + * only handle raw constants. + */ +#define LG_CACHELINE 6 +#define CACHELINE 64 +#define CACHELINE_MASK (CACHELINE - 1) + +/* Return the smallest cacheline multiple that is >= s. */ +#define CACHELINE_CEILING(s) \ + (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) + +/* Return the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2BASE(a, alignment) \ + ((void *)((uintptr_t)(a) & ((~(alignment)) + 1))) + +/* Return the offset between a and the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2OFFSET(a, alignment) \ + ((size_t)((uintptr_t)(a) & (alignment - 1))) + +/* Return the smallest alignment multiple that is >= s. */ +#define ALIGNMENT_CEILING(s, alignment) \ + (((s) + (alignment - 1)) & ((~(alignment)) + 1)) + +/* Declare a variable-length array. */ +#if __STDC_VERSION__ < 199901L +# ifdef _MSC_VER +# include <malloc.h> +# define alloca _alloca +# else +# ifdef JEMALLOC_HAS_ALLOCA_H +# include <alloca.h> +# else +# include <stdlib.h> +# endif +# endif +# define VARIABLE_ARRAY(type, name, count) \ + type *name = alloca(sizeof(type) * (count)) +#else +# define VARIABLE_ARRAY(type, name, count) type name[(count)] +#endif + +#endif /* JEMALLOC_INTERNAL_TYPES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in new file mode 100644 index 000000000..e621fbc85 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in @@ -0,0 +1,194 @@ +#ifndef JEMALLOC_PREAMBLE_H +#define JEMALLOC_PREAMBLE_H + +#include "jemalloc_internal_defs.h" +#include "jemalloc/internal/jemalloc_internal_decls.h" + +#ifdef JEMALLOC_UTRACE +#include <sys/ktrace.h> +#endif + +#define JEMALLOC_NO_DEMANGLE +#ifdef JEMALLOC_JET +# undef JEMALLOC_IS_MALLOC +# define JEMALLOC_N(n) jet_##n +# include "jemalloc/internal/public_namespace.h" +# define JEMALLOC_NO_RENAME +# include "../jemalloc@install_suffix@.h" +# undef JEMALLOC_NO_RENAME +#else +# define JEMALLOC_N(n) @private_namespace@##n +# include "../jemalloc@install_suffix@.h" +#endif + +#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) +#include <libkern/OSAtomic.h> +#endif + +#ifdef JEMALLOC_ZONE +#include <mach/mach_error.h> +#include <mach/mach_init.h> +#include <mach/vm_map.h> +#endif + +#include "jemalloc/internal/jemalloc_internal_macros.h" + +/* + * Note that the ordering matters here; the hook itself is name-mangled. We + * want the inclusion of hooks to happen early, so that we hook as much as + * possible. + */ +#ifndef JEMALLOC_NO_PRIVATE_NAMESPACE +# ifndef JEMALLOC_JET +# include "jemalloc/internal/private_namespace.h" +# else +# include "jemalloc/internal/private_namespace_jet.h" +# endif +#endif +#include "jemalloc/internal/hooks.h" + +#ifdef JEMALLOC_DEFINE_MADVISE_FREE +# define JEMALLOC_MADV_FREE 8 +#endif + +static const bool config_debug = +#ifdef JEMALLOC_DEBUG + true +#else + false +#endif + ; +static const bool have_dss = +#ifdef JEMALLOC_DSS + true +#else + false +#endif + ; +static const bool have_madvise_huge = +#ifdef JEMALLOC_HAVE_MADVISE_HUGE + true +#else + false +#endif + ; +static const bool config_fill = +#ifdef JEMALLOC_FILL + true +#else + false +#endif + ; +static const bool config_lazy_lock = +#ifdef JEMALLOC_LAZY_LOCK + true +#else + false +#endif + ; +static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF; +static const bool config_prof = +#ifdef JEMALLOC_PROF + true +#else + false +#endif + ; +static const bool config_prof_libgcc = +#ifdef JEMALLOC_PROF_LIBGCC + true +#else + false +#endif + ; +static const bool config_prof_libunwind = +#ifdef JEMALLOC_PROF_LIBUNWIND + true +#else + false +#endif + ; +static const bool maps_coalesce = +#ifdef JEMALLOC_MAPS_COALESCE + true +#else + false +#endif + ; +static const bool config_stats = +#ifdef JEMALLOC_STATS + true +#else + false +#endif + ; +static const bool config_tls = +#ifdef JEMALLOC_TLS + true +#else + false +#endif + ; +static const bool config_utrace = +#ifdef JEMALLOC_UTRACE + true +#else + false +#endif + ; +static const bool config_xmalloc = +#ifdef JEMALLOC_XMALLOC + true +#else + false +#endif + ; +static const bool config_cache_oblivious = +#ifdef JEMALLOC_CACHE_OBLIVIOUS + true +#else + false +#endif + ; +/* + * Undocumented, for jemalloc development use only at the moment. See the note + * in jemalloc/internal/log.h. + */ +static const bool config_log = +#ifdef JEMALLOC_LOG + true +#else + false +#endif + ; +#ifdef JEMALLOC_HAVE_SCHED_GETCPU +/* Currently percpu_arena depends on sched_getcpu. */ +#define JEMALLOC_PERCPU_ARENA +#endif +static const bool have_percpu_arena = +#ifdef JEMALLOC_PERCPU_ARENA + true +#else + false +#endif + ; +/* + * Undocumented, and not recommended; the application should take full + * responsibility for tracking provenance. + */ +static const bool force_ivsalloc = +#ifdef JEMALLOC_FORCE_IVSALLOC + true +#else + false +#endif + ; +static const bool have_background_thread = +#ifdef JEMALLOC_BACKGROUND_THREAD + true +#else + false +#endif + ; + +#endif /* JEMALLOC_PREAMBLE_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/large_externs.h b/deps/jemalloc/include/jemalloc/internal/large_externs.h new file mode 100644 index 000000000..3f36282cd --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/large_externs.h @@ -0,0 +1,26 @@ +#ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H +#define JEMALLOC_INTERNAL_LARGE_EXTERNS_H + +void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero); +void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, + bool zero); +bool large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min, + size_t usize_max, bool zero); +void *large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize, + size_t alignment, bool zero, tcache_t *tcache); + +typedef void (large_dalloc_junk_t)(void *, size_t); +extern large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk; + +typedef void (large_dalloc_maybe_junk_t)(void *, size_t); +extern large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk; + +void large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent); +void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent); +void large_dalloc(tsdn_t *tsdn, extent_t *extent); +size_t large_salloc(tsdn_t *tsdn, const extent_t *extent); +prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent); +void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx); +void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent); + +#endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/log.h b/deps/jemalloc/include/jemalloc/internal/log.h new file mode 100644 index 000000000..642085863 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/log.h @@ -0,0 +1,115 @@ +#ifndef JEMALLOC_INTERNAL_LOG_H +#define JEMALLOC_INTERNAL_LOG_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/mutex.h" + +#ifdef JEMALLOC_LOG +# define JEMALLOC_LOG_VAR_BUFSIZE 1000 +#else +# define JEMALLOC_LOG_VAR_BUFSIZE 1 +#endif + +#define JEMALLOC_LOG_BUFSIZE 4096 + +/* + * The log malloc_conf option is a '|'-delimited list of log_var name segments + * which should be logged. The names are themselves hierarchical, with '.' as + * the delimiter (a "segment" is just a prefix in the log namespace). So, if + * you have: + * + * log("arena", "log msg for arena"); // 1 + * log("arena.a", "log msg for arena.a"); // 2 + * log("arena.b", "log msg for arena.b"); // 3 + * log("arena.a.a", "log msg for arena.a.a"); // 4 + * log("extent.a", "log msg for extent.a"); // 5 + * log("extent.b", "log msg for extent.b"); // 6 + * + * And your malloc_conf option is "log=arena.a|extent", then lines 2, 4, 5, and + * 6 will print at runtime. You can enable logging from all log vars by + * writing "log=.". + * + * None of this should be regarded as a stable API for right now. It's intended + * as a debugging interface, to let us keep around some of our printf-debugging + * statements. + */ + +extern char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE]; +extern atomic_b_t log_init_done; + +typedef struct log_var_s log_var_t; +struct log_var_s { + /* + * Lowest bit is "inited", second lowest is "enabled". Putting them in + * a single word lets us avoid any fences on weak architectures. + */ + atomic_u_t state; + const char *name; +}; + +#define LOG_NOT_INITIALIZED 0U +#define LOG_INITIALIZED_NOT_ENABLED 1U +#define LOG_ENABLED 2U + +#define LOG_VAR_INIT(name_str) {ATOMIC_INIT(LOG_NOT_INITIALIZED), name_str} + +/* + * Returns the value we should assume for state (which is not necessarily + * accurate; if logging is done before logging has finished initializing, then + * we default to doing the safe thing by logging everything). + */ +unsigned log_var_update_state(log_var_t *log_var); + +/* We factor out the metadata management to allow us to test more easily. */ +#define log_do_begin(log_var) \ +if (config_log) { \ + unsigned log_state = atomic_load_u(&(log_var).state, \ + ATOMIC_RELAXED); \ + if (unlikely(log_state == LOG_NOT_INITIALIZED)) { \ + log_state = log_var_update_state(&(log_var)); \ + assert(log_state != LOG_NOT_INITIALIZED); \ + } \ + if (log_state == LOG_ENABLED) { \ + { + /* User code executes here. */ +#define log_do_end(log_var) \ + } \ + } \ +} + +/* + * MSVC has some preprocessor bugs in its expansion of __VA_ARGS__ during + * preprocessing. To work around this, we take all potential extra arguments in + * a var-args functions. Since a varargs macro needs at least one argument in + * the "...", we accept the format string there, and require that the first + * argument in this "..." is a const char *. + */ +static inline void +log_impl_varargs(const char *name, ...) { + char buf[JEMALLOC_LOG_BUFSIZE]; + va_list ap; + + va_start(ap, name); + const char *format = va_arg(ap, const char *); + size_t dst_offset = 0; + dst_offset += malloc_snprintf(buf, JEMALLOC_LOG_BUFSIZE, "%s: ", name); + dst_offset += malloc_vsnprintf(buf + dst_offset, + JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap); + dst_offset += malloc_snprintf(buf + dst_offset, + JEMALLOC_LOG_BUFSIZE - dst_offset, "\n"); + va_end(ap); + + malloc_write(buf); +} + +/* Call as log("log.var.str", "format_string %d", arg_for_format_string); */ +#define LOG(log_var_str, ...) \ +do { \ + static log_var_t log_var = LOG_VAR_INIT(log_var_str); \ + log_do_begin(log_var) \ + log_impl_varargs((log_var).name, __VA_ARGS__); \ + log_do_end(log_var) \ +} while (0) + +#endif /* JEMALLOC_INTERNAL_LOG_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/malloc_io.h b/deps/jemalloc/include/jemalloc/internal/malloc_io.h new file mode 100644 index 000000000..bfe556b52 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/malloc_io.h @@ -0,0 +1,102 @@ +#ifndef JEMALLOC_INTERNAL_MALLOC_IO_H +#define JEMALLOC_INTERNAL_MALLOC_IO_H + +#ifdef _WIN32 +# ifdef _WIN64 +# define FMT64_PREFIX "ll" +# define FMTPTR_PREFIX "ll" +# else +# define FMT64_PREFIX "ll" +# define FMTPTR_PREFIX "" +# endif +# define FMTd32 "d" +# define FMTu32 "u" +# define FMTx32 "x" +# define FMTd64 FMT64_PREFIX "d" +# define FMTu64 FMT64_PREFIX "u" +# define FMTx64 FMT64_PREFIX "x" +# define FMTdPTR FMTPTR_PREFIX "d" +# define FMTuPTR FMTPTR_PREFIX "u" +# define FMTxPTR FMTPTR_PREFIX "x" +#else +# include <inttypes.h> +# define FMTd32 PRId32 +# define FMTu32 PRIu32 +# define FMTx32 PRIx32 +# define FMTd64 PRId64 +# define FMTu64 PRIu64 +# define FMTx64 PRIx64 +# define FMTdPTR PRIdPTR +# define FMTuPTR PRIuPTR +# define FMTxPTR PRIxPTR +#endif + +/* Size of stack-allocated buffer passed to buferror(). */ +#define BUFERROR_BUF 64 + +/* + * Size of stack-allocated buffer used by malloc_{,v,vc}printf(). This must be + * large enough for all possible uses within jemalloc. + */ +#define MALLOC_PRINTF_BUFSIZE 4096 + +int buferror(int err, char *buf, size_t buflen); +uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr, + int base); +void malloc_write(const char *s); + +/* + * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating + * point math. + */ +size_t malloc_vsnprintf(char *str, size_t size, const char *format, + va_list ap); +size_t malloc_snprintf(char *str, size_t size, const char *format, ...) + JEMALLOC_FORMAT_PRINTF(3, 4); +/* + * The caller can set write_cb and cbopaque to null to choose to print with the + * je_malloc_message hook. + */ +void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, va_list ap); +void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4); +void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); + +static inline ssize_t +malloc_write_fd(int fd, const void *buf, size_t count) { +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write) + /* + * Use syscall(2) rather than write(2) when possible in order to avoid + * the possibility of memory allocation within libc. This is necessary + * on FreeBSD; most operating systems do not have this problem though. + * + * syscall() returns long or int, depending on platform, so capture the + * result in the widest plausible type to avoid compiler warnings. + */ + long result = syscall(SYS_write, fd, buf, count); +#else + ssize_t result = (ssize_t)write(fd, buf, +#ifdef _WIN32 + (unsigned int) +#endif + count); +#endif + return (ssize_t)result; +} + +static inline ssize_t +malloc_read_fd(int fd, void *buf, size_t count) { +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read) + long result = syscall(SYS_read, fd, buf, count); +#else + ssize_t result = read(fd, buf, +#ifdef _WIN32 + (unsigned int) +#endif + count); +#endif + return (ssize_t)result; +} + +#endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/mb.h b/deps/jemalloc/include/jemalloc/internal/mb.h deleted file mode 100644 index 3cfa78729..000000000 --- a/deps/jemalloc/include/jemalloc/internal/mb.h +++ /dev/null @@ -1,115 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void mb_write(void); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_)) -#ifdef __i386__ -/* - * According to the Intel Architecture Software Developer's Manual, current - * processors execute instructions in order from the perspective of other - * processors in a multiprocessor system, but 1) Intel reserves the right to - * change that, and 2) the compiler's optimizer could re-order instructions if - * there weren't some form of barrier. Therefore, even if running on an - * architecture that does not need memory barriers (everything through at least - * i686), an "optimizer barrier" is necessary. - */ -JEMALLOC_INLINE void -mb_write(void) -{ - -# if 0 - /* This is a true memory barrier. */ - asm volatile ("pusha;" - "xor %%eax,%%eax;" - "cpuid;" - "popa;" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -#else - /* - * This is hopefully enough to keep the compiler from reordering - * instructions around this one. - */ - asm volatile ("nop;" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -#endif -} -#elif (defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE void -mb_write(void) -{ - - asm volatile ("sfence" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -#elif defined(__powerpc__) -JEMALLOC_INLINE void -mb_write(void) -{ - - asm volatile ("eieio" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -#elif defined(__sparc64__) -JEMALLOC_INLINE void -mb_write(void) -{ - - asm volatile ("membar #StoreStore" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -#elif defined(__tile__) -JEMALLOC_INLINE void -mb_write(void) -{ - - __sync_synchronize(); -} -#else -/* - * This is much slower than a simple memory barrier, but the semantics of mutex - * unlock make this work. - */ -JEMALLOC_INLINE void -mb_write(void) -{ - malloc_mutex_t mtx; - - malloc_mutex_init(&mtx); - malloc_mutex_lock(&mtx); - malloc_mutex_unlock(&mtx); -} -#endif -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/mutex.h b/deps/jemalloc/include/jemalloc/internal/mutex.h index f051f2917..6520c2512 100644 --- a/deps/jemalloc/include/jemalloc/internal/mutex.h +++ b/deps/jemalloc/include/jemalloc/internal/mutex.h @@ -1,49 +1,123 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_MUTEX_H +#define JEMALLOC_INTERNAL_MUTEX_H -typedef struct malloc_mutex_s malloc_mutex_t; +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/mutex_prof.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/witness.h" + +typedef enum { + /* Can only acquire one mutex of a given witness rank at a time. */ + malloc_mutex_rank_exclusive, + /* + * Can acquire multiple mutexes of the same witness rank, but in + * address-ascending order only. + */ + malloc_mutex_address_ordered +} malloc_mutex_lock_order_t; +typedef struct malloc_mutex_s malloc_mutex_t; +struct malloc_mutex_s { + union { + struct { + /* + * prof_data is defined first to reduce cacheline + * bouncing: the data is not touched by the mutex holder + * during unlocking, while might be modified by + * contenders. Having it before the mutex itself could + * avoid prefetching a modified cacheline (for the + * unlocking thread). + */ + mutex_prof_data_t prof_data; #ifdef _WIN32 -# define MALLOC_MUTEX_INITIALIZER +# if _WIN32_WINNT >= 0x0600 + SRWLOCK lock; +# else + CRITICAL_SECTION lock; +# endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) + os_unfair_lock lock; #elif (defined(JEMALLOC_OSSPIN)) -# define MALLOC_MUTEX_INITIALIZER {0} + OSSpinLock lock; #elif (defined(JEMALLOC_MUTEX_INIT_CB)) -# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} + pthread_mutex_t lock; + malloc_mutex_t *postponed_next; #else -# if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) && \ - defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) -# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP -# define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP} -# else -# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT -# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER} -# endif + pthread_mutex_t lock; +#endif + }; + /* + * We only touch witness when configured w/ debug. However we + * keep the field in a union when !debug so that we don't have + * to pollute the code base with #ifdefs, while avoid paying the + * memory cost. + */ +#if !defined(JEMALLOC_DEBUG) + witness_t witness; + malloc_mutex_lock_order_t lock_order; #endif + }; -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS +#if defined(JEMALLOC_DEBUG) + witness_t witness; + malloc_mutex_lock_order_t lock_order; +#endif +}; + +/* + * Based on benchmark results, a fixed spin with this amount of retries works + * well for our critical sections. + */ +#define MALLOC_MUTEX_MAX_SPIN 250 -struct malloc_mutex_s { #ifdef _WIN32 # if _WIN32_WINNT >= 0x0600 - SRWLOCK lock; +# define MALLOC_MUTEX_LOCK(m) AcquireSRWLockExclusive(&(m)->lock) +# define MALLOC_MUTEX_UNLOCK(m) ReleaseSRWLockExclusive(&(m)->lock) +# define MALLOC_MUTEX_TRYLOCK(m) (!TryAcquireSRWLockExclusive(&(m)->lock)) # else - CRITICAL_SECTION lock; +# define MALLOC_MUTEX_LOCK(m) EnterCriticalSection(&(m)->lock) +# define MALLOC_MUTEX_UNLOCK(m) LeaveCriticalSection(&(m)->lock) +# define MALLOC_MUTEX_TRYLOCK(m) (!TryEnterCriticalSection(&(m)->lock)) # endif +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) +# define MALLOC_MUTEX_LOCK(m) os_unfair_lock_lock(&(m)->lock) +# define MALLOC_MUTEX_UNLOCK(m) os_unfair_lock_unlock(&(m)->lock) +# define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock)) #elif (defined(JEMALLOC_OSSPIN)) - OSSpinLock lock; -#elif (defined(JEMALLOC_MUTEX_INIT_CB)) - pthread_mutex_t lock; - malloc_mutex_t *postponed_next; +# define MALLOC_MUTEX_LOCK(m) OSSpinLockLock(&(m)->lock) +# define MALLOC_MUTEX_UNLOCK(m) OSSpinLockUnlock(&(m)->lock) +# define MALLOC_MUTEX_TRYLOCK(m) (!OSSpinLockTry(&(m)->lock)) #else - pthread_mutex_t lock; +# define MALLOC_MUTEX_LOCK(m) pthread_mutex_lock(&(m)->lock) +# define MALLOC_MUTEX_UNLOCK(m) pthread_mutex_unlock(&(m)->lock) +# define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0) #endif -}; -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS +#define LOCK_PROF_DATA_INITIALIZER \ + {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0, \ + ATOMIC_INIT(0), 0, NULL, 0} + +#ifdef _WIN32 +# define MALLOC_MUTEX_INITIALIZER +#elif (defined(JEMALLOC_OS_UNFAIR_LOCK)) +# define MALLOC_MUTEX_INITIALIZER \ + {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}}, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +#elif (defined(JEMALLOC_OSSPIN)) +# define MALLOC_MUTEX_INITIALIZER \ + {{{LOCK_PROF_DATA_INITIALIZER, 0}}, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) +# define MALLOC_MUTEX_INITIALIZER \ + {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}}, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +#else +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT +# define MALLOC_MUTEX_INITIALIZER \ + {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}}, \ + WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)} +#endif #ifdef JEMALLOC_LAZY_LOCK extern bool isthreaded; @@ -52,60 +126,123 @@ extern bool isthreaded; # define isthreaded true #endif -bool malloc_mutex_init(malloc_mutex_t *mutex); -void malloc_mutex_prefork(malloc_mutex_t *mutex); -void malloc_mutex_postfork_parent(malloc_mutex_t *mutex); -void malloc_mutex_postfork_child(malloc_mutex_t *mutex); -bool mutex_boot(void); +bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name, + witness_rank_t rank, malloc_mutex_lock_order_t lock_order); +void malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex); +void malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex); +bool malloc_mutex_boot(void); +void malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex); -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES +void malloc_mutex_lock_slow(malloc_mutex_t *mutex); -#ifndef JEMALLOC_ENABLE_INLINE -void malloc_mutex_lock(malloc_mutex_t *mutex); -void malloc_mutex_unlock(malloc_mutex_t *mutex); -#endif +static inline void +malloc_mutex_lock_final(malloc_mutex_t *mutex) { + MALLOC_MUTEX_LOCK(mutex); +} -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) -JEMALLOC_INLINE void -malloc_mutex_lock(malloc_mutex_t *mutex) -{ +static inline bool +malloc_mutex_trylock_final(malloc_mutex_t *mutex) { + return MALLOC_MUTEX_TRYLOCK(mutex); +} + +static inline void +mutex_owner_stats_update(tsdn_t *tsdn, malloc_mutex_t *mutex) { + if (config_stats) { + mutex_prof_data_t *data = &mutex->prof_data; + data->n_lock_ops++; + if (data->prev_owner != tsdn) { + data->prev_owner = tsdn; + data->n_owner_switches++; + } + } +} +/* Trylock: return false if the lock is successfully acquired. */ +static inline bool +malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) { + witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness); if (isthreaded) { -#ifdef _WIN32 -# if _WIN32_WINNT >= 0x0600 - AcquireSRWLockExclusive(&mutex->lock); -# else - EnterCriticalSection(&mutex->lock); -# endif -#elif (defined(JEMALLOC_OSSPIN)) - OSSpinLockLock(&mutex->lock); -#else - pthread_mutex_lock(&mutex->lock); -#endif + if (malloc_mutex_trylock_final(mutex)) { + return true; + } + mutex_owner_stats_update(tsdn, mutex); } + witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness); + + return false; } -JEMALLOC_INLINE void -malloc_mutex_unlock(malloc_mutex_t *mutex) -{ +/* Aggregate lock prof data. */ +static inline void +malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) { + nstime_add(&sum->tot_wait_time, &data->tot_wait_time); + if (nstime_compare(&sum->max_wait_time, &data->max_wait_time) < 0) { + nstime_copy(&sum->max_wait_time, &data->max_wait_time); + } + + sum->n_wait_times += data->n_wait_times; + sum->n_spin_acquired += data->n_spin_acquired; + + if (sum->max_n_thds < data->max_n_thds) { + sum->max_n_thds = data->max_n_thds; + } + uint32_t cur_n_waiting_thds = atomic_load_u32(&sum->n_waiting_thds, + ATOMIC_RELAXED); + uint32_t new_n_waiting_thds = cur_n_waiting_thds + atomic_load_u32( + &data->n_waiting_thds, ATOMIC_RELAXED); + atomic_store_u32(&sum->n_waiting_thds, new_n_waiting_thds, + ATOMIC_RELAXED); + sum->n_owner_switches += data->n_owner_switches; + sum->n_lock_ops += data->n_lock_ops; +} +static inline void +malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) { + witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness); if (isthreaded) { -#ifdef _WIN32 -# if _WIN32_WINNT >= 0x0600 - ReleaseSRWLockExclusive(&mutex->lock); -# else - LeaveCriticalSection(&mutex->lock); -# endif -#elif (defined(JEMALLOC_OSSPIN)) - OSSpinLockUnlock(&mutex->lock); -#else - pthread_mutex_unlock(&mutex->lock); -#endif + if (malloc_mutex_trylock_final(mutex)) { + malloc_mutex_lock_slow(mutex); + } + mutex_owner_stats_update(tsdn, mutex); } + witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness); +} + +static inline void +malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) { + witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness); + if (isthreaded) { + MALLOC_MUTEX_UNLOCK(mutex); + } +} + +static inline void +malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) { + witness_assert_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness); +} + +static inline void +malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) { + witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness); +} + +/* Copy the prof data from mutex for processing. */ +static inline void +malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data, + malloc_mutex_t *mutex) { + mutex_prof_data_t *source = &mutex->prof_data; + /* Can only read holding the mutex. */ + malloc_mutex_assert_owner(tsdn, mutex); + + /* + * Not *really* allowed (we shouldn't be doing non-atomic loads of + * atomic data), but the mutex protection makes this safe, and writing + * a member-for-member copy is tedious for this situation. + */ + *data = *source; + /* n_wait_thds is not reported (modified w/o locking). */ + atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED); } -#endif -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_MUTEX_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/mutex_pool.h b/deps/jemalloc/include/jemalloc/internal/mutex_pool.h new file mode 100644 index 000000000..726cece90 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/mutex_pool.h @@ -0,0 +1,94 @@ +#ifndef JEMALLOC_INTERNAL_MUTEX_POOL_H +#define JEMALLOC_INTERNAL_MUTEX_POOL_H + +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/witness.h" + +/* We do mod reductions by this value, so it should be kept a power of 2. */ +#define MUTEX_POOL_SIZE 256 + +typedef struct mutex_pool_s mutex_pool_t; +struct mutex_pool_s { + malloc_mutex_t mutexes[MUTEX_POOL_SIZE]; +}; + +bool mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank); + +/* Internal helper - not meant to be called outside this module. */ +static inline malloc_mutex_t * +mutex_pool_mutex(mutex_pool_t *pool, uintptr_t key) { + size_t hash_result[2]; + hash(&key, sizeof(key), 0xd50dcc1b, hash_result); + return &pool->mutexes[hash_result[0] % MUTEX_POOL_SIZE]; +} + +static inline void +mutex_pool_assert_not_held(tsdn_t *tsdn, mutex_pool_t *pool) { + for (int i = 0; i < MUTEX_POOL_SIZE; i++) { + malloc_mutex_assert_not_owner(tsdn, &pool->mutexes[i]); + } +} + +/* + * Note that a mutex pool doesn't work exactly the way an embdedded mutex would. + * You're not allowed to acquire mutexes in the pool one at a time. You have to + * acquire all the mutexes you'll need in a single function call, and then + * release them all in a single function call. + */ + +static inline void +mutex_pool_lock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) { + mutex_pool_assert_not_held(tsdn, pool); + + malloc_mutex_t *mutex = mutex_pool_mutex(pool, key); + malloc_mutex_lock(tsdn, mutex); +} + +static inline void +mutex_pool_unlock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) { + malloc_mutex_t *mutex = mutex_pool_mutex(pool, key); + malloc_mutex_unlock(tsdn, mutex); + + mutex_pool_assert_not_held(tsdn, pool); +} + +static inline void +mutex_pool_lock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1, + uintptr_t key2) { + mutex_pool_assert_not_held(tsdn, pool); + + malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1); + malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2); + if ((uintptr_t)mutex1 < (uintptr_t)mutex2) { + malloc_mutex_lock(tsdn, mutex1); + malloc_mutex_lock(tsdn, mutex2); + } else if ((uintptr_t)mutex1 == (uintptr_t)mutex2) { + malloc_mutex_lock(tsdn, mutex1); + } else { + malloc_mutex_lock(tsdn, mutex2); + malloc_mutex_lock(tsdn, mutex1); + } +} + +static inline void +mutex_pool_unlock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1, + uintptr_t key2) { + malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1); + malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2); + if (mutex1 == mutex2) { + malloc_mutex_unlock(tsdn, mutex1); + } else { + malloc_mutex_unlock(tsdn, mutex1); + malloc_mutex_unlock(tsdn, mutex2); + } + + mutex_pool_assert_not_held(tsdn, pool); +} + +static inline void +mutex_pool_assert_owner(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) { + malloc_mutex_assert_owner(tsdn, mutex_pool_mutex(pool, key)); +} + +#endif /* JEMALLOC_INTERNAL_MUTEX_POOL_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/mutex_prof.h b/deps/jemalloc/include/jemalloc/internal/mutex_prof.h new file mode 100644 index 000000000..ce183d335 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/mutex_prof.h @@ -0,0 +1,99 @@ +#ifndef JEMALLOC_INTERNAL_MUTEX_PROF_H +#define JEMALLOC_INTERNAL_MUTEX_PROF_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/nstime.h" +#include "jemalloc/internal/tsd_types.h" + +#define MUTEX_PROF_GLOBAL_MUTEXES \ + OP(background_thread) \ + OP(ctl) \ + OP(prof) + +typedef enum { +#define OP(mtx) global_prof_mutex_##mtx, + MUTEX_PROF_GLOBAL_MUTEXES +#undef OP + mutex_prof_num_global_mutexes +} mutex_prof_global_ind_t; + +#define MUTEX_PROF_ARENA_MUTEXES \ + OP(large) \ + OP(extent_avail) \ + OP(extents_dirty) \ + OP(extents_muzzy) \ + OP(extents_retained) \ + OP(decay_dirty) \ + OP(decay_muzzy) \ + OP(base) \ + OP(tcache_list) + +typedef enum { +#define OP(mtx) arena_prof_mutex_##mtx, + MUTEX_PROF_ARENA_MUTEXES +#undef OP + mutex_prof_num_arena_mutexes +} mutex_prof_arena_ind_t; + +#define MUTEX_PROF_UINT64_COUNTERS \ + OP(num_ops, uint64_t, "n_lock_ops") \ + OP(num_wait, uint64_t, "n_waiting") \ + OP(num_spin_acq, uint64_t, "n_spin_acq") \ + OP(num_owner_switch, uint64_t, "n_owner_switch") \ + OP(total_wait_time, uint64_t, "total_wait_ns") \ + OP(max_wait_time, uint64_t, "max_wait_ns") + +#define MUTEX_PROF_UINT32_COUNTERS \ + OP(max_num_thds, uint32_t, "max_n_thds") + +#define MUTEX_PROF_COUNTERS \ + MUTEX_PROF_UINT64_COUNTERS \ + MUTEX_PROF_UINT32_COUNTERS + +#define OP(counter, type, human) mutex_counter_##counter, + +#define COUNTER_ENUM(counter_list, t) \ + typedef enum { \ + counter_list \ + mutex_prof_num_##t##_counters \ + } mutex_prof_##t##_counter_ind_t; + +COUNTER_ENUM(MUTEX_PROF_UINT64_COUNTERS, uint64_t) +COUNTER_ENUM(MUTEX_PROF_UINT32_COUNTERS, uint32_t) + +#undef COUNTER_ENUM +#undef OP + +typedef struct { + /* + * Counters touched on the slow path, i.e. when there is lock + * contention. We update them once we have the lock. + */ + /* Total time (in nano seconds) spent waiting on this mutex. */ + nstime_t tot_wait_time; + /* Max time (in nano seconds) spent on a single lock operation. */ + nstime_t max_wait_time; + /* # of times have to wait for this mutex (after spinning). */ + uint64_t n_wait_times; + /* # of times acquired the mutex through local spinning. */ + uint64_t n_spin_acquired; + /* Max # of threads waiting for the mutex at the same time. */ + uint32_t max_n_thds; + /* Current # of threads waiting on the lock. Atomic synced. */ + atomic_u32_t n_waiting_thds; + + /* + * Data touched on the fast path. These are modified right after we + * grab the lock, so it's placed closest to the end (i.e. right before + * the lock) so that we have a higher chance of them being on the same + * cacheline. + */ + /* # of times the mutex holder is different than the previous one. */ + uint64_t n_owner_switches; + /* Previous mutex holder, to facilitate n_owner_switches. */ + tsdn_t *prev_owner; + /* # of lock() operations in total. */ + uint64_t n_lock_ops; +} mutex_prof_data_t; + +#endif /* JEMALLOC_INTERNAL_MUTEX_PROF_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/nstime.h b/deps/jemalloc/include/jemalloc/internal/nstime.h new file mode 100644 index 000000000..17c177c7f --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/nstime.h @@ -0,0 +1,34 @@ +#ifndef JEMALLOC_INTERNAL_NSTIME_H +#define JEMALLOC_INTERNAL_NSTIME_H + +/* Maximum supported number of seconds (~584 years). */ +#define NSTIME_SEC_MAX KQU(18446744072) +#define NSTIME_ZERO_INITIALIZER {0} + +typedef struct { + uint64_t ns; +} nstime_t; + +void nstime_init(nstime_t *time, uint64_t ns); +void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec); +uint64_t nstime_ns(const nstime_t *time); +uint64_t nstime_sec(const nstime_t *time); +uint64_t nstime_msec(const nstime_t *time); +uint64_t nstime_nsec(const nstime_t *time); +void nstime_copy(nstime_t *time, const nstime_t *source); +int nstime_compare(const nstime_t *a, const nstime_t *b); +void nstime_add(nstime_t *time, const nstime_t *addend); +void nstime_iadd(nstime_t *time, uint64_t addend); +void nstime_subtract(nstime_t *time, const nstime_t *subtrahend); +void nstime_isubtract(nstime_t *time, uint64_t subtrahend); +void nstime_imultiply(nstime_t *time, uint64_t multiplier); +void nstime_idivide(nstime_t *time, uint64_t divisor); +uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor); + +typedef bool (nstime_monotonic_t)(void); +extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic; + +typedef bool (nstime_update_t)(nstime_t *); +extern nstime_update_t *JET_MUTABLE nstime_update; + +#endif /* JEMALLOC_INTERNAL_NSTIME_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/pages.h b/deps/jemalloc/include/jemalloc/internal/pages.h index da7eb9686..7dae633af 100644 --- a/deps/jemalloc/include/jemalloc/internal/pages.h +++ b/deps/jemalloc/include/jemalloc/internal/pages.h @@ -1,26 +1,88 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_PAGES_EXTERNS_H +#define JEMALLOC_INTERNAL_PAGES_EXTERNS_H -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS +/* Page size. LG_PAGE is determined by the configure script. */ +#ifdef PAGE_MASK +# undef PAGE_MASK +#endif +#define PAGE ((size_t)(1U << LG_PAGE)) +#define PAGE_MASK ((size_t)(PAGE - 1)) +/* Return the page base address for the page containing address a. */ +#define PAGE_ADDR2BASE(a) \ + ((void *)((uintptr_t)(a) & ~PAGE_MASK)) +/* Return the smallest pagesize multiple that is >= s. */ +#define PAGE_CEILING(s) \ + (((s) + PAGE_MASK) & ~PAGE_MASK) -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS +/* Huge page size. LG_HUGEPAGE is determined by the configure script. */ +#define HUGEPAGE ((size_t)(1U << LG_HUGEPAGE)) +#define HUGEPAGE_MASK ((size_t)(HUGEPAGE - 1)) +/* Return the huge page base address for the huge page containing address a. */ +#define HUGEPAGE_ADDR2BASE(a) \ + ((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK)) +/* Return the smallest pagesize multiple that is >= s. */ +#define HUGEPAGE_CEILING(s) \ + (((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK) -void *pages_map(void *addr, size_t size); -void pages_unmap(void *addr, size_t size); -void *pages_trim(void *addr, size_t alloc_size, size_t leadsize, - size_t size); -bool pages_commit(void *addr, size_t size); -bool pages_decommit(void *addr, size_t size); -bool pages_purge(void *addr, size_t size); +/* PAGES_CAN_PURGE_LAZY is defined if lazy purging is supported. */ +#if defined(_WIN32) || defined(JEMALLOC_PURGE_MADVISE_FREE) +# define PAGES_CAN_PURGE_LAZY +#endif +/* + * PAGES_CAN_PURGE_FORCED is defined if forced purging is supported. + * + * The only supported way to hard-purge on Windows is to decommit and then + * re-commit, but doing so is racy, and if re-commit fails it's a pain to + * propagate the "poisoned" memory state. Since we typically decommit as the + * next step after purging on Windows anyway, there's no point in adding such + * complexity. + */ +#if !defined(_WIN32) && ((defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ + defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)) || \ + defined(JEMALLOC_MAPS_COALESCE)) +# define PAGES_CAN_PURGE_FORCED +#endif -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES +static const bool pages_can_purge_lazy = +#ifdef PAGES_CAN_PURGE_LAZY + true +#else + false +#endif + ; +static const bool pages_can_purge_forced = +#ifdef PAGES_CAN_PURGE_FORCED + true +#else + false +#endif + ; -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +typedef enum { + thp_mode_default = 0, /* Do not change hugepage settings. */ + thp_mode_always = 1, /* Always set MADV_HUGEPAGE. */ + thp_mode_never = 2, /* Always set MADV_NOHUGEPAGE. */ + thp_mode_names_limit = 3, /* Used for option processing. */ + thp_mode_not_supported = 3 /* No THP support detected. */ +} thp_mode_t; + +#define THP_MODE_DEFAULT thp_mode_default +extern thp_mode_t opt_thp; +extern thp_mode_t init_system_thp_mode; /* Initial system wide state. */ +extern const char *thp_mode_names[]; + +void *pages_map(void *addr, size_t size, size_t alignment, bool *commit); +void pages_unmap(void *addr, size_t size); +bool pages_commit(void *addr, size_t size); +bool pages_decommit(void *addr, size_t size); +bool pages_purge_lazy(void *addr, size_t size); +bool pages_purge_forced(void *addr, size_t size); +bool pages_huge(void *addr, size_t size); +bool pages_nohuge(void *addr, size_t size); +bool pages_dontdump(void *addr, size_t size); +bool pages_dodump(void *addr, size_t size); +bool pages_boot(void); +void pages_set_thp_state (void *ptr, size_t size); + +#endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/ph.h b/deps/jemalloc/include/jemalloc/internal/ph.h new file mode 100644 index 000000000..84d6778a9 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/ph.h @@ -0,0 +1,391 @@ +/* + * A Pairing Heap implementation. + * + * "The Pairing Heap: A New Form of Self-Adjusting Heap" + * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf + * + * With auxiliary twopass list, described in a follow on paper. + * + * "Pairing Heaps: Experiments and Analysis" + * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf + * + ******************************************************************************* + */ + +#ifndef PH_H_ +#define PH_H_ + +/* Node structure. */ +#define phn(a_type) \ +struct { \ + a_type *phn_prev; \ + a_type *phn_next; \ + a_type *phn_lchild; \ +} + +/* Root structure. */ +#define ph(a_type) \ +struct { \ + a_type *ph_root; \ +} + +/* Internal utility macros. */ +#define phn_lchild_get(a_type, a_field, a_phn) \ + (a_phn->a_field.phn_lchild) +#define phn_lchild_set(a_type, a_field, a_phn, a_lchild) do { \ + a_phn->a_field.phn_lchild = a_lchild; \ +} while (0) + +#define phn_next_get(a_type, a_field, a_phn) \ + (a_phn->a_field.phn_next) +#define phn_prev_set(a_type, a_field, a_phn, a_prev) do { \ + a_phn->a_field.phn_prev = a_prev; \ +} while (0) + +#define phn_prev_get(a_type, a_field, a_phn) \ + (a_phn->a_field.phn_prev) +#define phn_next_set(a_type, a_field, a_phn, a_next) do { \ + a_phn->a_field.phn_next = a_next; \ +} while (0) + +#define phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do { \ + a_type *phn0child; \ + \ + assert(a_phn0 != NULL); \ + assert(a_phn1 != NULL); \ + assert(a_cmp(a_phn0, a_phn1) <= 0); \ + \ + phn_prev_set(a_type, a_field, a_phn1, a_phn0); \ + phn0child = phn_lchild_get(a_type, a_field, a_phn0); \ + phn_next_set(a_type, a_field, a_phn1, phn0child); \ + if (phn0child != NULL) { \ + phn_prev_set(a_type, a_field, phn0child, a_phn1); \ + } \ + phn_lchild_set(a_type, a_field, a_phn0, a_phn1); \ +} while (0) + +#define phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do { \ + if (a_phn0 == NULL) { \ + r_phn = a_phn1; \ + } else if (a_phn1 == NULL) { \ + r_phn = a_phn0; \ + } else if (a_cmp(a_phn0, a_phn1) < 0) { \ + phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, \ + a_cmp); \ + r_phn = a_phn0; \ + } else { \ + phn_merge_ordered(a_type, a_field, a_phn1, a_phn0, \ + a_cmp); \ + r_phn = a_phn1; \ + } \ +} while (0) + +#define ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do { \ + a_type *head = NULL; \ + a_type *tail = NULL; \ + a_type *phn0 = a_phn; \ + a_type *phn1 = phn_next_get(a_type, a_field, phn0); \ + \ + /* \ + * Multipass merge, wherein the first two elements of a FIFO \ + * are repeatedly merged, and each result is appended to the \ + * singly linked FIFO, until the FIFO contains only a single \ + * element. We start with a sibling list but no reference to \ + * its tail, so we do a single pass over the sibling list to \ + * populate the FIFO. \ + */ \ + if (phn1 != NULL) { \ + a_type *phnrest = phn_next_get(a_type, a_field, phn1); \ + if (phnrest != NULL) { \ + phn_prev_set(a_type, a_field, phnrest, NULL); \ + } \ + phn_prev_set(a_type, a_field, phn0, NULL); \ + phn_next_set(a_type, a_field, phn0, NULL); \ + phn_prev_set(a_type, a_field, phn1, NULL); \ + phn_next_set(a_type, a_field, phn1, NULL); \ + phn_merge(a_type, a_field, phn0, phn1, a_cmp, phn0); \ + head = tail = phn0; \ + phn0 = phnrest; \ + while (phn0 != NULL) { \ + phn1 = phn_next_get(a_type, a_field, phn0); \ + if (phn1 != NULL) { \ + phnrest = phn_next_get(a_type, a_field, \ + phn1); \ + if (phnrest != NULL) { \ + phn_prev_set(a_type, a_field, \ + phnrest, NULL); \ + } \ + phn_prev_set(a_type, a_field, phn0, \ + NULL); \ + phn_next_set(a_type, a_field, phn0, \ + NULL); \ + phn_prev_set(a_type, a_field, phn1, \ + NULL); \ + phn_next_set(a_type, a_field, phn1, \ + NULL); \ + phn_merge(a_type, a_field, phn0, phn1, \ + a_cmp, phn0); \ + phn_next_set(a_type, a_field, tail, \ + phn0); \ + tail = phn0; \ + phn0 = phnrest; \ + } else { \ + phn_next_set(a_type, a_field, tail, \ + phn0); \ + tail = phn0; \ + phn0 = NULL; \ + } \ + } \ + phn0 = head; \ + phn1 = phn_next_get(a_type, a_field, phn0); \ + if (phn1 != NULL) { \ + while (true) { \ + head = phn_next_get(a_type, a_field, \ + phn1); \ + assert(phn_prev_get(a_type, a_field, \ + phn0) == NULL); \ + phn_next_set(a_type, a_field, phn0, \ + NULL); \ + assert(phn_prev_get(a_type, a_field, \ + phn1) == NULL); \ + phn_next_set(a_type, a_field, phn1, \ + NULL); \ + phn_merge(a_type, a_field, phn0, phn1, \ + a_cmp, phn0); \ + if (head == NULL) { \ + break; \ + } \ + phn_next_set(a_type, a_field, tail, \ + phn0); \ + tail = phn0; \ + phn0 = head; \ + phn1 = phn_next_get(a_type, a_field, \ + phn0); \ + } \ + } \ + } \ + r_phn = phn0; \ +} while (0) + +#define ph_merge_aux(a_type, a_field, a_ph, a_cmp) do { \ + a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root); \ + if (phn != NULL) { \ + phn_prev_set(a_type, a_field, a_ph->ph_root, NULL); \ + phn_next_set(a_type, a_field, a_ph->ph_root, NULL); \ + phn_prev_set(a_type, a_field, phn, NULL); \ + ph_merge_siblings(a_type, a_field, phn, a_cmp, phn); \ + assert(phn_next_get(a_type, a_field, phn) == NULL); \ + phn_merge(a_type, a_field, a_ph->ph_root, phn, a_cmp, \ + a_ph->ph_root); \ + } \ +} while (0) + +#define ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do { \ + a_type *lchild = phn_lchild_get(a_type, a_field, a_phn); \ + if (lchild == NULL) { \ + r_phn = NULL; \ + } else { \ + ph_merge_siblings(a_type, a_field, lchild, a_cmp, \ + r_phn); \ + } \ +} while (0) + +/* + * The ph_proto() macro generates function prototypes that correspond to the + * functions generated by an equivalently parameterized call to ph_gen(). + */ +#define ph_proto(a_attr, a_prefix, a_ph_type, a_type) \ +a_attr void a_prefix##new(a_ph_type *ph); \ +a_attr bool a_prefix##empty(a_ph_type *ph); \ +a_attr a_type *a_prefix##first(a_ph_type *ph); \ +a_attr a_type *a_prefix##any(a_ph_type *ph); \ +a_attr void a_prefix##insert(a_ph_type *ph, a_type *phn); \ +a_attr a_type *a_prefix##remove_first(a_ph_type *ph); \ +a_attr a_type *a_prefix##remove_any(a_ph_type *ph); \ +a_attr void a_prefix##remove(a_ph_type *ph, a_type *phn); + +/* + * The ph_gen() macro generates a type-specific pairing heap implementation, + * based on the above cpp macros. + */ +#define ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp) \ +a_attr void \ +a_prefix##new(a_ph_type *ph) { \ + memset(ph, 0, sizeof(ph(a_type))); \ +} \ +a_attr bool \ +a_prefix##empty(a_ph_type *ph) { \ + return (ph->ph_root == NULL); \ +} \ +a_attr a_type * \ +a_prefix##first(a_ph_type *ph) { \ + if (ph->ph_root == NULL) { \ + return NULL; \ + } \ + ph_merge_aux(a_type, a_field, ph, a_cmp); \ + return ph->ph_root; \ +} \ +a_attr a_type * \ +a_prefix##any(a_ph_type *ph) { \ + if (ph->ph_root == NULL) { \ + return NULL; \ + } \ + a_type *aux = phn_next_get(a_type, a_field, ph->ph_root); \ + if (aux != NULL) { \ + return aux; \ + } \ + return ph->ph_root; \ +} \ +a_attr void \ +a_prefix##insert(a_ph_type *ph, a_type *phn) { \ + memset(&phn->a_field, 0, sizeof(phn(a_type))); \ + \ + /* \ + * Treat the root as an aux list during insertion, and lazily \ + * merge during a_prefix##remove_first(). For elements that \ + * are inserted, then removed via a_prefix##remove() before the \ + * aux list is ever processed, this makes insert/remove \ + * constant-time, whereas eager merging would make insert \ + * O(log n). \ + */ \ + if (ph->ph_root == NULL) { \ + ph->ph_root = phn; \ + } else { \ + phn_next_set(a_type, a_field, phn, phn_next_get(a_type, \ + a_field, ph->ph_root)); \ + if (phn_next_get(a_type, a_field, ph->ph_root) != \ + NULL) { \ + phn_prev_set(a_type, a_field, \ + phn_next_get(a_type, a_field, ph->ph_root), \ + phn); \ + } \ + phn_prev_set(a_type, a_field, phn, ph->ph_root); \ + phn_next_set(a_type, a_field, ph->ph_root, phn); \ + } \ +} \ +a_attr a_type * \ +a_prefix##remove_first(a_ph_type *ph) { \ + a_type *ret; \ + \ + if (ph->ph_root == NULL) { \ + return NULL; \ + } \ + ph_merge_aux(a_type, a_field, ph, a_cmp); \ + \ + ret = ph->ph_root; \ + \ + ph_merge_children(a_type, a_field, ph->ph_root, a_cmp, \ + ph->ph_root); \ + \ + return ret; \ +} \ +a_attr a_type * \ +a_prefix##remove_any(a_ph_type *ph) { \ + /* \ + * Remove the most recently inserted aux list element, or the \ + * root if the aux list is empty. This has the effect of \ + * behaving as a LIFO (and insertion/removal is therefore \ + * constant-time) if a_prefix##[remove_]first() are never \ + * called. \ + */ \ + if (ph->ph_root == NULL) { \ + return NULL; \ + } \ + a_type *ret = phn_next_get(a_type, a_field, ph->ph_root); \ + if (ret != NULL) { \ + a_type *aux = phn_next_get(a_type, a_field, ret); \ + phn_next_set(a_type, a_field, ph->ph_root, aux); \ + if (aux != NULL) { \ + phn_prev_set(a_type, a_field, aux, \ + ph->ph_root); \ + } \ + return ret; \ + } \ + ret = ph->ph_root; \ + ph_merge_children(a_type, a_field, ph->ph_root, a_cmp, \ + ph->ph_root); \ + return ret; \ +} \ +a_attr void \ +a_prefix##remove(a_ph_type *ph, a_type *phn) { \ + a_type *replace, *parent; \ + \ + if (ph->ph_root == phn) { \ + /* \ + * We can delete from aux list without merging it, but \ + * we need to merge if we are dealing with the root \ + * node and it has children. \ + */ \ + if (phn_lchild_get(a_type, a_field, phn) == NULL) { \ + ph->ph_root = phn_next_get(a_type, a_field, \ + phn); \ + if (ph->ph_root != NULL) { \ + phn_prev_set(a_type, a_field, \ + ph->ph_root, NULL); \ + } \ + return; \ + } \ + ph_merge_aux(a_type, a_field, ph, a_cmp); \ + if (ph->ph_root == phn) { \ + ph_merge_children(a_type, a_field, ph->ph_root, \ + a_cmp, ph->ph_root); \ + return; \ + } \ + } \ + \ + /* Get parent (if phn is leftmost child) before mutating. */ \ + if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) { \ + if (phn_lchild_get(a_type, a_field, parent) != phn) { \ + parent = NULL; \ + } \ + } \ + /* Find a possible replacement node, and link to parent. */ \ + ph_merge_children(a_type, a_field, phn, a_cmp, replace); \ + /* Set next/prev for sibling linked list. */ \ + if (replace != NULL) { \ + if (parent != NULL) { \ + phn_prev_set(a_type, a_field, replace, parent); \ + phn_lchild_set(a_type, a_field, parent, \ + replace); \ + } else { \ + phn_prev_set(a_type, a_field, replace, \ + phn_prev_get(a_type, a_field, phn)); \ + if (phn_prev_get(a_type, a_field, phn) != \ + NULL) { \ + phn_next_set(a_type, a_field, \ + phn_prev_get(a_type, a_field, phn), \ + replace); \ + } \ + } \ + phn_next_set(a_type, a_field, replace, \ + phn_next_get(a_type, a_field, phn)); \ + if (phn_next_get(a_type, a_field, phn) != NULL) { \ + phn_prev_set(a_type, a_field, \ + phn_next_get(a_type, a_field, phn), \ + replace); \ + } \ + } else { \ + if (parent != NULL) { \ + a_type *next = phn_next_get(a_type, a_field, \ + phn); \ + phn_lchild_set(a_type, a_field, parent, next); \ + if (next != NULL) { \ + phn_prev_set(a_type, a_field, next, \ + parent); \ + } \ + } else { \ + assert(phn_prev_get(a_type, a_field, phn) != \ + NULL); \ + phn_next_set(a_type, a_field, \ + phn_prev_get(a_type, a_field, phn), \ + phn_next_get(a_type, a_field, phn)); \ + } \ + if (phn_next_get(a_type, a_field, phn) != NULL) { \ + phn_prev_set(a_type, a_field, \ + phn_next_get(a_type, a_field, phn), \ + phn_prev_get(a_type, a_field, phn)); \ + } \ + } \ +} + +#endif /* PH_H_ */ diff --git a/deps/jemalloc/include/jemalloc/internal/private_namespace.sh b/deps/jemalloc/include/jemalloc/internal/private_namespace.sh index cd25eb306..6ef1346a3 100755 --- a/deps/jemalloc/include/jemalloc/internal/private_namespace.sh +++ b/deps/jemalloc/include/jemalloc/internal/private_namespace.sh @@ -1,5 +1,5 @@ #!/bin/sh -for symbol in `cat $1` ; do - echo "#define ${symbol} JEMALLOC_N(${symbol})" +for symbol in `cat "$@"` ; do + echo "#define ${symbol} JEMALLOC_N(${symbol})" done diff --git a/deps/jemalloc/include/jemalloc/internal/private_symbols.sh b/deps/jemalloc/include/jemalloc/internal/private_symbols.sh new file mode 100755 index 000000000..442a259fd --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/private_symbols.sh @@ -0,0 +1,51 @@ +#!/bin/sh +# +# Generate private_symbols[_jet].awk. +# +# Usage: private_symbols.sh <sym_prefix> <sym>* +# +# <sym_prefix> is typically "" or "_". + +sym_prefix=$1 +shift + +cat <<EOF +#!/usr/bin/env awk -f + +BEGIN { + sym_prefix = "${sym_prefix}" + split("\\ +EOF + +for public_sym in "$@" ; do + cat <<EOF + ${sym_prefix}${public_sym} \\ +EOF +done + +cat <<"EOF" + ", exported_symbol_names) + # Store exported symbol names as keys in exported_symbols. + for (i in exported_symbol_names) { + exported_symbols[exported_symbol_names[i]] = 1 + } +} + +# Process 'nm -a <c_source.o>' output. +# +# Handle lines like: +# 0000000000000008 D opt_junk +# 0000000000007574 T malloc_initialized +(NF == 3 && $2 ~ /^[ABCDGRSTVW]$/ && !($3 in exported_symbols) && $3 ~ /^[A-Za-z0-9_]+$/) { + print substr($3, 1+length(sym_prefix), length($3)-length(sym_prefix)) +} + +# Process 'dumpbin /SYMBOLS <c_source.obj>' output. +# +# Handle lines like: +# 353 00008098 SECT4 notype External | opt_junk +# 3F1 00000000 SECT7 notype () External | malloc_initialized +($3 ~ /^SECT[0-9]+/ && $(NF-2) == "External" && !($NF in exported_symbols)) { + print $NF +} +EOF diff --git a/deps/jemalloc/include/jemalloc/internal/private_symbols.txt b/deps/jemalloc/include/jemalloc/internal/private_symbols.txt deleted file mode 100644 index a90021aa6..000000000 --- a/deps/jemalloc/include/jemalloc/internal/private_symbols.txt +++ /dev/null @@ -1,499 +0,0 @@ -a0dalloc -a0get -a0malloc -arena_aalloc -arena_alloc_junk_small -arena_bin_index -arena_bin_info -arena_bitselm_get -arena_boot -arena_choose -arena_choose_hard -arena_chunk_alloc_huge -arena_chunk_cache_maybe_insert -arena_chunk_cache_maybe_remove -arena_chunk_dalloc_huge -arena_chunk_ralloc_huge_expand -arena_chunk_ralloc_huge_shrink -arena_chunk_ralloc_huge_similar -arena_cleanup -arena_dalloc -arena_dalloc_bin -arena_dalloc_bin_junked_locked -arena_dalloc_junk_large -arena_dalloc_junk_small -arena_dalloc_large -arena_dalloc_large_junked_locked -arena_dalloc_small -arena_dss_prec_get -arena_dss_prec_set -arena_get -arena_get_hard -arena_init -arena_lg_dirty_mult_default_get -arena_lg_dirty_mult_default_set -arena_lg_dirty_mult_get -arena_lg_dirty_mult_set -arena_malloc -arena_malloc_large -arena_malloc_small -arena_mapbits_allocated_get -arena_mapbits_binind_get -arena_mapbits_decommitted_get -arena_mapbits_dirty_get -arena_mapbits_get -arena_mapbits_internal_set -arena_mapbits_large_binind_set -arena_mapbits_large_get -arena_mapbits_large_set -arena_mapbits_large_size_get -arena_mapbitsp_get -arena_mapbitsp_read -arena_mapbitsp_write -arena_mapbits_size_decode -arena_mapbits_size_encode -arena_mapbits_small_runind_get -arena_mapbits_small_set -arena_mapbits_unallocated_set -arena_mapbits_unallocated_size_get -arena_mapbits_unallocated_size_set -arena_mapbits_unzeroed_get -arena_maxrun -arena_maybe_purge -arena_metadata_allocated_add -arena_metadata_allocated_get -arena_metadata_allocated_sub -arena_migrate -arena_miscelm_get -arena_miscelm_to_pageind -arena_miscelm_to_rpages -arena_nbound -arena_new -arena_node_alloc -arena_node_dalloc -arena_palloc -arena_postfork_child -arena_postfork_parent -arena_prefork -arena_prof_accum -arena_prof_accum_impl -arena_prof_accum_locked -arena_prof_promoted -arena_prof_tctx_get -arena_prof_tctx_reset -arena_prof_tctx_set -arena_ptr_small_binind_get -arena_purge_all -arena_quarantine_junk_small -arena_ralloc -arena_ralloc_junk_large -arena_ralloc_no_move -arena_rd_to_miscelm -arena_redzone_corruption -arena_run_regind -arena_run_to_miscelm -arena_salloc -arenas_cache_bypass_cleanup -arenas_cache_cleanup -arena_sdalloc -arena_stats_merge -arena_tcache_fill_small -atomic_add_p -atomic_add_u -atomic_add_uint32 -atomic_add_uint64 -atomic_add_z -atomic_cas_p -atomic_cas_u -atomic_cas_uint32 -atomic_cas_uint64 -atomic_cas_z -atomic_sub_p -atomic_sub_u -atomic_sub_uint32 -atomic_sub_uint64 -atomic_sub_z -base_alloc -base_boot -base_postfork_child -base_postfork_parent -base_prefork -base_stats_get -bitmap_full -bitmap_get -bitmap_info_init -bitmap_info_ngroups -bitmap_init -bitmap_set -bitmap_sfu -bitmap_size -bitmap_unset -bootstrap_calloc -bootstrap_free -bootstrap_malloc -bt_init -buferror -chunk_alloc_base -chunk_alloc_cache -chunk_alloc_dss -chunk_alloc_mmap -chunk_alloc_wrapper -chunk_boot -chunk_dalloc_arena -chunk_dalloc_cache -chunk_dalloc_mmap -chunk_dalloc_wrapper -chunk_deregister -chunk_dss_boot -chunk_dss_postfork_child -chunk_dss_postfork_parent -chunk_dss_prec_get -chunk_dss_prec_set -chunk_dss_prefork -chunk_hooks_default -chunk_hooks_get -chunk_hooks_set -chunk_in_dss -chunk_lookup -chunk_npages -chunk_postfork_child -chunk_postfork_parent -chunk_prefork -chunk_purge_arena -chunk_purge_wrapper -chunk_register -chunksize -chunksize_mask -chunks_rtree -ckh_count -ckh_delete -ckh_insert -ckh_iter -ckh_new -ckh_pointer_hash -ckh_pointer_keycomp -ckh_remove -ckh_search -ckh_string_hash -ckh_string_keycomp -ctl_boot -ctl_bymib -ctl_byname -ctl_nametomib -ctl_postfork_child -ctl_postfork_parent -ctl_prefork -dss_prec_names -extent_node_achunk_get -extent_node_achunk_set -extent_node_addr_get -extent_node_addr_set -extent_node_arena_get -extent_node_arena_set -extent_node_dirty_insert -extent_node_dirty_linkage_init -extent_node_dirty_remove -extent_node_init -extent_node_prof_tctx_get -extent_node_prof_tctx_set -extent_node_size_get -extent_node_size_set -extent_node_zeroed_get -extent_node_zeroed_set -extent_tree_ad_empty -extent_tree_ad_first -extent_tree_ad_insert -extent_tree_ad_iter -extent_tree_ad_iter_recurse -extent_tree_ad_iter_start -extent_tree_ad_last -extent_tree_ad_new -extent_tree_ad_next -extent_tree_ad_nsearch -extent_tree_ad_prev -extent_tree_ad_psearch -extent_tree_ad_remove -extent_tree_ad_reverse_iter -extent_tree_ad_reverse_iter_recurse -extent_tree_ad_reverse_iter_start -extent_tree_ad_search -extent_tree_szad_empty -extent_tree_szad_first -extent_tree_szad_insert -extent_tree_szad_iter -extent_tree_szad_iter_recurse -extent_tree_szad_iter_start -extent_tree_szad_last -extent_tree_szad_new -extent_tree_szad_next -extent_tree_szad_nsearch -extent_tree_szad_prev -extent_tree_szad_psearch -extent_tree_szad_remove -extent_tree_szad_reverse_iter -extent_tree_szad_reverse_iter_recurse -extent_tree_szad_reverse_iter_start -extent_tree_szad_search -get_errno -hash -hash_fmix_32 -hash_fmix_64 -hash_get_block_32 -hash_get_block_64 -hash_rotl_32 -hash_rotl_64 -hash_x64_128 -hash_x86_128 -hash_x86_32 -huge_aalloc -huge_dalloc -huge_dalloc_junk -huge_malloc -huge_palloc -huge_prof_tctx_get -huge_prof_tctx_reset -huge_prof_tctx_set -huge_ralloc -huge_ralloc_no_move -huge_salloc -iaalloc -iallocztm -icalloc -icalloct -idalloc -idalloct -idalloctm -imalloc -imalloct -index2size -index2size_compute -index2size_lookup -index2size_tab -in_valgrind -ipalloc -ipalloct -ipallocztm -iqalloc -iralloc -iralloct -iralloct_realign -isalloc -isdalloct -isqalloc -isthreaded -ivsalloc -ixalloc -jemalloc_postfork_child -jemalloc_postfork_parent -jemalloc_prefork -large_maxclass -lg_floor -malloc_cprintf -malloc_mutex_init -malloc_mutex_lock -malloc_mutex_postfork_child -malloc_mutex_postfork_parent -malloc_mutex_prefork -malloc_mutex_unlock -malloc_printf -malloc_snprintf -malloc_strtoumax -malloc_tsd_boot0 -malloc_tsd_boot1 -malloc_tsd_cleanup_register -malloc_tsd_dalloc -malloc_tsd_malloc -malloc_tsd_no_cleanup -malloc_vcprintf -malloc_vsnprintf -malloc_write -map_bias -map_misc_offset -mb_write -mutex_boot -narenas_cache_cleanup -narenas_total_get -ncpus -nhbins -opt_abort -opt_dss -opt_junk -opt_junk_alloc -opt_junk_free -opt_lg_chunk -opt_lg_dirty_mult -opt_lg_prof_interval -opt_lg_prof_sample -opt_lg_tcache_max -opt_narenas -opt_prof -opt_prof_accum -opt_prof_active -opt_prof_final -opt_prof_gdump -opt_prof_leak -opt_prof_prefix -opt_prof_thread_active_init -opt_quarantine -opt_redzone -opt_stats_print -opt_tcache -opt_utrace -opt_xmalloc -opt_zero -p2rz -pages_commit -pages_decommit -pages_map -pages_purge -pages_trim -pages_unmap -pow2_ceil -prof_active_get -prof_active_get_unlocked -prof_active_set -prof_alloc_prep -prof_alloc_rollback -prof_backtrace -prof_boot0 -prof_boot1 -prof_boot2 -prof_dump_header -prof_dump_open -prof_free -prof_free_sampled_object -prof_gdump -prof_gdump_get -prof_gdump_get_unlocked -prof_gdump_set -prof_gdump_val -prof_idump -prof_interval -prof_lookup -prof_malloc -prof_malloc_sample_object -prof_mdump -prof_postfork_child -prof_postfork_parent -prof_prefork -prof_realloc -prof_reset -prof_sample_accum_update -prof_sample_threshold_update -prof_tctx_get -prof_tctx_reset -prof_tctx_set -prof_tdata_cleanup -prof_tdata_get -prof_tdata_init -prof_tdata_reinit -prof_thread_active_get -prof_thread_active_init_get -prof_thread_active_init_set -prof_thread_active_set -prof_thread_name_get -prof_thread_name_set -quarantine -quarantine_alloc_hook -quarantine_alloc_hook_work -quarantine_cleanup -register_zone -rtree_child_read -rtree_child_read_hard -rtree_child_tryread -rtree_delete -rtree_get -rtree_new -rtree_node_valid -rtree_set -rtree_start_level -rtree_subkey -rtree_subtree_read -rtree_subtree_read_hard -rtree_subtree_tryread -rtree_val_read -rtree_val_write -s2u -s2u_compute -s2u_lookup -sa2u -set_errno -size2index -size2index_compute -size2index_lookup -size2index_tab -stats_cactive -stats_cactive_add -stats_cactive_get -stats_cactive_sub -stats_print -tcache_alloc_easy -tcache_alloc_large -tcache_alloc_small -tcache_alloc_small_hard -tcache_arena_associate -tcache_arena_dissociate -tcache_arena_reassociate -tcache_bin_flush_large -tcache_bin_flush_small -tcache_bin_info -tcache_boot -tcache_cleanup -tcache_create -tcache_dalloc_large -tcache_dalloc_small -tcache_enabled_cleanup -tcache_enabled_get -tcache_enabled_set -tcache_event -tcache_event_hard -tcache_flush -tcache_get -tcache_get_hard -tcache_maxclass -tcaches -tcache_salloc -tcaches_create -tcaches_destroy -tcaches_flush -tcaches_get -tcache_stats_merge -thread_allocated_cleanup -thread_deallocated_cleanup -tsd_arena_get -tsd_arena_set -tsd_boot -tsd_boot0 -tsd_boot1 -tsd_booted -tsd_cleanup -tsd_cleanup_wrapper -tsd_fetch -tsd_get -tsd_wrapper_get -tsd_wrapper_set -tsd_initialized -tsd_init_check_recursion -tsd_init_finish -tsd_init_head -tsd_nominal -tsd_quarantine_get -tsd_quarantine_set -tsd_set -tsd_tcache_enabled_get -tsd_tcache_enabled_set -tsd_tcache_get -tsd_tcache_set -tsd_tls -tsd_tsd -tsd_prof_tdata_get -tsd_prof_tdata_set -tsd_thread_allocated_get -tsd_thread_allocated_set -tsd_thread_deallocated_get -tsd_thread_deallocated_set -u2rz -valgrind_freelike_block -valgrind_make_mem_defined -valgrind_make_mem_noaccess -valgrind_make_mem_undefined diff --git a/deps/jemalloc/include/jemalloc/internal/private_unnamespace.sh b/deps/jemalloc/include/jemalloc/internal/private_unnamespace.sh deleted file mode 100755 index 23fed8e80..000000000 --- a/deps/jemalloc/include/jemalloc/internal/private_unnamespace.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -for symbol in `cat $1` ; do - echo "#undef ${symbol}" -done diff --git a/deps/jemalloc/include/jemalloc/internal/prng.h b/deps/jemalloc/include/jemalloc/internal/prng.h index 216d0ef47..15cc2d18f 100644 --- a/deps/jemalloc/include/jemalloc/internal/prng.h +++ b/deps/jemalloc/include/jemalloc/internal/prng.h @@ -1,5 +1,8 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_PRNG_H +#define JEMALLOC_INTERNAL_PRNG_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/bit_util.h" /* * Simple linear congruential pseudo-random number generator: @@ -18,43 +21,165 @@ * proportional to bit position. For example, the lowest bit has a cycle of 2, * the next has a cycle of 4, etc. For this reason, we prefer to use the upper * bits. - * - * Macro parameters: - * uint32_t r : Result. - * unsigned lg_range : (0..32], number of least significant bits to return. - * uint32_t state : Seed value. - * const uint32_t a, c : See above discussion. */ -#define prng32(r, lg_range, state, a, c) do { \ - assert((lg_range) > 0); \ - assert((lg_range) <= 32); \ - \ - r = (state * (a)) + (c); \ - state = r; \ - r >>= (32 - (lg_range)); \ -} while (false) - -/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ -#define prng64(r, lg_range, state, a, c) do { \ - assert((lg_range) > 0); \ - assert((lg_range) <= 64); \ - \ - r = (state * (a)) + (c); \ - state = r; \ - r >>= (64 - (lg_range)); \ -} while (false) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS -#endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ +/* INTERNAL DEFINITIONS -- IGNORE */ /******************************************************************************/ -#ifdef JEMALLOC_H_INLINES +#define PRNG_A_32 UINT32_C(1103515241) +#define PRNG_C_32 UINT32_C(12347) + +#define PRNG_A_64 UINT64_C(6364136223846793005) +#define PRNG_C_64 UINT64_C(1442695040888963407) + +JEMALLOC_ALWAYS_INLINE uint32_t +prng_state_next_u32(uint32_t state) { + return (state * PRNG_A_32) + PRNG_C_32; +} + +JEMALLOC_ALWAYS_INLINE uint64_t +prng_state_next_u64(uint64_t state) { + return (state * PRNG_A_64) + PRNG_C_64; +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_state_next_zu(size_t state) { +#if LG_SIZEOF_PTR == 2 + return (state * PRNG_A_32) + PRNG_C_32; +#elif LG_SIZEOF_PTR == 3 + return (state * PRNG_A_64) + PRNG_C_64; +#else +#error Unsupported pointer size +#endif +} -#endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ +/* BEGIN PUBLIC API */ +/******************************************************************************/ + +/* + * The prng_lg_range functions give a uniform int in the half-open range [0, + * 2**lg_range). If atomic is true, they do so safely from multiple threads. + * Multithreaded 64-bit prngs aren't supported. + */ + +JEMALLOC_ALWAYS_INLINE uint32_t +prng_lg_range_u32(atomic_u32_t *state, unsigned lg_range, bool atomic) { + uint32_t ret, state0, state1; + + assert(lg_range > 0); + assert(lg_range <= 32); + + state0 = atomic_load_u32(state, ATOMIC_RELAXED); + + if (atomic) { + do { + state1 = prng_state_next_u32(state0); + } while (!atomic_compare_exchange_weak_u32(state, &state0, + state1, ATOMIC_RELAXED, ATOMIC_RELAXED)); + } else { + state1 = prng_state_next_u32(state0); + atomic_store_u32(state, state1, ATOMIC_RELAXED); + } + ret = state1 >> (32 - lg_range); + + return ret; +} + +JEMALLOC_ALWAYS_INLINE uint64_t +prng_lg_range_u64(uint64_t *state, unsigned lg_range) { + uint64_t ret, state1; + + assert(lg_range > 0); + assert(lg_range <= 64); + + state1 = prng_state_next_u64(*state); + *state = state1; + ret = state1 >> (64 - lg_range); + + return ret; +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic) { + size_t ret, state0, state1; + + assert(lg_range > 0); + assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR)); + + state0 = atomic_load_zu(state, ATOMIC_RELAXED); + + if (atomic) { + do { + state1 = prng_state_next_zu(state0); + } while (atomic_compare_exchange_weak_zu(state, &state0, + state1, ATOMIC_RELAXED, ATOMIC_RELAXED)); + } else { + state1 = prng_state_next_zu(state0); + atomic_store_zu(state, state1, ATOMIC_RELAXED); + } + ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range); + + return ret; +} + +/* + * The prng_range functions behave like the prng_lg_range, but return a result + * in [0, range) instead of [0, 2**lg_range). + */ + +JEMALLOC_ALWAYS_INLINE uint32_t +prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) { + uint32_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u32(pow2_ceil_u32(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_u32(state, lg_range, atomic); + } while (ret >= range); + + return ret; +} + +JEMALLOC_ALWAYS_INLINE uint64_t +prng_range_u64(uint64_t *state, uint64_t range) { + uint64_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_u64(state, lg_range); + } while (ret >= range); + + return ret; +} + +JEMALLOC_ALWAYS_INLINE size_t +prng_range_zu(atomic_zu_t *state, size_t range, bool atomic) { + size_t ret; + unsigned lg_range; + + assert(range > 1); + + /* Compute the ceiling of lg(range). */ + lg_range = ffs_u64(pow2_ceil_u64(range)) - 1; + + /* Generate a result in [0..range) via repeated trial. */ + do { + ret = prng_lg_range_zu(state, lg_range, atomic); + } while (ret >= range); + + return ret; +} + +#endif /* JEMALLOC_INTERNAL_PRNG_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/prof.h b/deps/jemalloc/include/jemalloc/internal/prof.h deleted file mode 100644 index e5198c3e8..000000000 --- a/deps/jemalloc/include/jemalloc/internal/prof.h +++ /dev/null @@ -1,545 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct prof_bt_s prof_bt_t; -typedef struct prof_cnt_s prof_cnt_t; -typedef struct prof_tctx_s prof_tctx_t; -typedef struct prof_gctx_s prof_gctx_t; -typedef struct prof_tdata_s prof_tdata_t; - -/* Option defaults. */ -#ifdef JEMALLOC_PROF -# define PROF_PREFIX_DEFAULT "jeprof" -#else -# define PROF_PREFIX_DEFAULT "" -#endif -#define LG_PROF_SAMPLE_DEFAULT 19 -#define LG_PROF_INTERVAL_DEFAULT -1 - -/* - * Hard limit on stack backtrace depth. The version of prof_backtrace() that - * is based on __builtin_return_address() necessarily has a hard-coded number - * of backtrace frame handlers, and should be kept in sync with this setting. - */ -#define PROF_BT_MAX 128 - -/* Initial hash table size. */ -#define PROF_CKH_MINITEMS 64 - -/* Size of memory buffer to use when writing dump files. */ -#define PROF_DUMP_BUFSIZE 65536 - -/* Size of stack-allocated buffer used by prof_printf(). */ -#define PROF_PRINTF_BUFSIZE 128 - -/* - * Number of mutexes shared among all gctx's. No space is allocated for these - * unless profiling is enabled, so it's okay to over-provision. - */ -#define PROF_NCTX_LOCKS 1024 - -/* - * Number of mutexes shared among all tdata's. No space is allocated for these - * unless profiling is enabled, so it's okay to over-provision. - */ -#define PROF_NTDATA_LOCKS 256 - -/* - * prof_tdata pointers close to NULL are used to encode state information that - * is used for cleaning up during thread shutdown. - */ -#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) -#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) -#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct prof_bt_s { - /* Backtrace, stored as len program counters. */ - void **vec; - unsigned len; -}; - -#ifdef JEMALLOC_PROF_LIBGCC -/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ -typedef struct { - prof_bt_t *bt; - unsigned max; -} prof_unwind_data_t; -#endif - -struct prof_cnt_s { - /* Profiling counters. */ - uint64_t curobjs; - uint64_t curbytes; - uint64_t accumobjs; - uint64_t accumbytes; -}; - -typedef enum { - prof_tctx_state_initializing, - prof_tctx_state_nominal, - prof_tctx_state_dumping, - prof_tctx_state_purgatory /* Dumper must finish destroying. */ -} prof_tctx_state_t; - -struct prof_tctx_s { - /* Thread data for thread that performed the allocation. */ - prof_tdata_t *tdata; - - /* - * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be - * defunct during teardown. - */ - uint64_t thr_uid; - uint64_t thr_discrim; - - /* Profiling counters, protected by tdata->lock. */ - prof_cnt_t cnts; - - /* Associated global context. */ - prof_gctx_t *gctx; - - /* - * UID that distinguishes multiple tctx's created by the same thread, - * but coexisting in gctx->tctxs. There are two ways that such - * coexistence can occur: - * - A dumper thread can cause a tctx to be retained in the purgatory - * state. - * - Although a single "producer" thread must create all tctx's which - * share the same thr_uid, multiple "consumers" can each concurrently - * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only - * gets called once each time cnts.cur{objs,bytes} drop to 0, but this - * threshold can be hit again before the first consumer finishes - * executing prof_tctx_destroy(). - */ - uint64_t tctx_uid; - - /* Linkage into gctx's tctxs. */ - rb_node(prof_tctx_t) tctx_link; - - /* - * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents - * sample vs destroy race. - */ - bool prepared; - - /* Current dump-related state, protected by gctx->lock. */ - prof_tctx_state_t state; - - /* - * Copy of cnts snapshotted during early dump phase, protected by - * dump_mtx. - */ - prof_cnt_t dump_cnts; -}; -typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; - -struct prof_gctx_s { - /* Protects nlimbo, cnt_summed, and tctxs. */ - malloc_mutex_t *lock; - - /* - * Number of threads that currently cause this gctx to be in a state of - * limbo due to one of: - * - Initializing this gctx. - * - Initializing per thread counters associated with this gctx. - * - Preparing to destroy this gctx. - * - Dumping a heap profile that includes this gctx. - * nlimbo must be 1 (single destroyer) in order to safely destroy the - * gctx. - */ - unsigned nlimbo; - - /* - * Tree of profile counters, one for each thread that has allocated in - * this context. - */ - prof_tctx_tree_t tctxs; - - /* Linkage for tree of contexts to be dumped. */ - rb_node(prof_gctx_t) dump_link; - - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* Associated backtrace. */ - prof_bt_t bt; - - /* Backtrace vector, variable size, referred to by bt. */ - void *vec[1]; -}; -typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; - -struct prof_tdata_s { - malloc_mutex_t *lock; - - /* Monotonically increasing unique thread identifier. */ - uint64_t thr_uid; - - /* - * Monotonically increasing discriminator among tdata structures - * associated with the same thr_uid. - */ - uint64_t thr_discrim; - - /* Included in heap profile dumps if non-NULL. */ - char *thread_name; - - bool attached; - bool expired; - - rb_node(prof_tdata_t) tdata_link; - - /* - * Counter used to initialize prof_tctx_t's tctx_uid. No locking is - * necessary when incrementing this field, because only one thread ever - * does so. - */ - uint64_t tctx_uid_next; - - /* - * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks - * backtraces for which it has non-zero allocation/deallocation counters - * associated with thread-specific prof_tctx_t objects. Other threads - * may write to prof_tctx_t contents when freeing associated objects. - */ - ckh_t bt2tctx; - - /* Sampling state. */ - uint64_t prng_state; - uint64_t bytes_until_sample; - - /* State used to avoid dumping while operating on prof internals. */ - bool enq; - bool enq_idump; - bool enq_gdump; - - /* - * Set to true during an early dump phase for tdata's which are - * currently being dumped. New threads' tdata's have this initialized - * to false so that they aren't accidentally included in later dump - * phases. - */ - bool dumping; - - /* - * True if profiling is active for this tdata's thread - * (thread.prof.active mallctl). - */ - bool active; - - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* Backtrace vector, used for calls to prof_backtrace(). */ - void *vec[PROF_BT_MAX]; -}; -typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_prof; -extern bool opt_prof_active; -extern bool opt_prof_thread_active_init; -extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ -extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ -extern bool opt_prof_gdump; /* High-water memory dumping. */ -extern bool opt_prof_final; /* Final profile dumping. */ -extern bool opt_prof_leak; /* Dump leak summary at exit. */ -extern bool opt_prof_accum; /* Report cumulative bytes. */ -extern char opt_prof_prefix[ - /* Minimize memory bloat for non-prof builds. */ -#ifdef JEMALLOC_PROF - PATH_MAX + -#endif - 1]; - -/* Accessed via prof_active_[gs]et{_unlocked,}(). */ -extern bool prof_active; - -/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ -extern bool prof_gdump_val; - -/* - * Profile dump interval, measured in bytes allocated. Each arena triggers a - * profile dump when it reaches this threshold. The effect is that the - * interval between profile dumps averages prof_interval, though the actual - * interval between dumps will tend to be sporadic, and the interval will be a - * maximum of approximately (prof_interval * narenas). - */ -extern uint64_t prof_interval; - -/* - * Initialized as opt_lg_prof_sample, and potentially modified during profiling - * resets. - */ -extern size_t lg_prof_sample; - -void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); -void prof_malloc_sample_object(const void *ptr, size_t usize, - prof_tctx_t *tctx); -void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); -void bt_init(prof_bt_t *bt, void **vec); -void prof_backtrace(prof_bt_t *bt); -prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); -#ifdef JEMALLOC_JET -size_t prof_tdata_count(void); -size_t prof_bt_count(void); -const prof_cnt_t *prof_cnt_all(void); -typedef int (prof_dump_open_t)(bool, const char *); -extern prof_dump_open_t *prof_dump_open; -typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *); -extern prof_dump_header_t *prof_dump_header; -#endif -void prof_idump(void); -bool prof_mdump(const char *filename); -void prof_gdump(void); -prof_tdata_t *prof_tdata_init(tsd_t *tsd); -prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); -void prof_reset(tsd_t *tsd, size_t lg_sample); -void prof_tdata_cleanup(tsd_t *tsd); -const char *prof_thread_name_get(void); -bool prof_active_get(void); -bool prof_active_set(bool active); -int prof_thread_name_set(tsd_t *tsd, const char *thread_name); -bool prof_thread_active_get(void); -bool prof_thread_active_set(bool active); -bool prof_thread_active_init_get(void); -bool prof_thread_active_init_set(bool active_init); -bool prof_gdump_get(void); -bool prof_gdump_set(bool active); -void prof_boot0(void); -void prof_boot1(void); -bool prof_boot2(void); -void prof_prefork(void); -void prof_postfork_parent(void); -void prof_postfork_child(void); -void prof_sample_threshold_update(prof_tdata_t *tdata); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -bool prof_active_get_unlocked(void); -bool prof_gdump_get_unlocked(void); -prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); -bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit, - prof_tdata_t **tdata_out); -prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, - bool update); -prof_tctx_t *prof_tctx_get(const void *ptr); -void prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx); -void prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, - prof_tctx_t *tctx); -void prof_malloc_sample_object(const void *ptr, size_t usize, - prof_tctx_t *tctx); -void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx); -void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, - prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr, - size_t old_usize, prof_tctx_t *old_tctx); -void prof_free(tsd_t *tsd, const void *ptr, size_t usize); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) -JEMALLOC_ALWAYS_INLINE bool -prof_active_get_unlocked(void) -{ - - /* - * Even if opt_prof is true, sampling can be temporarily disabled by - * setting prof_active to false. No locking is used when reading - * prof_active in the fast path, so there are no guarantees regarding - * how long it will take for all threads to notice state changes. - */ - return (prof_active); -} - -JEMALLOC_ALWAYS_INLINE bool -prof_gdump_get_unlocked(void) -{ - - /* - * No locking is used when reading prof_gdump_val in the fast path, so - * there are no guarantees regarding how long it will take for all - * threads to notice state changes. - */ - return (prof_gdump_val); -} - -JEMALLOC_ALWAYS_INLINE prof_tdata_t * -prof_tdata_get(tsd_t *tsd, bool create) -{ - prof_tdata_t *tdata; - - cassert(config_prof); - - tdata = tsd_prof_tdata_get(tsd); - if (create) { - if (unlikely(tdata == NULL)) { - if (tsd_nominal(tsd)) { - tdata = prof_tdata_init(tsd); - tsd_prof_tdata_set(tsd, tdata); - } - } else if (unlikely(tdata->expired)) { - tdata = prof_tdata_reinit(tsd, tdata); - tsd_prof_tdata_set(tsd, tdata); - } - assert(tdata == NULL || tdata->attached); - } - - return (tdata); -} - -JEMALLOC_ALWAYS_INLINE prof_tctx_t * -prof_tctx_get(const void *ptr) -{ - - cassert(config_prof); - assert(ptr != NULL); - - return (arena_prof_tctx_get(ptr)); -} - -JEMALLOC_ALWAYS_INLINE void -prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx) -{ - - cassert(config_prof); - assert(ptr != NULL); - - arena_prof_tctx_set(ptr, usize, tctx); -} - -JEMALLOC_ALWAYS_INLINE void -prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr, - prof_tctx_t *old_tctx) -{ - - cassert(config_prof); - assert(ptr != NULL); - - arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx); -} - -JEMALLOC_ALWAYS_INLINE bool -prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, - prof_tdata_t **tdata_out) -{ - prof_tdata_t *tdata; - - cassert(config_prof); - - tdata = prof_tdata_get(tsd, true); - if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) - tdata = NULL; - - if (tdata_out != NULL) - *tdata_out = tdata; - - if (tdata == NULL) - return (true); - - if (tdata->bytes_until_sample >= usize) { - if (update) - tdata->bytes_until_sample -= usize; - return (true); - } else { - /* Compute new sample threshold. */ - if (update) - prof_sample_threshold_update(tdata); - return (!tdata->active); - } -} - -JEMALLOC_ALWAYS_INLINE prof_tctx_t * -prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) -{ - prof_tctx_t *ret; - prof_tdata_t *tdata; - prof_bt_t bt; - - assert(usize == s2u(usize)); - - if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update, - &tdata))) - ret = (prof_tctx_t *)(uintptr_t)1U; - else { - bt_init(&bt, tdata->vec); - prof_backtrace(&bt); - ret = prof_lookup(tsd, &bt); - } - - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void -prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx) -{ - - cassert(config_prof); - assert(ptr != NULL); - assert(usize == isalloc(ptr, true)); - - if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) - prof_malloc_sample_object(ptr, usize, tctx); - else - prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U); -} - -JEMALLOC_ALWAYS_INLINE void -prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, - bool prof_active, bool updated, const void *old_ptr, size_t old_usize, - prof_tctx_t *old_tctx) -{ - bool sampled, old_sampled; - - cassert(config_prof); - assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); - - if (prof_active && !updated && ptr != NULL) { - assert(usize == isalloc(ptr, true)); - if (prof_sample_accum_update(tsd, usize, true, NULL)) { - /* - * Don't sample. The usize passed to prof_alloc_prep() - * was larger than what actually got allocated, so a - * backtrace was captured for this allocation, even - * though its actual usize was insufficient to cross the - * sample threshold. - */ - tctx = (prof_tctx_t *)(uintptr_t)1U; - } - } - - sampled = ((uintptr_t)tctx > (uintptr_t)1U); - old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); - - if (unlikely(sampled)) - prof_malloc_sample_object(ptr, usize, tctx); - else - prof_tctx_reset(ptr, usize, old_ptr, old_tctx); - - if (unlikely(old_sampled)) - prof_free_sampled_object(tsd, old_usize, old_tctx); -} - -JEMALLOC_ALWAYS_INLINE void -prof_free(tsd_t *tsd, const void *ptr, size_t usize) -{ - prof_tctx_t *tctx = prof_tctx_get(ptr); - - cassert(config_prof); - assert(usize == isalloc(ptr, true)); - - if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) - prof_free_sampled_object(tsd, usize, tctx); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/prof_externs.h b/deps/jemalloc/include/jemalloc/internal/prof_externs.h new file mode 100644 index 000000000..04348696f --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/prof_externs.h @@ -0,0 +1,92 @@ +#ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H +#define JEMALLOC_INTERNAL_PROF_EXTERNS_H + +#include "jemalloc/internal/mutex.h" + +extern malloc_mutex_t bt2gctx_mtx; + +extern bool opt_prof; +extern bool opt_prof_active; +extern bool opt_prof_thread_active_init; +extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ +extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ +extern bool opt_prof_gdump; /* High-water memory dumping. */ +extern bool opt_prof_final; /* Final profile dumping. */ +extern bool opt_prof_leak; /* Dump leak summary at exit. */ +extern bool opt_prof_accum; /* Report cumulative bytes. */ +extern char opt_prof_prefix[ + /* Minimize memory bloat for non-prof builds. */ +#ifdef JEMALLOC_PROF + PATH_MAX + +#endif + 1]; + +/* Accessed via prof_active_[gs]et{_unlocked,}(). */ +extern bool prof_active; + +/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */ +extern bool prof_gdump_val; + +/* + * Profile dump interval, measured in bytes allocated. Each arena triggers a + * profile dump when it reaches this threshold. The effect is that the + * interval between profile dumps averages prof_interval, though the actual + * interval between dumps will tend to be sporadic, and the interval will be a + * maximum of approximately (prof_interval * narenas). + */ +extern uint64_t prof_interval; + +/* + * Initialized as opt_lg_prof_sample, and potentially modified during profiling + * resets. + */ +extern size_t lg_prof_sample; + +void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated); +void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, + prof_tctx_t *tctx); +void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx); +void bt_init(prof_bt_t *bt, void **vec); +void prof_backtrace(prof_bt_t *bt); +prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt); +#ifdef JEMALLOC_JET +size_t prof_tdata_count(void); +size_t prof_bt_count(void); +#endif +typedef int (prof_dump_open_t)(bool, const char *); +extern prof_dump_open_t *JET_MUTABLE prof_dump_open; + +typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *); +extern prof_dump_header_t *JET_MUTABLE prof_dump_header; +#ifdef JEMALLOC_JET +void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs, + uint64_t *accumbytes); +#endif +bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum); +void prof_idump(tsdn_t *tsdn); +bool prof_mdump(tsd_t *tsd, const char *filename); +void prof_gdump(tsdn_t *tsdn); +prof_tdata_t *prof_tdata_init(tsd_t *tsd); +prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata); +void prof_reset(tsd_t *tsd, size_t lg_sample); +void prof_tdata_cleanup(tsd_t *tsd); +bool prof_active_get(tsdn_t *tsdn); +bool prof_active_set(tsdn_t *tsdn, bool active); +const char *prof_thread_name_get(tsd_t *tsd); +int prof_thread_name_set(tsd_t *tsd, const char *thread_name); +bool prof_thread_active_get(tsd_t *tsd); +bool prof_thread_active_set(tsd_t *tsd, bool active); +bool prof_thread_active_init_get(tsdn_t *tsdn); +bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init); +bool prof_gdump_get(tsdn_t *tsdn); +bool prof_gdump_set(tsdn_t *tsdn, bool active); +void prof_boot0(void); +void prof_boot1(void); +bool prof_boot2(tsd_t *tsd); +void prof_prefork0(tsdn_t *tsdn); +void prof_prefork1(tsdn_t *tsdn); +void prof_postfork_parent(tsdn_t *tsdn); +void prof_postfork_child(tsdn_t *tsdn); +void prof_sample_threshold_update(prof_tdata_t *tdata); + +#endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h b/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h new file mode 100644 index 000000000..a6efb4851 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h @@ -0,0 +1,83 @@ +#ifndef JEMALLOC_INTERNAL_PROF_INLINES_A_H +#define JEMALLOC_INTERNAL_PROF_INLINES_A_H + +#include "jemalloc/internal/mutex.h" + +static inline bool +prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) { + cassert(config_prof); + + bool overflow; + uint64_t a0, a1; + + /* + * If the application allocates fast enough (and/or if idump is slow + * enough), extreme overflow here (a1 >= prof_interval * 2) can cause + * idump trigger coalescing. This is an intentional mechanism that + * avoids rate-limiting allocation. + */ +#ifdef JEMALLOC_ATOMIC_U64 + a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED); + do { + a1 = a0 + accumbytes; + assert(a1 >= a0); + overflow = (a1 >= prof_interval); + if (overflow) { + a1 %= prof_interval; + } + } while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0, + a1, ATOMIC_RELAXED, ATOMIC_RELAXED)); +#else + malloc_mutex_lock(tsdn, &prof_accum->mtx); + a0 = prof_accum->accumbytes; + a1 = a0 + accumbytes; + overflow = (a1 >= prof_interval); + if (overflow) { + a1 %= prof_interval; + } + prof_accum->accumbytes = a1; + malloc_mutex_unlock(tsdn, &prof_accum->mtx); +#endif + return overflow; +} + +static inline void +prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) { + cassert(config_prof); + + /* + * Cancel out as much of the excessive prof_accumbytes increase as + * possible without underflowing. Interval-triggered dumps occur + * slightly more often than intended as a result of incomplete + * canceling. + */ + uint64_t a0, a1; +#ifdef JEMALLOC_ATOMIC_U64 + a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED); + do { + a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - + usize) : 0; + } while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0, + a1, ATOMIC_RELAXED, ATOMIC_RELAXED)); +#else + malloc_mutex_lock(tsdn, &prof_accum->mtx); + a0 = prof_accum->accumbytes; + a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - usize) : + 0; + prof_accum->accumbytes = a1; + malloc_mutex_unlock(tsdn, &prof_accum->mtx); +#endif +} + +JEMALLOC_ALWAYS_INLINE bool +prof_active_get_unlocked(void) { + /* + * Even if opt_prof is true, sampling can be temporarily disabled by + * setting prof_active to false. No locking is used when reading + * prof_active in the fast path, so there are no guarantees regarding + * how long it will take for all threads to notice state changes. + */ + return prof_active; +} + +#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h b/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h new file mode 100644 index 000000000..6ff465ad7 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h @@ -0,0 +1,206 @@ +#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H +#define JEMALLOC_INTERNAL_PROF_INLINES_B_H + +#include "jemalloc/internal/sz.h" + +JEMALLOC_ALWAYS_INLINE bool +prof_gdump_get_unlocked(void) { + /* + * No locking is used when reading prof_gdump_val in the fast path, so + * there are no guarantees regarding how long it will take for all + * threads to notice state changes. + */ + return prof_gdump_val; +} + +JEMALLOC_ALWAYS_INLINE prof_tdata_t * +prof_tdata_get(tsd_t *tsd, bool create) { + prof_tdata_t *tdata; + + cassert(config_prof); + + tdata = tsd_prof_tdata_get(tsd); + if (create) { + if (unlikely(tdata == NULL)) { + if (tsd_nominal(tsd)) { + tdata = prof_tdata_init(tsd); + tsd_prof_tdata_set(tsd, tdata); + } + } else if (unlikely(tdata->expired)) { + tdata = prof_tdata_reinit(tsd, tdata); + tsd_prof_tdata_set(tsd, tdata); + } + assert(tdata == NULL || tdata->attached); + } + + return tdata; +} + +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) { + cassert(config_prof); + assert(ptr != NULL); + + return arena_prof_tctx_get(tsdn, ptr, alloc_ctx); +} + +JEMALLOC_ALWAYS_INLINE void +prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, + alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) { + cassert(config_prof); + assert(ptr != NULL); + + arena_prof_tctx_set(tsdn, ptr, usize, alloc_ctx, tctx); +} + +JEMALLOC_ALWAYS_INLINE void +prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) { + cassert(config_prof); + assert(ptr != NULL); + + arena_prof_tctx_reset(tsdn, ptr, tctx); +} + +JEMALLOC_ALWAYS_INLINE bool +prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, + prof_tdata_t **tdata_out) { + prof_tdata_t *tdata; + + cassert(config_prof); + + tdata = prof_tdata_get(tsd, true); + if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) { + tdata = NULL; + } + + if (tdata_out != NULL) { + *tdata_out = tdata; + } + + if (unlikely(tdata == NULL)) { + return true; + } + + if (likely(tdata->bytes_until_sample >= usize)) { + if (update) { + tdata->bytes_until_sample -= usize; + } + return true; + } else { + if (tsd_reentrancy_level_get(tsd) > 0) { + return true; + } + /* Compute new sample threshold. */ + if (update) { + prof_sample_threshold_update(tdata); + } + return !tdata->active; + } +} + +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) { + prof_tctx_t *ret; + prof_tdata_t *tdata; + prof_bt_t bt; + + assert(usize == sz_s2u(usize)); + + if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update, + &tdata))) { + ret = (prof_tctx_t *)(uintptr_t)1U; + } else { + bt_init(&bt, tdata->vec); + prof_backtrace(&bt); + ret = prof_lookup(tsd, &bt); + } + + return ret; +} + +JEMALLOC_ALWAYS_INLINE void +prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx, + prof_tctx_t *tctx) { + cassert(config_prof); + assert(ptr != NULL); + assert(usize == isalloc(tsdn, ptr)); + + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) { + prof_malloc_sample_object(tsdn, ptr, usize, tctx); + } else { + prof_tctx_set(tsdn, ptr, usize, alloc_ctx, + (prof_tctx_t *)(uintptr_t)1U); + } +} + +JEMALLOC_ALWAYS_INLINE void +prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx, + bool prof_active, bool updated, const void *old_ptr, size_t old_usize, + prof_tctx_t *old_tctx) { + bool sampled, old_sampled, moved; + + cassert(config_prof); + assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); + + if (prof_active && !updated && ptr != NULL) { + assert(usize == isalloc(tsd_tsdn(tsd), ptr)); + if (prof_sample_accum_update(tsd, usize, true, NULL)) { + /* + * Don't sample. The usize passed to prof_alloc_prep() + * was larger than what actually got allocated, so a + * backtrace was captured for this allocation, even + * though its actual usize was insufficient to cross the + * sample threshold. + */ + prof_alloc_rollback(tsd, tctx, true); + tctx = (prof_tctx_t *)(uintptr_t)1U; + } + } + + sampled = ((uintptr_t)tctx > (uintptr_t)1U); + old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); + moved = (ptr != old_ptr); + + if (unlikely(sampled)) { + prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx); + } else if (moved) { + prof_tctx_set(tsd_tsdn(tsd), ptr, usize, NULL, + (prof_tctx_t *)(uintptr_t)1U); + } else if (unlikely(old_sampled)) { + /* + * prof_tctx_set() would work for the !moved case as well, but + * prof_tctx_reset() is slightly cheaper, and the proper thing + * to do here in the presence of explicit knowledge re: moved + * state. + */ + prof_tctx_reset(tsd_tsdn(tsd), ptr, tctx); + } else { + assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), ptr, NULL) == + (uintptr_t)1U); + } + + /* + * The prof_free_sampled_object() call must come after the + * prof_malloc_sample_object() call, because tctx and old_tctx may be + * the same, in which case reversing the call order could cause the tctx + * to be prematurely destroyed as a side effect of momentarily zeroed + * counters. + */ + if (unlikely(old_sampled)) { + prof_free_sampled_object(tsd, old_usize, old_tctx); + } +} + +JEMALLOC_ALWAYS_INLINE void +prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) { + prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx); + + cassert(config_prof); + assert(usize == isalloc(tsd_tsdn(tsd), ptr)); + + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) { + prof_free_sampled_object(tsd, usize, tctx); + } +} + +#endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/prof_structs.h b/deps/jemalloc/include/jemalloc/internal/prof_structs.h new file mode 100644 index 000000000..0d58ae100 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/prof_structs.h @@ -0,0 +1,201 @@ +#ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H +#define JEMALLOC_INTERNAL_PROF_STRUCTS_H + +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/rb.h" + +struct prof_bt_s { + /* Backtrace, stored as len program counters. */ + void **vec; + unsigned len; +}; + +#ifdef JEMALLOC_PROF_LIBGCC +/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ +typedef struct { + prof_bt_t *bt; + unsigned max; +} prof_unwind_data_t; +#endif + +struct prof_accum_s { +#ifndef JEMALLOC_ATOMIC_U64 + malloc_mutex_t mtx; + uint64_t accumbytes; +#else + atomic_u64_t accumbytes; +#endif +}; + +struct prof_cnt_s { + /* Profiling counters. */ + uint64_t curobjs; + uint64_t curbytes; + uint64_t accumobjs; + uint64_t accumbytes; +}; + +typedef enum { + prof_tctx_state_initializing, + prof_tctx_state_nominal, + prof_tctx_state_dumping, + prof_tctx_state_purgatory /* Dumper must finish destroying. */ +} prof_tctx_state_t; + +struct prof_tctx_s { + /* Thread data for thread that performed the allocation. */ + prof_tdata_t *tdata; + + /* + * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be + * defunct during teardown. + */ + uint64_t thr_uid; + uint64_t thr_discrim; + + /* Profiling counters, protected by tdata->lock. */ + prof_cnt_t cnts; + + /* Associated global context. */ + prof_gctx_t *gctx; + + /* + * UID that distinguishes multiple tctx's created by the same thread, + * but coexisting in gctx->tctxs. There are two ways that such + * coexistence can occur: + * - A dumper thread can cause a tctx to be retained in the purgatory + * state. + * - Although a single "producer" thread must create all tctx's which + * share the same thr_uid, multiple "consumers" can each concurrently + * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only + * gets called once each time cnts.cur{objs,bytes} drop to 0, but this + * threshold can be hit again before the first consumer finishes + * executing prof_tctx_destroy(). + */ + uint64_t tctx_uid; + + /* Linkage into gctx's tctxs. */ + rb_node(prof_tctx_t) tctx_link; + + /* + * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents + * sample vs destroy race. + */ + bool prepared; + + /* Current dump-related state, protected by gctx->lock. */ + prof_tctx_state_t state; + + /* + * Copy of cnts snapshotted during early dump phase, protected by + * dump_mtx. + */ + prof_cnt_t dump_cnts; +}; +typedef rb_tree(prof_tctx_t) prof_tctx_tree_t; + +struct prof_gctx_s { + /* Protects nlimbo, cnt_summed, and tctxs. */ + malloc_mutex_t *lock; + + /* + * Number of threads that currently cause this gctx to be in a state of + * limbo due to one of: + * - Initializing this gctx. + * - Initializing per thread counters associated with this gctx. + * - Preparing to destroy this gctx. + * - Dumping a heap profile that includes this gctx. + * nlimbo must be 1 (single destroyer) in order to safely destroy the + * gctx. + */ + unsigned nlimbo; + + /* + * Tree of profile counters, one for each thread that has allocated in + * this context. + */ + prof_tctx_tree_t tctxs; + + /* Linkage for tree of contexts to be dumped. */ + rb_node(prof_gctx_t) dump_link; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Associated backtrace. */ + prof_bt_t bt; + + /* Backtrace vector, variable size, referred to by bt. */ + void *vec[1]; +}; +typedef rb_tree(prof_gctx_t) prof_gctx_tree_t; + +struct prof_tdata_s { + malloc_mutex_t *lock; + + /* Monotonically increasing unique thread identifier. */ + uint64_t thr_uid; + + /* + * Monotonically increasing discriminator among tdata structures + * associated with the same thr_uid. + */ + uint64_t thr_discrim; + + /* Included in heap profile dumps if non-NULL. */ + char *thread_name; + + bool attached; + bool expired; + + rb_node(prof_tdata_t) tdata_link; + + /* + * Counter used to initialize prof_tctx_t's tctx_uid. No locking is + * necessary when incrementing this field, because only one thread ever + * does so. + */ + uint64_t tctx_uid_next; + + /* + * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks + * backtraces for which it has non-zero allocation/deallocation counters + * associated with thread-specific prof_tctx_t objects. Other threads + * may write to prof_tctx_t contents when freeing associated objects. + */ + ckh_t bt2tctx; + + /* Sampling state. */ + uint64_t prng_state; + uint64_t bytes_until_sample; + + /* State used to avoid dumping while operating on prof internals. */ + bool enq; + bool enq_idump; + bool enq_gdump; + + /* + * Set to true during an early dump phase for tdata's which are + * currently being dumped. New threads' tdata's have this initialized + * to false so that they aren't accidentally included in later dump + * phases. + */ + bool dumping; + + /* + * True if profiling is active for this tdata's thread + * (thread.prof.active mallctl). + */ + bool active; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* Backtrace vector, used for calls to prof_backtrace(). */ + void *vec[PROF_BT_MAX]; +}; +typedef rb_tree(prof_tdata_t) prof_tdata_tree_t; + +#endif /* JEMALLOC_INTERNAL_PROF_STRUCTS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/prof_types.h b/deps/jemalloc/include/jemalloc/internal/prof_types.h new file mode 100644 index 000000000..1eff995ec --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/prof_types.h @@ -0,0 +1,56 @@ +#ifndef JEMALLOC_INTERNAL_PROF_TYPES_H +#define JEMALLOC_INTERNAL_PROF_TYPES_H + +typedef struct prof_bt_s prof_bt_t; +typedef struct prof_accum_s prof_accum_t; +typedef struct prof_cnt_s prof_cnt_t; +typedef struct prof_tctx_s prof_tctx_t; +typedef struct prof_gctx_s prof_gctx_t; +typedef struct prof_tdata_s prof_tdata_t; + +/* Option defaults. */ +#ifdef JEMALLOC_PROF +# define PROF_PREFIX_DEFAULT "jeprof" +#else +# define PROF_PREFIX_DEFAULT "" +#endif +#define LG_PROF_SAMPLE_DEFAULT 19 +#define LG_PROF_INTERVAL_DEFAULT -1 + +/* + * Hard limit on stack backtrace depth. The version of prof_backtrace() that + * is based on __builtin_return_address() necessarily has a hard-coded number + * of backtrace frame handlers, and should be kept in sync with this setting. + */ +#define PROF_BT_MAX 128 + +/* Initial hash table size. */ +#define PROF_CKH_MINITEMS 64 + +/* Size of memory buffer to use when writing dump files. */ +#define PROF_DUMP_BUFSIZE 65536 + +/* Size of stack-allocated buffer used by prof_printf(). */ +#define PROF_PRINTF_BUFSIZE 128 + +/* + * Number of mutexes shared among all gctx's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NCTX_LOCKS 1024 + +/* + * Number of mutexes shared among all tdata's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NTDATA_LOCKS 256 + +/* + * prof_tdata pointers close to NULL are used to encode state information that + * is used for cleaning up during thread shutdown. + */ +#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) +#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) +#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY + +#endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/public_namespace.sh b/deps/jemalloc/include/jemalloc/internal/public_namespace.sh index 362109f71..4d415ba01 100755 --- a/deps/jemalloc/include/jemalloc/internal/public_namespace.sh +++ b/deps/jemalloc/include/jemalloc/internal/public_namespace.sh @@ -2,5 +2,5 @@ for nm in `cat $1` ; do n=`echo ${nm} |tr ':' ' ' |awk '{print $1}'` - echo "#define je_${n} JEMALLOC_N(${n})" + echo "#define je_${n} JEMALLOC_N(${n})" done diff --git a/deps/jemalloc/include/jemalloc/internal/ql.h b/deps/jemalloc/include/jemalloc/internal/ql.h index 1834bb855..802904077 100644 --- a/deps/jemalloc/include/jemalloc/internal/ql.h +++ b/deps/jemalloc/include/jemalloc/internal/ql.h @@ -1,59 +1,64 @@ +#ifndef JEMALLOC_INTERNAL_QL_H +#define JEMALLOC_INTERNAL_QL_H + +#include "jemalloc/internal/qr.h" + /* List definitions. */ -#define ql_head(a_type) \ +#define ql_head(a_type) \ struct { \ a_type *qlh_first; \ } -#define ql_head_initializer(a_head) {NULL} +#define ql_head_initializer(a_head) {NULL} -#define ql_elm(a_type) qr(a_type) +#define ql_elm(a_type) qr(a_type) /* List functions. */ -#define ql_new(a_head) do { \ +#define ql_new(a_head) do { \ (a_head)->qlh_first = NULL; \ } while (0) -#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field) +#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field) -#define ql_first(a_head) ((a_head)->qlh_first) +#define ql_first(a_head) ((a_head)->qlh_first) -#define ql_last(a_head, a_field) \ +#define ql_last(a_head, a_field) \ ((ql_first(a_head) != NULL) \ ? qr_prev(ql_first(a_head), a_field) : NULL) -#define ql_next(a_head, a_elm, a_field) \ +#define ql_next(a_head, a_elm, a_field) \ ((ql_last(a_head, a_field) != (a_elm)) \ ? qr_next((a_elm), a_field) : NULL) -#define ql_prev(a_head, a_elm, a_field) \ +#define ql_prev(a_head, a_elm, a_field) \ ((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) \ : NULL) -#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \ +#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \ qr_before_insert((a_qlelm), (a_elm), a_field); \ if (ql_first(a_head) == (a_qlelm)) { \ ql_first(a_head) = (a_elm); \ } \ } while (0) -#define ql_after_insert(a_qlelm, a_elm, a_field) \ +#define ql_after_insert(a_qlelm, a_elm, a_field) \ qr_after_insert((a_qlelm), (a_elm), a_field) -#define ql_head_insert(a_head, a_elm, a_field) do { \ +#define ql_head_insert(a_head, a_elm, a_field) do { \ if (ql_first(a_head) != NULL) { \ qr_before_insert(ql_first(a_head), (a_elm), a_field); \ } \ ql_first(a_head) = (a_elm); \ } while (0) -#define ql_tail_insert(a_head, a_elm, a_field) do { \ +#define ql_tail_insert(a_head, a_elm, a_field) do { \ if (ql_first(a_head) != NULL) { \ qr_before_insert(ql_first(a_head), (a_elm), a_field); \ } \ ql_first(a_head) = qr_next((a_elm), a_field); \ } while (0) -#define ql_remove(a_head, a_elm, a_field) do { \ +#define ql_remove(a_head, a_elm, a_field) do { \ if (ql_first(a_head) == (a_elm)) { \ ql_first(a_head) = qr_next(ql_first(a_head), a_field); \ } \ @@ -64,18 +69,20 @@ struct { \ } \ } while (0) -#define ql_head_remove(a_head, a_type, a_field) do { \ +#define ql_head_remove(a_head, a_type, a_field) do { \ a_type *t = ql_first(a_head); \ ql_remove((a_head), t, a_field); \ } while (0) -#define ql_tail_remove(a_head, a_type, a_field) do { \ +#define ql_tail_remove(a_head, a_type, a_field) do { \ a_type *t = ql_last(a_head, a_field); \ ql_remove((a_head), t, a_field); \ } while (0) -#define ql_foreach(a_var, a_head, a_field) \ +#define ql_foreach(a_var, a_head, a_field) \ qr_foreach((a_var), ql_first(a_head), a_field) -#define ql_reverse_foreach(a_var, a_head, a_field) \ +#define ql_reverse_foreach(a_var, a_head, a_field) \ qr_reverse_foreach((a_var), ql_first(a_head), a_field) + +#endif /* JEMALLOC_INTERNAL_QL_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/qr.h b/deps/jemalloc/include/jemalloc/internal/qr.h index 0fbaec25e..1e1056b38 100644 --- a/deps/jemalloc/include/jemalloc/internal/qr.h +++ b/deps/jemalloc/include/jemalloc/internal/qr.h @@ -1,38 +1,39 @@ +#ifndef JEMALLOC_INTERNAL_QR_H +#define JEMALLOC_INTERNAL_QR_H + /* Ring definitions. */ -#define qr(a_type) \ +#define qr(a_type) \ struct { \ a_type *qre_next; \ a_type *qre_prev; \ } /* Ring functions. */ -#define qr_new(a_qr, a_field) do { \ +#define qr_new(a_qr, a_field) do { \ (a_qr)->a_field.qre_next = (a_qr); \ (a_qr)->a_field.qre_prev = (a_qr); \ } while (0) -#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) +#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) -#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) +#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) -#define qr_before_insert(a_qrelm, a_qr, a_field) do { \ +#define qr_before_insert(a_qrelm, a_qr, a_field) do { \ (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \ (a_qr)->a_field.qre_next = (a_qrelm); \ (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \ (a_qrelm)->a_field.qre_prev = (a_qr); \ } while (0) -#define qr_after_insert(a_qrelm, a_qr, a_field) \ - do \ - { \ +#define qr_after_insert(a_qrelm, a_qr, a_field) do { \ (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \ (a_qr)->a_field.qre_prev = (a_qrelm); \ (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \ (a_qrelm)->a_field.qre_next = (a_qr); \ - } while (0) +} while (0) -#define qr_meld(a_qr_a, a_qr_b, a_field) do { \ - void *t; \ +#define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do { \ + a_type *t; \ (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \ (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \ t = (a_qr_a)->a_field.qre_prev; \ @@ -44,10 +45,10 @@ struct { \ * qr_meld() and qr_split() are functionally equivalent, so there's no need to * have two copies of the code. */ -#define qr_split(a_qr_a, a_qr_b, a_field) \ - qr_meld((a_qr_a), (a_qr_b), a_field) +#define qr_split(a_qr_a, a_qr_b, a_type, a_field) \ + qr_meld((a_qr_a), (a_qr_b), a_type, a_field) -#define qr_remove(a_qr, a_field) do { \ +#define qr_remove(a_qr, a_field) do { \ (a_qr)->a_field.qre_prev->a_field.qre_next \ = (a_qr)->a_field.qre_next; \ (a_qr)->a_field.qre_next->a_field.qre_prev \ @@ -56,14 +57,16 @@ struct { \ (a_qr)->a_field.qre_prev = (a_qr); \ } while (0) -#define qr_foreach(var, a_qr, a_field) \ +#define qr_foreach(var, a_qr, a_field) \ for ((var) = (a_qr); \ (var) != NULL; \ (var) = (((var)->a_field.qre_next != (a_qr)) \ ? (var)->a_field.qre_next : NULL)) -#define qr_reverse_foreach(var, a_qr, a_field) \ +#define qr_reverse_foreach(var, a_qr, a_field) \ for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \ (var) != NULL; \ (var) = (((var) != (a_qr)) \ ? (var)->a_field.qre_prev : NULL)) + +#endif /* JEMALLOC_INTERNAL_QR_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/quarantine.h b/deps/jemalloc/include/jemalloc/internal/quarantine.h deleted file mode 100644 index ae607399f..000000000 --- a/deps/jemalloc/include/jemalloc/internal/quarantine.h +++ /dev/null @@ -1,60 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct quarantine_obj_s quarantine_obj_t; -typedef struct quarantine_s quarantine_t; - -/* Default per thread quarantine size if valgrind is enabled. */ -#define JEMALLOC_VALGRIND_QUARANTINE_DEFAULT (ZU(1) << 24) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct quarantine_obj_s { - void *ptr; - size_t usize; -}; - -struct quarantine_s { - size_t curbytes; - size_t curobjs; - size_t first; -#define LG_MAXOBJS_INIT 10 - size_t lg_maxobjs; - quarantine_obj_t objs[1]; /* Dynamically sized ring buffer. */ -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void quarantine_alloc_hook_work(tsd_t *tsd); -void quarantine(tsd_t *tsd, void *ptr); -void quarantine_cleanup(tsd_t *tsd); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void quarantine_alloc_hook(void); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_QUARANTINE_C_)) -JEMALLOC_ALWAYS_INLINE void -quarantine_alloc_hook(void) -{ - tsd_t *tsd; - - assert(config_fill && opt_quarantine); - - tsd = tsd_fetch(); - if (tsd_quarantine_get(tsd) == NULL) - quarantine_alloc_hook_work(tsd); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - diff --git a/deps/jemalloc/include/jemalloc/internal/rb.h b/deps/jemalloc/include/jemalloc/internal/rb.h index 2ca8e5933..47fa5ca99 100644 --- a/deps/jemalloc/include/jemalloc/internal/rb.h +++ b/deps/jemalloc/include/jemalloc/internal/rb.h @@ -20,17 +20,21 @@ */ #ifndef RB_H_ -#define RB_H_ +#define RB_H_ + +#ifndef __PGI +#define RB_COMPACT +#endif #ifdef RB_COMPACT /* Node structure. */ -#define rb_node(a_type) \ +#define rb_node(a_type) \ struct { \ a_type *rbn_left; \ a_type *rbn_right_red; \ } #else -#define rb_node(a_type) \ +#define rb_node(a_type) \ struct { \ a_type *rbn_left; \ a_type *rbn_right; \ @@ -39,111 +43,116 @@ struct { \ #endif /* Root structure. */ -#define rb_tree(a_type) \ +#define rb_tree(a_type) \ struct { \ a_type *rbt_root; \ - a_type rbt_nil; \ } /* Left accessors. */ -#define rbtn_left_get(a_type, a_field, a_node) \ +#define rbtn_left_get(a_type, a_field, a_node) \ ((a_node)->a_field.rbn_left) -#define rbtn_left_set(a_type, a_field, a_node, a_left) do { \ +#define rbtn_left_set(a_type, a_field, a_node, a_left) do { \ (a_node)->a_field.rbn_left = a_left; \ } while (0) #ifdef RB_COMPACT /* Right accessors. */ -#define rbtn_right_get(a_type, a_field, a_node) \ +#define rbtn_right_get(a_type, a_field, a_node) \ ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red) \ & ((ssize_t)-2))) -#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ +#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right) \ | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1))); \ } while (0) /* Color accessors. */ -#define rbtn_red_get(a_type, a_field, a_node) \ +#define rbtn_red_get(a_type, a_field, a_node) \ ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red) \ & ((size_t)1))) -#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ +#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t) \ (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)) \ | ((ssize_t)a_red)); \ } while (0) -#define rbtn_red_set(a_type, a_field, a_node) do { \ +#define rbtn_red_set(a_type, a_field, a_node) do { \ (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) \ (a_node)->a_field.rbn_right_red) | ((size_t)1)); \ } while (0) -#define rbtn_black_set(a_type, a_field, a_node) do { \ +#define rbtn_black_set(a_type, a_field, a_node) do { \ (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \ (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \ } while (0) + +/* Node initializer. */ +#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ + /* Bookkeeping bit cannot be used by node pointer. */ \ + assert(((uintptr_t)(a_node) & 0x1) == 0); \ + rbtn_left_set(a_type, a_field, (a_node), NULL); \ + rbtn_right_set(a_type, a_field, (a_node), NULL); \ + rbtn_red_set(a_type, a_field, (a_node)); \ +} while (0) #else /* Right accessors. */ -#define rbtn_right_get(a_type, a_field, a_node) \ +#define rbtn_right_get(a_type, a_field, a_node) \ ((a_node)->a_field.rbn_right) -#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ +#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ (a_node)->a_field.rbn_right = a_right; \ } while (0) /* Color accessors. */ -#define rbtn_red_get(a_type, a_field, a_node) \ +#define rbtn_red_get(a_type, a_field, a_node) \ ((a_node)->a_field.rbn_red) -#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ +#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ (a_node)->a_field.rbn_red = (a_red); \ } while (0) -#define rbtn_red_set(a_type, a_field, a_node) do { \ +#define rbtn_red_set(a_type, a_field, a_node) do { \ (a_node)->a_field.rbn_red = true; \ } while (0) -#define rbtn_black_set(a_type, a_field, a_node) do { \ +#define rbtn_black_set(a_type, a_field, a_node) do { \ (a_node)->a_field.rbn_red = false; \ } while (0) -#endif /* Node initializer. */ -#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ - rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ - rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ +#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ + rbtn_left_set(a_type, a_field, (a_node), NULL); \ + rbtn_right_set(a_type, a_field, (a_node), NULL); \ rbtn_red_set(a_type, a_field, (a_node)); \ } while (0) +#endif /* Tree initializer. */ -#define rb_new(a_type, a_field, a_rbt) do { \ - (a_rbt)->rbt_root = &(a_rbt)->rbt_nil; \ - rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil); \ - rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil); \ +#define rb_new(a_type, a_field, a_rbt) do { \ + (a_rbt)->rbt_root = NULL; \ } while (0) /* Internal utility macros. */ -#define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \ +#define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \ (r_node) = (a_root); \ - if ((r_node) != &(a_rbt)->rbt_nil) { \ + if ((r_node) != NULL) { \ for (; \ - rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\ + rbtn_left_get(a_type, a_field, (r_node)) != NULL; \ (r_node) = rbtn_left_get(a_type, a_field, (r_node))) { \ } \ } \ } while (0) -#define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \ +#define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \ (r_node) = (a_root); \ - if ((r_node) != &(a_rbt)->rbt_nil) { \ - for (; rbtn_right_get(a_type, a_field, (r_node)) != \ - &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field, \ - (r_node))) { \ + if ((r_node) != NULL) { \ + for (; rbtn_right_get(a_type, a_field, (r_node)) != NULL; \ + (r_node) = rbtn_right_get(a_type, a_field, (r_node))) { \ } \ } \ } while (0) -#define rbtn_rotate_left(a_type, a_field, a_node, r_node) do { \ +#define rbtn_rotate_left(a_type, a_field, a_node, r_node) do { \ (r_node) = rbtn_right_get(a_type, a_field, (a_node)); \ rbtn_right_set(a_type, a_field, (a_node), \ rbtn_left_get(a_type, a_field, (r_node))); \ rbtn_left_set(a_type, a_field, (r_node), (a_node)); \ } while (0) -#define rbtn_rotate_right(a_type, a_field, a_node, r_node) do { \ +#define rbtn_rotate_right(a_type, a_field, a_node, r_node) do { \ (r_node) = rbtn_left_get(a_type, a_field, (a_node)); \ rbtn_left_set(a_type, a_field, (a_node), \ rbtn_right_get(a_type, a_field, (r_node))); \ @@ -155,7 +164,7 @@ struct { \ * functions generated by an equivalently parameterized call to rb_gen(). */ -#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \ +#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \ a_attr void \ a_prefix##new(a_rbt_type *rbtree); \ a_attr bool \ @@ -169,11 +178,11 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node); \ a_attr a_type * \ a_prefix##prev(a_rbt_type *rbtree, a_type *node); \ a_attr a_type * \ -a_prefix##search(a_rbt_type *rbtree, a_type *key); \ +a_prefix##search(a_rbt_type *rbtree, const a_type *key); \ a_attr a_type * \ -a_prefix##nsearch(a_rbt_type *rbtree, a_type *key); \ +a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key); \ a_attr a_type * \ -a_prefix##psearch(a_rbt_type *rbtree, a_type *key); \ +a_prefix##psearch(a_rbt_type *rbtree, const a_type *key); \ a_attr void \ a_prefix##insert(a_rbt_type *rbtree, a_type *node); \ a_attr void \ @@ -183,7 +192,10 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ a_rbt_type *, a_type *, void *), void *arg); \ a_attr a_type * \ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ - a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); \ +a_attr void \ +a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ + void *arg); /* * The rb_gen() macro generates a type-specific red-black tree implementation, @@ -254,7 +266,7 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * last/first. * * static ex_node_t * - * ex_search(ex_t *tree, ex_node_t *key); + * ex_search(ex_t *tree, const ex_node_t *key); * Description: Search for node that matches key. * Args: * tree: Pointer to an initialized red-black tree object. @@ -262,9 +274,9 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * Ret: Node in tree that matches key, or NULL if no match. * * static ex_node_t * - * ex_nsearch(ex_t *tree, ex_node_t *key); + * ex_nsearch(ex_t *tree, const ex_node_t *key); * static ex_node_t * - * ex_psearch(ex_t *tree, ex_node_t *key); + * ex_psearch(ex_t *tree, const ex_node_t *key); * Description: Search for node that matches key. If no match is found, * return what would be key's successor/predecessor, were * key in tree. @@ -312,44 +324,52 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * arg : Opaque pointer passed to cb(). * Ret: NULL if iteration completed, or the non-NULL callback return value * that caused termination of the iteration. + * + * static void + * ex_destroy(ex_t *tree, void (*cb)(ex_node_t *, void *), void *arg); + * Description: Iterate over the tree with post-order traversal, remove + * each node, and run the callback if non-null. This is + * used for destroying a tree without paying the cost to + * rebalance it. The tree must not be otherwise altered + * during traversal. + * Args: + * tree: Pointer to an initialized red-black tree object. + * cb : Callback function, which, if non-null, is called for each node + * during iteration. There is no way to stop iteration once it + * has begun. + * arg : Opaque pointer passed to cb(). */ -#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ +#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ a_attr void \ a_prefix##new(a_rbt_type *rbtree) { \ rb_new(a_type, a_field, rbtree); \ } \ a_attr bool \ a_prefix##empty(a_rbt_type *rbtree) { \ - return (rbtree->rbt_root == &rbtree->rbt_nil); \ + return (rbtree->rbt_root == NULL); \ } \ a_attr a_type * \ a_prefix##first(a_rbt_type *rbtree) { \ a_type *ret; \ rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ - return (ret); \ + return ret; \ } \ a_attr a_type * \ a_prefix##last(a_rbt_type *rbtree) { \ a_type *ret; \ rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ - return (ret); \ + return ret; \ } \ a_attr a_type * \ a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ a_type *ret; \ - if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ + if (rbtn_right_get(a_type, a_field, node) != NULL) { \ rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type, \ a_field, node), ret); \ } else { \ a_type *tnode = rbtree->rbt_root; \ - assert(tnode != &rbtree->rbt_nil); \ - ret = &rbtree->rbt_nil; \ + assert(tnode != NULL); \ + ret = NULL; \ while (true) { \ int cmp = (a_cmp)(node, tnode); \ if (cmp < 0) { \ @@ -360,24 +380,21 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ } else { \ break; \ } \ - assert(tnode != &rbtree->rbt_nil); \ + assert(tnode != NULL); \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ - return (ret); \ + return ret; \ } \ a_attr a_type * \ a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ a_type *ret; \ - if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ + if (rbtn_left_get(a_type, a_field, node) != NULL) { \ rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type, \ a_field, node), ret); \ } else { \ a_type *tnode = rbtree->rbt_root; \ - assert(tnode != &rbtree->rbt_nil); \ - ret = &rbtree->rbt_nil; \ + assert(tnode != NULL); \ + ret = NULL; \ while (true) { \ int cmp = (a_cmp)(node, tnode); \ if (cmp < 0) { \ @@ -388,20 +405,17 @@ a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ } else { \ break; \ } \ - assert(tnode != &rbtree->rbt_nil); \ + assert(tnode != NULL); \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ - return (ret); \ + return ret; \ } \ a_attr a_type * \ -a_prefix##search(a_rbt_type *rbtree, a_type *key) { \ +a_prefix##search(a_rbt_type *rbtree, const a_type *key) { \ a_type *ret; \ int cmp; \ ret = rbtree->rbt_root; \ - while (ret != &rbtree->rbt_nil \ + while (ret != NULL \ && (cmp = (a_cmp)(key, ret)) != 0) { \ if (cmp < 0) { \ ret = rbtn_left_get(a_type, a_field, ret); \ @@ -409,17 +423,14 @@ a_prefix##search(a_rbt_type *rbtree, a_type *key) { \ ret = rbtn_right_get(a_type, a_field, ret); \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ - return (ret); \ + return ret; \ } \ a_attr a_type * \ -a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \ +a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) { \ a_type *ret; \ a_type *tnode = rbtree->rbt_root; \ - ret = &rbtree->rbt_nil; \ - while (tnode != &rbtree->rbt_nil) { \ + ret = NULL; \ + while (tnode != NULL) { \ int cmp = (a_cmp)(key, tnode); \ if (cmp < 0) { \ ret = tnode; \ @@ -431,17 +442,14 @@ a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \ break; \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ - return (ret); \ + return ret; \ } \ a_attr a_type * \ -a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \ +a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) { \ a_type *ret; \ a_type *tnode = rbtree->rbt_root; \ - ret = &rbtree->rbt_nil; \ - while (tnode != &rbtree->rbt_nil) { \ + ret = NULL; \ + while (tnode != NULL) { \ int cmp = (a_cmp)(key, tnode); \ if (cmp < 0) { \ tnode = rbtn_left_get(a_type, a_field, tnode); \ @@ -453,10 +461,7 @@ a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \ break; \ } \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ - return (ret); \ + return ret; \ } \ a_attr void \ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ @@ -467,7 +472,7 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ rbt_node_new(a_type, a_field, rbtree, node); \ /* Wind. */ \ path->node = rbtree->rbt_root; \ - for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ + for (pathp = path; pathp->node != NULL; pathp++) { \ int cmp = pathp->cmp = a_cmp(node, pathp->node); \ assert(cmp != 0); \ if (cmp < 0) { \ @@ -487,7 +492,8 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ rbtn_left_set(a_type, a_field, cnode, left); \ if (rbtn_red_get(a_type, a_field, left)) { \ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ + if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ + leftleft)) { \ /* Fix up 4-node. */ \ a_type *tnode; \ rbtn_black_set(a_type, a_field, leftleft); \ @@ -502,7 +508,8 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ rbtn_right_set(a_type, a_field, cnode, right); \ if (rbtn_red_get(a_type, a_field, right)) { \ a_type *left = rbtn_left_get(a_type, a_field, cnode); \ - if (rbtn_red_get(a_type, a_field, left)) { \ + if (left != NULL && rbtn_red_get(a_type, a_field, \ + left)) { \ /* Split 4-node. */ \ rbtn_black_set(a_type, a_field, left); \ rbtn_black_set(a_type, a_field, right); \ @@ -535,7 +542,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* Wind. */ \ nodep = NULL; /* Silence compiler warning. */ \ path->node = rbtree->rbt_root; \ - for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ + for (pathp = path; pathp->node != NULL; pathp++) { \ int cmp = pathp->cmp = a_cmp(node, pathp->node); \ if (cmp < 0) { \ pathp[1].node = rbtn_left_get(a_type, a_field, \ @@ -547,8 +554,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* Find node's successor, in preparation for swap. */ \ pathp->cmp = 1; \ nodep = pathp; \ - for (pathp++; pathp->node != &rbtree->rbt_nil; \ - pathp++) { \ + for (pathp++; pathp->node != NULL; pathp++) { \ pathp->cmp = -1; \ pathp[1].node = rbtn_left_get(a_type, a_field, \ pathp->node); \ @@ -590,7 +596,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ } \ } else { \ a_type *left = rbtn_left_get(a_type, a_field, node); \ - if (left != &rbtree->rbt_nil) { \ + if (left != NULL) { \ /* node has no successor, but it has a left child. */\ /* Splice node out, without losing the left child. */\ assert(!rbtn_red_get(a_type, a_field, node)); \ @@ -610,33 +616,32 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ return; \ } else if (pathp == path) { \ /* The tree only contained one node. */ \ - rbtree->rbt_root = &rbtree->rbt_nil; \ + rbtree->rbt_root = NULL; \ return; \ } \ } \ if (rbtn_red_get(a_type, a_field, pathp->node)) { \ /* Prune red node, which requires no fixup. */ \ assert(pathp[-1].cmp < 0); \ - rbtn_left_set(a_type, a_field, pathp[-1].node, \ - &rbtree->rbt_nil); \ + rbtn_left_set(a_type, a_field, pathp[-1].node, NULL); \ return; \ } \ /* The node to be pruned is black, so unwind until balance is */\ /* restored. */\ - pathp->node = &rbtree->rbt_nil; \ + pathp->node = NULL; \ for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ assert(pathp->cmp != 0); \ if (pathp->cmp < 0) { \ rbtn_left_set(a_type, a_field, pathp->node, \ pathp[1].node); \ - assert(!rbtn_red_get(a_type, a_field, pathp[1].node)); \ if (rbtn_red_get(a_type, a_field, pathp->node)) { \ a_type *right = rbtn_right_get(a_type, a_field, \ pathp->node); \ a_type *rightleft = rbtn_left_get(a_type, a_field, \ right); \ a_type *tnode; \ - if (rbtn_red_get(a_type, a_field, rightleft)) { \ + if (rightleft != NULL && rbtn_red_get(a_type, a_field, \ + rightleft)) { \ /* In the following diagrams, ||, //, and \\ */\ /* indicate the path to the removed node. */\ /* */\ @@ -679,7 +684,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ pathp->node); \ a_type *rightleft = rbtn_left_get(a_type, a_field, \ right); \ - if (rbtn_red_get(a_type, a_field, rightleft)) { \ + if (rightleft != NULL && rbtn_red_get(a_type, a_field, \ + rightleft)) { \ /* || */\ /* pathp(b) */\ /* // \ */\ @@ -733,7 +739,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ left); \ a_type *leftrightleft = rbtn_left_get(a_type, a_field, \ leftright); \ - if (rbtn_red_get(a_type, a_field, leftrightleft)) { \ + if (leftrightleft != NULL && rbtn_red_get(a_type, \ + a_field, leftrightleft)) { \ /* || */\ /* pathp(b) */\ /* / \\ */\ @@ -759,7 +766,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ /* (b) */\ /* / */\ /* (b) */\ - assert(leftright != &rbtree->rbt_nil); \ + assert(leftright != NULL); \ rbtn_red_set(a_type, a_field, leftright); \ rbtn_rotate_right(a_type, a_field, pathp->node, \ tnode); \ @@ -782,7 +789,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ return; \ } else if (rbtn_red_get(a_type, a_field, pathp->node)) { \ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ + if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ + leftleft)) { \ /* || */\ /* pathp(r) */\ /* / \\ */\ @@ -820,7 +828,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ } \ } else { \ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ + if (leftleft != NULL && rbtn_red_get(a_type, a_field, \ + leftleft)) { \ /* || */\ /* pathp(b) */\ /* / \\ */\ @@ -866,17 +875,17 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ a_attr a_type * \ a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - if (node == &rbtree->rbt_nil) { \ - return (&rbtree->rbt_nil); \ + if (node == NULL) { \ + return NULL; \ } else { \ a_type *ret; \ if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \ - a_field, node), cb, arg)) != &rbtree->rbt_nil \ - || (ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ + a_field, node), cb, arg)) != NULL || (ret = cb(rbtree, node, \ + arg)) != NULL) { \ + return ret; \ } \ - return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ - a_field, node), cb, arg)); \ + return a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg); \ } \ } \ a_attr a_type * \ @@ -886,22 +895,22 @@ a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \ if (cmp < 0) { \ a_type *ret; \ if ((ret = a_prefix##iter_start(rbtree, start, \ - rbtn_left_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ + rbtn_left_get(a_type, a_field, node), cb, arg)) != NULL || \ + (ret = cb(rbtree, node, arg)) != NULL) { \ + return ret; \ } \ - return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ - a_field, node), cb, arg)); \ + return a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg); \ } else if (cmp > 0) { \ - return (a_prefix##iter_start(rbtree, start, \ - rbtn_right_get(a_type, a_field, node), cb, arg)); \ + return a_prefix##iter_start(rbtree, start, \ + rbtn_right_get(a_type, a_field, node), cb, arg); \ } else { \ a_type *ret; \ if ((ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ + return ret; \ } \ - return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ - a_field, node), cb, arg)); \ + return a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg); \ } \ } \ a_attr a_type * \ @@ -914,25 +923,22 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ } else { \ ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ - return (ret); \ + return ret; \ } \ a_attr a_type * \ a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - if (node == &rbtree->rbt_nil) { \ - return (&rbtree->rbt_nil); \ + if (node == NULL) { \ + return NULL; \ } else { \ a_type *ret; \ if ((ret = a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_right_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \ + (ret = cb(rbtree, node, arg)) != NULL) { \ + return ret; \ } \ - return (a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ + return a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg); \ } \ } \ a_attr a_type * \ @@ -943,22 +949,22 @@ a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \ if (cmp > 0) { \ a_type *ret; \ if ((ret = a_prefix##reverse_iter_start(rbtree, start, \ - rbtn_right_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \ + (ret = cb(rbtree, node, arg)) != NULL) { \ + return ret; \ } \ - return (a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ + return a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg); \ } else if (cmp < 0) { \ - return (a_prefix##reverse_iter_start(rbtree, start, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ + return a_prefix##reverse_iter_start(rbtree, start, \ + rbtn_left_get(a_type, a_field, node), cb, arg); \ } else { \ a_type *ret; \ if ((ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ + return ret; \ } \ - return (a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ + return a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg); \ } \ } \ a_attr a_type * \ @@ -972,10 +978,29 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \ cb, arg); \ } \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ + return ret; \ +} \ +a_attr void \ +a_prefix##destroy_recurse(a_rbt_type *rbtree, a_type *node, void (*cb)( \ + a_type *, void *), void *arg) { \ + if (node == NULL) { \ + return; \ } \ - return (ret); \ + a_prefix##destroy_recurse(rbtree, rbtn_left_get(a_type, a_field, \ + node), cb, arg); \ + rbtn_left_set(a_type, a_field, (node), NULL); \ + a_prefix##destroy_recurse(rbtree, rbtn_right_get(a_type, a_field, \ + node), cb, arg); \ + rbtn_right_set(a_type, a_field, (node), NULL); \ + if (cb) { \ + cb(node, arg); \ + } \ +} \ +a_attr void \ +a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \ + void *arg) { \ + a_prefix##destroy_recurse(rbtree, rbtree->rbt_root, cb, arg); \ + rbtree->rbt_root = NULL; \ } #endif /* RB_H_ */ diff --git a/deps/jemalloc/include/jemalloc/internal/rtree.h b/deps/jemalloc/include/jemalloc/internal/rtree.h index 28ae9d1dd..b59d33a80 100644 --- a/deps/jemalloc/include/jemalloc/internal/rtree.h +++ b/deps/jemalloc/include/jemalloc/internal/rtree.h @@ -1,74 +1,72 @@ +#ifndef JEMALLOC_INTERNAL_RTREE_H +#define JEMALLOC_INTERNAL_RTREE_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/rtree_tsd.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/tsd.h" + /* * This radix tree implementation is tailored to the singular purpose of - * associating metadata with chunks that are currently owned by jemalloc. + * associating metadata with extents that are currently owned by jemalloc. * ******************************************************************************* */ -#ifdef JEMALLOC_H_TYPES - -typedef struct rtree_node_elm_s rtree_node_elm_t; -typedef struct rtree_level_s rtree_level_t; -typedef struct rtree_s rtree_t; -/* - * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the - * machine address width. - */ -#define LG_RTREE_BITS_PER_LEVEL 4 -#define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL) -#define RTREE_HEIGHT_MAX \ - ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL) - -/* Used for two-stage lock-free node initialization. */ -#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1) - -/* - * The node allocation callback function's argument is the number of contiguous - * rtree_node_elm_t structures to allocate, and the resulting memory must be - * zeroed. - */ -typedef rtree_node_elm_t *(rtree_node_alloc_t)(size_t); -typedef void (rtree_node_dalloc_t)(rtree_node_elm_t *); +/* Number of high insignificant bits. */ +#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR+3)) - LG_VADDR) +/* Number of low insigificant bits. */ +#define RTREE_NLIB LG_PAGE +/* Number of significant bits. */ +#define RTREE_NSB (LG_VADDR - RTREE_NLIB) +/* Number of levels in radix tree. */ +#if RTREE_NSB <= 10 +# define RTREE_HEIGHT 1 +#elif RTREE_NSB <= 36 +# define RTREE_HEIGHT 2 +#elif RTREE_NSB <= 52 +# define RTREE_HEIGHT 3 +#else +# error Unsupported number of significant virtual address bits +#endif +/* Use compact leaf representation if virtual address encoding allows. */ +#if RTREE_NHIB >= LG_CEIL_NSIZES +# define RTREE_LEAF_COMPACT +#endif -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS +/* Needed for initialization only. */ +#define RTREE_LEAFKEY_INVALID ((uintptr_t)1) +typedef struct rtree_node_elm_s rtree_node_elm_t; struct rtree_node_elm_s { - union { - void *pun; - rtree_node_elm_t *child; - extent_node_t *val; - }; + atomic_p_t child; /* (rtree_{node,leaf}_elm_t *) */ }; -struct rtree_level_s { +struct rtree_leaf_elm_s { +#ifdef RTREE_LEAF_COMPACT /* - * A non-NULL subtree points to a subtree rooted along the hypothetical - * path to the leaf node corresponding to key 0. Depending on what keys - * have been used to store to the tree, an arbitrary combination of - * subtree pointers may remain NULL. - * - * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4. - * This results in a 3-level tree, and the leftmost leaf can be directly - * accessed via subtrees[2], the subtree prefixed by 0x0000 (excluding - * 0x00000000) can be accessed via subtrees[1], and the remainder of the - * tree can be accessed via subtrees[0]. + * Single pointer-width field containing all three leaf element fields. + * For example, on a 64-bit x64 system with 48 significant virtual + * memory address bits, the index, extent, and slab fields are packed as + * such: * - * levels[0] : [<unused> | 0x0001******** | 0x0002******** | ...] + * x: index + * e: extent + * b: slab * - * levels[1] : [<unused> | 0x00000001**** | 0x00000002**** | ... ] - * - * levels[2] : [val(0x000000000000) | val(0x000000000001) | ...] - * - * This has practical implications on x64, which currently uses only the - * lower 47 bits of virtual address space in userland, thus leaving - * subtrees[0] unused and avoiding a level of tree traversal. + * 00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee000b */ - union { - void *subtree_pun; - rtree_node_elm_t *subtree; - }; + atomic_p_t le_bits; +#else + atomic_p_t le_extent; /* (extent_t *) */ + atomic_u_t le_szind; /* (szind_t) */ + atomic_b_t le_slab; /* (bool) */ +#endif +}; + +typedef struct rtree_level_s rtree_level_t; +struct rtree_level_s { /* Number of key bits distinguished by this level. */ unsigned bits; /* @@ -78,217 +76,417 @@ struct rtree_level_s { unsigned cumbits; }; +typedef struct rtree_s rtree_t; struct rtree_s { - rtree_node_alloc_t *alloc; - rtree_node_dalloc_t *dalloc; - unsigned height; - /* - * Precomputed table used to convert from the number of leading 0 key - * bits to which subtree level to start at. - */ - unsigned start_level[RTREE_HEIGHT_MAX]; - rtree_level_t levels[RTREE_HEIGHT_MAX]; + malloc_mutex_t init_lock; + /* Number of elements based on rtree_levels[0].bits. */ +#if RTREE_HEIGHT > 1 + rtree_node_elm_t root[1U << (RTREE_NSB/RTREE_HEIGHT)]; +#else + rtree_leaf_elm_t root[1U << (RTREE_NSB/RTREE_HEIGHT)]; +#endif }; -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -bool rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc, - rtree_node_dalloc_t *dalloc); -void rtree_delete(rtree_t *rtree); -rtree_node_elm_t *rtree_subtree_read_hard(rtree_t *rtree, - unsigned level); -rtree_node_elm_t *rtree_child_read_hard(rtree_t *rtree, - rtree_node_elm_t *elm, unsigned level); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -unsigned rtree_start_level(rtree_t *rtree, uintptr_t key); -uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level); - -bool rtree_node_valid(rtree_node_elm_t *node); -rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm); -rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, - unsigned level); -extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, - bool dependent); -void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, - const extent_node_t *val); -rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level); -rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level); - -extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent); -bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val); +/* + * Split the bits into one to three partitions depending on number of + * significant bits. It the number of bits does not divide evenly into the + * number of levels, place one remainder bit per level starting at the leaf + * level. + */ +static const rtree_level_t rtree_levels[] = { +#if RTREE_HEIGHT == 1 + {RTREE_NSB, RTREE_NHIB + RTREE_NSB} +#elif RTREE_HEIGHT == 2 + {RTREE_NSB/2, RTREE_NHIB + RTREE_NSB/2}, + {RTREE_NSB/2 + RTREE_NSB%2, RTREE_NHIB + RTREE_NSB} +#elif RTREE_HEIGHT == 3 + {RTREE_NSB/3, RTREE_NHIB + RTREE_NSB/3}, + {RTREE_NSB/3 + RTREE_NSB%3/2, + RTREE_NHIB + RTREE_NSB/3*2 + RTREE_NSB%3/2}, + {RTREE_NSB/3 + RTREE_NSB%3 - RTREE_NSB%3/2, RTREE_NHIB + RTREE_NSB} +#else +# error Unsupported rtree height #endif +}; + +bool rtree_new(rtree_t *rtree, bool zeroed); + +typedef rtree_node_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t); +extern rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc; -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) -JEMALLOC_INLINE unsigned -rtree_start_level(rtree_t *rtree, uintptr_t key) -{ - unsigned start_level; +typedef rtree_leaf_elm_t *(rtree_leaf_alloc_t)(tsdn_t *, rtree_t *, size_t); +extern rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc; - if (unlikely(key == 0)) - return (rtree->height - 1); +typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_node_elm_t *); +extern rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc; - start_level = rtree->start_level[lg_floor(key) >> - LG_RTREE_BITS_PER_LEVEL]; - assert(start_level < rtree->height); - return (start_level); +typedef void (rtree_leaf_dalloc_t)(tsdn_t *, rtree_t *, rtree_leaf_elm_t *); +extern rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc; +#ifdef JEMALLOC_JET +void rtree_delete(tsdn_t *tsdn, rtree_t *rtree); +#endif +rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, + rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing); + +JEMALLOC_ALWAYS_INLINE uintptr_t +rtree_leafkey(uintptr_t key) { + unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3); + unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits - + rtree_levels[RTREE_HEIGHT-1].bits); + unsigned maskbits = ptrbits - cumbits; + uintptr_t mask = ~((ZU(1) << maskbits) - 1); + return (key & mask); } -JEMALLOC_INLINE uintptr_t -rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) -{ +JEMALLOC_ALWAYS_INLINE size_t +rtree_cache_direct_map(uintptr_t key) { + unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3); + unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits - + rtree_levels[RTREE_HEIGHT-1].bits); + unsigned maskbits = ptrbits - cumbits; + return (size_t)((key >> maskbits) & (RTREE_CTX_NCACHE - 1)); +} - return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - - rtree->levels[level].cumbits)) & ((ZU(1) << - rtree->levels[level].bits) - 1)); +JEMALLOC_ALWAYS_INLINE uintptr_t +rtree_subkey(uintptr_t key, unsigned level) { + unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3); + unsigned cumbits = rtree_levels[level].cumbits; + unsigned shiftbits = ptrbits - cumbits; + unsigned maskbits = rtree_levels[level].bits; + uintptr_t mask = (ZU(1) << maskbits) - 1; + return ((key >> shiftbits) & mask); } -JEMALLOC_INLINE bool -rtree_node_valid(rtree_node_elm_t *node) -{ +/* + * Atomic getters. + * + * dependent: Reading a value on behalf of a pointer to a valid allocation + * is guaranteed to be a clean read even without synchronization, + * because the rtree update became visible in memory before the + * pointer came into existence. + * !dependent: An arbitrary read, e.g. on behalf of ivsalloc(), may not be + * dependent on a previous rtree write, which means a stale read + * could result if synchronization were omitted here. + */ +# ifdef RTREE_LEAF_COMPACT +JEMALLOC_ALWAYS_INLINE uintptr_t +rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + bool dependent) { + return (uintptr_t)atomic_load_p(&elm->le_bits, dependent + ? ATOMIC_RELAXED : ATOMIC_ACQUIRE); +} - return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING); +JEMALLOC_ALWAYS_INLINE extent_t * +rtree_leaf_elm_bits_extent_get(uintptr_t bits) { +# ifdef __aarch64__ + /* + * aarch64 doesn't sign extend the highest virtual address bit to set + * the higher ones. Instead, the high bits gets zeroed. + */ + uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1; + /* Mask off the slab bit. */ + uintptr_t low_bit_mask = ~(uintptr_t)1; + uintptr_t mask = high_bit_mask & low_bit_mask; + return (extent_t *)(bits & mask); +# else + /* Restore sign-extended high bits, mask slab bit. */ + return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >> + RTREE_NHIB) & ~((uintptr_t)0x1)); +# endif } -JEMALLOC_INLINE rtree_node_elm_t * -rtree_child_tryread(rtree_node_elm_t *elm) -{ - rtree_node_elm_t *child; +JEMALLOC_ALWAYS_INLINE szind_t +rtree_leaf_elm_bits_szind_get(uintptr_t bits) { + return (szind_t)(bits >> LG_VADDR); +} - /* Double-checked read (first read may be stale. */ - child = elm->child; - if (!rtree_node_valid(child)) - child = atomic_read_p(&elm->pun); - return (child); +JEMALLOC_ALWAYS_INLINE bool +rtree_leaf_elm_bits_slab_get(uintptr_t bits) { + return (bool)(bits & (uintptr_t)0x1); } -JEMALLOC_INLINE rtree_node_elm_t * -rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level) -{ - rtree_node_elm_t *child; +# endif + +JEMALLOC_ALWAYS_INLINE extent_t * +rtree_leaf_elm_extent_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree, + rtree_leaf_elm_t *elm, bool dependent) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); + return rtree_leaf_elm_bits_extent_get(bits); +#else + extent_t *extent = (extent_t *)atomic_load_p(&elm->le_extent, dependent + ? ATOMIC_RELAXED : ATOMIC_ACQUIRE); + return extent; +#endif +} - child = rtree_child_tryread(elm); - if (unlikely(!rtree_node_valid(child))) - child = rtree_child_read_hard(rtree, elm, level); - return (child); +JEMALLOC_ALWAYS_INLINE szind_t +rtree_leaf_elm_szind_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree, + rtree_leaf_elm_t *elm, bool dependent) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); + return rtree_leaf_elm_bits_szind_get(bits); +#else + return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED + : ATOMIC_ACQUIRE); +#endif } -JEMALLOC_INLINE extent_node_t * -rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent) -{ - - if (dependent) { - /* - * Reading a val on behalf of a pointer to a valid allocation is - * guaranteed to be a clean read even without synchronization, - * because the rtree update became visible in memory before the - * pointer came into existence. - */ - return (elm->val); - } else { - /* - * An arbitrary read, e.g. on behalf of ivsalloc(), may not be - * dependent on a previous rtree write, which means a stale read - * could result if synchronization were omitted here. - */ - return (atomic_read_p(&elm->pun)); - } +JEMALLOC_ALWAYS_INLINE bool +rtree_leaf_elm_slab_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree, + rtree_leaf_elm_t *elm, bool dependent) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); + return rtree_leaf_elm_bits_slab_get(bits); +#else + return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED : + ATOMIC_ACQUIRE); +#endif } -JEMALLOC_INLINE void -rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val) -{ +static inline void +rtree_leaf_elm_extent_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree, + rtree_leaf_elm_t *elm, extent_t *extent) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true); + uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) << + LG_VADDR) | ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) + | ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits)); + atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); +#else + atomic_store_p(&elm->le_extent, extent, ATOMIC_RELEASE); +#endif +} - atomic_write_p(&elm->pun, val); +static inline void +rtree_leaf_elm_szind_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree, + rtree_leaf_elm_t *elm, szind_t szind) { + assert(szind <= NSIZES); + +#ifdef RTREE_LEAF_COMPACT + uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, + true); + uintptr_t bits = ((uintptr_t)szind << LG_VADDR) | + ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) & + (((uintptr_t)0x1 << LG_VADDR) - 1)) | + ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits)); + atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); +#else + atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE); +#endif } -JEMALLOC_INLINE rtree_node_elm_t * -rtree_subtree_tryread(rtree_t *rtree, unsigned level) -{ - rtree_node_elm_t *subtree; +static inline void +rtree_leaf_elm_slab_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree, + rtree_leaf_elm_t *elm, bool slab) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, + true); + uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) << + LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) & + (((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab); + atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); +#else + atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE); +#endif +} - /* Double-checked read (first read may be stale. */ - subtree = rtree->levels[level].subtree; - if (!rtree_node_valid(subtree)) - subtree = atomic_read_p(&rtree->levels[level].subtree_pun); - return (subtree); +static inline void +rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, + extent_t *extent, szind_t szind, bool slab) { +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = ((uintptr_t)szind << LG_VADDR) | + ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) | + ((uintptr_t)slab); + atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE); +#else + rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab); + rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind); + /* + * Write extent last, since the element is atomically considered valid + * as soon as the extent field is non-NULL. + */ + rtree_leaf_elm_extent_write(tsdn, rtree, elm, extent); +#endif } -JEMALLOC_INLINE rtree_node_elm_t * -rtree_subtree_read(rtree_t *rtree, unsigned level) -{ - rtree_node_elm_t *subtree; +static inline void +rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, + rtree_leaf_elm_t *elm, szind_t szind, bool slab) { + assert(!slab || szind < NBINS); - subtree = rtree_subtree_tryread(rtree, level); - if (unlikely(!rtree_node_valid(subtree))) - subtree = rtree_subtree_read_hard(rtree, level); - return (subtree); + /* + * The caller implicitly assures that it is the only writer to the szind + * and slab fields, and that the extent field cannot currently change. + */ + rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab); + rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind); } -JEMALLOC_INLINE extent_node_t * -rtree_get(rtree_t *rtree, uintptr_t key, bool dependent) -{ - uintptr_t subkey; - unsigned i, start_level; - rtree_node_elm_t *node, *child; - - start_level = rtree_start_level(rtree, key); - - for (i = start_level, node = rtree_subtree_tryread(rtree, start_level); - /**/; i++, node = child) { - if (!dependent && unlikely(!rtree_node_valid(node))) - return (NULL); - subkey = rtree_subkey(rtree, key, i); - if (i == rtree->height - 1) { - /* - * node is a leaf, so it contains values rather than - * child pointers. - */ - return (rtree_val_read(rtree, &node[subkey], - dependent)); - } - assert(i < rtree->height - 1); - child = rtree_child_tryread(&node[subkey]); +JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t * +rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent, bool init_missing) { + assert(key != 0); + assert(!dependent || !init_missing); + + size_t slot = rtree_cache_direct_map(key); + uintptr_t leafkey = rtree_leafkey(key); + assert(leafkey != RTREE_LEAFKEY_INVALID); + + /* Fast path: L1 direct mapped cache. */ + if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) { + rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf; + assert(leaf != NULL); + uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1); + return &leaf[subkey]; + } + /* + * Search the L2 LRU cache. On hit, swap the matching element into the + * slot in L1 cache, and move the position in L2 up by 1. + */ +#define RTREE_CACHE_CHECK_L2(i) do { \ + if (likely(rtree_ctx->l2_cache[i].leafkey == leafkey)) { \ + rtree_leaf_elm_t *leaf = rtree_ctx->l2_cache[i].leaf; \ + assert(leaf != NULL); \ + if (i > 0) { \ + /* Bubble up by one. */ \ + rtree_ctx->l2_cache[i].leafkey = \ + rtree_ctx->l2_cache[i - 1].leafkey; \ + rtree_ctx->l2_cache[i].leaf = \ + rtree_ctx->l2_cache[i - 1].leaf; \ + rtree_ctx->l2_cache[i - 1].leafkey = \ + rtree_ctx->cache[slot].leafkey; \ + rtree_ctx->l2_cache[i - 1].leaf = \ + rtree_ctx->cache[slot].leaf; \ + } else { \ + rtree_ctx->l2_cache[0].leafkey = \ + rtree_ctx->cache[slot].leafkey; \ + rtree_ctx->l2_cache[0].leaf = \ + rtree_ctx->cache[slot].leaf; \ + } \ + rtree_ctx->cache[slot].leafkey = leafkey; \ + rtree_ctx->cache[slot].leaf = leaf; \ + uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1); \ + return &leaf[subkey]; \ + } \ +} while (0) + /* Check the first cache entry. */ + RTREE_CACHE_CHECK_L2(0); + /* Search the remaining cache elements. */ + for (unsigned i = 1; i < RTREE_CTX_NCACHE_L2; i++) { + RTREE_CACHE_CHECK_L2(i); } - not_reached(); +#undef RTREE_CACHE_CHECK_L2 + + return rtree_leaf_elm_lookup_hard(tsdn, rtree, rtree_ctx, key, + dependent, init_missing); } -JEMALLOC_INLINE bool -rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val) -{ - uintptr_t subkey; - unsigned i, start_level; - rtree_node_elm_t *node, *child; - - start_level = rtree_start_level(rtree, key); - - node = rtree_subtree_read(rtree, start_level); - if (node == NULL) - return (true); - for (i = start_level; /**/; i++, node = child) { - subkey = rtree_subkey(rtree, key, i); - if (i == rtree->height - 1) { - /* - * node is a leaf, so it contains values rather than - * child pointers. - */ - rtree_val_write(rtree, &node[subkey], val); - return (false); - } - assert(i + 1 < rtree->height); - child = rtree_child_read(rtree, &node[subkey], i); - if (child == NULL) - return (true); +static inline bool +rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, + extent_t *extent, szind_t szind, bool slab) { + /* Use rtree_clear() to set the extent to NULL. */ + assert(extent != NULL); + + rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, + key, false, true); + if (elm == NULL) { + return true; } - not_reached(); + + assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) == NULL); + rtree_leaf_elm_write(tsdn, rtree, elm, extent, szind, slab); + + return false; } + +JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t * +rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key, + bool dependent) { + rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, + key, dependent, false); + if (!dependent && elm == NULL) { + return NULL; + } + assert(elm != NULL); + return elm; +} + +JEMALLOC_ALWAYS_INLINE extent_t * +rtree_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent) { + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, + dependent); + if (!dependent && elm == NULL) { + return NULL; + } + return rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent); +} + +JEMALLOC_ALWAYS_INLINE szind_t +rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent) { + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, + dependent); + if (!dependent && elm == NULL) { + return NSIZES; + } + return rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent); +} + +/* + * rtree_slab_read() is intentionally omitted because slab is always read in + * conjunction with szind, which makes rtree_szind_slab_read() a better choice. + */ + +JEMALLOC_ALWAYS_INLINE bool +rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent, extent_t **r_extent, szind_t *r_szind) { + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, + dependent); + if (!dependent && elm == NULL) { + return true; + } + *r_extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent); + *r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent); + return false; +} + +JEMALLOC_ALWAYS_INLINE bool +rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab) { + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, + dependent); + if (!dependent && elm == NULL) { + return true; + } +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); + *r_szind = rtree_leaf_elm_bits_szind_get(bits); + *r_slab = rtree_leaf_elm_bits_slab_get(bits); +#else + *r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent); + *r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent); #endif + return false; +} + +static inline void +rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key, szind_t szind, bool slab) { + assert(!slab || szind < NBINS); + + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true); + rtree_leaf_elm_szind_slab_update(tsdn, rtree, elm, szind, slab); +} + +static inline void +rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, + uintptr_t key) { + rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true); + assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) != + NULL); + rtree_leaf_elm_write(tsdn, rtree, elm, NULL, NSIZES, false); +} -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_RTREE_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h b/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h new file mode 100644 index 000000000..93a75173a --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h @@ -0,0 +1,50 @@ +#ifndef JEMALLOC_INTERNAL_RTREE_CTX_H +#define JEMALLOC_INTERNAL_RTREE_CTX_H + +/* + * Number of leafkey/leaf pairs to cache in L1 and L2 level respectively. Each + * entry supports an entire leaf, so the cache hit rate is typically high even + * with a small number of entries. In rare cases extent activity will straddle + * the boundary between two leaf nodes. Furthermore, an arena may use a + * combination of dss and mmap. Note that as memory usage grows past the amount + * that this cache can directly cover, the cache will become less effective if + * locality of reference is low, but the consequence is merely cache misses + * while traversing the tree nodes. + * + * The L1 direct mapped cache offers consistent and low cost on cache hit. + * However collision could affect hit rate negatively. This is resolved by + * combining with a L2 LRU cache, which requires linear search and re-ordering + * on access but suffers no collision. Note that, the cache will itself suffer + * cache misses if made overly large, plus the cost of linear search in the LRU + * cache. + */ +#define RTREE_CTX_LG_NCACHE 4 +#define RTREE_CTX_NCACHE (1 << RTREE_CTX_LG_NCACHE) +#define RTREE_CTX_NCACHE_L2 8 + +/* + * Zero initializer required for tsd initialization only. Proper initialization + * done via rtree_ctx_data_init(). + */ +#define RTREE_CTX_ZERO_INITIALIZER {{{0}}, {{0}}} + + +typedef struct rtree_leaf_elm_s rtree_leaf_elm_t; + +typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t; +struct rtree_ctx_cache_elm_s { + uintptr_t leafkey; + rtree_leaf_elm_t *leaf; +}; + +typedef struct rtree_ctx_s rtree_ctx_t; +struct rtree_ctx_s { + /* Direct mapped cache. */ + rtree_ctx_cache_elm_t cache[RTREE_CTX_NCACHE]; + /* L2 LRU cache. */ + rtree_ctx_cache_elm_t l2_cache[RTREE_CTX_NCACHE_L2]; +}; + +void rtree_ctx_data_init(rtree_ctx_t *ctx); + +#endif /* JEMALLOC_INTERNAL_RTREE_CTX_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/size_classes.sh b/deps/jemalloc/include/jemalloc/internal/size_classes.sh index fc82036d3..998994d09 100755 --- a/deps/jemalloc/include/jemalloc/internal/size_classes.sh +++ b/deps/jemalloc/include/jemalloc/internal/size_classes.sh @@ -40,6 +40,54 @@ lg() { done } +lg_ceil() { + y=$1 + lg ${y}; lg_floor=${lg_result} + pow2 ${lg_floor}; pow2_floor=${pow2_result} + if [ ${pow2_floor} -lt ${y} ] ; then + lg_ceil_result=$((${lg_floor} + 1)) + else + lg_ceil_result=${lg_floor} + fi +} + +reg_size_compute() { + lg_grp=$1 + lg_delta=$2 + ndelta=$3 + + pow2 ${lg_grp}; grp=${pow2_result} + pow2 ${lg_delta}; delta=${pow2_result} + reg_size=$((${grp} + ${delta}*${ndelta})) +} + +slab_size() { + lg_p=$1 + lg_grp=$2 + lg_delta=$3 + ndelta=$4 + + pow2 ${lg_p}; p=${pow2_result} + reg_size_compute ${lg_grp} ${lg_delta} ${ndelta} + + # Compute smallest slab size that is an integer multiple of reg_size. + try_slab_size=${p} + try_nregs=$((${try_slab_size} / ${reg_size})) + perfect=0 + while [ ${perfect} -eq 0 ] ; do + perfect_slab_size=${try_slab_size} + perfect_nregs=${try_nregs} + + try_slab_size=$((${try_slab_size} + ${p})) + try_nregs=$((${try_slab_size} / ${reg_size})) + if [ ${perfect_slab_size} -eq $((${perfect_nregs} * ${reg_size})) ] ; then + perfect=1 + fi + done + + slab_size_pgs=$((${perfect_slab_size} / ${p})) +} + size_class() { index=$1 lg_grp=$2 @@ -48,6 +96,21 @@ size_class() { lg_p=$5 lg_kmax=$6 + if [ ${lg_delta} -ge ${lg_p} ] ; then + psz="yes" + else + pow2 ${lg_p}; p=${pow2_result} + pow2 ${lg_grp}; grp=${pow2_result} + pow2 ${lg_delta}; delta=${pow2_result} + sz=$((${grp} + ${delta} * ${ndelta})) + npgs=$((${sz} / ${p})) + if [ ${sz} -eq $((${npgs} * ${p})) ] ; then + psz="yes" + else + psz="no" + fi + fi + lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta} if [ ${pow2_result} -lt ${ndelta} ] ; then rem="yes" @@ -65,8 +128,10 @@ size_class() { if [ ${lg_size} -lt $((${lg_p} + ${lg_g})) ] ; then bin="yes" + slab_size ${lg_p} ${lg_grp} ${lg_delta} ${ndelta}; pgs=${slab_size_pgs} else bin="no" + pgs=0 fi if [ ${lg_size} -lt ${lg_kmax} \ -o ${lg_size} -eq ${lg_kmax} -a ${rem} = "no" ] ; then @@ -74,14 +139,16 @@ size_class() { else lg_delta_lookup="no" fi - printf ' SC(%3d, %6d, %8d, %6d, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${lg_delta_lookup} + printf ' SC(%3d, %6d, %8d, %6d, %3s, %3s, %3d, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${pgs} ${lg_delta_lookup} # Defined upon return: - # - lg_delta_lookup (${lg_delta} or "no") + # - psz ("yes" or "no") # - bin ("yes" or "no") + # - pgs + # - lg_delta_lookup (${lg_delta} or "no") } sep_line() { - echo " \\" + echo " \\" } size_classes() { @@ -94,13 +161,14 @@ size_classes() { pow2 $((${lg_z} + 3)); ptr_bits=${pow2_result} pow2 ${lg_g}; g=${pow2_result} - echo "#define SIZE_CLASSES \\" - echo " /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \\" + echo "#define SIZE_CLASSES \\" + echo " /* index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup */ \\" ntbins=0 nlbins=0 lg_tiny_maxclass='"NA"' nbins=0 + npsizes=0 # Tiny size classes. ndelta=0 @@ -112,6 +180,9 @@ size_classes() { if [ ${lg_delta_lookup} != "no" ] ; then nlbins=$((${index} + 1)) fi + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi if [ ${bin} != "no" ] ; then nbins=$((${index} + 1)) fi @@ -133,19 +204,25 @@ size_classes() { index=$((${index} + 1)) lg_grp=$((${lg_grp} + 1)) lg_delta=$((${lg_delta} + 1)) + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi fi while [ ${ndelta} -lt ${g} ] ; do size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax} index=$((${index} + 1)) ndelta=$((${ndelta} + 1)) + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi done # All remaining groups. lg_grp=$((${lg_grp} + ${lg_g})) - while [ ${lg_grp} -lt ${ptr_bits} ] ; do + while [ ${lg_grp} -lt $((${ptr_bits} - 1)) ] ; do sep_line ndelta=1 - if [ ${lg_grp} -eq $((${ptr_bits} - 1)) ] ; then + if [ ${lg_grp} -eq $((${ptr_bits} - 2)) ] ; then ndelta_limit=$((${g} - 1)) else ndelta_limit=${g} @@ -157,6 +234,9 @@ size_classes() { # Final written value is correct: lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" fi + if [ ${psz} = "yes" ] ; then + npsizes=$((${npsizes} + 1)) + fi if [ ${bin} != "no" ] ; then nbins=$((${index} + 1)) # Final written value is correct: @@ -168,7 +248,7 @@ size_classes() { fi fi # Final written value is correct: - huge_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" + large_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))" index=$((${index} + 1)) ndelta=$((${ndelta} + 1)) done @@ -177,51 +257,61 @@ size_classes() { done echo nsizes=${index} + lg_ceil ${nsizes}; lg_ceil_nsizes=${lg_ceil_result} # Defined upon completion: # - ntbins # - nlbins # - nbins # - nsizes + # - lg_ceil_nsizes + # - npsizes # - lg_tiny_maxclass # - lookup_maxclass # - small_maxclass # - lg_large_minclass - # - huge_maxclass + # - large_maxclass } cat <<EOF +#ifndef JEMALLOC_INTERNAL_SIZE_CLASSES_H +#define JEMALLOC_INTERNAL_SIZE_CLASSES_H + /* This file was automatically generated by size_classes.sh. */ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES + +#include "jemalloc/internal/jemalloc_internal_types.h" /* - * This header requires LG_SIZEOF_PTR, LG_TINY_MIN, LG_QUANTUM, and LG_PAGE to - * be defined prior to inclusion, and it in turn defines: + * This header file defines: * * LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling. - * SIZE_CLASSES: Complete table of - * SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) - * tuples. + * LG_TINY_MIN: Lg of minimum size class to support. + * SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, psz, + * bin, pgs, lg_delta_lookup) tuples. * index: Size class index. * lg_grp: Lg group base size (no deltas added). * lg_delta: Lg delta to previous size class. * ndelta: Delta multiplier. size == 1<<lg_grp + ndelta<<lg_delta + * psz: 'yes' if a multiple of the page size, 'no' otherwise. * bin: 'yes' if a small bin size class, 'no' otherwise. + * pgs: Slab page count if a small bin size class, 0 otherwise. * lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no' * otherwise. * NTBINS: Number of tiny bins. * NLBINS: Number of bins supported by the lookup table. * NBINS: Number of small size class bins. * NSIZES: Number of size classes. + * LG_CEIL_NSIZES: Number of bits required to store NSIZES. + * NPSIZES: Number of size classes that are a multiple of (1U << LG_PAGE). * LG_TINY_MAXCLASS: Lg of maximum tiny size class. * LOOKUP_MAXCLASS: Maximum size class included in lookup table. * SMALL_MAXCLASS: Maximum small size class. * LG_LARGE_MINCLASS: Lg of minimum large size class. - * HUGE_MAXCLASS: Maximum (huge) size class. + * LARGE_MAXCLASS: Maximum (large) size class. */ -#define LG_SIZE_CLASS_GROUP ${lg_g} +#define LG_SIZE_CLASS_GROUP ${lg_g} +#define LG_TINY_MIN ${lg_tmin} EOF @@ -233,16 +323,19 @@ for lg_z in ${lg_zarr} ; do for lg_p in ${lg_parr} ; do echo "#if (LG_SIZEOF_PTR == ${lg_z} && LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})" size_classes ${lg_z} ${lg_q} ${lg_t} ${lg_p} ${lg_g} - echo "#define SIZE_CLASSES_DEFINED" - echo "#define NTBINS ${ntbins}" - echo "#define NLBINS ${nlbins}" - echo "#define NBINS ${nbins}" - echo "#define NSIZES ${nsizes}" - echo "#define LG_TINY_MAXCLASS ${lg_tiny_maxclass}" - echo "#define LOOKUP_MAXCLASS ${lookup_maxclass}" - echo "#define SMALL_MAXCLASS ${small_maxclass}" - echo "#define LG_LARGE_MINCLASS ${lg_large_minclass}" - echo "#define HUGE_MAXCLASS ${huge_maxclass}" + echo "#define SIZE_CLASSES_DEFINED" + echo "#define NTBINS ${ntbins}" + echo "#define NLBINS ${nlbins}" + echo "#define NBINS ${nbins}" + echo "#define NSIZES ${nsizes}" + echo "#define LG_CEIL_NSIZES ${lg_ceil_nsizes}" + echo "#define NPSIZES ${npsizes}" + echo "#define LG_TINY_MAXCLASS ${lg_tiny_maxclass}" + echo "#define LOOKUP_MAXCLASS ${lookup_maxclass}" + echo "#define SMALL_MAXCLASS ${small_maxclass}" + echo "#define LG_LARGE_MINCLASS ${lg_large_minclass}" + echo "#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS)" + echo "#define LARGE_MAXCLASS ${large_maxclass}" echo "#endif" echo done @@ -258,29 +351,11 @@ cat <<EOF #undef SIZE_CLASSES_DEFINED /* * The size2index_tab lookup table uses uint8_t to encode each bin index, so we - * cannot support more than 256 small size classes. Further constrain NBINS to - * 255 since all small size classes, plus a "not small" size class must be - * stored in 8 bits of arena_chunk_map_bits_t's bits field. + * cannot support more than 256 small size classes. */ -#if (NBINS > 255) +#if (NBINS > 256) # error "Too many small size classes" #endif -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_SIZE_CLASSES_H */ EOF diff --git a/deps/jemalloc/include/jemalloc/internal/smoothstep.h b/deps/jemalloc/include/jemalloc/internal/smoothstep.h new file mode 100644 index 000000000..2e14430f5 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/smoothstep.h @@ -0,0 +1,232 @@ +#ifndef JEMALLOC_INTERNAL_SMOOTHSTEP_H +#define JEMALLOC_INTERNAL_SMOOTHSTEP_H + +/* + * This file was generated by the following command: + * sh smoothstep.sh smoother 200 24 3 15 + */ +/******************************************************************************/ + +/* + * This header defines a precomputed table based on the smoothstep family of + * sigmoidal curves (https://en.wikipedia.org/wiki/Smoothstep) that grow from 0 + * to 1 in 0 <= x <= 1. The table is stored as integer fixed point values so + * that floating point math can be avoided. + * + * 3 2 + * smoothstep(x) = -2x + 3x + * + * 5 4 3 + * smootherstep(x) = 6x - 15x + 10x + * + * 7 6 5 4 + * smootheststep(x) = -20x + 70x - 84x + 35x + */ + +#define SMOOTHSTEP_VARIANT "smoother" +#define SMOOTHSTEP_NSTEPS 200 +#define SMOOTHSTEP_BFP 24 +#define SMOOTHSTEP \ + /* STEP(step, h, x, y) */ \ + STEP( 1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \ + STEP( 2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \ + STEP( 3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250) \ + STEP( 4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000) \ + STEP( 5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750) \ + STEP( 6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000) \ + STEP( 7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250) \ + STEP( 8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000) \ + STEP( 9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750) \ + STEP( 10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000) \ + STEP( 11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250) \ + STEP( 12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000) \ + STEP( 13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750) \ + STEP( 14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000) \ + STEP( 15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250) \ + STEP( 16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000) \ + STEP( 17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750) \ + STEP( 18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000) \ + STEP( 19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250) \ + STEP( 20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000) \ + STEP( 21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750) \ + STEP( 22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000) \ + STEP( 23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250) \ + STEP( 24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000) \ + STEP( 25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750) \ + STEP( 26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000) \ + STEP( 27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250) \ + STEP( 28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000) \ + STEP( 29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750) \ + STEP( 30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000) \ + STEP( 31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250) \ + STEP( 32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000) \ + STEP( 33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750) \ + STEP( 34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000) \ + STEP( 35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250) \ + STEP( 36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000) \ + STEP( 37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750) \ + STEP( 38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000) \ + STEP( 39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250) \ + STEP( 40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000) \ + STEP( 41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750) \ + STEP( 42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000) \ + STEP( 43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250) \ + STEP( 44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000) \ + STEP( 45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750) \ + STEP( 46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000) \ + STEP( 47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250) \ + STEP( 48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000) \ + STEP( 49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750) \ + STEP( 50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000) \ + STEP( 51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250) \ + STEP( 52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000) \ + STEP( 53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750) \ + STEP( 54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000) \ + STEP( 55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250) \ + STEP( 56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000) \ + STEP( 57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750) \ + STEP( 58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000) \ + STEP( 59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250) \ + STEP( 60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000) \ + STEP( 61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750) \ + STEP( 62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000) \ + STEP( 63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250) \ + STEP( 64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000) \ + STEP( 65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750) \ + STEP( 66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000) \ + STEP( 67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250) \ + STEP( 68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000) \ + STEP( 69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750) \ + STEP( 70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000) \ + STEP( 71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250) \ + STEP( 72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000) \ + STEP( 73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750) \ + STEP( 74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000) \ + STEP( 75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250) \ + STEP( 76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000) \ + STEP( 77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750) \ + STEP( 78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000) \ + STEP( 79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250) \ + STEP( 80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000) \ + STEP( 81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750) \ + STEP( 82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000) \ + STEP( 83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250) \ + STEP( 84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000) \ + STEP( 85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750) \ + STEP( 86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000) \ + STEP( 87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250) \ + STEP( 88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000) \ + STEP( 89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750) \ + STEP( 90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000) \ + STEP( 91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250) \ + STEP( 92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000) \ + STEP( 93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750) \ + STEP( 94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000) \ + STEP( 95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250) \ + STEP( 96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000) \ + STEP( 97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750) \ + STEP( 98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000) \ + STEP( 99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250) \ + STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000) \ + STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750) \ + STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000) \ + STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250) \ + STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000) \ + STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750) \ + STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000) \ + STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250) \ + STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000) \ + STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750) \ + STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000) \ + STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250) \ + STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000) \ + STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750) \ + STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000) \ + STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250) \ + STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000) \ + STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750) \ + STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000) \ + STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250) \ + STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000) \ + STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750) \ + STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000) \ + STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250) \ + STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000) \ + STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750) \ + STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000) \ + STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250) \ + STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000) \ + STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750) \ + STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000) \ + STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250) \ + STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000) \ + STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750) \ + STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000) \ + STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250) \ + STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000) \ + STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750) \ + STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000) \ + STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250) \ + STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000) \ + STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750) \ + STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000) \ + STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250) \ + STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000) \ + STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750) \ + STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000) \ + STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250) \ + STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000) \ + STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750) \ + STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000) \ + STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250) \ + STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000) \ + STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750) \ + STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000) \ + STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250) \ + STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000) \ + STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750) \ + STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000) \ + STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250) \ + STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000) \ + STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750) \ + STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000) \ + STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250) \ + STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000) \ + STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750) \ + STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000) \ + STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250) \ + STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000) \ + STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750) \ + STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000) \ + STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250) \ + STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000) \ + STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750) \ + STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000) \ + STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250) \ + STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000) \ + STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750) \ + STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000) \ + STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250) \ + STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000) \ + STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750) \ + STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000) \ + STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250) \ + STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000) \ + STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750) \ + STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000) \ + STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250) \ + STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000) \ + STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750) \ + STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000) \ + STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250) \ + STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000) \ + STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750) \ + STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000) \ + STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250) \ + STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000) \ + STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750) \ + STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000) \ + STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \ + STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \ + +#endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/smoothstep.sh b/deps/jemalloc/include/jemalloc/internal/smoothstep.sh new file mode 100755 index 000000000..65de97bf4 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/smoothstep.sh @@ -0,0 +1,101 @@ +#!/bin/sh +# +# Generate a discrete lookup table for a sigmoid function in the smoothstep +# family (https://en.wikipedia.org/wiki/Smoothstep), where the lookup table +# entries correspond to x in [1/nsteps, 2/nsteps, ..., nsteps/nsteps]. Encode +# the entries using a binary fixed point representation. +# +# Usage: smoothstep.sh <variant> <nsteps> <bfp> <xprec> <yprec> +# +# <variant> is in {smooth, smoother, smoothest}. +# <nsteps> must be greater than zero. +# <bfp> must be in [0..62]; reasonable values are roughly [10..30]. +# <xprec> is x decimal precision. +# <yprec> is y decimal precision. + +#set -x + +cmd="sh smoothstep.sh $*" +variant=$1 +nsteps=$2 +bfp=$3 +xprec=$4 +yprec=$5 + +case "${variant}" in + smooth) + ;; + smoother) + ;; + smoothest) + ;; + *) + echo "Unsupported variant" + exit 1 + ;; +esac + +smooth() { + step=$1 + y=`echo ${yprec} k ${step} ${nsteps} / sx _2 lx 3 ^ '*' 3 lx 2 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` +} + +smoother() { + step=$1 + y=`echo ${yprec} k ${step} ${nsteps} / sx 6 lx 5 ^ '*' _15 lx 4 ^ '*' + 10 lx 3 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` +} + +smoothest() { + step=$1 + y=`echo ${yprec} k ${step} ${nsteps} / sx _20 lx 7 ^ '*' 70 lx 6 ^ '*' + _84 lx 5 ^ '*' + 35 lx 4 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' ` +} + +cat <<EOF +#ifndef JEMALLOC_INTERNAL_SMOOTHSTEP_H +#define JEMALLOC_INTERNAL_SMOOTHSTEP_H + +/* + * This file was generated by the following command: + * $cmd + */ +/******************************************************************************/ + +/* + * This header defines a precomputed table based on the smoothstep family of + * sigmoidal curves (https://en.wikipedia.org/wiki/Smoothstep) that grow from 0 + * to 1 in 0 <= x <= 1. The table is stored as integer fixed point values so + * that floating point math can be avoided. + * + * 3 2 + * smoothstep(x) = -2x + 3x + * + * 5 4 3 + * smootherstep(x) = 6x - 15x + 10x + * + * 7 6 5 4 + * smootheststep(x) = -20x + 70x - 84x + 35x + */ + +#define SMOOTHSTEP_VARIANT "${variant}" +#define SMOOTHSTEP_NSTEPS ${nsteps} +#define SMOOTHSTEP_BFP ${bfp} +#define SMOOTHSTEP \\ + /* STEP(step, h, x, y) */ \\ +EOF + +s=1 +while [ $s -le $nsteps ] ; do + $variant ${s} + x=`echo ${xprec} k ${s} ${nsteps} / p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'` + printf ' STEP(%4d, UINT64_C(0x%016x), %s, %s) \\\n' ${s} ${h} ${x} ${y} + + s=$((s+1)) +done +echo + +cat <<EOF +#endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */ +EOF diff --git a/deps/jemalloc/include/jemalloc/internal/spin.h b/deps/jemalloc/include/jemalloc/internal/spin.h new file mode 100644 index 000000000..22804c687 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/spin.h @@ -0,0 +1,40 @@ +#ifndef JEMALLOC_INTERNAL_SPIN_H +#define JEMALLOC_INTERNAL_SPIN_H + +#define SPIN_INITIALIZER {0U} + +typedef struct { + unsigned iteration; +} spin_t; + +static inline void +spin_cpu_spinwait() { +# if HAVE_CPU_SPINWAIT + CPU_SPINWAIT; +# else + volatile int x = 0; + x = x; +# endif +} + +static inline void +spin_adaptive(spin_t *spin) { + volatile uint32_t i; + + if (spin->iteration < 5) { + for (i = 0; i < (1U << spin->iteration); i++) { + spin_cpu_spinwait(); + } + spin->iteration++; + } else { +#ifdef _WIN32 + SwitchToThread(); +#else + sched_yield(); +#endif + } +} + +#undef SPIN_INLINE + +#endif /* JEMALLOC_INTERNAL_SPIN_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/stats.h b/deps/jemalloc/include/jemalloc/internal/stats.h index c91dba99d..852e34269 100644 --- a/deps/jemalloc/include/jemalloc/internal/stats.h +++ b/deps/jemalloc/include/jemalloc/internal/stats.h @@ -1,183 +1,30 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct tcache_bin_stats_s tcache_bin_stats_t; -typedef struct malloc_bin_stats_s malloc_bin_stats_t; -typedef struct malloc_large_stats_s malloc_large_stats_t; -typedef struct malloc_huge_stats_s malloc_huge_stats_t; -typedef struct arena_stats_s arena_stats_t; -typedef struct chunk_stats_s chunk_stats_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct tcache_bin_stats_s { - /* - * Number of allocation requests that corresponded to the size of this - * bin. - */ - uint64_t nrequests; -}; - -struct malloc_bin_stats_s { - /* - * Total number of allocation/deallocation requests served directly by - * the bin. Note that tcache may allocate an object, then recycle it - * many times, resulting many increments to nrequests, but only one - * each to nmalloc and ndalloc. - */ - uint64_t nmalloc; - uint64_t ndalloc; - - /* - * Number of allocation requests that correspond to the size of this - * bin. This includes requests served by tcache, though tcache only - * periodically merges into this counter. - */ - uint64_t nrequests; - - /* - * Current number of regions of this size class, including regions - * currently cached by tcache. - */ - size_t curregs; - - /* Number of tcache fills from this bin. */ - uint64_t nfills; - - /* Number of tcache flushes to this bin. */ - uint64_t nflushes; - - /* Total number of runs created for this bin's size class. */ - uint64_t nruns; - - /* - * Total number of runs reused by extracting them from the runs tree for - * this bin's size class. - */ - uint64_t reruns; - - /* Current number of runs in this bin. */ - size_t curruns; -}; - -struct malloc_large_stats_s { - /* - * Total number of allocation/deallocation requests served directly by - * the arena. Note that tcache may allocate an object, then recycle it - * many times, resulting many increments to nrequests, but only one - * each to nmalloc and ndalloc. - */ - uint64_t nmalloc; - uint64_t ndalloc; - - /* - * Number of allocation requests that correspond to this size class. - * This includes requests served by tcache, though tcache only - * periodically merges into this counter. - */ - uint64_t nrequests; - - /* - * Current number of runs of this size class, including runs currently - * cached by tcache. - */ - size_t curruns; -}; - -struct malloc_huge_stats_s { - /* - * Total number of allocation/deallocation requests served directly by - * the arena. - */ - uint64_t nmalloc; - uint64_t ndalloc; - - /* Current number of (multi-)chunk allocations of this size class. */ - size_t curhchunks; +#ifndef JEMALLOC_INTERNAL_STATS_H +#define JEMALLOC_INTERNAL_STATS_H + +/* OPTION(opt, var_name, default, set_value_to) */ +#define STATS_PRINT_OPTIONS \ + OPTION('J', json, false, true) \ + OPTION('g', general, true, false) \ + OPTION('m', merged, config_stats, false) \ + OPTION('d', destroyed, config_stats, false) \ + OPTION('a', unmerged, config_stats, false) \ + OPTION('b', bins, true, false) \ + OPTION('l', large, true, false) \ + OPTION('x', mutex, true, false) + +enum { +#define OPTION(o, v, d, s) stats_print_option_num_##v, + STATS_PRINT_OPTIONS +#undef OPTION + stats_print_tot_num_options }; -struct arena_stats_s { - /* Number of bytes currently mapped. */ - size_t mapped; +/* Options for stats_print. */ +extern bool opt_stats_print; +extern char opt_stats_print_opts[stats_print_tot_num_options+1]; - /* - * Total number of purge sweeps, total number of madvise calls made, - * and total pages purged in order to keep dirty unused memory under - * control. - */ - uint64_t npurge; - uint64_t nmadvise; - uint64_t purged; - - /* - * Number of bytes currently mapped purely for metadata purposes, and - * number of bytes currently allocated for internal metadata. - */ - size_t metadata_mapped; - size_t metadata_allocated; /* Protected via atomic_*_z(). */ - - /* Per-size-category statistics. */ - size_t allocated_large; - uint64_t nmalloc_large; - uint64_t ndalloc_large; - uint64_t nrequests_large; - - size_t allocated_huge; - uint64_t nmalloc_huge; - uint64_t ndalloc_huge; - - /* One element for each large size class. */ - malloc_large_stats_t *lstats; - - /* One element for each huge size class. */ - malloc_huge_stats_t *hstats; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_stats_print; - -extern size_t stats_cactive; - -void stats_print(void (*write)(void *, const char *), void *cbopaque, +/* Implements je_malloc_stats_print. */ +void stats_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *opts); -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -size_t stats_cactive_get(void); -void stats_cactive_add(size_t size); -void stats_cactive_sub(size_t size); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_)) -JEMALLOC_INLINE size_t -stats_cactive_get(void) -{ - - return (atomic_read_z(&stats_cactive)); -} - -JEMALLOC_INLINE void -stats_cactive_add(size_t size) -{ - - atomic_add_z(&stats_cactive, size); -} - -JEMALLOC_INLINE void -stats_cactive_sub(size_t size) -{ - - atomic_sub_z(&stats_cactive, size); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_STATS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/sz.h b/deps/jemalloc/include/jemalloc/internal/sz.h new file mode 100644 index 000000000..979462898 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/sz.h @@ -0,0 +1,317 @@ +#ifndef JEMALLOC_INTERNAL_SIZE_H +#define JEMALLOC_INTERNAL_SIZE_H + +#include "jemalloc/internal/bit_util.h" +#include "jemalloc/internal/pages.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/util.h" + +/* + * sz module: Size computations. + * + * Some abbreviations used here: + * p: Page + * ind: Index + * s, sz: Size + * u: Usable size + * a: Aligned + * + * These are not always used completely consistently, but should be enough to + * interpret function names. E.g. sz_psz2ind converts page size to page size + * index; sz_sa2u converts a (size, alignment) allocation request to the usable + * size that would result from such an allocation. + */ + +/* + * sz_pind2sz_tab encodes the same information as could be computed by + * sz_pind2sz_compute(). + */ +extern size_t const sz_pind2sz_tab[NPSIZES+1]; +/* + * sz_index2size_tab encodes the same information as could be computed (at + * unacceptable cost in some code paths) by sz_index2size_compute(). + */ +extern size_t const sz_index2size_tab[NSIZES]; +/* + * sz_size2index_tab is a compact lookup table that rounds request sizes up to + * size classes. In order to reduce cache footprint, the table is compressed, + * and all accesses are via sz_size2index(). + */ +extern uint8_t const sz_size2index_tab[]; + +static const size_t sz_large_pad = +#ifdef JEMALLOC_CACHE_OBLIVIOUS + PAGE +#else + 0 +#endif + ; + +JEMALLOC_ALWAYS_INLINE pszind_t +sz_psz2ind(size_t psz) { + if (unlikely(psz > LARGE_MAXCLASS)) { + return NPSIZES; + } + { + pszind_t x = lg_floor((psz<<1)-1); + pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x - + (LG_SIZE_CLASS_GROUP + LG_PAGE); + pszind_t grp = shift << LG_SIZE_CLASS_GROUP; + + pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? + LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZU(-1) << lg_delta; + pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + pszind_t ind = grp + mod; + return ind; + } +} + +static inline size_t +sz_pind2sz_compute(pszind_t pind) { + if (unlikely(pind == NPSIZES)) { + return LARGE_MAXCLASS + PAGE; + } + { + size_t grp = pind >> LG_SIZE_CLASS_GROUP; + size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_PAGE + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_PAGE-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t sz = grp_size + mod_size; + return sz; + } +} + +static inline size_t +sz_pind2sz_lookup(pszind_t pind) { + size_t ret = (size_t)sz_pind2sz_tab[pind]; + assert(ret == sz_pind2sz_compute(pind)); + return ret; +} + +static inline size_t +sz_pind2sz(pszind_t pind) { + assert(pind < NPSIZES+1); + return sz_pind2sz_lookup(pind); +} + +static inline size_t +sz_psz2u(size_t psz) { + if (unlikely(psz > LARGE_MAXCLASS)) { + return LARGE_MAXCLASS + PAGE; + } + { + size_t x = lg_floor((psz<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? + LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (psz + delta_mask) & ~delta_mask; + return usize; + } +} + +static inline szind_t +sz_size2index_compute(size_t size) { + if (unlikely(size > LARGE_MAXCLASS)) { + return NSIZES; + } +#if (NTBINS != 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + szind_t lg_ceil = lg_floor(pow2_ceil_zu(size)); + return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin); + } +#endif + { + szind_t x = lg_floor((size<<1)-1); + szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 : + x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM); + szind_t grp = shift << LG_SIZE_CLASS_GROUP; + + szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + + size_t delta_inverse_mask = ZU(-1) << lg_delta; + szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & + ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); + + szind_t index = NTBINS + grp + mod; + return index; + } +} + +JEMALLOC_ALWAYS_INLINE szind_t +sz_size2index_lookup(size_t size) { + assert(size <= LOOKUP_MAXCLASS); + { + szind_t ret = (sz_size2index_tab[(size-1) >> LG_TINY_MIN]); + assert(ret == sz_size2index_compute(size)); + return ret; + } +} + +JEMALLOC_ALWAYS_INLINE szind_t +sz_size2index(size_t size) { + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) { + return sz_size2index_lookup(size); + } + return sz_size2index_compute(size); +} + +static inline size_t +sz_index2size_compute(szind_t index) { +#if (NTBINS > 0) + if (index < NTBINS) { + return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index)); + } +#endif + { + size_t reduced_index = index - NTBINS; + size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP; + size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) - + 1); + + size_t grp_size_mask = ~((!!grp)-1); + size_t grp_size = ((ZU(1) << (LG_QUANTUM + + (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask; + + size_t shift = (grp == 0) ? 1 : grp; + size_t lg_delta = shift + (LG_QUANTUM-1); + size_t mod_size = (mod+1) << lg_delta; + + size_t usize = grp_size + mod_size; + return usize; + } +} + +JEMALLOC_ALWAYS_INLINE size_t +sz_index2size_lookup(szind_t index) { + size_t ret = (size_t)sz_index2size_tab[index]; + assert(ret == sz_index2size_compute(index)); + return ret; +} + +JEMALLOC_ALWAYS_INLINE size_t +sz_index2size(szind_t index) { + assert(index < NSIZES); + return sz_index2size_lookup(index); +} + +JEMALLOC_ALWAYS_INLINE size_t +sz_s2u_compute(size_t size) { + if (unlikely(size > LARGE_MAXCLASS)) { + return 0; + } +#if (NTBINS > 0) + if (size <= (ZU(1) << LG_TINY_MAXCLASS)) { + size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1; + size_t lg_ceil = lg_floor(pow2_ceil_zu(size)); + return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) : + (ZU(1) << lg_ceil)); + } +#endif + { + size_t x = lg_floor((size<<1)-1); + size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) + ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; + size_t delta = ZU(1) << lg_delta; + size_t delta_mask = delta - 1; + size_t usize = (size + delta_mask) & ~delta_mask; + return usize; + } +} + +JEMALLOC_ALWAYS_INLINE size_t +sz_s2u_lookup(size_t size) { + size_t ret = sz_index2size_lookup(sz_size2index_lookup(size)); + + assert(ret == sz_s2u_compute(size)); + return ret; +} + +/* + * Compute usable size that would result from allocating an object with the + * specified size. + */ +JEMALLOC_ALWAYS_INLINE size_t +sz_s2u(size_t size) { + assert(size > 0); + if (likely(size <= LOOKUP_MAXCLASS)) { + return sz_s2u_lookup(size); + } + return sz_s2u_compute(size); +} + +/* + * Compute usable size that would result from allocating an object with the + * specified size and alignment. + */ +JEMALLOC_ALWAYS_INLINE size_t +sz_sa2u(size_t size, size_t alignment) { + size_t usize; + + assert(alignment != 0 && ((alignment - 1) & alignment) == 0); + + /* Try for a small size class. */ + if (size <= SMALL_MAXCLASS && alignment < PAGE) { + /* + * Round size up to the nearest multiple of alignment. + * + * This done, we can take advantage of the fact that for each + * small size class, every object is aligned at the smallest + * power of two that is non-zero in the base two representation + * of the size. For example: + * + * Size | Base 2 | Minimum alignment + * -----+----------+------------------ + * 96 | 1100000 | 32 + * 144 | 10100000 | 32 + * 192 | 11000000 | 64 + */ + usize = sz_s2u(ALIGNMENT_CEILING(size, alignment)); + if (usize < LARGE_MINCLASS) { + return usize; + } + } + + /* Large size class. Beware of overflow. */ + + if (unlikely(alignment > LARGE_MAXCLASS)) { + return 0; + } + + /* Make sure result is a large size class. */ + if (size <= LARGE_MINCLASS) { + usize = LARGE_MINCLASS; + } else { + usize = sz_s2u(size); + if (usize < size) { + /* size_t overflow. */ + return 0; + } + } + + /* + * Calculate the multi-page mapping that large_palloc() would need in + * order to guarantee the alignment. + */ + if (usize + sz_large_pad + PAGE_CEILING(alignment) - PAGE < usize) { + /* size_t overflow. */ + return 0; + } + return usize; +} + +#endif /* JEMALLOC_INTERNAL_SIZE_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/tcache.h b/deps/jemalloc/include/jemalloc/internal/tcache.h deleted file mode 100644 index 5079cd266..000000000 --- a/deps/jemalloc/include/jemalloc/internal/tcache.h +++ /dev/null @@ -1,426 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct tcache_bin_info_s tcache_bin_info_t; -typedef struct tcache_bin_s tcache_bin_t; -typedef struct tcache_s tcache_t; -typedef struct tcaches_s tcaches_t; - -/* - * tcache pointers close to NULL are used to encode state information that is - * used for two purposes: preventing thread caching on a per thread basis and - * cleaning up during thread shutdown. - */ -#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1) -#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2) -#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3) -#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY - -/* - * Absolute minimum number of cache slots for each small bin. - */ -#define TCACHE_NSLOTS_SMALL_MIN 20 - -/* - * Absolute maximum number of cache slots for each small bin in the thread - * cache. This is an additional constraint beyond that imposed as: twice the - * number of regions per run for this size class. - * - * This constant must be an even number. - */ -#define TCACHE_NSLOTS_SMALL_MAX 200 - -/* Number of cache slots for large size classes. */ -#define TCACHE_NSLOTS_LARGE 20 - -/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */ -#define LG_TCACHE_MAXCLASS_DEFAULT 15 - -/* - * TCACHE_GC_SWEEP is the approximate number of allocation events between - * full GC sweeps. Integer rounding may cause the actual number to be - * slightly higher, since GC is performed incrementally. - */ -#define TCACHE_GC_SWEEP 8192 - -/* Number of tcache allocation/deallocation events between incremental GCs. */ -#define TCACHE_GC_INCR \ - ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1)) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -typedef enum { - tcache_enabled_false = 0, /* Enable cast to/from bool. */ - tcache_enabled_true = 1, - tcache_enabled_default = 2 -} tcache_enabled_t; - -/* - * Read-only information associated with each element of tcache_t's tbins array - * is stored separately, mainly to reduce memory usage. - */ -struct tcache_bin_info_s { - unsigned ncached_max; /* Upper limit on ncached. */ -}; - -struct tcache_bin_s { - tcache_bin_stats_t tstats; - int low_water; /* Min # cached since last GC. */ - unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ - unsigned ncached; /* # of cached objects. */ - void **avail; /* Stack of available objects. */ -}; - -struct tcache_s { - ql_elm(tcache_t) link; /* Used for aggregating stats. */ - uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ - unsigned ev_cnt; /* Event count since incremental GC. */ - szind_t next_gc_bin; /* Next bin to GC. */ - tcache_bin_t tbins[1]; /* Dynamically sized. */ - /* - * The pointer stacks associated with tbins follow as a contiguous - * array. During tcache initialization, the avail pointer in each - * element of tbins is initialized to point to the proper offset within - * this array. - */ -}; - -/* Linkage for list of available (previously used) explicit tcache IDs. */ -struct tcaches_s { - union { - tcache_t *tcache; - tcaches_t *next; - }; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_tcache; -extern ssize_t opt_lg_tcache_max; - -extern tcache_bin_info_t *tcache_bin_info; - -/* - * Number of tcache bins. There are NBINS small-object bins, plus 0 or more - * large-object bins. - */ -extern size_t nhbins; - -/* Maximum cached size class. */ -extern size_t tcache_maxclass; - -/* - * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and - * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are - * completely disjoint from this data structure. tcaches starts off as a sparse - * array, so it has no physical memory footprint until individual pages are - * touched. This allows the entire array to be allocated the first time an - * explicit tcache is created without a disproportionate impact on memory usage. - */ -extern tcaches_t *tcaches; - -size_t tcache_salloc(const void *ptr); -void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); -void *tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind); -void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, - szind_t binind, unsigned rem); -void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, - unsigned rem, tcache_t *tcache); -void tcache_arena_associate(tcache_t *tcache, arena_t *arena); -void tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, - arena_t *newarena); -void tcache_arena_dissociate(tcache_t *tcache, arena_t *arena); -tcache_t *tcache_get_hard(tsd_t *tsd); -tcache_t *tcache_create(tsd_t *tsd, arena_t *arena); -void tcache_cleanup(tsd_t *tsd); -void tcache_enabled_cleanup(tsd_t *tsd); -void tcache_stats_merge(tcache_t *tcache, arena_t *arena); -bool tcaches_create(tsd_t *tsd, unsigned *r_ind); -void tcaches_flush(tsd_t *tsd, unsigned ind); -void tcaches_destroy(tsd_t *tsd, unsigned ind); -bool tcache_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void tcache_event(tsd_t *tsd, tcache_t *tcache); -void tcache_flush(void); -bool tcache_enabled_get(void); -tcache_t *tcache_get(tsd_t *tsd, bool create); -void tcache_enabled_set(bool enabled); -void *tcache_alloc_easy(tcache_bin_t *tbin); -void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, bool zero); -void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, - size_t size, bool zero); -void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, - szind_t binind); -void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, - size_t size); -tcache_t *tcaches_get(tsd_t *tsd, unsigned ind); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) -JEMALLOC_INLINE void -tcache_flush(void) -{ - tsd_t *tsd; - - cassert(config_tcache); - - tsd = tsd_fetch(); - tcache_cleanup(tsd); -} - -JEMALLOC_INLINE bool -tcache_enabled_get(void) -{ - tsd_t *tsd; - tcache_enabled_t tcache_enabled; - - cassert(config_tcache); - - tsd = tsd_fetch(); - tcache_enabled = tsd_tcache_enabled_get(tsd); - if (tcache_enabled == tcache_enabled_default) { - tcache_enabled = (tcache_enabled_t)opt_tcache; - tsd_tcache_enabled_set(tsd, tcache_enabled); - } - - return ((bool)tcache_enabled); -} - -JEMALLOC_INLINE void -tcache_enabled_set(bool enabled) -{ - tsd_t *tsd; - tcache_enabled_t tcache_enabled; - - cassert(config_tcache); - - tsd = tsd_fetch(); - - tcache_enabled = (tcache_enabled_t)enabled; - tsd_tcache_enabled_set(tsd, tcache_enabled); - - if (!enabled) - tcache_cleanup(tsd); -} - -JEMALLOC_ALWAYS_INLINE tcache_t * -tcache_get(tsd_t *tsd, bool create) -{ - tcache_t *tcache; - - if (!config_tcache) - return (NULL); - - tcache = tsd_tcache_get(tsd); - if (!create) - return (tcache); - if (unlikely(tcache == NULL) && tsd_nominal(tsd)) { - tcache = tcache_get_hard(tsd); - tsd_tcache_set(tsd, tcache); - } - - return (tcache); -} - -JEMALLOC_ALWAYS_INLINE void -tcache_event(tsd_t *tsd, tcache_t *tcache) -{ - - if (TCACHE_GC_INCR == 0) - return; - - tcache->ev_cnt++; - assert(tcache->ev_cnt <= TCACHE_GC_INCR); - if (unlikely(tcache->ev_cnt == TCACHE_GC_INCR)) - tcache_event_hard(tsd, tcache); -} - -JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_easy(tcache_bin_t *tbin) -{ - void *ret; - - if (unlikely(tbin->ncached == 0)) { - tbin->low_water = -1; - return (NULL); - } - tbin->ncached--; - if (unlikely((int)tbin->ncached < tbin->low_water)) - tbin->low_water = tbin->ncached; - ret = tbin->avail[tbin->ncached]; - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - bool zero) -{ - void *ret; - szind_t binind; - size_t usize; - tcache_bin_t *tbin; - - binind = size2index(size); - assert(binind < NBINS); - tbin = &tcache->tbins[binind]; - usize = index2size(binind); - ret = tcache_alloc_easy(tbin); - if (unlikely(ret == NULL)) { - ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind); - if (ret == NULL) - return (NULL); - } - assert(tcache_salloc(ret) == usize); - - if (likely(!zero)) { - if (config_fill) { - if (unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ret, - &arena_bin_info[binind], false); - } else if (unlikely(opt_zero)) - memset(ret, 0, usize); - } - } else { - if (config_fill && unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ret, &arena_bin_info[binind], - true); - } - memset(ret, 0, usize); - } - - if (config_stats) - tbin->tstats.nrequests++; - if (config_prof) - tcache->prof_accumbytes += usize; - tcache_event(tsd, tcache); - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - bool zero) -{ - void *ret; - szind_t binind; - size_t usize; - tcache_bin_t *tbin; - - binind = size2index(size); - usize = index2size(binind); - assert(usize <= tcache_maxclass); - assert(binind < nhbins); - tbin = &tcache->tbins[binind]; - ret = tcache_alloc_easy(tbin); - if (unlikely(ret == NULL)) { - /* - * Only allocate one large object at a time, because it's quite - * expensive to create one and not use it. - */ - ret = arena_malloc_large(arena, usize, zero); - if (ret == NULL) - return (NULL); - } else { - if (config_prof && usize == LARGE_MINCLASS) { - arena_chunk_t *chunk = - (arena_chunk_t *)CHUNK_ADDR2BASE(ret); - size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> - LG_PAGE); - arena_mapbits_large_binind_set(chunk, pageind, - BININD_INVALID); - } - if (likely(!zero)) { - if (config_fill) { - if (unlikely(opt_junk_alloc)) - memset(ret, 0xa5, usize); - else if (unlikely(opt_zero)) - memset(ret, 0, usize); - } - } else - memset(ret, 0, usize); - - if (config_stats) - tbin->tstats.nrequests++; - if (config_prof) - tcache->prof_accumbytes += usize; - } - - tcache_event(tsd, tcache); - return (ret); -} - -JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind) -{ - tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; - - assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); - - if (config_fill && unlikely(opt_junk_free)) - arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); - - tbin = &tcache->tbins[binind]; - tbin_info = &tcache_bin_info[binind]; - if (unlikely(tbin->ncached == tbin_info->ncached_max)) { - tcache_bin_flush_small(tsd, tcache, tbin, binind, - (tbin_info->ncached_max >> 1)); - } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; - tbin->ncached++; - - tcache_event(tsd, tcache); -} - -JEMALLOC_ALWAYS_INLINE void -tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size) -{ - szind_t binind; - tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; - - assert((size & PAGE_MASK) == 0); - assert(tcache_salloc(ptr) > SMALL_MAXCLASS); - assert(tcache_salloc(ptr) <= tcache_maxclass); - - binind = size2index(size); - - if (config_fill && unlikely(opt_junk_free)) - arena_dalloc_junk_large(ptr, size); - - tbin = &tcache->tbins[binind]; - tbin_info = &tcache_bin_info[binind]; - if (unlikely(tbin->ncached == tbin_info->ncached_max)) { - tcache_bin_flush_large(tsd, tbin, binind, - (tbin_info->ncached_max >> 1), tcache); - } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; - tbin->ncached++; - - tcache_event(tsd, tcache); -} - -JEMALLOC_ALWAYS_INLINE tcache_t * -tcaches_get(tsd_t *tsd, unsigned ind) -{ - tcaches_t *elm = &tcaches[ind]; - if (unlikely(elm->tcache == NULL)) - elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL)); - return (elm->tcache); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_externs.h b/deps/jemalloc/include/jemalloc/internal/tcache_externs.h new file mode 100644 index 000000000..790367bd4 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/tcache_externs.h @@ -0,0 +1,55 @@ +#ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H +#define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H + +#include "jemalloc/internal/size_classes.h" + +extern bool opt_tcache; +extern ssize_t opt_lg_tcache_max; + +extern cache_bin_info_t *tcache_bin_info; + +/* + * Number of tcache bins. There are NBINS small-object bins, plus 0 or more + * large-object bins. + */ +extern unsigned nhbins; + +/* Maximum cached size class. */ +extern size_t tcache_maxclass; + +/* + * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and + * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are + * completely disjoint from this data structure. tcaches starts off as a sparse + * array, so it has no physical memory footprint until individual pages are + * touched. This allows the entire array to be allocated the first time an + * explicit tcache is created without a disproportionate impact on memory usage. + */ +extern tcaches_t *tcaches; + +size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); +void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); +void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, + cache_bin_t *tbin, szind_t binind, bool *tcache_success); +void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, + szind_t binind, unsigned rem); +void tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind, + unsigned rem, tcache_t *tcache); +void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, + arena_t *arena); +tcache_t *tcache_create_explicit(tsd_t *tsd); +void tcache_cleanup(tsd_t *tsd); +void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); +bool tcaches_create(tsd_t *tsd, unsigned *r_ind); +void tcaches_flush(tsd_t *tsd, unsigned ind); +void tcaches_destroy(tsd_t *tsd, unsigned ind); +bool tcache_boot(tsdn_t *tsdn); +void tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); +void tcache_prefork(tsdn_t *tsdn); +void tcache_postfork_parent(tsdn_t *tsdn); +void tcache_postfork_child(tsdn_t *tsdn); +void tcache_flush(tsd_t *tsd); +bool tsd_tcache_data_init(tsd_t *tsd); +bool tsd_tcache_enabled_data_init(tsd_t *tsd); + +#endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h b/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h new file mode 100644 index 000000000..0f6ab8cb5 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h @@ -0,0 +1,223 @@ +#ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H +#define JEMALLOC_INTERNAL_TCACHE_INLINES_H + +#include "jemalloc/internal/bin.h" +#include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/sz.h" +#include "jemalloc/internal/ticker.h" +#include "jemalloc/internal/util.h" + +static inline bool +tcache_enabled_get(tsd_t *tsd) { + return tsd_tcache_enabled_get(tsd); +} + +static inline void +tcache_enabled_set(tsd_t *tsd, bool enabled) { + bool was_enabled = tsd_tcache_enabled_get(tsd); + + if (!was_enabled && enabled) { + tsd_tcache_data_init(tsd); + } else if (was_enabled && !enabled) { + tcache_cleanup(tsd); + } + /* Commit the state last. Above calls check current state. */ + tsd_tcache_enabled_set(tsd, enabled); + tsd_slow_update(tsd); +} + +JEMALLOC_ALWAYS_INLINE void +tcache_event(tsd_t *tsd, tcache_t *tcache) { + if (TCACHE_GC_INCR == 0) { + return; + } + + if (unlikely(ticker_tick(&tcache->gc_ticker))) { + tcache_event_hard(tsd, tcache); + } +} + +JEMALLOC_ALWAYS_INLINE void * +tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + UNUSED size_t size, szind_t binind, bool zero, bool slow_path) { + void *ret; + cache_bin_t *bin; + bool tcache_success; + size_t usize JEMALLOC_CC_SILENCE_INIT(0); + + assert(binind < NBINS); + bin = tcache_small_bin_get(tcache, binind); + ret = cache_bin_alloc_easy(bin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { + bool tcache_hard_success; + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) { + return NULL; + } + + ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache, + bin, binind, &tcache_hard_success); + if (tcache_hard_success == false) { + return NULL; + } + } + + assert(ret); + /* + * Only compute usize if required. The checks in the following if + * statement are all static. + */ + if (config_prof || (slow_path && config_fill) || unlikely(zero)) { + usize = sz_index2size(binind); + assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize); + } + + if (likely(!zero)) { + if (slow_path && config_fill) { + if (unlikely(opt_junk_alloc)) { + arena_alloc_junk_small(ret, &bin_infos[binind], + false); + } else if (unlikely(opt_zero)) { + memset(ret, 0, usize); + } + } + } else { + if (slow_path && config_fill && unlikely(opt_junk_alloc)) { + arena_alloc_junk_small(ret, &bin_infos[binind], true); + } + memset(ret, 0, usize); + } + + if (config_stats) { + bin->tstats.nrequests++; + } + if (config_prof) { + tcache->prof_accumbytes += usize; + } + tcache_event(tsd, tcache); + return ret; +} + +JEMALLOC_ALWAYS_INLINE void * +tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, + szind_t binind, bool zero, bool slow_path) { + void *ret; + cache_bin_t *bin; + bool tcache_success; + + assert(binind >= NBINS &&binind < nhbins); + bin = tcache_large_bin_get(tcache, binind); + ret = cache_bin_alloc_easy(bin, &tcache_success); + assert(tcache_success == (ret != NULL)); + if (unlikely(!tcache_success)) { + /* + * Only allocate one large object at a time, because it's quite + * expensive to create one and not use it. + */ + arena = arena_choose(tsd, arena); + if (unlikely(arena == NULL)) { + return NULL; + } + + ret = large_malloc(tsd_tsdn(tsd), arena, sz_s2u(size), zero); + if (ret == NULL) { + return NULL; + } + } else { + size_t usize JEMALLOC_CC_SILENCE_INIT(0); + + /* Only compute usize on demand */ + if (config_prof || (slow_path && config_fill) || + unlikely(zero)) { + usize = sz_index2size(binind); + assert(usize <= tcache_maxclass); + } + + if (likely(!zero)) { + if (slow_path && config_fill) { + if (unlikely(opt_junk_alloc)) { + memset(ret, JEMALLOC_ALLOC_JUNK, + usize); + } else if (unlikely(opt_zero)) { + memset(ret, 0, usize); + } + } + } else { + memset(ret, 0, usize); + } + + if (config_stats) { + bin->tstats.nrequests++; + } + if (config_prof) { + tcache->prof_accumbytes += usize; + } + } + + tcache_event(tsd, tcache); + return ret; +} + +JEMALLOC_ALWAYS_INLINE void +tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, + bool slow_path) { + cache_bin_t *bin; + cache_bin_info_t *bin_info; + + assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS); + + if (slow_path && config_fill && unlikely(opt_junk_free)) { + arena_dalloc_junk_small(ptr, &bin_infos[binind]); + } + + bin = tcache_small_bin_get(tcache, binind); + bin_info = &tcache_bin_info[binind]; + if (unlikely(bin->ncached == bin_info->ncached_max)) { + tcache_bin_flush_small(tsd, tcache, bin, binind, + (bin_info->ncached_max >> 1)); + } + assert(bin->ncached < bin_info->ncached_max); + bin->ncached++; + *(bin->avail - bin->ncached) = ptr; + + tcache_event(tsd, tcache); +} + +JEMALLOC_ALWAYS_INLINE void +tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, + bool slow_path) { + cache_bin_t *bin; + cache_bin_info_t *bin_info; + + assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS); + assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass); + + if (slow_path && config_fill && unlikely(opt_junk_free)) { + large_dalloc_junk(ptr, sz_index2size(binind)); + } + + bin = tcache_large_bin_get(tcache, binind); + bin_info = &tcache_bin_info[binind]; + if (unlikely(bin->ncached == bin_info->ncached_max)) { + tcache_bin_flush_large(tsd, bin, binind, + (bin_info->ncached_max >> 1), tcache); + } + assert(bin->ncached < bin_info->ncached_max); + bin->ncached++; + *(bin->avail - bin->ncached) = ptr; + + tcache_event(tsd, tcache); +} + +JEMALLOC_ALWAYS_INLINE tcache_t * +tcaches_get(tsd_t *tsd, unsigned ind) { + tcaches_t *elm = &tcaches[ind]; + if (unlikely(elm->tcache == NULL)) { + elm->tcache = tcache_create_explicit(tsd); + } + return elm->tcache; +} + +#endif /* JEMALLOC_INTERNAL_TCACHE_INLINES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_structs.h b/deps/jemalloc/include/jemalloc/internal/tcache_structs.h new file mode 100644 index 000000000..07b738705 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/tcache_structs.h @@ -0,0 +1,61 @@ +#ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H +#define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H + +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/cache_bin.h" +#include "jemalloc/internal/ticker.h" + +struct tcache_s { + /* + * To minimize our cache-footprint, we put the frequently accessed data + * together at the start of this struct. + */ + + /* Cleared after arena_prof_accum(). */ + uint64_t prof_accumbytes; + /* Drives incremental GC. */ + ticker_t gc_ticker; + /* + * The pointer stacks associated with bins follow as a contiguous array. + * During tcache initialization, the avail pointer in each element of + * tbins is initialized to point to the proper offset within this array. + */ + cache_bin_t bins_small[NBINS]; + + /* + * This data is less hot; we can be a little less careful with our + * footprint here. + */ + /* Lets us track all the tcaches in an arena. */ + ql_elm(tcache_t) link; + /* + * The descriptor lets the arena find our cache bins without seeing the + * tcache definition. This enables arenas to aggregate stats across + * tcaches without having a tcache dependency. + */ + cache_bin_array_descriptor_t cache_bin_array_descriptor; + + /* The arena this tcache is associated with. */ + arena_t *arena; + /* Next bin to GC. */ + szind_t next_gc_bin; + /* For small bins, fill (ncached_max >> lg_fill_div). */ + uint8_t lg_fill_div[NBINS]; + /* + * We put the cache bins for large size classes at the end of the + * struct, since some of them might not get used. This might end up + * letting us avoid touching an extra page if we don't have to. + */ + cache_bin_t bins_large[NSIZES-NBINS]; +}; + +/* Linkage for list of available (previously used) explicit tcache IDs. */ +struct tcaches_s { + union { + tcache_t *tcache; + tcaches_t *next; + }; +}; + +#endif /* JEMALLOC_INTERNAL_TCACHE_STRUCTS_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_types.h b/deps/jemalloc/include/jemalloc/internal/tcache_types.h new file mode 100644 index 000000000..e49bc9d79 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/tcache_types.h @@ -0,0 +1,56 @@ +#ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H +#define JEMALLOC_INTERNAL_TCACHE_TYPES_H + +#include "jemalloc/internal/size_classes.h" + +typedef struct tcache_s tcache_t; +typedef struct tcaches_s tcaches_t; + +/* + * tcache pointers close to NULL are used to encode state information that is + * used for two purposes: preventing thread caching on a per thread basis and + * cleaning up during thread shutdown. + */ +#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1) +#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2) +#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3) +#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY + +/* + * Absolute minimum number of cache slots for each small bin. + */ +#define TCACHE_NSLOTS_SMALL_MIN 20 + +/* + * Absolute maximum number of cache slots for each small bin in the thread + * cache. This is an additional constraint beyond that imposed as: twice the + * number of regions per slab for this size class. + * + * This constant must be an even number. + */ +#define TCACHE_NSLOTS_SMALL_MAX 200 + +/* Number of cache slots for large size classes. */ +#define TCACHE_NSLOTS_LARGE 20 + +/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */ +#define LG_TCACHE_MAXCLASS_DEFAULT 15 + +/* + * TCACHE_GC_SWEEP is the approximate number of allocation events between + * full GC sweeps. Integer rounding may cause the actual number to be + * slightly higher, since GC is performed incrementally. + */ +#define TCACHE_GC_SWEEP 8192 + +/* Number of tcache allocation/deallocation events between incremental GCs. */ +#define TCACHE_GC_INCR \ + ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1)) + +/* Used in TSD static initializer only. Real init in tcache_data_init(). */ +#define TCACHE_ZERO_INITIALIZER {0} + +/* Used in TSD static initializer only. Will be initialized to opt_tcache. */ +#define TCACHE_ENABLED_ZERO_INITIALIZER false + +#endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/ticker.h b/deps/jemalloc/include/jemalloc/internal/ticker.h new file mode 100644 index 000000000..4b3604708 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/ticker.h @@ -0,0 +1,78 @@ +#ifndef JEMALLOC_INTERNAL_TICKER_H +#define JEMALLOC_INTERNAL_TICKER_H + +#include "jemalloc/internal/util.h" + +/** + * A ticker makes it easy to count-down events until some limit. You + * ticker_init the ticker to trigger every nticks events. You then notify it + * that an event has occurred with calls to ticker_tick (or that nticks events + * have occurred with a call to ticker_ticks), which will return true (and reset + * the counter) if the countdown hit zero. + */ + +typedef struct { + int32_t tick; + int32_t nticks; +} ticker_t; + +static inline void +ticker_init(ticker_t *ticker, int32_t nticks) { + ticker->tick = nticks; + ticker->nticks = nticks; +} + +static inline void +ticker_copy(ticker_t *ticker, const ticker_t *other) { + *ticker = *other; +} + +static inline int32_t +ticker_read(const ticker_t *ticker) { + return ticker->tick; +} + +/* + * Not intended to be a public API. Unfortunately, on x86, neither gcc nor + * clang seems smart enough to turn + * ticker->tick -= nticks; + * if (unlikely(ticker->tick < 0)) { + * fixup ticker + * return true; + * } + * return false; + * into + * subq %nticks_reg, (%ticker_reg) + * js fixup ticker + * + * unless we force "fixup ticker" out of line. In that case, gcc gets it right, + * but clang now does worse than before. So, on x86 with gcc, we force it out + * of line, but otherwise let the inlining occur. Ordinarily this wouldn't be + * worth the hassle, but this is on the fast path of both malloc and free (via + * tcache_event). + */ +#if defined(__GNUC__) && !defined(__clang__) \ + && (defined(__x86_64__) || defined(__i386__)) +JEMALLOC_NOINLINE +#endif +static bool +ticker_fixup(ticker_t *ticker) { + ticker->tick = ticker->nticks; + return true; +} + +static inline bool +ticker_ticks(ticker_t *ticker, int32_t nticks) { + ticker->tick -= nticks; + if (unlikely(ticker->tick < 0)) { + return ticker_fixup(ticker); + } + return false; +} + +static inline bool +ticker_tick(ticker_t *ticker) { + return ticker_ticks(ticker, 1); +} + +#endif /* JEMALLOC_INTERNAL_TICKER_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/tsd.h b/deps/jemalloc/include/jemalloc/internal/tsd.h index eed7aa013..0b9841aa7 100644 --- a/deps/jemalloc/include/jemalloc/internal/tsd.h +++ b/deps/jemalloc/include/jemalloc/internal/tsd.h @@ -1,665 +1,326 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_TSD_H +#define JEMALLOC_INTERNAL_TSD_H -/* Maximum number of malloc_tsd users with cleanup functions. */ -#define MALLOC_TSD_CLEANUPS_MAX 2 - -typedef bool (*malloc_tsd_cleanup_t)(void); - -#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ - !defined(_WIN32)) -typedef struct tsd_init_block_s tsd_init_block_t; -typedef struct tsd_init_head_s tsd_init_head_t; -#endif - -typedef struct tsd_s tsd_t; - -typedef enum { - tsd_state_uninitialized, - tsd_state_nominal, - tsd_state_purgatory, - tsd_state_reincarnated -} tsd_state_t; +#include "jemalloc/internal/arena_types.h" +#include "jemalloc/internal/assert.h" +#include "jemalloc/internal/jemalloc_internal_externs.h" +#include "jemalloc/internal/prof_types.h" +#include "jemalloc/internal/ql.h" +#include "jemalloc/internal/rtree_tsd.h" +#include "jemalloc/internal/tcache_types.h" +#include "jemalloc/internal/tcache_structs.h" +#include "jemalloc/internal/util.h" +#include "jemalloc/internal/witness.h" /* - * TLS/TSD-agnostic macro-based implementation of thread-specific data. There - * are five macros that support (at least) three use cases: file-private, - * library-private, and library-private inlined. Following is an example - * library-private tsd variable: - * - * In example.h: - * typedef struct { - * int x; - * int y; - * } example_t; - * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) - * malloc_tsd_types(example_, example_t) - * malloc_tsd_protos(, example_, example_t) - * malloc_tsd_externs(example_, example_t) - * In example.c: - * malloc_tsd_data(, example_, example_t, EX_INITIALIZER) - * malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER, - * example_tsd_cleanup) - * - * The result is a set of generated functions, e.g.: - * - * bool example_tsd_boot(void) {...} - * example_t *example_tsd_get() {...} - * void example_tsd_set(example_t *val) {...} - * - * Note that all of the functions deal in terms of (a_type *) rather than - * (a_type) so that it is possible to support non-pointer types (unlike - * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is - * cast to (void *). This means that the cleanup function needs to cast the - * function argument to (a_type *), then dereference the resulting pointer to - * access fields, e.g. + * Thread-Specific-Data layout + * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof --- + * s: state + * e: tcache_enabled + * m: thread_allocated (config_stats) + * f: thread_deallocated (config_stats) + * p: prof_tdata (config_prof) + * c: rtree_ctx (rtree cache accessed on deallocation) + * t: tcache + * --- data not accessed on tcache fast path: arena-related fields --- + * d: arenas_tdata_bypass + * r: reentrancy_level + * x: narenas_tdata + * i: iarena + * a: arena + * o: arenas_tdata + * Loading TSD data is on the critical path of basically all malloc operations. + * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective. + * Use a compact layout to reduce cache footprint. + * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+ + * |---------------------------- 1st cacheline ----------------------------| + * | sedrxxxx mmmmmmmm ffffffff pppppppp [c * 32 ........ ........ .......] | + * |---------------------------- 2nd cacheline ----------------------------| + * | [c * 64 ........ ........ ........ ........ ........ ........ .......] | + * |---------------------------- 3nd cacheline ----------------------------| + * | [c * 32 ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... | + * +-------------------------------------------------------------------------+ + * Note: the entire tcache is embedded into TSD and spans multiple cachelines. * - * void - * example_tsd_cleanup(void *arg) - * { - * example_t *example = (example_t *)arg; - * - * example->x = 42; - * [...] - * if ([want the cleanup function to be called again]) - * example_tsd_set(example); - * } - * - * If example_tsd_set() is called within example_tsd_cleanup(), it will be - * called again. This is similar to how pthreads TSD destruction works, except - * that pthreads only calls the cleanup function again if the value was set to - * non-NULL. + * The last 3 members (i, a and o) before tcache isn't really needed on tcache + * fast path. However we have a number of unused tcache bins and witnesses + * (never touched unless config_debug) at the end of tcache, so we place them + * there to avoid breaking the cachelines and possibly paging in an extra page. */ - -/* malloc_tsd_types(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_types(a_name, a_type) -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_types(a_name, a_type) -#elif (defined(_WIN32)) -#define malloc_tsd_types(a_name, a_type) \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##tsd_wrapper_t; +#ifdef JEMALLOC_JET +typedef void (*test_callback_t)(int *); +# define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10 +# define MALLOC_TEST_TSD \ + O(test_data, int, int) \ + O(test_callback, test_callback_t, int) +# define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL #else -#define malloc_tsd_types(a_name, a_type) \ -typedef struct { \ - bool initialized; \ - a_type val; \ -} a_name##tsd_wrapper_t; +# define MALLOC_TEST_TSD +# define MALLOC_TEST_TSD_INITIALIZER #endif -/* malloc_tsd_protos(). */ -#define malloc_tsd_protos(a_attr, a_name, a_type) \ -a_attr bool \ -a_name##tsd_boot0(void); \ -a_attr void \ -a_name##tsd_boot1(void); \ -a_attr bool \ -a_name##tsd_boot(void); \ -a_attr a_type * \ -a_name##tsd_get(void); \ -a_attr void \ -a_name##tsd_set(a_type *val); - -/* malloc_tsd_externs(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##tsd_tls; \ -extern __thread bool a_name##tsd_initialized; \ -extern bool a_name##tsd_booted; -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_externs(a_name, a_type) \ -extern __thread a_type a_name##tsd_tls; \ -extern pthread_key_t a_name##tsd_tsd; \ -extern bool a_name##tsd_booted; -#elif (defined(_WIN32)) -#define malloc_tsd_externs(a_name, a_type) \ -extern DWORD a_name##tsd_tsd; \ -extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ -extern bool a_name##tsd_booted; -#else -#define malloc_tsd_externs(a_name, a_type) \ -extern pthread_key_t a_name##tsd_tsd; \ -extern tsd_init_head_t a_name##tsd_init_head; \ -extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \ -extern bool a_name##tsd_booted; -#endif +/* O(name, type, nullable type */ +#define MALLOC_TSD \ + O(tcache_enabled, bool, bool) \ + O(arenas_tdata_bypass, bool, bool) \ + O(reentrancy_level, int8_t, int8_t) \ + O(narenas_tdata, uint32_t, uint32_t) \ + O(offset_state, uint64_t, uint64_t) \ + O(thread_allocated, uint64_t, uint64_t) \ + O(thread_deallocated, uint64_t, uint64_t) \ + O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \ + O(rtree_ctx, rtree_ctx_t, rtree_ctx_t) \ + O(iarena, arena_t *, arena_t *) \ + O(arena, arena_t *, arena_t *) \ + O(arenas_tdata, arena_tdata_t *, arena_tdata_t *)\ + O(tcache, tcache_t, tcache_t) \ + O(witness_tsd, witness_tsd_t, witness_tsdn_t) \ + MALLOC_TEST_TSD -/* malloc_tsd_data(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##tsd_tls = a_initializer; \ -a_attr __thread bool JEMALLOC_TLS_MODEL \ - a_name##tsd_initialized = false; \ -a_attr bool a_name##tsd_booted = false; -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr __thread a_type JEMALLOC_TLS_MODEL \ - a_name##tsd_tls = a_initializer; \ -a_attr pthread_key_t a_name##tsd_tsd; \ -a_attr bool a_name##tsd_booted = false; -#elif (defined(_WIN32)) -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr DWORD a_name##tsd_tsd; \ -a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ - false, \ - a_initializer \ -}; \ -a_attr bool a_name##tsd_booted = false; -#else -#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ -a_attr pthread_key_t a_name##tsd_tsd; \ -a_attr tsd_init_head_t a_name##tsd_init_head = { \ - ql_head_initializer(blocks), \ - MALLOC_MUTEX_INITIALIZER \ -}; \ -a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \ - false, \ - a_initializer \ -}; \ -a_attr bool a_name##tsd_booted = false; -#endif - -/* malloc_tsd_funcs(). */ -#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr bool \ -a_name##tsd_cleanup_wrapper(void) \ -{ \ - \ - if (a_name##tsd_initialized) { \ - a_name##tsd_initialized = false; \ - a_cleanup(&a_name##tsd_tls); \ - } \ - return (a_name##tsd_initialized); \ -} \ -a_attr bool \ -a_name##tsd_boot0(void) \ -{ \ - \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - malloc_tsd_cleanup_register( \ - &a_name##tsd_cleanup_wrapper); \ - } \ - a_name##tsd_booted = true; \ - return (false); \ -} \ -a_attr void \ -a_name##tsd_boot1(void) \ -{ \ - \ - /* Do nothing. */ \ -} \ -a_attr bool \ -a_name##tsd_boot(void) \ -{ \ - \ - return (a_name##tsd_boot0()); \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(void) \ -{ \ - \ - assert(a_name##tsd_booted); \ - return (&a_name##tsd_tls); \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) \ -{ \ - \ - assert(a_name##tsd_booted); \ - a_name##tsd_tls = (*val); \ - if (a_cleanup != malloc_tsd_no_cleanup) \ - a_name##tsd_initialized = true; \ -} -#elif (defined(JEMALLOC_TLS)) -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr bool \ -a_name##tsd_boot0(void) \ -{ \ - \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_key_create(&a_name##tsd_tsd, a_cleanup) != \ - 0) \ - return (true); \ - } \ - a_name##tsd_booted = true; \ - return (false); \ -} \ -a_attr void \ -a_name##tsd_boot1(void) \ -{ \ - \ - /* Do nothing. */ \ -} \ -a_attr bool \ -a_name##tsd_boot(void) \ -{ \ - \ - return (a_name##tsd_boot0()); \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(void) \ -{ \ - \ - assert(a_name##tsd_booted); \ - return (&a_name##tsd_tls); \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) \ -{ \ - \ - assert(a_name##tsd_booted); \ - a_name##tsd_tls = (*val); \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - if (pthread_setspecific(a_name##tsd_tsd, \ - (void *)(&a_name##tsd_tls))) { \ - malloc_write("<jemalloc>: Error" \ - " setting TSD for "#a_name"\n"); \ - if (opt_abort) \ - abort(); \ - } \ - } \ -} -#elif (defined(_WIN32)) -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr bool \ -a_name##tsd_cleanup_wrapper(void) \ -{ \ - DWORD error = GetLastError(); \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ - TlsGetValue(a_name##tsd_tsd); \ - SetLastError(error); \ - \ - if (wrapper == NULL) \ - return (false); \ - if (a_cleanup != malloc_tsd_no_cleanup && \ - wrapper->initialized) { \ - wrapper->initialized = false; \ - a_cleanup(&wrapper->val); \ - if (wrapper->initialized) { \ - /* Trigger another cleanup round. */ \ - return (true); \ - } \ - } \ - malloc_tsd_dalloc(wrapper); \ - return (false); \ -} \ -a_attr void \ -a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ -{ \ - \ - if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \ - malloc_write("<jemalloc>: Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ -} \ -a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(void) \ -{ \ - DWORD error = GetLastError(); \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ - TlsGetValue(a_name##tsd_tsd); \ - SetLastError(error); \ - \ - if (unlikely(wrapper == NULL)) { \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - if (wrapper == NULL) { \ - malloc_write("<jemalloc>: Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } else { \ - wrapper->initialized = false; \ - wrapper->val = a_initializer; \ - } \ - a_name##tsd_wrapper_set(wrapper); \ - } \ - return (wrapper); \ -} \ -a_attr bool \ -a_name##tsd_boot0(void) \ -{ \ - \ - a_name##tsd_tsd = TlsAlloc(); \ - if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \ - return (true); \ - if (a_cleanup != malloc_tsd_no_cleanup) { \ - malloc_tsd_cleanup_register( \ - &a_name##tsd_cleanup_wrapper); \ - } \ - a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ - a_name##tsd_booted = true; \ - return (false); \ -} \ -a_attr void \ -a_name##tsd_boot1(void) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - if (wrapper == NULL) { \ - malloc_write("<jemalloc>: Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ - memcpy(wrapper, &a_name##tsd_boot_wrapper, \ - sizeof(a_name##tsd_wrapper_t)); \ - a_name##tsd_wrapper_set(wrapper); \ -} \ -a_attr bool \ -a_name##tsd_boot(void) \ -{ \ - \ - if (a_name##tsd_boot0()) \ - return (true); \ - a_name##tsd_boot1(); \ - return (false); \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(void) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ - return (&wrapper->val); \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ - wrapper->val = *(val); \ - if (a_cleanup != malloc_tsd_no_cleanup) \ - wrapper->initialized = true; \ -} -#else -#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ - a_cleanup) \ -/* Initialization/cleanup. */ \ -a_attr void \ -a_name##tsd_cleanup_wrapper(void *arg) \ -{ \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)arg; \ - \ - if (a_cleanup != malloc_tsd_no_cleanup && \ - wrapper->initialized) { \ - wrapper->initialized = false; \ - a_cleanup(&wrapper->val); \ - if (wrapper->initialized) { \ - /* Trigger another cleanup round. */ \ - if (pthread_setspecific(a_name##tsd_tsd, \ - (void *)wrapper)) { \ - malloc_write("<jemalloc>: Error" \ - " setting TSD for "#a_name"\n"); \ - if (opt_abort) \ - abort(); \ - } \ - return; \ - } \ - } \ - malloc_tsd_dalloc(wrapper); \ -} \ -a_attr void \ -a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \ -{ \ - \ - if (pthread_setspecific(a_name##tsd_tsd, \ - (void *)wrapper)) { \ - malloc_write("<jemalloc>: Error setting" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ -} \ -a_attr a_name##tsd_wrapper_t * \ -a_name##tsd_wrapper_get(void) \ -{ \ - a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \ - pthread_getspecific(a_name##tsd_tsd); \ - \ - if (unlikely(wrapper == NULL)) { \ - tsd_init_block_t block; \ - wrapper = tsd_init_check_recursion( \ - &a_name##tsd_init_head, &block); \ - if (wrapper) \ - return (wrapper); \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - block.data = wrapper; \ - if (wrapper == NULL) { \ - malloc_write("<jemalloc>: Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } else { \ - wrapper->initialized = false; \ - wrapper->val = a_initializer; \ - } \ - a_name##tsd_wrapper_set(wrapper); \ - tsd_init_finish(&a_name##tsd_init_head, &block); \ - } \ - return (wrapper); \ -} \ -a_attr bool \ -a_name##tsd_boot0(void) \ -{ \ - \ - if (pthread_key_create(&a_name##tsd_tsd, \ - a_name##tsd_cleanup_wrapper) != 0) \ - return (true); \ - a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \ - a_name##tsd_booted = true; \ - return (false); \ -} \ -a_attr void \ -a_name##tsd_boot1(void) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - wrapper = (a_name##tsd_wrapper_t *) \ - malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \ - if (wrapper == NULL) { \ - malloc_write("<jemalloc>: Error allocating" \ - " TSD for "#a_name"\n"); \ - abort(); \ - } \ - memcpy(wrapper, &a_name##tsd_boot_wrapper, \ - sizeof(a_name##tsd_wrapper_t)); \ - a_name##tsd_wrapper_set(wrapper); \ -} \ -a_attr bool \ -a_name##tsd_boot(void) \ -{ \ - \ - if (a_name##tsd_boot0()) \ - return (true); \ - a_name##tsd_boot1(); \ - return (false); \ -} \ -/* Get/set. */ \ -a_attr a_type * \ -a_name##tsd_get(void) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ - return (&wrapper->val); \ -} \ -a_attr void \ -a_name##tsd_set(a_type *val) \ -{ \ - a_name##tsd_wrapper_t *wrapper; \ - \ - assert(a_name##tsd_booted); \ - wrapper = a_name##tsd_wrapper_get(); \ - wrapper->val = *(val); \ - if (a_cleanup != malloc_tsd_no_cleanup) \ - wrapper->initialized = true; \ -} -#endif - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ - !defined(_WIN32)) -struct tsd_init_block_s { - ql_elm(tsd_init_block_t) link; - pthread_t thread; - void *data; -}; -struct tsd_init_head_s { - ql_head(tsd_init_block_t) blocks; - malloc_mutex_t lock; -}; -#endif - -#define MALLOC_TSD \ -/* O(name, type) */ \ - O(tcache, tcache_t *) \ - O(thread_allocated, uint64_t) \ - O(thread_deallocated, uint64_t) \ - O(prof_tdata, prof_tdata_t *) \ - O(arena, arena_t *) \ - O(arenas_cache, arena_t **) \ - O(narenas_cache, unsigned) \ - O(arenas_cache_bypass, bool) \ - O(tcache_enabled, tcache_enabled_t) \ - O(quarantine, quarantine_t *) \ - -#define TSD_INITIALIZER { \ +#define TSD_INITIALIZER { \ tsd_state_uninitialized, \ - NULL, \ + TCACHE_ENABLED_ZERO_INITIALIZER, \ + false, \ + 0, \ + 0, \ + 0, \ 0, \ 0, \ NULL, \ + RTREE_CTX_ZERO_INITIALIZER, \ NULL, \ NULL, \ - 0, \ - false, \ - tcache_enabled_default, \ - NULL \ + NULL, \ + TCACHE_ZERO_INITIALIZER, \ + WITNESS_TSD_INITIALIZER \ + MALLOC_TEST_TSD_INITIALIZER \ } +enum { + tsd_state_nominal = 0, /* Common case --> jnz. */ + tsd_state_nominal_slow = 1, /* Initialized but on slow path. */ + /* the above 2 nominal states should be lower values. */ + tsd_state_nominal_max = 1, /* used for comparison only. */ + tsd_state_minimal_initialized = 2, + tsd_state_purgatory = 3, + tsd_state_reincarnated = 4, + tsd_state_uninitialized = 5 +}; + +/* Manually limit tsd_state_t to a single byte. */ +typedef uint8_t tsd_state_t; + +/* The actual tsd. */ struct tsd_s { + /* + * The contents should be treated as totally opaque outside the tsd + * module. Access any thread-local state through the getters and + * setters below. + */ tsd_state_t state; -#define O(n, t) \ - t n; +#define O(n, t, nt) \ + t use_a_getter_or_setter_instead_##n; MALLOC_TSD #undef O }; -static const tsd_t tsd_initializer = TSD_INITIALIZER; - -malloc_tsd_types(, tsd_t) - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void *malloc_tsd_malloc(size_t size); -void malloc_tsd_dalloc(void *wrapper); -void malloc_tsd_no_cleanup(void *arg); -void malloc_tsd_cleanup_register(bool (*f)(void)); -bool malloc_tsd_boot0(void); -void malloc_tsd_boot1(void); -#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ - !defined(_WIN32)) -void *tsd_init_check_recursion(tsd_init_head_t *head, - tsd_init_block_t *block); -void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block); +/* + * Wrapper around tsd_t that makes it possible to avoid implicit conversion + * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be + * explicitly converted to tsd_t, which is non-nullable. + */ +struct tsdn_s { + tsd_t tsd; +}; +#define TSDN_NULL ((tsdn_t *)0) +JEMALLOC_ALWAYS_INLINE tsdn_t * +tsd_tsdn(tsd_t *tsd) { + return (tsdn_t *)tsd; +} + +JEMALLOC_ALWAYS_INLINE bool +tsdn_null(const tsdn_t *tsdn) { + return tsdn == NULL; +} + +JEMALLOC_ALWAYS_INLINE tsd_t * +tsdn_tsd(tsdn_t *tsdn) { + assert(!tsdn_null(tsdn)); + + return &tsdn->tsd; +} + +void *malloc_tsd_malloc(size_t size); +void malloc_tsd_dalloc(void *wrapper); +void malloc_tsd_cleanup_register(bool (*f)(void)); +tsd_t *malloc_tsd_boot0(void); +void malloc_tsd_boot1(void); +void tsd_cleanup(void *arg); +tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal); +void tsd_slow_update(tsd_t *tsd); + +/* + * We put the platform-specific data declarations and inlines into their own + * header files to avoid cluttering this file. They define tsd_boot0, + * tsd_boot1, tsd_boot, tsd_booted_get, tsd_get_allocates, tsd_get, and tsd_set. + */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#include "jemalloc/internal/tsd_malloc_thread_cleanup.h" +#elif (defined(JEMALLOC_TLS)) +#include "jemalloc/internal/tsd_tls.h" +#elif (defined(_WIN32)) +#include "jemalloc/internal/tsd_win.h" +#else +#include "jemalloc/internal/tsd_generic.h" #endif -void tsd_cleanup(void *arg); -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES +/* + * tsd_foop_get_unsafe(tsd) returns a pointer to the thread-local instance of + * foo. This omits some safety checks, and so can be used during tsd + * initialization and cleanup. + */ +#define O(n, t, nt) \ +JEMALLOC_ALWAYS_INLINE t * \ +tsd_##n##p_get_unsafe(tsd_t *tsd) { \ + return &tsd->use_a_getter_or_setter_instead_##n; \ +} +MALLOC_TSD +#undef O + +/* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */ +#define O(n, t, nt) \ +JEMALLOC_ALWAYS_INLINE t * \ +tsd_##n##p_get(tsd_t *tsd) { \ + assert(tsd->state == tsd_state_nominal || \ + tsd->state == tsd_state_nominal_slow || \ + tsd->state == tsd_state_reincarnated || \ + tsd->state == tsd_state_minimal_initialized); \ + return tsd_##n##p_get_unsafe(tsd); \ +} +MALLOC_TSD +#undef O -#ifndef JEMALLOC_ENABLE_INLINE -malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t) +/* + * tsdn_foop_get(tsdn) returns either the thread-local instance of foo (if tsdn + * isn't NULL), or NULL (if tsdn is NULL), cast to the nullable pointer type. + */ +#define O(n, t, nt) \ +JEMALLOC_ALWAYS_INLINE nt * \ +tsdn_##n##p_get(tsdn_t *tsdn) { \ + if (tsdn_null(tsdn)) { \ + return NULL; \ + } \ + tsd_t *tsd = tsdn_tsd(tsdn); \ + return (nt *)tsd_##n##p_get(tsd); \ +} +MALLOC_TSD +#undef O -tsd_t *tsd_fetch(void); -bool tsd_nominal(tsd_t *tsd); -#define O(n, t) \ -t *tsd_##n##p_get(tsd_t *tsd); \ -t tsd_##n##_get(tsd_t *tsd); \ -void tsd_##n##_set(tsd_t *tsd, t n); +/* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */ +#define O(n, t, nt) \ +JEMALLOC_ALWAYS_INLINE t \ +tsd_##n##_get(tsd_t *tsd) { \ + return *tsd_##n##p_get(tsd); \ +} MALLOC_TSD #undef O -#endif -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_)) -malloc_tsd_externs(, tsd_t) -malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup) +/* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */ +#define O(n, t, nt) \ +JEMALLOC_ALWAYS_INLINE void \ +tsd_##n##_set(tsd_t *tsd, t val) { \ + assert(tsd->state != tsd_state_reincarnated && \ + tsd->state != tsd_state_minimal_initialized); \ + *tsd_##n##p_get(tsd) = val; \ +} +MALLOC_TSD +#undef O + +JEMALLOC_ALWAYS_INLINE void +tsd_assert_fast(tsd_t *tsd) { + assert(!malloc_slow && tsd_tcache_enabled_get(tsd) && + tsd_reentrancy_level_get(tsd) == 0); +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_fast(tsd_t *tsd) { + bool fast = (tsd->state == tsd_state_nominal); + if (fast) { + tsd_assert_fast(tsd); + } + + return fast; +} JEMALLOC_ALWAYS_INLINE tsd_t * -tsd_fetch(void) -{ - tsd_t *tsd = tsd_get(); +tsd_fetch_impl(bool init, bool minimal) { + tsd_t *tsd = tsd_get(init); + + if (!init && tsd_get_allocates() && tsd == NULL) { + return NULL; + } + assert(tsd != NULL); if (unlikely(tsd->state != tsd_state_nominal)) { - if (tsd->state == tsd_state_uninitialized) { - tsd->state = tsd_state_nominal; - /* Trigger cleanup handler registration. */ - tsd_set(tsd); - } else if (tsd->state == tsd_state_purgatory) { - tsd->state = tsd_state_reincarnated; - tsd_set(tsd); - } else - assert(tsd->state == tsd_state_reincarnated); + return tsd_fetch_slow(tsd, minimal); } + assert(tsd_fast(tsd)); + tsd_assert_fast(tsd); + + return tsd; +} - return (tsd); +/* Get a minimal TSD that requires no cleanup. See comments in free(). */ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch_min(void) { + return tsd_fetch_impl(true, true); } -JEMALLOC_INLINE bool -tsd_nominal(tsd_t *tsd) -{ +/* For internal background threads use only. */ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_internal_fetch(void) { + tsd_t *tsd = tsd_fetch_min(); + /* Use reincarnated state to prevent full initialization. */ + tsd->state = tsd_state_reincarnated; - return (tsd->state == tsd_state_nominal); + return tsd; } -#define O(n, t) \ -JEMALLOC_ALWAYS_INLINE t * \ -tsd_##n##p_get(tsd_t *tsd) \ -{ \ - \ - return (&tsd->n); \ -} \ - \ -JEMALLOC_ALWAYS_INLINE t \ -tsd_##n##_get(tsd_t *tsd) \ -{ \ - \ - return (*tsd_##n##p_get(tsd)); \ -} \ - \ -JEMALLOC_ALWAYS_INLINE void \ -tsd_##n##_set(tsd_t *tsd, t n) \ -{ \ - \ - assert(tsd->state == tsd_state_nominal); \ - tsd->n = n; \ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_fetch(void) { + return tsd_fetch_impl(true, false); +} + +static inline bool +tsd_nominal(tsd_t *tsd) { + return (tsd->state <= tsd_state_nominal_max); +} + +JEMALLOC_ALWAYS_INLINE tsdn_t * +tsdn_fetch(void) { + if (!tsd_booted_get()) { + return NULL; + } + + return tsd_tsdn(tsd_fetch_impl(false, false)); +} + +JEMALLOC_ALWAYS_INLINE rtree_ctx_t * +tsd_rtree_ctx(tsd_t *tsd) { + return tsd_rtree_ctxp_get(tsd); +} + +JEMALLOC_ALWAYS_INLINE rtree_ctx_t * +tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) { + /* + * If tsd cannot be accessed, initialize the fallback rtree_ctx and + * return a pointer to it. + */ + if (unlikely(tsdn_null(tsdn))) { + rtree_ctx_data_init(fallback); + return fallback; + } + return tsd_rtree_ctx(tsdn_tsd(tsdn)); } -MALLOC_TSD -#undef O -#endif -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_TSD_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_generic.h b/deps/jemalloc/include/jemalloc/internal/tsd_generic.h new file mode 100644 index 000000000..1e52ef767 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/tsd_generic.h @@ -0,0 +1,157 @@ +#ifdef JEMALLOC_INTERNAL_TSD_GENERIC_H +#error This file should be included only once, by tsd.h. +#endif +#define JEMALLOC_INTERNAL_TSD_GENERIC_H + +typedef struct tsd_init_block_s tsd_init_block_t; +struct tsd_init_block_s { + ql_elm(tsd_init_block_t) link; + pthread_t thread; + void *data; +}; + +/* Defined in tsd.c, to allow the mutex headers to have tsd dependencies. */ +typedef struct tsd_init_head_s tsd_init_head_t; + +typedef struct { + bool initialized; + tsd_t val; +} tsd_wrapper_t; + +void *tsd_init_check_recursion(tsd_init_head_t *head, + tsd_init_block_t *block); +void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block); + +extern pthread_key_t tsd_tsd; +extern tsd_init_head_t tsd_init_head; +extern tsd_wrapper_t tsd_boot_wrapper; +extern bool tsd_booted; + +/* Initialization/cleanup. */ +JEMALLOC_ALWAYS_INLINE void +tsd_cleanup_wrapper(void *arg) { + tsd_wrapper_t *wrapper = (tsd_wrapper_t *)arg; + + if (wrapper->initialized) { + wrapper->initialized = false; + tsd_cleanup(&wrapper->val); + if (wrapper->initialized) { + /* Trigger another cleanup round. */ + if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0) + { + malloc_write("<jemalloc>: Error setting TSD\n"); + if (opt_abort) { + abort(); + } + } + return; + } + } + malloc_tsd_dalloc(wrapper); +} + +JEMALLOC_ALWAYS_INLINE void +tsd_wrapper_set(tsd_wrapper_t *wrapper) { + if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0) { + malloc_write("<jemalloc>: Error setting TSD\n"); + abort(); + } +} + +JEMALLOC_ALWAYS_INLINE tsd_wrapper_t * +tsd_wrapper_get(bool init) { + tsd_wrapper_t *wrapper = (tsd_wrapper_t *)pthread_getspecific(tsd_tsd); + + if (init && unlikely(wrapper == NULL)) { + tsd_init_block_t block; + wrapper = (tsd_wrapper_t *) + tsd_init_check_recursion(&tsd_init_head, &block); + if (wrapper) { + return wrapper; + } + wrapper = (tsd_wrapper_t *) + malloc_tsd_malloc(sizeof(tsd_wrapper_t)); + block.data = (void *)wrapper; + if (wrapper == NULL) { + malloc_write("<jemalloc>: Error allocating TSD\n"); + abort(); + } else { + wrapper->initialized = false; + tsd_t initializer = TSD_INITIALIZER; + wrapper->val = initializer; + } + tsd_wrapper_set(wrapper); + tsd_init_finish(&tsd_init_head, &block); + } + return wrapper; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot0(void) { + if (pthread_key_create(&tsd_tsd, tsd_cleanup_wrapper) != 0) { + return true; + } + tsd_wrapper_set(&tsd_boot_wrapper); + tsd_booted = true; + return false; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_boot1(void) { + tsd_wrapper_t *wrapper; + wrapper = (tsd_wrapper_t *)malloc_tsd_malloc(sizeof(tsd_wrapper_t)); + if (wrapper == NULL) { + malloc_write("<jemalloc>: Error allocating TSD\n"); + abort(); + } + tsd_boot_wrapper.initialized = false; + tsd_cleanup(&tsd_boot_wrapper.val); + wrapper->initialized = false; + tsd_t initializer = TSD_INITIALIZER; + wrapper->val = initializer; + tsd_wrapper_set(wrapper); +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot(void) { + if (tsd_boot0()) { + return true; + } + tsd_boot1(); + return false; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_booted_get(void) { + return tsd_booted; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_get_allocates(void) { + return true; +} + +/* Get/set. */ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_get(bool init) { + tsd_wrapper_t *wrapper; + + assert(tsd_booted); + wrapper = tsd_wrapper_get(init); + if (tsd_get_allocates() && !init && wrapper == NULL) { + return NULL; + } + return &wrapper->val; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_set(tsd_t *val) { + tsd_wrapper_t *wrapper; + + assert(tsd_booted); + wrapper = tsd_wrapper_get(true); + if (likely(&wrapper->val != val)) { + wrapper->val = *(val); + } + wrapper->initialized = true; +} diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h new file mode 100644 index 000000000..beb467a67 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h @@ -0,0 +1,60 @@ +#ifdef JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H +#error This file should be included only once, by tsd.h. +#endif +#define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H + +extern __thread tsd_t tsd_tls; +extern __thread bool tsd_initialized; +extern bool tsd_booted; + +/* Initialization/cleanup. */ +JEMALLOC_ALWAYS_INLINE bool +tsd_cleanup_wrapper(void) { + if (tsd_initialized) { + tsd_initialized = false; + tsd_cleanup(&tsd_tls); + } + return tsd_initialized; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot0(void) { + malloc_tsd_cleanup_register(&tsd_cleanup_wrapper); + tsd_booted = true; + return false; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_boot1(void) { + /* Do nothing. */ +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot(void) { + return tsd_boot0(); +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_booted_get(void) { + return tsd_booted; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_get_allocates(void) { + return false; +} + +/* Get/set. */ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_get(bool init) { + assert(tsd_booted); + return &tsd_tls; +} +JEMALLOC_ALWAYS_INLINE void +tsd_set(tsd_t *val) { + assert(tsd_booted); + if (likely(&tsd_tls != val)) { + tsd_tls = (*val); + } + tsd_initialized = true; +} diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_tls.h b/deps/jemalloc/include/jemalloc/internal/tsd_tls.h new file mode 100644 index 000000000..0de64b7b8 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/tsd_tls.h @@ -0,0 +1,59 @@ +#ifdef JEMALLOC_INTERNAL_TSD_TLS_H +#error This file should be included only once, by tsd.h. +#endif +#define JEMALLOC_INTERNAL_TSD_TLS_H + +extern __thread tsd_t tsd_tls; +extern pthread_key_t tsd_tsd; +extern bool tsd_booted; + +/* Initialization/cleanup. */ +JEMALLOC_ALWAYS_INLINE bool +tsd_boot0(void) { + if (pthread_key_create(&tsd_tsd, &tsd_cleanup) != 0) { + return true; + } + tsd_booted = true; + return false; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_boot1(void) { + /* Do nothing. */ +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot(void) { + return tsd_boot0(); +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_booted_get(void) { + return tsd_booted; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_get_allocates(void) { + return false; +} + +/* Get/set. */ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_get(UNUSED bool init) { + assert(tsd_booted); + return &tsd_tls; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_set(tsd_t *val) { + assert(tsd_booted); + if (likely(&tsd_tls != val)) { + tsd_tls = (*val); + } + if (pthread_setspecific(tsd_tsd, (void *)(&tsd_tls)) != 0) { + malloc_write("<jemalloc>: Error setting tsd.\n"); + if (opt_abort) { + abort(); + } + } +} diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_types.h b/deps/jemalloc/include/jemalloc/internal/tsd_types.h new file mode 100644 index 000000000..6200af61f --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/tsd_types.h @@ -0,0 +1,10 @@ +#ifndef JEMALLOC_INTERNAL_TSD_TYPES_H +#define JEMALLOC_INTERNAL_TSD_TYPES_H + +#define MALLOC_TSD_CLEANUPS_MAX 2 + +typedef struct tsd_s tsd_t; +typedef struct tsdn_s tsdn_t; +typedef bool (*malloc_tsd_cleanup_t)(void); + +#endif /* JEMALLOC_INTERNAL_TSD_TYPES_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_win.h b/deps/jemalloc/include/jemalloc/internal/tsd_win.h new file mode 100644 index 000000000..cf30d18e3 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/tsd_win.h @@ -0,0 +1,139 @@ +#ifdef JEMALLOC_INTERNAL_TSD_WIN_H +#error This file should be included only once, by tsd.h. +#endif +#define JEMALLOC_INTERNAL_TSD_WIN_H + +typedef struct { + bool initialized; + tsd_t val; +} tsd_wrapper_t; + +extern DWORD tsd_tsd; +extern tsd_wrapper_t tsd_boot_wrapper; +extern bool tsd_booted; + +/* Initialization/cleanup. */ +JEMALLOC_ALWAYS_INLINE bool +tsd_cleanup_wrapper(void) { + DWORD error = GetLastError(); + tsd_wrapper_t *wrapper = (tsd_wrapper_t *)TlsGetValue(tsd_tsd); + SetLastError(error); + + if (wrapper == NULL) { + return false; + } + + if (wrapper->initialized) { + wrapper->initialized = false; + tsd_cleanup(&wrapper->val); + if (wrapper->initialized) { + /* Trigger another cleanup round. */ + return true; + } + } + malloc_tsd_dalloc(wrapper); + return false; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_wrapper_set(tsd_wrapper_t *wrapper) { + if (!TlsSetValue(tsd_tsd, (void *)wrapper)) { + malloc_write("<jemalloc>: Error setting TSD\n"); + abort(); + } +} + +JEMALLOC_ALWAYS_INLINE tsd_wrapper_t * +tsd_wrapper_get(bool init) { + DWORD error = GetLastError(); + tsd_wrapper_t *wrapper = (tsd_wrapper_t *) TlsGetValue(tsd_tsd); + SetLastError(error); + + if (init && unlikely(wrapper == NULL)) { + wrapper = (tsd_wrapper_t *) + malloc_tsd_malloc(sizeof(tsd_wrapper_t)); + if (wrapper == NULL) { + malloc_write("<jemalloc>: Error allocating TSD\n"); + abort(); + } else { + wrapper->initialized = false; + /* MSVC is finicky about aggregate initialization. */ + tsd_t tsd_initializer = TSD_INITIALIZER; + wrapper->val = tsd_initializer; + } + tsd_wrapper_set(wrapper); + } + return wrapper; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_boot0(void) { + tsd_tsd = TlsAlloc(); + if (tsd_tsd == TLS_OUT_OF_INDEXES) { + return true; + } + malloc_tsd_cleanup_register(&tsd_cleanup_wrapper); + tsd_wrapper_set(&tsd_boot_wrapper); + tsd_booted = true; + return false; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_boot1(void) { + tsd_wrapper_t *wrapper; + wrapper = (tsd_wrapper_t *) + malloc_tsd_malloc(sizeof(tsd_wrapper_t)); + if (wrapper == NULL) { + malloc_write("<jemalloc>: Error allocating TSD\n"); + abort(); + } + tsd_boot_wrapper.initialized = false; + tsd_cleanup(&tsd_boot_wrapper.val); + wrapper->initialized = false; + tsd_t initializer = TSD_INITIALIZER; + wrapper->val = initializer; + tsd_wrapper_set(wrapper); +} +JEMALLOC_ALWAYS_INLINE bool +tsd_boot(void) { + if (tsd_boot0()) { + return true; + } + tsd_boot1(); + return false; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_booted_get(void) { + return tsd_booted; +} + +JEMALLOC_ALWAYS_INLINE bool +tsd_get_allocates(void) { + return true; +} + +/* Get/set. */ +JEMALLOC_ALWAYS_INLINE tsd_t * +tsd_get(bool init) { + tsd_wrapper_t *wrapper; + + assert(tsd_booted); + wrapper = tsd_wrapper_get(init); + if (tsd_get_allocates() && !init && wrapper == NULL) { + return NULL; + } + return &wrapper->val; +} + +JEMALLOC_ALWAYS_INLINE void +tsd_set(tsd_t *val) { + tsd_wrapper_t *wrapper; + + assert(tsd_booted); + wrapper = tsd_wrapper_get(true); + if (likely(&wrapper->val != val)) { + wrapper->val = *(val); + } + wrapper->initialized = true; +} diff --git a/deps/jemalloc/include/jemalloc/internal/util.h b/deps/jemalloc/include/jemalloc/internal/util.h index b2ea740fd..304cb545a 100644 --- a/deps/jemalloc/include/jemalloc/internal/util.h +++ b/deps/jemalloc/include/jemalloc/internal/util.h @@ -1,295 +1,50 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES +#ifndef JEMALLOC_INTERNAL_UTIL_H +#define JEMALLOC_INTERNAL_UTIL_H -#ifdef _WIN32 -# ifdef _WIN64 -# define FMT64_PREFIX "ll" -# define FMTPTR_PREFIX "ll" -# else -# define FMT64_PREFIX "ll" -# define FMTPTR_PREFIX "" -# endif -# define FMTd32 "d" -# define FMTu32 "u" -# define FMTx32 "x" -# define FMTd64 FMT64_PREFIX "d" -# define FMTu64 FMT64_PREFIX "u" -# define FMTx64 FMT64_PREFIX "x" -# define FMTdPTR FMTPTR_PREFIX "d" -# define FMTuPTR FMTPTR_PREFIX "u" -# define FMTxPTR FMTPTR_PREFIX "x" -#else -# include <inttypes.h> -# define FMTd32 PRId32 -# define FMTu32 PRIu32 -# define FMTx32 PRIx32 -# define FMTd64 PRId64 -# define FMTu64 PRIu64 -# define FMTx64 PRIx64 -# define FMTdPTR PRIdPTR -# define FMTuPTR PRIuPTR -# define FMTxPTR PRIxPTR -#endif +#define UTIL_INLINE static inline -/* Size of stack-allocated buffer passed to buferror(). */ -#define BUFERROR_BUF 64 - -/* - * Size of stack-allocated buffer used by malloc_{,v,vc}printf(). This must be - * large enough for all possible uses within jemalloc. - */ -#define MALLOC_PRINTF_BUFSIZE 4096 +/* Junk fill patterns. */ +#ifndef JEMALLOC_ALLOC_JUNK +# define JEMALLOC_ALLOC_JUNK ((uint8_t)0xa5) +#endif +#ifndef JEMALLOC_FREE_JUNK +# define JEMALLOC_FREE_JUNK ((uint8_t)0x5a) +#endif /* * Wrap a cpp argument that contains commas such that it isn't broken up into * multiple arguments. */ -#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__ +#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__ + +/* cpp macro definition stringification. */ +#define STRINGIFY_HELPER(x) #x +#define STRINGIFY(x) STRINGIFY_HELPER(x) /* * Silence compiler warnings due to uninitialized values. This is used * wherever the compiler fails to recognize that the variable is never used * uninitialized. */ -#ifdef JEMALLOC_CC_SILENCE -# define JEMALLOC_CC_SILENCE_INIT(v) = v -#else -# define JEMALLOC_CC_SILENCE_INIT(v) -#endif - -#define JEMALLOC_GNUC_PREREQ(major, minor) \ - (!defined(__clang__) && \ - (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))) -#ifndef __has_builtin -# define __has_builtin(builtin) (0) -#endif -#define JEMALLOC_CLANG_HAS_BUILTIN(builtin) \ - (defined(__clang__) && __has_builtin(builtin)) +#define JEMALLOC_CC_SILENCE_INIT(v) = v #ifdef __GNUC__ -# define likely(x) __builtin_expect(!!(x), 1) -# define unlikely(x) __builtin_expect(!!(x), 0) -# if JEMALLOC_GNUC_PREREQ(4, 6) || \ - JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable) -# define unreachable() __builtin_unreachable() -# else -# define unreachable() -# endif -#else -# define likely(x) !!(x) -# define unlikely(x) !!(x) -# define unreachable() -#endif - -/* - * Define a custom assert() in order to reduce the chances of deadlock during - * assertion failure. - */ -#ifndef assert -#define assert(e) do { \ - if (unlikely(config_debug && !(e))) { \ - malloc_printf( \ - "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \ - __FILE__, __LINE__, #e); \ - abort(); \ - } \ -} while (0) -#endif - -#ifndef not_reached -#define not_reached() do { \ - if (config_debug) { \ - malloc_printf( \ - "<jemalloc>: %s:%d: Unreachable code reached\n", \ - __FILE__, __LINE__); \ - abort(); \ - } \ - unreachable(); \ -} while (0) -#endif - -#ifndef not_implemented -#define not_implemented() do { \ - if (config_debug) { \ - malloc_printf("<jemalloc>: %s:%d: Not implemented\n", \ - __FILE__, __LINE__); \ - abort(); \ - } \ -} while (0) -#endif - -#ifndef assert_not_implemented -#define assert_not_implemented(e) do { \ - if (unlikely(config_debug && !(e))) \ - not_implemented(); \ -} while (0) -#endif - -/* Use to assert a particular configuration, e.g., cassert(config_debug). */ -#define cassert(c) do { \ - if (unlikely(!(c))) \ - not_reached(); \ -} while (0) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -int buferror(int err, char *buf, size_t buflen); -uintmax_t malloc_strtoumax(const char *restrict nptr, - char **restrict endptr, int base); -void malloc_write(const char *s); - -/* - * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating - * point math. - */ -int malloc_vsnprintf(char *str, size_t size, const char *format, - va_list ap); -int malloc_snprintf(char *str, size_t size, const char *format, ...) - JEMALLOC_FORMAT_PRINTF(3, 4); -void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, - const char *format, va_list ap); -void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, - const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4); -void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -int jemalloc_ffsl(long bitmap); -int jemalloc_ffs(int bitmap); -size_t pow2_ceil(size_t x); -size_t lg_floor(size_t x); -void set_errno(int errnum); -int get_errno(void); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) - -/* Sanity check. */ -#if !defined(JEMALLOC_INTERNAL_FFSL) || !defined(JEMALLOC_INTERNAL_FFS) -# error Both JEMALLOC_INTERNAL_FFSL && JEMALLOC_INTERNAL_FFS should have been defined by configure -#endif - -JEMALLOC_ALWAYS_INLINE int -jemalloc_ffsl(long bitmap) -{ - - return (JEMALLOC_INTERNAL_FFSL(bitmap)); -} - -JEMALLOC_ALWAYS_INLINE int -jemalloc_ffs(int bitmap) -{ - - return (JEMALLOC_INTERNAL_FFS(bitmap)); -} - -/* Compute the smallest power of 2 that is >= x. */ -JEMALLOC_INLINE size_t -pow2_ceil(size_t x) -{ - - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; -#if (LG_SIZEOF_PTR == 3) - x |= x >> 32; -#endif - x++; - return (x); -} - -#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) -JEMALLOC_INLINE size_t -lg_floor(size_t x) -{ - size_t ret; - - assert(x != 0); - - asm ("bsr %1, %0" - : "=r"(ret) // Outputs. - : "r"(x) // Inputs. - ); - return (ret); -} -#elif (defined(_MSC_VER)) -JEMALLOC_INLINE size_t -lg_floor(size_t x) -{ - unsigned long ret; - - assert(x != 0); - -#if (LG_SIZEOF_PTR == 3) - _BitScanReverse64(&ret, x); -#elif (LG_SIZEOF_PTR == 2) - _BitScanReverse(&ret, x); +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) #else -# error "Unsupported type sizes for lg_floor()" +# define likely(x) !!(x) +# define unlikely(x) !!(x) #endif - return (ret); -} -#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ)) -JEMALLOC_INLINE size_t -lg_floor(size_t x) -{ - - assert(x != 0); -#if (LG_SIZEOF_PTR == LG_SIZEOF_INT) - return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x)); -#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG) - return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x)); -#else -# error "Unsupported type sizes for lg_floor()" +#if !defined(JEMALLOC_INTERNAL_UNREACHABLE) +# error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure #endif -} -#else -JEMALLOC_INLINE size_t -lg_floor(size_t x) -{ - assert(x != 0); - - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); -#if (LG_SIZEOF_PTR == 3 && LG_SIZEOF_PTR == LG_SIZEOF_LONG) - x |= (x >> 32); - if (x == KZU(0xffffffffffffffff)) - return (63); - x++; - return (jemalloc_ffsl(x) - 2); -#elif (LG_SIZEOF_PTR == 2) - if (x == KZU(0xffffffff)) - return (31); - x++; - return (jemalloc_ffs(x) - 2); -#else -# error "Unsupported type sizes for lg_floor()" -#endif -} -#endif +#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE() /* Set error code. */ -JEMALLOC_INLINE void -set_errno(int errnum) -{ - +UTIL_INLINE void +set_errno(int errnum) { #ifdef _WIN32 SetLastError(errnum); #else @@ -298,17 +53,15 @@ set_errno(int errnum) } /* Get last error code. */ -JEMALLOC_INLINE int -get_errno(void) -{ - +UTIL_INLINE int +get_errno(void) { #ifdef _WIN32 - return (GetLastError()); + return GetLastError(); #else - return (errno); + return errno; #endif } -#endif -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ +#undef UTIL_INLINE + +#endif /* JEMALLOC_INTERNAL_UTIL_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/valgrind.h b/deps/jemalloc/include/jemalloc/internal/valgrind.h deleted file mode 100644 index a3380df92..000000000 --- a/deps/jemalloc/include/jemalloc/internal/valgrind.h +++ /dev/null @@ -1,112 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#ifdef JEMALLOC_VALGRIND -#include <valgrind/valgrind.h> - -/* - * The size that is reported to Valgrind must be consistent through a chain of - * malloc..realloc..realloc calls. Request size isn't recorded anywhere in - * jemalloc, so it is critical that all callers of these macros provide usize - * rather than request size. As a result, buffer overflow detection is - * technically weakened for the standard API, though it is generally accepted - * practice to consider any extra bytes reported by malloc_usable_size() as - * usable space. - */ -#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do { \ - if (unlikely(in_valgrind)) \ - valgrind_make_mem_noaccess(ptr, usize); \ -} while (0) -#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do { \ - if (unlikely(in_valgrind)) \ - valgrind_make_mem_undefined(ptr, usize); \ -} while (0) -#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do { \ - if (unlikely(in_valgrind)) \ - valgrind_make_mem_defined(ptr, usize); \ -} while (0) -/* - * The VALGRIND_MALLOCLIKE_BLOCK() and VALGRIND_RESIZEINPLACE_BLOCK() macro - * calls must be embedded in macros rather than in functions so that when - * Valgrind reports errors, there are no extra stack frames in the backtraces. - */ -#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ - if (unlikely(in_valgrind && cond)) \ - VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ -} while (0) -#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \ - ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ - zero) do { \ - if (unlikely(in_valgrind)) { \ - size_t rzsize = p2rz(ptr); \ - \ - if (!maybe_moved || ptr == old_ptr) { \ - VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ - usize, rzsize); \ - if (zero && old_usize < usize) { \ - valgrind_make_mem_defined( \ - (void *)((uintptr_t)ptr + \ - old_usize), usize - old_usize); \ - } \ - } else { \ - if (!old_ptr_maybe_null || old_ptr != NULL) { \ - valgrind_freelike_block(old_ptr, \ - old_rzsize); \ - } \ - if (!ptr_maybe_null || ptr != NULL) { \ - size_t copy_size = (old_usize < usize) \ - ? old_usize : usize; \ - size_t tail_size = usize - copy_size; \ - VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \ - rzsize, false); \ - if (copy_size > 0) { \ - valgrind_make_mem_defined(ptr, \ - copy_size); \ - } \ - if (zero && tail_size > 0) { \ - valgrind_make_mem_defined( \ - (void *)((uintptr_t)ptr + \ - copy_size), tail_size); \ - } \ - } \ - } \ - } \ -} while (0) -#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ - if (unlikely(in_valgrind)) \ - valgrind_freelike_block(ptr, rzsize); \ -} while (0) -#else -#define RUNNING_ON_VALGRIND ((unsigned)0) -#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0) -#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0) -#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0) -#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0) -#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \ - ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \ - zero) do {} while (0) -#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0) -#endif - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#ifdef JEMALLOC_VALGRIND -void valgrind_make_mem_noaccess(void *ptr, size_t usize); -void valgrind_make_mem_undefined(void *ptr, size_t usize); -void valgrind_make_mem_defined(void *ptr, size_t usize); -void valgrind_freelike_block(void *ptr, size_t usize); -#endif - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - diff --git a/deps/jemalloc/include/jemalloc/internal/witness.h b/deps/jemalloc/include/jemalloc/internal/witness.h new file mode 100644 index 000000000..7ace8ae4a --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/witness.h @@ -0,0 +1,346 @@ +#ifndef JEMALLOC_INTERNAL_WITNESS_H +#define JEMALLOC_INTERNAL_WITNESS_H + +#include "jemalloc/internal/ql.h" + +/******************************************************************************/ +/* LOCK RANKS */ +/******************************************************************************/ + +/* + * Witnesses with rank WITNESS_RANK_OMIT are completely ignored by the witness + * machinery. + */ + +#define WITNESS_RANK_OMIT 0U + +#define WITNESS_RANK_MIN 1U + +#define WITNESS_RANK_INIT 1U +#define WITNESS_RANK_CTL 1U +#define WITNESS_RANK_TCACHES 2U +#define WITNESS_RANK_ARENAS 3U + +#define WITNESS_RANK_BACKGROUND_THREAD_GLOBAL 4U + +#define WITNESS_RANK_PROF_DUMP 5U +#define WITNESS_RANK_PROF_BT2GCTX 6U +#define WITNESS_RANK_PROF_TDATAS 7U +#define WITNESS_RANK_PROF_TDATA 8U +#define WITNESS_RANK_PROF_GCTX 9U + +#define WITNESS_RANK_BACKGROUND_THREAD 10U + +/* + * Used as an argument to witness_assert_depth_to_rank() in order to validate + * depth excluding non-core locks with lower ranks. Since the rank argument to + * witness_assert_depth_to_rank() is inclusive rather than exclusive, this + * definition can have the same value as the minimally ranked core lock. + */ +#define WITNESS_RANK_CORE 11U + +#define WITNESS_RANK_DECAY 11U +#define WITNESS_RANK_TCACHE_QL 12U +#define WITNESS_RANK_EXTENT_GROW 13U +#define WITNESS_RANK_EXTENTS 14U +#define WITNESS_RANK_EXTENT_AVAIL 15U + +#define WITNESS_RANK_EXTENT_POOL 16U +#define WITNESS_RANK_RTREE 17U +#define WITNESS_RANK_BASE 18U +#define WITNESS_RANK_ARENA_LARGE 19U + +#define WITNESS_RANK_LEAF 0xffffffffU +#define WITNESS_RANK_BIN WITNESS_RANK_LEAF +#define WITNESS_RANK_ARENA_STATS WITNESS_RANK_LEAF +#define WITNESS_RANK_DSS WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_ACCUM WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_DUMP_SEQ WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_GDUMP WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_NEXT_THR_UID WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT WITNESS_RANK_LEAF + +/******************************************************************************/ +/* PER-WITNESS DATA */ +/******************************************************************************/ +#if defined(JEMALLOC_DEBUG) +# define WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}} +#else +# define WITNESS_INITIALIZER(name, rank) +#endif + +typedef struct witness_s witness_t; +typedef unsigned witness_rank_t; +typedef ql_head(witness_t) witness_list_t; +typedef int witness_comp_t (const witness_t *, void *, const witness_t *, + void *); + +struct witness_s { + /* Name, used for printing lock order reversal messages. */ + const char *name; + + /* + * Witness rank, where 0 is lowest and UINT_MAX is highest. Witnesses + * must be acquired in order of increasing rank. + */ + witness_rank_t rank; + + /* + * If two witnesses are of equal rank and they have the samp comp + * function pointer, it is called as a last attempt to differentiate + * between witnesses of equal rank. + */ + witness_comp_t *comp; + + /* Opaque data, passed to comp(). */ + void *opaque; + + /* Linkage for thread's currently owned locks. */ + ql_elm(witness_t) link; +}; + +/******************************************************************************/ +/* PER-THREAD DATA */ +/******************************************************************************/ +typedef struct witness_tsd_s witness_tsd_t; +struct witness_tsd_s { + witness_list_t witnesses; + bool forking; +}; + +#define WITNESS_TSD_INITIALIZER { ql_head_initializer(witnesses), false } +#define WITNESS_TSDN_NULL ((witness_tsdn_t *)0) + +/******************************************************************************/ +/* (PER-THREAD) NULLABILITY HELPERS */ +/******************************************************************************/ +typedef struct witness_tsdn_s witness_tsdn_t; +struct witness_tsdn_s { + witness_tsd_t witness_tsd; +}; + +JEMALLOC_ALWAYS_INLINE witness_tsdn_t * +witness_tsd_tsdn(witness_tsd_t *witness_tsd) { + return (witness_tsdn_t *)witness_tsd; +} + +JEMALLOC_ALWAYS_INLINE bool +witness_tsdn_null(witness_tsdn_t *witness_tsdn) { + return witness_tsdn == NULL; +} + +JEMALLOC_ALWAYS_INLINE witness_tsd_t * +witness_tsdn_tsd(witness_tsdn_t *witness_tsdn) { + assert(!witness_tsdn_null(witness_tsdn)); + return &witness_tsdn->witness_tsd; +} + +/******************************************************************************/ +/* API */ +/******************************************************************************/ +void witness_init(witness_t *witness, const char *name, witness_rank_t rank, + witness_comp_t *comp, void *opaque); + +typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *); +extern witness_lock_error_t *JET_MUTABLE witness_lock_error; + +typedef void (witness_owner_error_t)(const witness_t *); +extern witness_owner_error_t *JET_MUTABLE witness_owner_error; + +typedef void (witness_not_owner_error_t)(const witness_t *); +extern witness_not_owner_error_t *JET_MUTABLE witness_not_owner_error; + +typedef void (witness_depth_error_t)(const witness_list_t *, + witness_rank_t rank_inclusive, unsigned depth); +extern witness_depth_error_t *JET_MUTABLE witness_depth_error; + +void witnesses_cleanup(witness_tsd_t *witness_tsd); +void witness_prefork(witness_tsd_t *witness_tsd); +void witness_postfork_parent(witness_tsd_t *witness_tsd); +void witness_postfork_child(witness_tsd_t *witness_tsd); + +/* Helper, not intended for direct use. */ +static inline bool +witness_owner(witness_tsd_t *witness_tsd, const witness_t *witness) { + witness_list_t *witnesses; + witness_t *w; + + cassert(config_debug); + + witnesses = &witness_tsd->witnesses; + ql_foreach(w, witnesses, link) { + if (w == witness) { + return true; + } + } + + return false; +} + +static inline void +witness_assert_owner(witness_tsdn_t *witness_tsdn, const witness_t *witness) { + witness_tsd_t *witness_tsd; + + if (!config_debug) { + return; + } + + if (witness_tsdn_null(witness_tsdn)) { + return; + } + witness_tsd = witness_tsdn_tsd(witness_tsdn); + if (witness->rank == WITNESS_RANK_OMIT) { + return; + } + + if (witness_owner(witness_tsd, witness)) { + return; + } + witness_owner_error(witness); +} + +static inline void +witness_assert_not_owner(witness_tsdn_t *witness_tsdn, + const witness_t *witness) { + witness_tsd_t *witness_tsd; + witness_list_t *witnesses; + witness_t *w; + + if (!config_debug) { + return; + } + + if (witness_tsdn_null(witness_tsdn)) { + return; + } + witness_tsd = witness_tsdn_tsd(witness_tsdn); + if (witness->rank == WITNESS_RANK_OMIT) { + return; + } + + witnesses = &witness_tsd->witnesses; + ql_foreach(w, witnesses, link) { + if (w == witness) { + witness_not_owner_error(witness); + } + } +} + +static inline void +witness_assert_depth_to_rank(witness_tsdn_t *witness_tsdn, + witness_rank_t rank_inclusive, unsigned depth) { + witness_tsd_t *witness_tsd; + unsigned d; + witness_list_t *witnesses; + witness_t *w; + + if (!config_debug) { + return; + } + + if (witness_tsdn_null(witness_tsdn)) { + return; + } + witness_tsd = witness_tsdn_tsd(witness_tsdn); + + d = 0; + witnesses = &witness_tsd->witnesses; + w = ql_last(witnesses, link); + if (w != NULL) { + ql_reverse_foreach(w, witnesses, link) { + if (w->rank < rank_inclusive) { + break; + } + d++; + } + } + if (d != depth) { + witness_depth_error(witnesses, rank_inclusive, depth); + } +} + +static inline void +witness_assert_depth(witness_tsdn_t *witness_tsdn, unsigned depth) { + witness_assert_depth_to_rank(witness_tsdn, WITNESS_RANK_MIN, depth); +} + +static inline void +witness_assert_lockless(witness_tsdn_t *witness_tsdn) { + witness_assert_depth(witness_tsdn, 0); +} + +static inline void +witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) { + witness_tsd_t *witness_tsd; + witness_list_t *witnesses; + witness_t *w; + + if (!config_debug) { + return; + } + + if (witness_tsdn_null(witness_tsdn)) { + return; + } + witness_tsd = witness_tsdn_tsd(witness_tsdn); + if (witness->rank == WITNESS_RANK_OMIT) { + return; + } + + witness_assert_not_owner(witness_tsdn, witness); + + witnesses = &witness_tsd->witnesses; + w = ql_last(witnesses, link); + if (w == NULL) { + /* No other locks; do nothing. */ + } else if (witness_tsd->forking && w->rank <= witness->rank) { + /* Forking, and relaxed ranking satisfied. */ + } else if (w->rank > witness->rank) { + /* Not forking, rank order reversal. */ + witness_lock_error(witnesses, witness); + } else if (w->rank == witness->rank && (w->comp == NULL || w->comp != + witness->comp || w->comp(w, w->opaque, witness, witness->opaque) > + 0)) { + /* + * Missing/incompatible comparison function, or comparison + * function indicates rank order reversal. + */ + witness_lock_error(witnesses, witness); + } + + ql_elm_new(witness, link); + ql_tail_insert(witnesses, witness, link); +} + +static inline void +witness_unlock(witness_tsdn_t *witness_tsdn, witness_t *witness) { + witness_tsd_t *witness_tsd; + witness_list_t *witnesses; + + if (!config_debug) { + return; + } + + if (witness_tsdn_null(witness_tsdn)) { + return; + } + witness_tsd = witness_tsdn_tsd(witness_tsdn); + if (witness->rank == WITNESS_RANK_OMIT) { + return; + } + + /* + * Check whether owner before removal, rather than relying on + * witness_assert_owner() to abort, so that unit tests can test this + * function's failure mode without causing undefined behavior. + */ + if (witness_owner(witness_tsd, witness)) { + witnesses = &witness_tsd->witnesses; + ql_remove(witnesses, witness, link); + } else { + witness_assert_owner(witness_tsdn, witness); + } +} + +#endif /* JEMALLOC_INTERNAL_WITNESS_H */ diff --git a/deps/jemalloc/include/jemalloc/jemalloc.sh b/deps/jemalloc/include/jemalloc/jemalloc.sh index c085814f2..b19b1548b 100755 --- a/deps/jemalloc/include/jemalloc/jemalloc.sh +++ b/deps/jemalloc/include/jemalloc/jemalloc.sh @@ -4,7 +4,7 @@ objroot=$1 cat <<EOF #ifndef JEMALLOC_H_ -#define JEMALLOC_H_ +#define JEMALLOC_H_ #ifdef __cplusplus extern "C" { #endif @@ -15,7 +15,6 @@ for hdr in jemalloc_defs.h jemalloc_rename.h jemalloc_macros.h \ jemalloc_protos.h jemalloc_typedefs.h jemalloc_mangle.h ; do cat "${objroot}include/jemalloc/${hdr}" \ | grep -v 'Generated from .* by configure\.' \ - | sed -e 's/^#define /#define /g' \ | sed -e 's/ $//g' echo done diff --git a/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in b/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in index ab13c3758..6d89435c2 100644 --- a/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in +++ b/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in @@ -33,5 +33,13 @@ */ #undef JEMALLOC_USE_CXX_THROW +#ifdef _MSC_VER +# ifdef _WIN64 +# define LG_SIZEOF_PTR_WIN 3 +# else +# define LG_SIZEOF_PTR_WIN 2 +# endif +#endif + /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR diff --git a/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in b/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in index a7028db34..aee55438c 100644 --- a/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in +++ b/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in @@ -4,31 +4,51 @@ #include <limits.h> #include <strings.h> -#define JEMALLOC_VERSION "@jemalloc_version@" -#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@ -#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@ -#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@ -#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ -#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" +#define JEMALLOC_VERSION "@jemalloc_version@" +#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@ +#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@ +#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@ +#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ +#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" -# define MALLOCX_LG_ALIGN(la) (la) -# if LG_SIZEOF_PTR == 2 -# define MALLOCX_ALIGN(a) (ffs(a)-1) -# else -# define MALLOCX_ALIGN(a) \ - ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) -# endif -# define MALLOCX_ZERO ((int)0x40) +#define MALLOCX_LG_ALIGN(la) ((int)(la)) +#if LG_SIZEOF_PTR == 2 +# define MALLOCX_ALIGN(a) ((int)(ffs((int)(a))-1)) +#else +# define MALLOCX_ALIGN(a) \ + ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 : \ + ffs((int)(((size_t)(a))>>32))+31)) +#endif +#define MALLOCX_ZERO ((int)0x40) /* * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1 * encodes MALLOCX_TCACHE_NONE. */ -# define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8)) -# define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) +#define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8)) +#define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) /* * Bias arena index bits so that 0 encodes "use an automatically chosen arena". */ -# define MALLOCX_ARENA(a) ((int)(((a)+1) << 20)) +#define MALLOCX_ARENA(a) ((((int)(a))+1) << 20) + +/* + * Use as arena index in "arena.<i>.{purge,decay,dss}" and + * "stats.arenas.<i>.*" mallctl interfaces to select all arenas. This + * definition is intentionally specified in raw decimal format to support + * cpp-based string concatenation, e.g. + * + * #define STRINGIFY_HELPER(x) #x + * #define STRINGIFY(x) STRINGIFY_HELPER(x) + * + * mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, + * 0); + */ +#define MALLCTL_ARENAS_ALL 4096 +/* + * Use as arena index in "stats.arenas.<i>.*" mallctl interfaces to select + * destroyed arenas. + */ +#define MALLCTL_ARENAS_DESTROYED 4097 #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW) # define JEMALLOC_CXX_THROW throw() @@ -36,32 +56,7 @@ # define JEMALLOC_CXX_THROW #endif -#ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ATTR(s) __attribute__((s)) -# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) -# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE -# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) -# else -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# endif -# ifndef JEMALLOC_EXPORT -# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) -# endif -# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) -# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) -# else -# define JEMALLOC_FORMAT_PRINTF(s, i) -# endif -# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) -# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) -# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) -# define JEMALLOC_RESTRICT_RETURN -# define JEMALLOC_ALLOCATOR -#elif _MSC_VER +#if defined(_MSC_VER) # define JEMALLOC_ATTR(s) # define JEMALLOC_ALIGNED(s) __declspec(align(s)) # define JEMALLOC_ALLOC_SIZE(s) @@ -87,6 +82,31 @@ # else # define JEMALLOC_ALLOCATOR # endif +#elif defined(JEMALLOC_HAVE_ATTR) +# define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE +# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) +# else +# define JEMALLOC_ALLOC_SIZE(s) +# define JEMALLOC_ALLOC_SIZE2(s1, s2) +# endif +# ifndef JEMALLOC_EXPORT +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# endif +# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) +# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) +# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) +# else +# define JEMALLOC_FORMAT_PRINTF(s, i) +# endif +# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) +# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +# define JEMALLOC_RESTRICT_RETURN +# define JEMALLOC_ALLOCATOR #else # define JEMALLOC_ATTR(s) # define JEMALLOC_ALIGNED(s) @@ -100,7 +120,3 @@ # define JEMALLOC_RESTRICT_RETURN # define JEMALLOC_ALLOCATOR #endif - -/* This version of Jemalloc, modified for Redis, has the je_get_defrag_hint() - * function. */ -#define JEMALLOC_FRAG_HINT diff --git a/deps/jemalloc/include/jemalloc/jemalloc_mangle.sh b/deps/jemalloc/include/jemalloc/jemalloc_mangle.sh index df328b78d..c675bb469 100755 --- a/deps/jemalloc/include/jemalloc/jemalloc_mangle.sh +++ b/deps/jemalloc/include/jemalloc/jemalloc_mangle.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/sh -eu public_symbols_txt=$1 symbol_prefix=$2 diff --git a/deps/jemalloc/include/jemalloc/jemalloc_typedefs.h.in b/deps/jemalloc/include/jemalloc/jemalloc_typedefs.h.in index fa7b350ad..1a5887430 100644 --- a/deps/jemalloc/include/jemalloc/jemalloc_typedefs.h.in +++ b/deps/jemalloc/include/jemalloc/jemalloc_typedefs.h.in @@ -1,57 +1,77 @@ +typedef struct extent_hooks_s extent_hooks_t; + /* * void * - * chunk_alloc(void *new_addr, size_t size, size_t alignment, bool *zero, - * bool *commit, unsigned arena_ind); + * extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size, + * size_t alignment, bool *zero, bool *commit, unsigned arena_ind); */ -typedef void *(chunk_alloc_t)(void *, size_t, size_t, bool *, bool *, unsigned); +typedef void *(extent_alloc_t)(extent_hooks_t *, void *, size_t, size_t, bool *, + bool *, unsigned); /* * bool - * chunk_dalloc(void *chunk, size_t size, bool committed, unsigned arena_ind); + * extent_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size, + * bool committed, unsigned arena_ind); */ -typedef bool (chunk_dalloc_t)(void *, size_t, bool, unsigned); +typedef bool (extent_dalloc_t)(extent_hooks_t *, void *, size_t, bool, + unsigned); + +/* + * void + * extent_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size, + * bool committed, unsigned arena_ind); + */ +typedef void (extent_destroy_t)(extent_hooks_t *, void *, size_t, bool, + unsigned); /* * bool - * chunk_commit(void *chunk, size_t size, size_t offset, size_t length, - * unsigned arena_ind); + * extent_commit(extent_hooks_t *extent_hooks, void *addr, size_t size, + * size_t offset, size_t length, unsigned arena_ind); */ -typedef bool (chunk_commit_t)(void *, size_t, size_t, size_t, unsigned); +typedef bool (extent_commit_t)(extent_hooks_t *, void *, size_t, size_t, size_t, + unsigned); /* * bool - * chunk_decommit(void *chunk, size_t size, size_t offset, size_t length, - * unsigned arena_ind); + * extent_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size, + * size_t offset, size_t length, unsigned arena_ind); */ -typedef bool (chunk_decommit_t)(void *, size_t, size_t, size_t, unsigned); +typedef bool (extent_decommit_t)(extent_hooks_t *, void *, size_t, size_t, + size_t, unsigned); /* * bool - * chunk_purge(void *chunk, size_t size, size_t offset, size_t length, - * unsigned arena_ind); + * extent_purge(extent_hooks_t *extent_hooks, void *addr, size_t size, + * size_t offset, size_t length, unsigned arena_ind); */ -typedef bool (chunk_purge_t)(void *, size_t, size_t, size_t, unsigned); +typedef bool (extent_purge_t)(extent_hooks_t *, void *, size_t, size_t, size_t, + unsigned); /* * bool - * chunk_split(void *chunk, size_t size, size_t size_a, size_t size_b, - * bool committed, unsigned arena_ind); + * extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size, + * size_t size_a, size_t size_b, bool committed, unsigned arena_ind); */ -typedef bool (chunk_split_t)(void *, size_t, size_t, size_t, bool, unsigned); +typedef bool (extent_split_t)(extent_hooks_t *, void *, size_t, size_t, size_t, + bool, unsigned); /* * bool - * chunk_merge(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b, - * bool committed, unsigned arena_ind); + * extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a, + * void *addr_b, size_t size_b, bool committed, unsigned arena_ind); */ -typedef bool (chunk_merge_t)(void *, size_t, void *, size_t, bool, unsigned); - -typedef struct { - chunk_alloc_t *alloc; - chunk_dalloc_t *dalloc; - chunk_commit_t *commit; - chunk_decommit_t *decommit; - chunk_purge_t *purge; - chunk_split_t *split; - chunk_merge_t *merge; -} chunk_hooks_t; +typedef bool (extent_merge_t)(extent_hooks_t *, void *, size_t, void *, size_t, + bool, unsigned); + +struct extent_hooks_s { + extent_alloc_t *alloc; + extent_dalloc_t *dalloc; + extent_destroy_t *destroy; + extent_commit_t *commit; + extent_decommit_t *decommit; + extent_purge_t *purge_lazy; + extent_purge_t *purge_forced; + extent_split_t *split; + extent_merge_t *merge; +}; diff --git a/deps/jemalloc/include/msvc_compat/strings.h b/deps/jemalloc/include/msvc_compat/strings.h index f01ffdd18..996f256ce 100644 --- a/deps/jemalloc/include/msvc_compat/strings.h +++ b/deps/jemalloc/include/msvc_compat/strings.h @@ -6,22 +6,51 @@ #ifdef _MSC_VER # include <intrin.h> # pragma intrinsic(_BitScanForward) -static __forceinline int ffsl(long x) -{ +static __forceinline int ffsl(long x) { unsigned long i; - if (_BitScanForward(&i, x)) - return (i + 1); - return (0); + if (_BitScanForward(&i, x)) { + return i + 1; + } + return 0; } -static __forceinline int ffs(int x) -{ +static __forceinline int ffs(int x) { + return ffsl(x); +} + +# ifdef _M_X64 +# pragma intrinsic(_BitScanForward64) +# endif - return (ffsl(x)); +static __forceinline int ffsll(unsigned __int64 x) { + unsigned long i; +#ifdef _M_X64 + if (_BitScanForward64(&i, x)) { + return i + 1; + } + return 0; +#else +// Fallback for 32-bit build where 64-bit version not available +// assuming little endian + union { + unsigned __int64 ll; + unsigned long l[2]; + } s; + + s.ll = x; + + if (_BitScanForward(&i, s.l[0])) { + return i + 1; + } else if(_BitScanForward(&i, s.l[1])) { + return i + 33; + } + return 0; +#endif } #else +# define ffsll(x) __builtin_ffsll(x) # define ffsl(x) __builtin_ffsl(x) # define ffs(x) __builtin_ffs(x) #endif diff --git a/deps/jemalloc/include/msvc_compat/windows_extra.h b/deps/jemalloc/include/msvc_compat/windows_extra.h index 0c5e323ff..a6ebb9306 100644 --- a/deps/jemalloc/include/msvc_compat/windows_extra.h +++ b/deps/jemalloc/include/msvc_compat/windows_extra.h @@ -1,26 +1,6 @@ #ifndef MSVC_COMPAT_WINDOWS_EXTRA_H -#define MSVC_COMPAT_WINDOWS_EXTRA_H +#define MSVC_COMPAT_WINDOWS_EXTRA_H -#ifndef ENOENT -# define ENOENT ERROR_PATH_NOT_FOUND -#endif -#ifndef EINVAL -# define EINVAL ERROR_BAD_ARGUMENTS -#endif -#ifndef EAGAIN -# define EAGAIN ERROR_OUTOFMEMORY -#endif -#ifndef EPERM -# define EPERM ERROR_WRITE_FAULT -#endif -#ifndef EFAULT -# define EFAULT ERROR_INVALID_ADDRESS -#endif -#ifndef ENOMEM -# define ENOMEM ERROR_NOT_ENOUGH_MEMORY -#endif -#ifndef ERANGE -# define ERANGE ERROR_INVALID_DATA -#endif +#include <errno.h> #endif /* MSVC_COMPAT_WINDOWS_EXTRA_H */ |