diff options
author | antirez <antirez@gmail.com> | 2012-05-15 15:27:12 +0200 |
---|---|---|
committer | antirez <antirez@gmail.com> | 2012-05-16 11:09:45 +0200 |
commit | ad4c0b4117ec15c0061b702f230caf1bc5eb4e06 (patch) | |
tree | 76b8e044bab3cec328dc8c8c9210a3088f9d1849 /deps/jemalloc/include | |
parent | 3c72c94aaeb98b0c4c1a98992539f8839cea1d74 (diff) | |
download | redis-ad4c0b4117ec15c0061b702f230caf1bc5eb4e06.tar.gz |
Jemalloc updated to 3.0.0.
Full changelog here:
http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git;a=blob_plain;f=ChangeLog;hb=master
Notable improvements from the point of view of Redis:
1) Bugfixing.
2) Support for Valgrind.
3) Support for OSX Lion, FreeBSD.
Diffstat (limited to 'deps/jemalloc/include')
31 files changed, 3251 insertions, 1183 deletions
diff --git a/deps/jemalloc/include/jemalloc/internal/arena.h b/deps/jemalloc/include/jemalloc/internal/arena.h index b80c118d8..0b0f640a4 100644 --- a/deps/jemalloc/include/jemalloc/internal/arena.h +++ b/deps/jemalloc/include/jemalloc/internal/arena.h @@ -2,41 +2,6 @@ #ifdef JEMALLOC_H_TYPES /* - * Subpages are an artificially designated partitioning of pages. Their only - * purpose is to support subpage-spaced size classes. - * - * There must be at least 4 subpages per page, due to the way size classes are - * handled. - */ -#define LG_SUBPAGE 8 -#define SUBPAGE ((size_t)(1U << LG_SUBPAGE)) -#define SUBPAGE_MASK (SUBPAGE - 1) - -/* Return the smallest subpage multiple that is >= s. */ -#define SUBPAGE_CEILING(s) \ - (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK) - -#ifdef JEMALLOC_TINY - /* Smallest size class to support. */ -# define LG_TINY_MIN LG_SIZEOF_PTR -# define TINY_MIN (1U << LG_TINY_MIN) -#endif - -/* - * Maximum size class that is a multiple of the quantum, but not (necessarily) - * a power of 2. Above this size, allocations are rounded up to the nearest - * power of 2. - */ -#define LG_QSPACE_MAX_DEFAULT 7 - -/* - * Maximum size class that is a multiple of the cacheline, but not (necessarily) - * a power of 2. Above this size, allocations are rounded up to the nearest - * power of 2. - */ -#define LG_CSPACE_MAX_DEFAULT 9 - -/* * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized * as small as possible such that this setting is still honored, without * violating other constraints. The goal is to make runs as small as possible @@ -51,7 +16,7 @@ * constraint is relaxed (ignored) for runs that are so small that the * per-region overhead is greater than: * - * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP)) + * (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP)) */ #define RUN_BFP 12 /* \/ Implicit binary fixed point. */ @@ -63,6 +28,12 @@ #define RUN_MAXREGS (1U << LG_RUN_MAXREGS) /* + * Minimum redzone size. Redzones may be larger than this if necessary to + * preserve region alignment. + */ +#define REDZONE_MINSIZE 16 + +/* * The minimum ratio of active:dirty pages per arena is computed as: * * (nactive >> opt_lg_dirty_mult) >= ndirty @@ -85,6 +56,15 @@ typedef struct arena_s arena_t; /* Each element of the chunk map corresponds to one page within the chunk. */ struct arena_chunk_map_s { +#ifndef JEMALLOC_PROF + /* + * Overlay prof_ctx in order to allow it to be referenced by dead code. + * Such antics aren't warranted for per arena data structures, but + * chunk map overhead accounts for a percentage of memory, rather than + * being just a fixed cost. + */ + union { +#endif union { /* * Linkage for run trees. There are two disjoint uses: @@ -103,22 +83,23 @@ struct arena_chunk_map_s { ql_elm(arena_chunk_map_t) ql_link; } u; -#ifdef JEMALLOC_PROF /* Profile counters, used for large object runs. */ prof_ctx_t *prof_ctx; +#ifndef JEMALLOC_PROF + }; /* union { ... }; */ #endif /* * Run address (or size) and various flags are stored together. The bit * layout looks like (assuming 32-bit system): * - * ???????? ???????? ????---- ----dula + * ???????? ???????? ????nnnn nnnndula * * ? : Unallocated: Run address for first/last pages, unset for internal * pages. * Small: Run page offset. * Large: Run size for first page, unset for trailing pages. - * - : Unused. + * n : binind for small size class, BININD_INVALID for large size class. * d : dirty? * u : unzeroed? * l : large? @@ -128,7 +109,8 @@ struct arena_chunk_map_s { * * p : run page offset * s : run size - * c : (binind+1) for size class (used only if prof_promote is true) + * n : binind for size class; large objects set these to BININD_INVALID + * except for promoted allocations (see prof_promote) * x : don't care * - : 0 * + : 1 @@ -136,37 +118,38 @@ struct arena_chunk_map_s { * [dula] : bit unset * * Unallocated (clean): - * ssssssss ssssssss ssss---- ----du-a - * xxxxxxxx xxxxxxxx xxxx---- -----Uxx - * ssssssss ssssssss ssss---- ----dU-a + * ssssssss ssssssss ssss++++ ++++du-a + * xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx + * ssssssss ssssssss ssss++++ ++++dU-a * * Unallocated (dirty): - * ssssssss ssssssss ssss---- ----D--a - * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * ssssssss ssssssss ssss---- ----D--a + * ssssssss ssssssss ssss++++ ++++D--a + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * ssssssss ssssssss ssss++++ ++++D--a * * Small: - * pppppppp pppppppp pppp---- ----d--A - * pppppppp pppppppp pppp---- -------A - * pppppppp pppppppp pppp---- ----d--A + * pppppppp pppppppp ppppnnnn nnnnd--A + * pppppppp pppppppp ppppnnnn nnnn---A + * pppppppp pppppppp ppppnnnn nnnnd--A * * Large: - * ssssssss ssssssss ssss---- ----D-LA - * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * -------- -------- -------- ----D-LA + * ssssssss ssssssss ssss++++ ++++D-LA + * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * -------- -------- ----++++ ++++D-LA * - * Large (sampled, size <= PAGE_SIZE): - * ssssssss ssssssss sssscccc ccccD-LA + * Large (sampled, size <= PAGE): + * ssssssss ssssssss ssssnnnn nnnnD-LA * - * Large (not sampled, size == PAGE_SIZE): - * ssssssss ssssssss ssss---- ----D-LA + * Large (not sampled, size == PAGE): + * ssssssss ssssssss ssss++++ ++++D-LA */ size_t bits; -#ifdef JEMALLOC_PROF -#define CHUNK_MAP_CLASS_SHIFT 4 -#define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U) -#endif -#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU) +#define CHUNK_MAP_BININD_SHIFT 4 +#define BININD_INVALID ((size_t)0xffU) +/* CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */ +#define CHUNK_MAP_BININD_MASK ((size_t)0xff0U) +#define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK +#define CHUNK_MAP_FLAGS_MASK ((size_t)0xcU) #define CHUNK_MAP_DIRTY ((size_t)0x8U) #define CHUNK_MAP_UNZEROED ((size_t)0x4U) #define CHUNK_MAP_LARGE ((size_t)0x2U) @@ -205,11 +188,6 @@ struct arena_chunk_s { typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; struct arena_run_s { -#ifdef JEMALLOC_DEBUG - uint32_t magic; -# define ARENA_RUN_MAGIC 0x384adf93 -#endif - /* Bin this run is associated with. */ arena_bin_t *bin; @@ -224,11 +202,50 @@ struct arena_run_s { * Read-only information associated with each element of arena_t's bins array * is stored separately, partly to reduce memory usage (only one copy, rather * than one per arena), but mainly to avoid false cacheline sharing. + * + * Each run has the following layout: + * + * /--------------------\ + * | arena_run_t header | + * | ... | + * bitmap_offset | bitmap | + * | ... | + * ctx0_offset | ctx map | + * | ... | + * |--------------------| + * | redzone | + * reg0_offset | region 0 | + * | redzone | + * |--------------------| \ + * | redzone | | + * | region 1 | > reg_interval + * | redzone | / + * |--------------------| + * | ... | + * | ... | + * | ... | + * |--------------------| + * | redzone | + * | region nregs-1 | + * | redzone | + * |--------------------| + * | alignment pad? | + * \--------------------/ + * + * reg_interval has at least the same minimum alignment as reg_size; this + * preserves the alignment constraint that sa2u() depends on. Alignment pad is + * either 0 or redzone_size; it is present only if needed to align reg0_offset. */ struct arena_bin_info_s { /* Size of regions in a run for this bin's size class. */ size_t reg_size; + /* Redzone size. */ + size_t redzone_size; + + /* Interval between regions (reg_size + (redzone_size << 1)). */ + size_t reg_interval; + /* Total size of a run for this bin's size class. */ size_t run_size; @@ -247,13 +264,11 @@ struct arena_bin_info_s { */ bitmap_info_t bitmap_info; -#ifdef JEMALLOC_PROF /* * Offset of first (prof_ctx_t *) in a run header for this bin's size - * class, or 0 if (opt_prof == false). + * class, or 0 if (config_prof == false || opt_prof == false). */ uint32_t ctx0_offset; -#endif /* Offset of first region in a run for this bin's size class. */ uint32_t reg0_offset; @@ -283,18 +298,11 @@ struct arena_bin_s { */ arena_run_tree_t runs; -#ifdef JEMALLOC_STATS /* Bin statistics. */ malloc_bin_stats_t stats; -#endif }; struct arena_s { -#ifdef JEMALLOC_DEBUG - uint32_t magic; -# define ARENA_MAGIC 0x947d3d24 -#endif - /* This arena's index within the arenas array. */ unsigned ind; @@ -314,20 +322,14 @@ struct arena_s { */ malloc_mutex_t lock; -#ifdef JEMALLOC_STATS arena_stats_t stats; -# ifdef JEMALLOC_TCACHE /* * List of tcaches for extant threads associated with this arena. * Stats from these are merged incrementally, and at exit. */ ql_head(tcache_t) tcache_ql; -# endif -#endif -#ifdef JEMALLOC_PROF uint64_t prof_accumbytes; -#endif /* List of dirty-page-containing chunks this arena manages. */ ql_head(arena_chunk_t) chunks_dirty; @@ -378,140 +380,334 @@ struct arena_s { arena_avail_tree_t runs_avail_clean; arena_avail_tree_t runs_avail_dirty; - /* - * bins is used to store trees of free regions of the following sizes, - * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and - * default MALLOC_CONF. - * - * bins[i] | size | - * --------+--------+ - * 0 | 8 | - * --------+--------+ - * 1 | 16 | - * 2 | 32 | - * 3 | 48 | - * : : - * 6 | 96 | - * 7 | 112 | - * 8 | 128 | - * --------+--------+ - * 9 | 192 | - * 10 | 256 | - * 11 | 320 | - * 12 | 384 | - * 13 | 448 | - * 14 | 512 | - * --------+--------+ - * 15 | 768 | - * 16 | 1024 | - * 17 | 1280 | - * : : - * 25 | 3328 | - * 26 | 3584 | - * 27 | 3840 | - * --------+--------+ - */ - arena_bin_t bins[1]; /* Dynamically sized. */ + /* bins is used to store trees of free regions. */ + arena_bin_t bins[NBINS]; }; #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -extern size_t opt_lg_qspace_max; -extern size_t opt_lg_cspace_max; extern ssize_t opt_lg_dirty_mult; /* * small_size2bin is a compact lookup table that rounds request sizes up to * size classes. In order to reduce cache footprint, the table is compressed, * and all accesses are via the SMALL_SIZE2BIN macro. */ -extern uint8_t const *small_size2bin; +extern uint8_t const small_size2bin[]; #define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) -extern arena_bin_info_t *arena_bin_info; - -/* Various bin-related settings. */ -#ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */ -# define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN)) -#else -# define ntbins 0 -#endif -extern unsigned nqbins; /* Number of quantum-spaced bins. */ -extern unsigned ncbins; /* Number of cacheline-spaced bins. */ -extern unsigned nsbins; /* Number of subpage-spaced bins. */ -extern unsigned nbins; -#ifdef JEMALLOC_TINY -# define tspace_max ((size_t)(QUANTUM >> 1)) -#endif -#define qspace_min QUANTUM -extern size_t qspace_max; -extern size_t cspace_min; -extern size_t cspace_max; -extern size_t sspace_min; -extern size_t sspace_max; -#define small_maxclass sspace_max +extern arena_bin_info_t arena_bin_info[NBINS]; +/* Number of large size classes. */ #define nlclasses (chunk_npages - map_bias) void arena_purge_all(arena_t *arena); -#ifdef JEMALLOC_PROF void arena_prof_accum(arena_t *arena, uint64_t accumbytes); -#endif -#ifdef JEMALLOC_TCACHE void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, - size_t binind -# ifdef JEMALLOC_PROF - , uint64_t prof_accumbytes -# endif - ); -#endif + size_t binind, uint64_t prof_accumbytes); +void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, + bool zero); +void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); -void *arena_malloc(size_t size, bool zero); -void *arena_palloc(arena_t *arena, size_t size, size_t alloc_size, - size_t alignment, bool zero); -size_t arena_salloc(const void *ptr); -#ifdef JEMALLOC_PROF +void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); void arena_prof_promoted(const void *ptr, size_t size); -size_t arena_salloc_demote(const void *ptr); -#endif -void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, +void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm); +void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind, arena_chunk_map_t *mapelm); +void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t pageind); +void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, + void *ptr); void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); -#ifdef JEMALLOC_STATS void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats); -#endif void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero); + size_t alignment, bool zero, bool try_tcache); bool arena_new(arena_t *arena, unsigned ind); -bool arena_boot(void); +void arena_boot(void); +void arena_prefork(arena_t *arena); +void arena_postfork_parent(arena_t *arena); +void arena_postfork_child(arena_t *arena); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind); +size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, + size_t pageind); +size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind); +size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind); +void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, + size_t size, size_t flags); +void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, + size_t size); +void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, + size_t size, size_t flags); +void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, + size_t binind); +void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, + size_t runind, size_t binind, size_t flags); +void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, + size_t unzeroed); +size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits); size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr); -# ifdef JEMALLOC_PROF prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -# endif -void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); +void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); +size_t arena_salloc(const void *ptr, bool demote); +void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, + bool try_tcache); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +# ifdef JEMALLOC_ARENA_INLINE_A +JEMALLOC_INLINE arena_chunk_map_t * +arena_mapp_get(arena_chunk_t *chunk, size_t pageind) +{ + + assert(pageind >= map_bias); + assert(pageind < chunk_npages); + + return (&chunk->map[pageind-map_bias]); +} + +JEMALLOC_INLINE size_t * +arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind) +{ + + return (&arena_mapp_get(chunk, pageind)->bits); +} + +JEMALLOC_INLINE size_t +arena_mapbits_get(arena_chunk_t *chunk, size_t pageind) +{ + + return (*arena_mapbitsp_get(chunk, pageind)); +} + +JEMALLOC_INLINE size_t +arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); + return (mapbits & ~PAGE_MASK); +} + +JEMALLOC_INLINE size_t +arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == + (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)); + return (mapbits & ~PAGE_MASK); +} + +JEMALLOC_INLINE size_t +arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == + CHUNK_MAP_ALLOCATED); + return (mapbits >> LG_PAGE); +} + +JEMALLOC_INLINE size_t +arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + size_t binind; + + mapbits = arena_mapbits_get(chunk, pageind); + binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; + assert(binind < NBINS || binind == BININD_INVALID); + return (binind); +} + +JEMALLOC_INLINE size_t +arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_DIRTY); +} + +JEMALLOC_INLINE size_t +arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_UNZEROED); +} + +JEMALLOC_INLINE size_t +arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_LARGE); +} + +JEMALLOC_INLINE size_t +arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind) +{ + size_t mapbits; + + mapbits = arena_mapbits_get(chunk, pageind); + return (mapbits & CHUNK_MAP_ALLOCATED); +} + +JEMALLOC_INLINE void +arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size, + size_t flags) +{ + size_t *mapbitsp; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert((size & PAGE_MASK) == 0); + assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0); + assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags); + *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags; +} + +JEMALLOC_INLINE void +arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind, + size_t size) +{ + size_t *mapbitsp; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert((size & PAGE_MASK) == 0); + assert((*mapbitsp & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0); + *mapbitsp = size | (*mapbitsp & PAGE_MASK); +} + +JEMALLOC_INLINE void +arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size, + size_t flags) +{ + size_t *mapbitsp; + size_t unzeroed; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert((size & PAGE_MASK) == 0); + assert((flags & CHUNK_MAP_DIRTY) == flags); + unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ + *mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags | unzeroed | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; +} + +JEMALLOC_INLINE void +arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind, + size_t binind) +{ + size_t *mapbitsp; + + assert(binind <= BININD_INVALID); + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE); + *mapbitsp = (*mapbitsp & ~CHUNK_MAP_BININD_MASK) | (binind << + CHUNK_MAP_BININD_SHIFT); +} + +JEMALLOC_INLINE void +arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind, + size_t binind, size_t flags) +{ + size_t *mapbitsp; + size_t unzeroed; + + assert(binind < BININD_INVALID); + mapbitsp = arena_mapbitsp_get(chunk, pageind); + assert(pageind - runind >= map_bias); + assert((flags & CHUNK_MAP_DIRTY) == flags); + unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */ + *mapbitsp = (runind << LG_PAGE) | (binind << CHUNK_MAP_BININD_SHIFT) | + flags | unzeroed | CHUNK_MAP_ALLOCATED; +} + +JEMALLOC_INLINE void +arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind, + size_t unzeroed) +{ + size_t *mapbitsp; + + mapbitsp = arena_mapbitsp_get(chunk, pageind); + *mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed; +} + +JEMALLOC_INLINE size_t +arena_ptr_small_binind_get(const void *ptr, size_t mapbits) +{ + size_t binind; + + binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT; + + if (config_debug) { + arena_chunk_t *chunk; + arena_t *arena; + size_t pageind; + size_t actual_mapbits; + arena_run_t *run; + arena_bin_t *bin; + size_t actual_binind; + arena_bin_info_t *bin_info; + + assert(binind != BININD_INVALID); + assert(binind < NBINS); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = chunk->arena; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + actual_mapbits = arena_mapbits_get(chunk, pageind); + assert(mapbits == actual_mapbits); + assert(arena_mapbits_large_get(chunk, pageind) == 0); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + (actual_mapbits >> LG_PAGE)) << LG_PAGE)); + bin = run->bin; + actual_binind = bin - arena->bins; + assert(binind == actual_binind); + bin_info = &arena_bin_info[actual_binind]; + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval + == 0); + } + + return (binind); +} +# endif /* JEMALLOC_ARENA_INLINE_A */ + +# ifdef JEMALLOC_ARENA_INLINE_B JEMALLOC_INLINE size_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { size_t binind = bin - arena->bins; - assert(binind < nbins); + assert(binind < NBINS); return (binind); } @@ -519,9 +715,8 @@ JEMALLOC_INLINE unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) { unsigned shift, diff, regind; - size_t size; + size_t interval; - dassert(run->magic == ARENA_RUN_MAGIC); /* * Freeing a pointer lower than region zero can cause assertion * failure. @@ -537,12 +732,12 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) bin_info->reg0_offset); /* Rescale (factor powers of 2 out of the numerator and denominator). */ - size = bin_info->reg_size; - shift = ffs(size) - 1; + interval = bin_info->reg_interval; + shift = ffs(interval) - 1; diff >>= shift; - size >>= shift; + interval >>= shift; - if (size == 1) { + if (interval == 1) { /* The divisor was a power of 2. */ regind = diff; } else { @@ -554,7 +749,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) * * becomes * - * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT + * (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT * * We can omit the first three elements, because we never * divide by 0, and 1 and 2 are both powers of two, which are @@ -562,7 +757,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) */ #define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) #define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) - static const unsigned size_invs[] = { + static const unsigned interval_invs[] = { SIZE_INV(3), SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), @@ -573,20 +768,21 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) }; - if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2)) - regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT; - else - regind = diff / size; + if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) + + 2)) { + regind = (diff * interval_invs[interval - 3]) >> + SIZE_INV_SHIFT; + } else + regind = diff / interval; #undef SIZE_INV #undef SIZE_INV_SHIFT } - assert(diff == regind * size); + assert(diff == regind * interval); assert(regind < bin_info->nregs); return (regind); } -#ifdef JEMALLOC_PROF JEMALLOC_INLINE prof_ctx_t * arena_prof_ctx_get(const void *ptr) { @@ -594,32 +790,33 @@ arena_prof_ctx_get(const void *ptr) arena_chunk_t *chunk; size_t pageind, mapbits; + cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - mapbits = chunk->map[pageind-map_bias].bits; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { if (prof_promote) ret = (prof_ctx_t *)(uintptr_t)1U; else { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); - size_t binind = arena_bin_index(chunk->arena, run->bin); + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << + LG_PAGE)); + size_t binind = arena_ptr_small_binind_get(ptr, + mapbits); arena_bin_info_t *bin_info = &arena_bin_info[binind]; unsigned regind; - dassert(run->magic == ARENA_RUN_MAGIC); regind = arena_run_regind(run, bin_info, ptr); ret = *(prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t *))); } } else - ret = chunk->map[pageind-map_bias].prof_ctx; + ret = arena_mapp_get(chunk, pageind)->prof_ctx; return (ret); } @@ -630,25 +827,24 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) arena_chunk_t *chunk; size_t pageind, mapbits; + cassert(config_prof); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - mapbits = chunk->map[pageind-map_bias].bits; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapbits = arena_mapbits_get(chunk, pageind); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); if ((mapbits & CHUNK_MAP_LARGE) == 0) { if (prof_promote == false) { arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); - arena_bin_t *bin = run->bin; + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << + LG_PAGE)); size_t binind; arena_bin_info_t *bin_info; unsigned regind; - dassert(run->magic == ARENA_RUN_MAGIC); - binind = arena_bin_index(chunk->arena, bin); + binind = arena_ptr_small_binind_get(ptr, mapbits); bin_info = &arena_bin_info[binind]; regind = arena_run_regind(run, bin_info, ptr); @@ -657,86 +853,122 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) } else assert((uintptr_t)ctx == (uintptr_t)1U); } else - chunk->map[pageind-map_bias].prof_ctx = ctx; + arena_mapp_get(chunk, pageind)->prof_ctx = ctx; +} + +JEMALLOC_INLINE void * +arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) +{ + tcache_t *tcache; + + assert(size != 0); + assert(size <= arena_maxclass); + + if (size <= SMALL_MAXCLASS) { + if (try_tcache && (tcache = tcache_get(true)) != NULL) + return (tcache_alloc_small(tcache, size, zero)); + else { + return (arena_malloc_small(choose_arena(arena), size, + zero)); + } + } else { + /* + * Initialize tcache after checking size in order to avoid + * infinite recursion during tcache initialization. + */ + if (try_tcache && size <= tcache_maxclass && (tcache = + tcache_get(true)) != NULL) + return (tcache_alloc_large(tcache, size, zero)); + else { + return (arena_malloc_large(choose_arena(arena), size, + zero)); + } + } +} + +/* Return the size of the allocation pointed to by ptr. */ +JEMALLOC_INLINE size_t +arena_salloc(const void *ptr, bool demote) +{ + size_t ret; + arena_chunk_t *chunk; + size_t pageind, binind; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + binind = arena_mapbits_binind_get(chunk, pageind); + if (binind == BININD_INVALID || (config_prof && demote == false && + prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) { + /* + * Large allocation. In the common case (demote == true), and + * as this is an inline function, most callers will only end up + * looking at binind to determine that ptr is a small + * allocation. + */ + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + ret = arena_mapbits_large_size_get(chunk, pageind); + assert(ret != 0); + assert(pageind + (ret>>LG_PAGE) <= chunk_npages); + assert(ret == PAGE || arena_mapbits_large_size_get(chunk, + pageind+(ret>>LG_PAGE)-1) == 0); + assert(binind == arena_mapbits_binind_get(chunk, + pageind+(ret>>LG_PAGE)-1)); + assert(arena_mapbits_dirty_get(chunk, pageind) == + arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1)); + } else { + /* + * Small allocation (possibly promoted to a large object due to + * prof_promote). + */ + assert(arena_mapbits_large_get(chunk, pageind) != 0 || + arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk, + pageind)) == binind); + ret = arena_bin_info[binind].reg_size; + } + + return (ret); } -#endif JEMALLOC_INLINE void -arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) +arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) { - size_t pageind; - arena_chunk_map_t *mapelm; + size_t pageind, mapbits; + tcache_t *tcache; assert(arena != NULL); - dassert(arena->magic == ARENA_MAGIC); assert(chunk->arena == arena); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - mapelm = &chunk->map[pageind-map_bias]; - assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapbits = arena_mapbits_get(chunk, pageind); + assert(arena_mapbits_allocated_get(chunk, pageind) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { /* Small allocation. */ -#ifdef JEMALLOC_TCACHE - tcache_t *tcache; + if (try_tcache && (tcache = tcache_get(false)) != NULL) { + size_t binind; - if ((tcache = tcache_get()) != NULL) - tcache_dalloc_small(tcache, ptr); - else { -#endif - arena_run_t *run; - arena_bin_t *bin; - - run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> - PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); - bin = run->bin; -#ifdef JEMALLOC_DEBUG - { - size_t binind = arena_bin_index(arena, bin); - arena_bin_info_t *bin_info = - &arena_bin_info[binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % - bin_info->reg_size == 0); - } -#endif - malloc_mutex_lock(&bin->lock); - arena_dalloc_bin(arena, chunk, ptr, mapelm); - malloc_mutex_unlock(&bin->lock); -#ifdef JEMALLOC_TCACHE - } -#endif + binind = arena_ptr_small_binind_get(ptr, mapbits); + tcache_dalloc_small(tcache, ptr, binind); + } else + arena_dalloc_small(arena, chunk, ptr, pageind); } else { -#ifdef JEMALLOC_TCACHE - size_t size = mapelm->bits & ~PAGE_MASK; + size_t size = arena_mapbits_large_size_get(chunk, pageind); assert(((uintptr_t)ptr & PAGE_MASK) == 0); - if (size <= tcache_maxclass) { - tcache_t *tcache; - - if ((tcache = tcache_get()) != NULL) - tcache_dalloc_large(tcache, ptr, size); - else { - malloc_mutex_lock(&arena->lock); - arena_dalloc_large(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); - } - } else { - malloc_mutex_lock(&arena->lock); + + if (try_tcache && size <= tcache_maxclass && (tcache = + tcache_get(false)) != NULL) { + tcache_dalloc_large(tcache, ptr, size); + } else arena_dalloc_large(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); - } -#else - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - malloc_mutex_lock(&arena->lock); - arena_dalloc_large(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); -#endif } } +# endif /* JEMALLOC_ARENA_INLINE_B */ #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/deps/jemalloc/include/jemalloc/internal/atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic.h index 9a298623f..11a7b47fe 100644 --- a/deps/jemalloc/include/jemalloc/internal/atomic.h +++ b/deps/jemalloc/include/jemalloc/internal/atomic.h @@ -11,22 +11,8 @@ #define atomic_read_uint64(p) atomic_add_uint64(p, 0) #define atomic_read_uint32(p) atomic_add_uint32(p, 0) - -#if (LG_SIZEOF_PTR == 3) -# define atomic_read_z(p) \ - (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)0) -# define atomic_add_z(p, x) \ - (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x) -# define atomic_sub_z(p, x) \ - (size_t)atomic_sub_uint64((uint64_t *)p, (uint64_t)x) -#elif (LG_SIZEOF_PTR == 2) -# define atomic_read_z(p) \ - (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)0) -# define atomic_add_z(p, x) \ - (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x) -# define atomic_sub_z(p, x) \ - (size_t)atomic_sub_uint32((uint32_t *)p, (uint32_t)x) -#endif +#define atomic_read_z(p) atomic_add_z(p, 0) +#define atomic_read_u(p) atomic_add_u(p, 0) #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -37,12 +23,17 @@ uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); +size_t atomic_add_z(size_t *p, size_t x); +size_t atomic_sub_z(size_t *p, size_t x); +unsigned atomic_add_u(unsigned *p, unsigned x); +unsigned atomic_sub_u(unsigned *p, unsigned x); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) /******************************************************************************/ /* 64-bit operations. */ -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 +#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -56,6 +47,20 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (__sync_sub_and_fetch(p, x)); } +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (InterlockedExchangeAdd64(p, -((int64_t)x))); +} #elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) @@ -70,7 +75,7 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); } -#elif (defined(__amd64_) || defined(__x86_64__)) +# elif (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -97,8 +102,43 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (x); } -#else -# if (LG_SIZEOF_PTR == 3) +# elif (defined(JEMALLOC_ATOMIC9)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + /* + * atomic_fetchadd_64() doesn't exist, but we only ever use this + * function on LP64 systems, so atomic_fetchadd_long() will do. + */ + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)x) + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); +} +# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} +# else # error "Missing implementation for 64-bit atomic operations" # endif #endif @@ -119,6 +159,20 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (__sync_sub_and_fetch(p, x)); } +#elif (defined(_MSC_VER)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (InterlockedExchangeAdd(p, -((int32_t)x))); +} #elif (defined(JEMALLOC_OSATOMIC)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) @@ -133,7 +187,7 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); } -#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__)) +#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { @@ -160,9 +214,90 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (x); } +#elif (defined(JEMALLOC_ATOMIC9)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (atomic_fetchadd_32(p, x) + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); +} +#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} #else # error "Missing implementation for 32-bit atomic operations" #endif + +/******************************************************************************/ +/* size_t operations. */ +JEMALLOC_INLINE size_t +atomic_add_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_PTR == 2) + return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE size_t +atomic_sub_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((size_t)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_PTR == 2) + return ((size_t)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} + +/******************************************************************************/ +/* unsigned operations. */ +JEMALLOC_INLINE unsigned +atomic_add_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE unsigned +atomic_sub_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} +/******************************************************************************/ #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/deps/jemalloc/include/jemalloc/internal/base.h b/deps/jemalloc/include/jemalloc/internal/base.h index e353f309b..9cf75ffb0 100644 --- a/deps/jemalloc/include/jemalloc/internal/base.h +++ b/deps/jemalloc/include/jemalloc/internal/base.h @@ -9,12 +9,14 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -extern malloc_mutex_t base_mtx; - void *base_alloc(size_t size); +void *base_calloc(size_t number, size_t size); extent_node_t *base_node_alloc(void); void base_node_dealloc(extent_node_t *node); bool base_boot(void); +void base_prefork(void); +void base_postfork_parent(void); +void base_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/chunk.h b/deps/jemalloc/include/jemalloc/internal/chunk.h index 54b6a3ec8..8fb1fe6d1 100644 --- a/deps/jemalloc/include/jemalloc/internal/chunk.h +++ b/deps/jemalloc/include/jemalloc/internal/chunk.h @@ -28,20 +28,13 @@ #ifdef JEMALLOC_H_EXTERNS extern size_t opt_lg_chunk; -#ifdef JEMALLOC_SWAP -extern bool opt_overcommit; -#endif -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) /* Protects stats_chunks; currently not used for any other purpose. */ extern malloc_mutex_t chunks_mtx; /* Chunk statistics. */ extern chunk_stats_t stats_chunks; -#endif -#ifdef JEMALLOC_IVSALLOC extern rtree_t *chunks_rtree; -#endif extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ @@ -49,7 +42,7 @@ extern size_t chunk_npages; extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t arena_maxclass; /* Max size class for arenas. */ -void *chunk_alloc(size_t size, bool base, bool *zero); +void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero); void chunk_dealloc(void *chunk, size_t size, bool unmap); bool chunk_boot(void); @@ -60,6 +53,5 @@ bool chunk_boot(void); #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ -#include "jemalloc/internal/chunk_swap.h" #include "jemalloc/internal/chunk_dss.h" #include "jemalloc/internal/chunk_mmap.h" diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h index 6f0052221..6e2643b24 100644 --- a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h +++ b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h @@ -1,4 +1,3 @@ -#ifdef JEMALLOC_DSS /******************************************************************************/ #ifdef JEMALLOC_H_TYPES @@ -10,16 +9,12 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -/* - * Protects sbrk() calls. This avoids malloc races among threads, though it - * does not protect against races with threads that call sbrk() directly. - */ -extern malloc_mutex_t dss_mtx; - -void *chunk_alloc_dss(size_t size, bool *zero); +void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero); bool chunk_in_dss(void *chunk); -bool chunk_dealloc_dss(void *chunk, size_t size); bool chunk_dss_boot(void); +void chunk_dss_prefork(void); +void chunk_dss_postfork_parent(void); +void chunk_dss_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -27,4 +22,3 @@ bool chunk_dss_boot(void); #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ -#endif /* JEMALLOC_DSS */ diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h index 07b50a4dc..b29f39e9e 100644 --- a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h +++ b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h @@ -9,11 +9,10 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -void *chunk_alloc_mmap(size_t size); -void *chunk_alloc_mmap_noreserve(size_t size); -void chunk_dealloc_mmap(void *chunk, size_t size); +void pages_purge(void *addr, size_t length); -bool chunk_mmap_boot(void); +void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero); +bool chunk_dealloc_mmap(void *chunk, size_t size); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_swap.h b/deps/jemalloc/include/jemalloc/internal/chunk_swap.h deleted file mode 100644 index 9faa739f7..000000000 --- a/deps/jemalloc/include/jemalloc/internal/chunk_swap.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifdef JEMALLOC_SWAP -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern malloc_mutex_t swap_mtx; -extern bool swap_enabled; -extern bool swap_prezeroed; -extern size_t swap_nfds; -extern int *swap_fds; -#ifdef JEMALLOC_STATS -extern size_t swap_avail; -#endif - -void *chunk_alloc_swap(size_t size, bool *zero); -bool chunk_in_swap(void *chunk); -bool chunk_dealloc_swap(void *chunk, size_t size); -bool chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed); -bool chunk_swap_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ -#endif /* JEMALLOC_SWAP */ diff --git a/deps/jemalloc/include/jemalloc/internal/ckh.h b/deps/jemalloc/include/jemalloc/internal/ckh.h index 3e4ad4c85..05d1fc03e 100644 --- a/deps/jemalloc/include/jemalloc/internal/ckh.h +++ b/deps/jemalloc/include/jemalloc/internal/ckh.h @@ -30,11 +30,6 @@ struct ckhc_s { }; struct ckh_s { -#ifdef JEMALLOC_DEBUG -#define CKH_MAGIC 0x3af2489d - uint32_t magic; -#endif - #ifdef CKH_COUNT /* Counters used to get an idea of performance. */ uint64_t ngrows; @@ -47,7 +42,7 @@ struct ckh_s { /* Used for pseudo-random number generation. */ #define CKH_A 1103515241 #define CKH_C 12347 - uint32_t prn_state; + uint32_t prng_state; /* Total number of items. */ size_t count; diff --git a/deps/jemalloc/include/jemalloc/internal/ctl.h b/deps/jemalloc/include/jemalloc/internal/ctl.h index f1f5eb70a..adf3827f0 100644 --- a/deps/jemalloc/include/jemalloc/internal/ctl.h +++ b/deps/jemalloc/include/jemalloc/internal/ctl.h @@ -2,6 +2,8 @@ #ifdef JEMALLOC_H_TYPES typedef struct ctl_node_s ctl_node_t; +typedef struct ctl_named_node_s ctl_named_node_t; +typedef struct ctl_indexed_node_s ctl_indexed_node_t; typedef struct ctl_arena_stats_s ctl_arena_stats_t; typedef struct ctl_stats_s ctl_stats_t; @@ -11,20 +13,21 @@ typedef struct ctl_stats_s ctl_stats_t; struct ctl_node_s { bool named; - union { - struct { - const char *name; - /* If (nchildren == 0), this is a terminal node. */ - unsigned nchildren; - const ctl_node_t *children; - } named; - struct { - const ctl_node_t *(*index)(const size_t *, size_t, - size_t); - } indexed; - } u; - int (*ctl)(const size_t *, size_t, void *, size_t *, void *, - size_t); +}; + +struct ctl_named_node_s { + struct ctl_node_s node; + const char *name; + /* If (nchildren == 0), this is a terminal node. */ + unsigned nchildren; + const ctl_node_t *children; + int (*ctl)(const size_t *, size_t, void *, size_t *, + void *, size_t); +}; + +struct ctl_indexed_node_s { + struct ctl_node_s node; + const ctl_named_node_t *(*index)(const size_t *, size_t, size_t); }; struct ctl_arena_stats_s { @@ -32,7 +35,6 @@ struct ctl_arena_stats_s { unsigned nthreads; size_t pactive; size_t pdirty; -#ifdef JEMALLOC_STATS arena_stats_t astats; /* Aggregate stats for small size classes, based on bin stats. */ @@ -41,13 +43,11 @@ struct ctl_arena_stats_s { uint64_t ndalloc_small; uint64_t nrequests_small; - malloc_bin_stats_t *bstats; /* nbins elements. */ + malloc_bin_stats_t bstats[NBINS]; malloc_large_stats_t *lstats; /* nlclasses elements. */ -#endif }; struct ctl_stats_s { -#ifdef JEMALLOC_STATS size_t allocated; size_t active; size_t mapped; @@ -61,11 +61,7 @@ struct ctl_stats_s { uint64_t nmalloc; /* huge_nmalloc */ uint64_t ndalloc; /* huge_ndalloc */ } huge; -#endif ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ -#ifdef JEMALLOC_SWAP - size_t swap_avail; -#endif }; #endif /* JEMALLOC_H_STRUCTS */ @@ -81,27 +77,25 @@ int ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, bool ctl_boot(void); #define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ - if (JEMALLOC_P(mallctl)(name, oldp, oldlenp, newp, newlen) \ + if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ != 0) { \ - malloc_write("<jemalloc>: Failure in xmallctl(\""); \ - malloc_write(name); \ - malloc_write("\", ...)\n"); \ + malloc_printf( \ + "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n", \ + name); \ abort(); \ } \ } while (0) #define xmallctlnametomib(name, mibp, miblenp) do { \ - if (JEMALLOC_P(mallctlnametomib)(name, mibp, miblenp) != 0) { \ - malloc_write( \ - "<jemalloc>: Failure in xmallctlnametomib(\""); \ - malloc_write(name); \ - malloc_write("\", ...)\n"); \ + if (je_mallctlnametomib(name, mibp, miblenp) != 0) { \ + malloc_printf("<jemalloc>: Failure in " \ + "xmallctlnametomib(\"%s\", ...)\n", name); \ abort(); \ } \ } while (0) #define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ - if (JEMALLOC_P(mallctlbymib)(mib, miblen, oldp, oldlenp, newp, \ + if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, \ newlen) != 0) { \ malloc_write( \ "<jemalloc>: Failure in xmallctlbymib()\n"); \ diff --git a/deps/jemalloc/include/jemalloc/internal/extent.h b/deps/jemalloc/include/jemalloc/internal/extent.h index 6fe9702b5..36af8be89 100644 --- a/deps/jemalloc/include/jemalloc/internal/extent.h +++ b/deps/jemalloc/include/jemalloc/internal/extent.h @@ -9,18 +9,14 @@ typedef struct extent_node_s extent_node_t; /* Tree of extents. */ struct extent_node_s { -#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) /* Linkage for the size/address-ordered tree. */ rb_node(extent_node_t) link_szad; -#endif /* Linkage for the address-ordered tree. */ rb_node(extent_node_t) link_ad; -#ifdef JEMALLOC_PROF /* Profile counters, used for huge objects. */ prof_ctx_t *prof_ctx; -#endif /* Pointer to the extent that this tree node is responsible for. */ void *addr; @@ -34,9 +30,7 @@ typedef rb_tree(extent_node_t) extent_tree_t; /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t) -#endif rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) diff --git a/deps/jemalloc/include/jemalloc/internal/hash.h b/deps/jemalloc/include/jemalloc/internal/hash.h index 8a46ce308..2f501f5d4 100644 --- a/deps/jemalloc/include/jemalloc/internal/hash.h +++ b/deps/jemalloc/include/jemalloc/internal/hash.h @@ -26,7 +26,7 @@ uint64_t hash(const void *key, size_t len, uint64_t seed); JEMALLOC_INLINE uint64_t hash(const void *key, size_t len, uint64_t seed) { - const uint64_t m = 0xc6a4a7935bd1e995LLU; + const uint64_t m = UINT64_C(0xc6a4a7935bd1e995); const int r = 47; uint64_t h = seed ^ (len * m); const uint64_t *data = (const uint64_t *)key; @@ -48,14 +48,14 @@ hash(const void *key, size_t len, uint64_t seed) data2 = (const unsigned char *)data; switch(len & 7) { - case 7: h ^= ((uint64_t)(data2[6])) << 48; - case 6: h ^= ((uint64_t)(data2[5])) << 40; - case 5: h ^= ((uint64_t)(data2[4])) << 32; - case 4: h ^= ((uint64_t)(data2[3])) << 24; - case 3: h ^= ((uint64_t)(data2[2])) << 16; - case 2: h ^= ((uint64_t)(data2[1])) << 8; - case 1: h ^= ((uint64_t)(data2[0])); - h *= m; + case 7: h ^= ((uint64_t)(data2[6])) << 48; + case 6: h ^= ((uint64_t)(data2[5])) << 40; + case 5: h ^= ((uint64_t)(data2[4])) << 32; + case 4: h ^= ((uint64_t)(data2[3])) << 24; + case 3: h ^= ((uint64_t)(data2[2])) << 16; + case 2: h ^= ((uint64_t)(data2[1])) << 8; + case 1: h ^= ((uint64_t)(data2[0])); + h *= m; } h ^= h >> r; diff --git a/deps/jemalloc/include/jemalloc/internal/huge.h b/deps/jemalloc/include/jemalloc/internal/huge.h index 66544cf8d..e8513c933 100644 --- a/deps/jemalloc/include/jemalloc/internal/huge.h +++ b/deps/jemalloc/include/jemalloc/internal/huge.h @@ -9,12 +9,10 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -#ifdef JEMALLOC_STATS /* Huge allocation statistics. */ extern uint64_t huge_nmalloc; extern uint64_t huge_ndalloc; extern size_t huge_allocated; -#endif /* Protects chunk-related data structures. */ extern malloc_mutex_t huge_mtx; @@ -27,11 +25,12 @@ void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero); void huge_dalloc(void *ptr, bool unmap); size_t huge_salloc(const void *ptr); -#ifdef JEMALLOC_PROF prof_ctx_t *huge_prof_ctx_get(const void *ptr); void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -#endif bool huge_boot(void); +void huge_prefork(void); +void huge_postfork_parent(void); +void huge_postfork_child(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in index a44f0978a..268cd146f 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in @@ -1,17 +1,33 @@ -#include <sys/mman.h> -#include <sys/param.h> -#include <sys/time.h> +#ifndef JEMALLOC_INTERNAL_H +#define JEMALLOC_INTERNAL_H +#include <math.h> +#ifdef _WIN32 +# include <windows.h> +# define ENOENT ERROR_PATH_NOT_FOUND +# define EINVAL ERROR_BAD_ARGUMENTS +# define EAGAIN ERROR_OUTOFMEMORY +# define EPERM ERROR_WRITE_FAULT +# define EFAULT ERROR_INVALID_ADDRESS +# define ENOMEM ERROR_NOT_ENOUGH_MEMORY +# undef ERANGE +# define ERANGE ERROR_INVALID_DATA +#else +# include <sys/param.h> +# include <sys/mman.h> +# include <sys/syscall.h> +# if !defined(SYS_write) && defined(__NR_write) +# define SYS_write __NR_write +# endif +# include <sys/uio.h> +# include <pthread.h> +# include <errno.h> +#endif #include <sys/types.h> -#include <sys/sysctl.h> -#include <sys/uio.h> -#include <errno.h> #include <limits.h> #ifndef SIZE_T_MAX # define SIZE_T_MAX SIZE_MAX #endif -#include <pthread.h> -#include <sched.h> #include <stdarg.h> #include <stdbool.h> #include <stdio.h> @@ -25,16 +41,156 @@ #include <string.h> #include <strings.h> #include <ctype.h> -#include <unistd.h> +#ifdef _MSC_VER +# include <io.h> +typedef intptr_t ssize_t; +# define PATH_MAX 1024 +# define STDERR_FILENO 2 +# define __func__ __FUNCTION__ +/* Disable warnings about deprecated system functions */ +# pragma warning(disable: 4996) +#else +# include <unistd.h> +#endif #include <fcntl.h> -#include <pthread.h> -#include <math.h> -#define JEMALLOC_MANGLE +#define JEMALLOC_NO_DEMANGLE #include "../jemalloc@install_suffix@.h" +#ifdef JEMALLOC_UTRACE +#include <sys/ktrace.h> +#endif + +#ifdef JEMALLOC_VALGRIND +#include <valgrind/valgrind.h> +#include <valgrind/memcheck.h> +#endif + #include "jemalloc/internal/private_namespace.h" +#ifdef JEMALLOC_CC_SILENCE +#define UNUSED JEMALLOC_ATTR(unused) +#else +#define UNUSED +#endif + +static const bool config_debug = +#ifdef JEMALLOC_DEBUG + true +#else + false +#endif + ; +static const bool config_dss = +#ifdef JEMALLOC_DSS + true +#else + false +#endif + ; +static const bool config_fill = +#ifdef JEMALLOC_FILL + true +#else + false +#endif + ; +static const bool config_lazy_lock = +#ifdef JEMALLOC_LAZY_LOCK + true +#else + false +#endif + ; +static const bool config_prof = +#ifdef JEMALLOC_PROF + true +#else + false +#endif + ; +static const bool config_prof_libgcc = +#ifdef JEMALLOC_PROF_LIBGCC + true +#else + false +#endif + ; +static const bool config_prof_libunwind = +#ifdef JEMALLOC_PROF_LIBUNWIND + true +#else + false +#endif + ; +static const bool config_mremap = +#ifdef JEMALLOC_MREMAP + true +#else + false +#endif + ; +static const bool config_munmap = +#ifdef JEMALLOC_MUNMAP + true +#else + false +#endif + ; +static const bool config_stats = +#ifdef JEMALLOC_STATS + true +#else + false +#endif + ; +static const bool config_tcache = +#ifdef JEMALLOC_TCACHE + true +#else + false +#endif + ; +static const bool config_tls = +#ifdef JEMALLOC_TLS + true +#else + false +#endif + ; +static const bool config_utrace = +#ifdef JEMALLOC_UTRACE + true +#else + false +#endif + ; +static const bool config_valgrind = +#ifdef JEMALLOC_VALGRIND + true +#else + false +#endif + ; +static const bool config_xmalloc = +#ifdef JEMALLOC_XMALLOC + true +#else + false +#endif + ; +static const bool config_ivsalloc = +#ifdef JEMALLOC_IVSALLOC + true +#else + false +#endif + ; + +#ifdef JEMALLOC_ATOMIC9 +#include <machine/atomic.h> +#endif + #if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) #include <libkern/OSAtomic.h> #endif @@ -46,48 +202,11 @@ #include <malloc/malloc.h> #endif -#ifdef JEMALLOC_LAZY_LOCK -#include <dlfcn.h> -#endif - #define RB_COMPACT #include "jemalloc/internal/rb.h" #include "jemalloc/internal/qr.h" #include "jemalloc/internal/ql.h" -extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); - -/* - * Define a custom assert() in order to reduce the chances of deadlock during - * assertion failure. - */ -#ifndef assert -# ifdef JEMALLOC_DEBUG -# define assert(e) do { \ - if (!(e)) { \ - char line_buf[UMAX2S_BUFSIZE]; \ - malloc_write("<jemalloc>: "); \ - malloc_write(__FILE__); \ - malloc_write(":"); \ - malloc_write(u2s(__LINE__, 10, line_buf)); \ - malloc_write(": Failed assertion: "); \ - malloc_write("\""); \ - malloc_write(#e); \ - malloc_write("\"\n"); \ - abort(); \ - } \ -} while (0) -# else -# define assert(e) -# endif -#endif - -#ifdef JEMALLOC_DEBUG -# define dassert(e) assert(e) -#else -# define dassert(e) -#endif - /* * jemalloc can conceptually be broken into components (arena, tcache, etc.), * but there are circular dependencies that cannot be broken without @@ -119,38 +238,56 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #else # define JEMALLOC_ENABLE_INLINE # define JEMALLOC_INLINE static inline +# ifdef _MSC_VER +# define inline _inline +# endif #endif -/* Size of stack-allocated buffer passed to buferror(). */ -#define BUFERROR_BUF 64 +/* Smallest size class to support. */ +#define LG_TINY_MIN 3 +#define TINY_MIN (1U << LG_TINY_MIN) -/* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */ -#ifdef __i386__ -# define LG_QUANTUM 4 -#endif -#ifdef __ia64__ -# define LG_QUANTUM 4 -#endif -#ifdef __alpha__ -# define LG_QUANTUM 4 -#endif -#ifdef __sparc64__ -# define LG_QUANTUM 4 -#endif -#if (defined(__amd64__) || defined(__x86_64__)) -# define LG_QUANTUM 4 -#endif -#ifdef __arm__ -# define LG_QUANTUM 3 -#endif -#ifdef __mips__ -# define LG_QUANTUM 3 -#endif -#ifdef __powerpc__ -# define LG_QUANTUM 4 -#endif -#ifdef __s390x__ -# define LG_QUANTUM 4 +/* + * Minimum alignment of allocations is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +#ifndef LG_QUANTUM +# if (defined(__i386__) || defined(_M_IX86)) +# define LG_QUANTUM 4 +# endif +# ifdef __ia64__ +# define LG_QUANTUM 4 +# endif +# ifdef __alpha__ +# define LG_QUANTUM 4 +# endif +# ifdef __sparc64__ +# define LG_QUANTUM 4 +# endif +# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64)) +# define LG_QUANTUM 4 +# endif +# ifdef __arm__ +# define LG_QUANTUM 3 +# endif +# ifdef __mips__ +# define LG_QUANTUM 3 +# endif +# ifdef __powerpc__ +# define LG_QUANTUM 4 +# endif +# ifdef __s390x__ +# define LG_QUANTUM 4 +# endif +# ifdef __SH4__ +# define LG_QUANTUM 4 +# endif +# ifdef __tile__ +# define LG_QUANTUM 4 +# endif +# ifndef LG_QUANTUM +# error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS" +# endif #endif #define QUANTUM ((size_t)(1U << LG_QUANTUM)) @@ -164,67 +301,149 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #define LONG_MASK (LONG - 1) /* Return the smallest long multiple that is >= a. */ -#define LONG_CEILING(a) \ +#define LONG_CEILING(a) \ (((a) + LONG_MASK) & ~LONG_MASK) #define SIZEOF_PTR (1U << LG_SIZEOF_PTR) #define PTR_MASK (SIZEOF_PTR - 1) /* Return the smallest (void *) multiple that is >= a. */ -#define PTR_CEILING(a) \ +#define PTR_CEILING(a) \ (((a) + PTR_MASK) & ~PTR_MASK) /* * Maximum size of L1 cache line. This is used to avoid cache line aliasing. * In addition, this controls the spacing of cacheline-spaced size classes. + * + * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can + * only handle raw constants. */ #define LG_CACHELINE 6 -#define CACHELINE ((size_t)(1U << LG_CACHELINE)) +#define CACHELINE 64 #define CACHELINE_MASK (CACHELINE - 1) /* Return the smallest cacheline multiple that is >= s. */ #define CACHELINE_CEILING(s) \ (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) -/* - * Page size. STATIC_PAGE_SHIFT is determined by the configure script. If - * DYNAMIC_PAGE_SHIFT is enabled, only use the STATIC_PAGE_* macros where - * compile-time values are required for the purposes of defining data - * structures. - */ -#define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT)) -#define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1)) - -#ifdef PAGE_SHIFT -# undef PAGE_SHIFT -#endif -#ifdef PAGE_SIZE -# undef PAGE_SIZE -#endif +/* Page size. STATIC_PAGE_SHIFT is determined by the configure script. */ #ifdef PAGE_MASK # undef PAGE_MASK #endif - -#ifdef DYNAMIC_PAGE_SHIFT -# define PAGE_SHIFT lg_pagesize -# define PAGE_SIZE pagesize -# define PAGE_MASK pagesize_mask -#else -# define PAGE_SHIFT STATIC_PAGE_SHIFT -# define PAGE_SIZE STATIC_PAGE_SIZE -# define PAGE_MASK STATIC_PAGE_MASK -#endif +#define LG_PAGE STATIC_PAGE_SHIFT +#define PAGE ((size_t)(1U << STATIC_PAGE_SHIFT)) +#define PAGE_MASK ((size_t)(PAGE - 1)) /* Return the smallest pagesize multiple that is >= s. */ #define PAGE_CEILING(s) \ (((s) + PAGE_MASK) & ~PAGE_MASK) +/* Return the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2BASE(a, alignment) \ + ((void *)((uintptr_t)(a) & (-(alignment)))) + +/* Return the offset between a and the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2OFFSET(a, alignment) \ + ((size_t)((uintptr_t)(a) & (alignment - 1))) + +/* Return the smallest alignment multiple that is >= s. */ +#define ALIGNMENT_CEILING(s, alignment) \ + (((s) + (alignment - 1)) & (-(alignment))) + +/* Declare a variable length array */ +#if __STDC_VERSION__ < 199901L +# ifdef _MSC_VER +# include <malloc.h> +# define alloca _alloca +# else +# include <alloca.h> +# endif +# define VARIABLE_ARRAY(type, name, count) \ + type *name = alloca(sizeof(type) * count) +#else +# define VARIABLE_ARRAY(type, name, count) type name[count] +#endif + +#ifdef JEMALLOC_VALGRIND +/* + * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions + * so that when Valgrind reports errors, there are no extra stack frames + * in the backtraces. + * + * The size that is reported to valgrind must be consistent through a chain of + * malloc..realloc..realloc calls. Request size isn't recorded anywhere in + * jemalloc, so it is critical that all callers of these macros provide usize + * rather than request size. As a result, buffer overflow detection is + * technically weakened for the standard API, though it is generally accepted + * practice to consider any extra bytes reported by malloc_usable_size() as + * usable space. + */ +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ + if (config_valgrind && opt_valgrind && cond) \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ +} while (0) +#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ + old_rzsize, zero) do { \ + if (config_valgrind && opt_valgrind) { \ + size_t rzsize = p2rz(ptr); \ + \ + if (ptr == old_ptr) { \ + VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ + usize, rzsize); \ + if (zero && old_usize < usize) { \ + VALGRIND_MAKE_MEM_DEFINED( \ + (void *)((uintptr_t)ptr + \ + old_usize), usize - old_usize); \ + } \ + } else { \ + if (old_ptr != NULL) { \ + VALGRIND_FREELIKE_BLOCK(old_ptr, \ + old_rzsize); \ + } \ + if (ptr != NULL) { \ + size_t copy_size = (old_usize < usize) \ + ? old_usize : usize; \ + size_t tail_size = usize - copy_size; \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \ + rzsize, false); \ + if (copy_size > 0) { \ + VALGRIND_MAKE_MEM_DEFINED(ptr, \ + copy_size); \ + } \ + if (zero && tail_size > 0) { \ + VALGRIND_MAKE_MEM_DEFINED( \ + (void *)((uintptr_t)ptr + \ + copy_size), tail_size); \ + } \ + } \ + } \ + } \ +} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ + if (config_valgrind && opt_valgrind) \ + VALGRIND_FREELIKE_BLOCK(ptr, rzsize); \ +} while (0) +#else +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) +#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) +#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) +#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) +#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ + old_rzsize, zero) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) +#endif + +#include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/arena.h" @@ -235,21 +454,22 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif +#include "jemalloc/internal/quarantine.h" #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_TYPES /******************************************************************************/ #define JEMALLOC_H_STRUCTS +#include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" @@ -260,66 +480,37 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif +#include "jemalloc/internal/quarantine.h" #include "jemalloc/internal/prof.h" -#ifdef JEMALLOC_STATS typedef struct { uint64_t allocated; uint64_t deallocated; } thread_allocated_t; -#endif +/* + * The JEMALLOC_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro + * argument. + */ +#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_CONCAT({0, 0}) #undef JEMALLOC_H_STRUCTS /******************************************************************************/ #define JEMALLOC_H_EXTERNS extern bool opt_abort; -#ifdef JEMALLOC_FILL extern bool opt_junk; -#endif -#ifdef JEMALLOC_SYSV -extern bool opt_sysv; -#endif -#ifdef JEMALLOC_XMALLOC +extern size_t opt_quarantine; +extern bool opt_redzone; +extern bool opt_utrace; +extern bool opt_valgrind; extern bool opt_xmalloc; -#endif -#ifdef JEMALLOC_FILL extern bool opt_zero; -#endif extern size_t opt_narenas; -#ifdef DYNAMIC_PAGE_SHIFT -extern size_t pagesize; -extern size_t pagesize_mask; -extern size_t lg_pagesize; -#endif - /* Number of CPUs. */ extern unsigned ncpus; extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ -extern pthread_key_t arenas_tsd; -#ifndef NO_TLS -/* - * Map of pthread_self() --> arenas[???], used for selecting an arena to use - * for allocations. - */ -extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); -# define ARENA_GET() arenas_tls -# define ARENA_SET(v) do { \ - arenas_tls = (v); \ - pthread_setspecific(arenas_tsd, (void *)(v)); \ -} while (0) -#else -# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd)) -# define ARENA_SET(v) do { \ - pthread_setspecific(arenas_tsd, (void *)(v)); \ -} while (0) -#endif - /* * Arenas that are used to service external requests. Not all elements of the * arenas array are necessarily used; arenas are created lazily as needed. @@ -327,45 +518,22 @@ extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); extern arena_t **arenas; extern unsigned narenas; -#ifdef JEMALLOC_STATS -# ifndef NO_TLS -extern __thread thread_allocated_t thread_allocated_tls; -# define ALLOCATED_GET() (thread_allocated_tls.allocated) -# define ALLOCATEDP_GET() (&thread_allocated_tls.allocated) -# define DEALLOCATED_GET() (thread_allocated_tls.deallocated) -# define DEALLOCATEDP_GET() (&thread_allocated_tls.deallocated) -# define ALLOCATED_ADD(a, d) do { \ - thread_allocated_tls.allocated += a; \ - thread_allocated_tls.deallocated += d; \ -} while (0) -# else -extern pthread_key_t thread_allocated_tsd; -thread_allocated_t *thread_allocated_get_hard(void); - -# define ALLOCATED_GET() (thread_allocated_get()->allocated) -# define ALLOCATEDP_GET() (&thread_allocated_get()->allocated) -# define DEALLOCATED_GET() (thread_allocated_get()->deallocated) -# define DEALLOCATEDP_GET() (&thread_allocated_get()->deallocated) -# define ALLOCATED_ADD(a, d) do { \ - thread_allocated_t *thread_allocated = thread_allocated_get(); \ - thread_allocated->allocated += (a); \ - thread_allocated->deallocated += (d); \ -} while (0) -# endif -#endif - arena_t *arenas_extend(unsigned ind); +void arenas_cleanup(void *arg); arena_t *choose_arena_hard(void); -int buferror(int errnum, char *buf, size_t buflen); void jemalloc_prefork(void); -void jemalloc_postfork(void); +void jemalloc_postfork_parent(void); +void jemalloc_postfork_child(void); +#include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent.h" @@ -376,21 +544,22 @@ void jemalloc_postfork(void); #include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif +#include "jemalloc/internal/quarantine.h" #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_EXTERNS /******************************************************************************/ #define JEMALLOC_H_INLINES +#include "jemalloc/internal/util.h" #include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/prng.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" #include "jemalloc/internal/mb.h" #include "jemalloc/internal/extent.h" #include "jemalloc/internal/base.h" @@ -398,34 +567,20 @@ void jemalloc_postfork(void); #include "jemalloc/internal/huge.h" #ifndef JEMALLOC_ENABLE_INLINE -size_t pow2_ceil(size_t x); +malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) + size_t s2u(size_t size); -size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); -void malloc_write(const char *s); -arena_t *choose_arena(void); -# if (defined(JEMALLOC_STATS) && defined(NO_TLS)) -thread_allocated_t *thread_allocated_get(void); -# endif +size_t sa2u(size_t size, size_t alignment); +arena_t *choose_arena(arena_t *arena); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -/* Compute the smallest power of 2 that is >= x. */ -JEMALLOC_INLINE size_t -pow2_ceil(size_t x) -{ - - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; -#if (LG_SIZEOF_PTR == 3) - x |= x >> 32; -#endif - x++; - return (x); -} +/* + * Map of pthread_self() --> arenas[???], used for selecting an arena to use + * for allocations. + */ +malloc_tsd_externs(arenas, arena_t *) +malloc_tsd_funcs(JEMALLOC_INLINE, arenas, arena_t *, NULL, arenas_cleanup) /* * Compute usable size that would result from allocating an object with the @@ -435,7 +590,7 @@ JEMALLOC_INLINE size_t s2u(size_t size) { - if (size <= small_maxclass) + if (size <= SMALL_MAXCLASS) return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size); if (size <= arena_maxclass) return (PAGE_CEILING(size)); @@ -447,10 +602,12 @@ s2u(size_t size) * specified size and alignment. */ JEMALLOC_INLINE size_t -sa2u(size_t size, size_t alignment, size_t *run_size_p) +sa2u(size_t size, size_t alignment) { size_t usize; + assert(alignment != 0 && ((alignment - 1) & alignment) == 0); + /* * Round size up to the nearest multiple of alignment. * @@ -464,12 +621,8 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) * 96 | 1100000 | 32 * 144 | 10100000 | 32 * 192 | 11000000 | 64 - * - * Depending on runtime settings, it is possible that arena_malloc() - * will further round up to a power of two, but that never causes - * correctness issues. */ - usize = (size + (alignment - 1)) & (-alignment); + usize = ALIGNMENT_CEILING(size, alignment); /* * (usize < size) protects against the combination of maximal * alignment and size greater than maximal alignment. @@ -479,8 +632,8 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) return (0); } - if (usize <= arena_maxclass && alignment <= PAGE_SIZE) { - if (usize <= small_maxclass) + if (usize <= arena_maxclass && alignment <= PAGE) { + if (usize <= SMALL_MAXCLASS) return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); return (PAGE_CEILING(usize)); } else { @@ -494,7 +647,7 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) usize = PAGE_CEILING(size); /* * (usize < size) protects against very large sizes within - * PAGE_SIZE of SIZE_T_MAX. + * PAGE of SIZE_T_MAX. * * (usize + alignment < usize) protects against the * combination of maximal alignment and usize large enough @@ -512,93 +665,63 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) /* * Calculate the size of the over-size run that arena_palloc() * would need to allocate in order to guarantee the alignment. + * If the run wouldn't fit within a chunk, round up to a huge + * allocation size. */ - if (usize >= alignment) - run_size = usize + alignment - PAGE_SIZE; - else { - /* - * It is possible that (alignment << 1) will cause - * overflow, but it doesn't matter because we also - * subtract PAGE_SIZE, which in the case of overflow - * leaves us with a very large run_size. That causes - * the first conditional below to fail, which means - * that the bogus run_size value never gets used for - * anything important. - */ - run_size = (alignment << 1) - PAGE_SIZE; - } - if (run_size_p != NULL) - *run_size_p = run_size; - + run_size = usize + alignment - PAGE; if (run_size <= arena_maxclass) return (PAGE_CEILING(usize)); return (CHUNK_CEILING(usize)); } } -/* - * Wrapper around malloc_message() that avoids the need for - * JEMALLOC_P(malloc_message)(...) throughout the code. - */ -JEMALLOC_INLINE void -malloc_write(const char *s) -{ - - JEMALLOC_P(malloc_message)(NULL, s); -} - -/* - * Choose an arena based on a per-thread value (fast-path code, calls slow-path - * code if necessary). - */ +/* Choose an arena based on a per-thread value. */ JEMALLOC_INLINE arena_t * -choose_arena(void) +choose_arena(arena_t *arena) { arena_t *ret; - ret = ARENA_GET(); - if (ret == NULL) { + if (arena != NULL) + return (arena); + + if ((ret = *arenas_tsd_get()) == NULL) { ret = choose_arena_hard(); assert(ret != NULL); } return (ret); } - -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) -JEMALLOC_INLINE thread_allocated_t * -thread_allocated_get(void) -{ - thread_allocated_t *thread_allocated = (thread_allocated_t *) - pthread_getspecific(thread_allocated_tsd); - - if (thread_allocated == NULL) - return (thread_allocated_get_hard()); - return (thread_allocated); -} -#endif #endif #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/rtree.h" +/* + * Include arena.h twice in order to resolve circular dependencies with + * tcache.h. + */ +#define JEMALLOC_ARENA_INLINE_A +#include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_A #include "jemalloc/internal/tcache.h" +#define JEMALLOC_ARENA_INLINE_B #include "jemalloc/internal/arena.h" +#undef JEMALLOC_ARENA_INLINE_B #include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif +#include "jemalloc/internal/quarantine.h" #ifndef JEMALLOC_ENABLE_INLINE void *imalloc(size_t size); void *icalloc(size_t size); void *ipalloc(size_t usize, size_t alignment, bool zero); -size_t isalloc(const void *ptr); -# ifdef JEMALLOC_IVSALLOC -size_t ivsalloc(const void *ptr); -# endif +size_t isalloc(const void *ptr, bool demote); +size_t ivsalloc(const void *ptr, bool demote); +size_t u2rz(size_t usize); +size_t p2rz(const void *ptr); void idalloc(void *ptr); +void iqalloc(void *ptr); void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move); +malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) @@ -609,7 +732,7 @@ imalloc(size_t size) assert(size != 0); if (size <= arena_maxclass) - return (arena_malloc(size, false)); + return (arena_malloc(NULL, size, false, true)); else return (huge_malloc(size, false)); } @@ -619,7 +742,7 @@ icalloc(size_t size) { if (size <= arena_maxclass) - return (arena_malloc(size, true)); + return (arena_malloc(NULL, size, true, true)); else return (huge_malloc(size, true)); } @@ -630,75 +753,80 @@ ipalloc(size_t usize, size_t alignment, bool zero) void *ret; assert(usize != 0); - assert(usize == sa2u(usize, alignment, NULL)); + assert(usize == sa2u(usize, alignment)); - if (usize <= arena_maxclass && alignment <= PAGE_SIZE) - ret = arena_malloc(usize, zero); + if (usize <= arena_maxclass && alignment <= PAGE) + ret = arena_malloc(NULL, usize, zero, true); else { - size_t run_size -#ifdef JEMALLOC_CC_SILENCE - = 0 -#endif - ; - - /* - * Ideally we would only ever call sa2u() once per aligned - * allocation request, and the caller of this function has - * already done so once. However, it's rather burdensome to - * require every caller to pass in run_size, especially given - * that it's only relevant to large allocations. Therefore, - * just call it again here in order to get run_size. - */ - sa2u(usize, alignment, &run_size); - if (run_size <= arena_maxclass) { - ret = arena_palloc(choose_arena(), usize, run_size, - alignment, zero); + if (usize <= arena_maxclass) { + ret = arena_palloc(choose_arena(NULL), usize, alignment, + zero); } else if (alignment <= chunksize) ret = huge_malloc(usize, zero); else ret = huge_palloc(usize, alignment, zero); } - assert(((uintptr_t)ret & (alignment - 1)) == 0); + assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); return (ret); } +/* + * Typical usage: + * void *ptr = [...] + * size_t sz = isalloc(ptr, config_prof); + */ JEMALLOC_INLINE size_t -isalloc(const void *ptr) +isalloc(const void *ptr, bool demote) { size_t ret; arena_chunk_t *chunk; assert(ptr != NULL); + /* Demotion only makes sense if config_prof is true. */ + assert(config_prof || demote == false); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ - dassert(chunk->arena->magic == ARENA_MAGIC); - -#ifdef JEMALLOC_PROF - ret = arena_salloc_demote(ptr); -#else - ret = arena_salloc(ptr); -#endif - } else + if (chunk != ptr) + ret = arena_salloc(ptr, demote); + else ret = huge_salloc(ptr); return (ret); } -#ifdef JEMALLOC_IVSALLOC JEMALLOC_INLINE size_t -ivsalloc(const void *ptr) +ivsalloc(const void *ptr, bool demote) { /* Return 0 if ptr is not within a chunk managed by jemalloc. */ if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL) return (0); - return (isalloc(ptr)); + return (isalloc(ptr, demote)); +} + +JEMALLOC_INLINE size_t +u2rz(size_t usize) +{ + size_t ret; + + if (usize <= SMALL_MAXCLASS) { + size_t binind = SMALL_SIZE2BIN(usize); + ret = arena_bin_info[binind].redzone_size; + } else + ret = 0; + + return (ret); +} + +JEMALLOC_INLINE size_t +p2rz(const void *ptr) +{ + size_t usize = isalloc(ptr, false); + + return (u2rz(usize)); } -#endif JEMALLOC_INLINE void idalloc(void *ptr) @@ -709,11 +837,21 @@ idalloc(void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) - arena_dalloc(chunk->arena, chunk, ptr); + arena_dalloc(chunk->arena, chunk, ptr, true); else huge_dalloc(ptr, true); } +JEMALLOC_INLINE void +iqalloc(void *ptr) +{ + + if (config_fill && opt_quarantine) + quarantine(ptr); + else + idalloc(ptr); +} + JEMALLOC_INLINE void * iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, bool no_move) @@ -724,19 +862,19 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, assert(ptr != NULL); assert(size != 0); - oldsize = isalloc(ptr); + oldsize = isalloc(ptr, config_prof); if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) != 0) { size_t usize, copysize; /* - * Existing object alignment is inadquate; allocate new space + * Existing object alignment is inadequate; allocate new space * and copy. */ if (no_move) return (NULL); - usize = sa2u(size + extra, alignment, NULL); + usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); ret = ipalloc(usize, alignment, zero); @@ -744,7 +882,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, if (extra == 0) return (NULL); /* Try again, without extra this time. */ - usize = sa2u(size, alignment, NULL); + usize = sa2u(size, alignment); if (usize == 0) return (NULL); ret = ipalloc(usize, alignment, zero); @@ -758,7 +896,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, */ copysize = (size < oldsize) ? size : oldsize; memcpy(ret, ptr, copysize); - idalloc(ptr); + iqalloc(ptr); return (ret); } @@ -773,16 +911,21 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, } else { if (size + extra <= arena_maxclass) { return (arena_ralloc(ptr, oldsize, size, extra, - alignment, zero)); + alignment, zero, true)); } else { return (huge_ralloc(ptr, oldsize, size, extra, alignment, zero)); } } } + +malloc_tsd_externs(thread_allocated, thread_allocated_t) +malloc_tsd_funcs(JEMALLOC_INLINE, thread_allocated, thread_allocated_t, + THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup) #endif #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_INLINES /******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_H */ diff --git a/deps/jemalloc/include/jemalloc/internal/mb.h b/deps/jemalloc/include/jemalloc/internal/mb.h index dc9f2a542..3cfa78729 100644 --- a/deps/jemalloc/include/jemalloc/internal/mb.h +++ b/deps/jemalloc/include/jemalloc/internal/mb.h @@ -54,7 +54,7 @@ mb_write(void) ); #endif } -#elif (defined(__amd64_) || defined(__x86_64__)) +#elif (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE void mb_write(void) { @@ -87,6 +87,13 @@ mb_write(void) : "memory" /* Clobbers. */ ); } +#elif defined(__tile__) +JEMALLOC_INLINE void +mb_write(void) +{ + + __sync_synchronize(); +} #else /* * This is much slower than a simple memory barrier, but the semantics of mutex diff --git a/deps/jemalloc/include/jemalloc/internal/mutex.h b/deps/jemalloc/include/jemalloc/internal/mutex.h index 62947ced5..de44e1435 100644 --- a/deps/jemalloc/include/jemalloc/internal/mutex.h +++ b/deps/jemalloc/include/jemalloc/internal/mutex.h @@ -1,22 +1,42 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES -#ifdef JEMALLOC_OSSPIN -typedef OSSpinLock malloc_mutex_t; -#else -typedef pthread_mutex_t malloc_mutex_t; -#endif +typedef struct malloc_mutex_s malloc_mutex_t; -#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP -# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +#ifdef _WIN32 +# define MALLOC_MUTEX_INITIALIZER +#elif (defined(JEMALLOC_OSSPIN)) +# define MALLOC_MUTEX_INITIALIZER {0} +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} #else -# define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +# if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \ + defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP} +# else +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER} +# endif #endif #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +struct malloc_mutex_s { +#ifdef _WIN32 + CRITICAL_SECTION lock; +#elif (defined(JEMALLOC_OSSPIN)) + OSSpinLock lock; +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) + pthread_mutex_t lock; + malloc_mutex_t *postponed_next; +#else + pthread_mutex_t lock; +#endif +}; + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS @@ -24,11 +44,15 @@ typedef pthread_mutex_t malloc_mutex_t; #ifdef JEMALLOC_LAZY_LOCK extern bool isthreaded; #else +# undef isthreaded /* Undo private_namespace.h definition. */ # define isthreaded true #endif bool malloc_mutex_init(malloc_mutex_t *mutex); -void malloc_mutex_destroy(malloc_mutex_t *mutex); +void malloc_mutex_prefork(malloc_mutex_t *mutex); +void malloc_mutex_postfork_parent(malloc_mutex_t *mutex); +void malloc_mutex_postfork_child(malloc_mutex_t *mutex); +bool mutex_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -36,7 +60,6 @@ void malloc_mutex_destroy(malloc_mutex_t *mutex); #ifndef JEMALLOC_ENABLE_INLINE void malloc_mutex_lock(malloc_mutex_t *mutex); -bool malloc_mutex_trylock(malloc_mutex_t *mutex); void malloc_mutex_unlock(malloc_mutex_t *mutex); #endif @@ -46,37 +69,27 @@ malloc_mutex_lock(malloc_mutex_t *mutex) { if (isthreaded) { -#ifdef JEMALLOC_OSSPIN - OSSpinLockLock(mutex); +#ifdef _WIN32 + EnterCriticalSection(&mutex->lock); +#elif (defined(JEMALLOC_OSSPIN)) + OSSpinLockLock(&mutex->lock); #else - pthread_mutex_lock(mutex); + pthread_mutex_lock(&mutex->lock); #endif } } -JEMALLOC_INLINE bool -malloc_mutex_trylock(malloc_mutex_t *mutex) -{ - - if (isthreaded) { -#ifdef JEMALLOC_OSSPIN - return (OSSpinLockTry(mutex) == false); -#else - return (pthread_mutex_trylock(mutex) != 0); -#endif - } else - return (false); -} - JEMALLOC_INLINE void malloc_mutex_unlock(malloc_mutex_t *mutex) { if (isthreaded) { -#ifdef JEMALLOC_OSSPIN - OSSpinLockUnlock(mutex); +#ifdef _WIN32 + LeaveCriticalSection(&mutex->lock); +#elif (defined(JEMALLOC_OSSPIN)) + OSSpinLockUnlock(&mutex->lock); #else - pthread_mutex_unlock(mutex); + pthread_mutex_unlock(&mutex->lock); #endif } } diff --git a/deps/jemalloc/include/jemalloc/internal/private_namespace.h b/deps/jemalloc/include/jemalloc/internal/private_namespace.h index d4f5f96d7..b8166470d 100644 --- a/deps/jemalloc/include/jemalloc/internal/private_namespace.h +++ b/deps/jemalloc/include/jemalloc/internal/private_namespace.h @@ -1,36 +1,84 @@ +#define a0calloc JEMALLOC_N(a0calloc) +#define a0free JEMALLOC_N(a0free) +#define a0malloc JEMALLOC_N(a0malloc) +#define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) #define arena_bin_index JEMALLOC_N(arena_bin_index) +#define arena_bin_info JEMALLOC_N(arena_bin_info) #define arena_boot JEMALLOC_N(arena_boot) #define arena_dalloc JEMALLOC_N(arena_dalloc) #define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) +#define arena_dalloc_bin_locked JEMALLOC_N(arena_dalloc_bin_locked) +#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) #define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) +#define arena_dalloc_large_locked JEMALLOC_N(arena_dalloc_large_locked) +#define arena_dalloc_small JEMALLOC_N(arena_dalloc_small) #define arena_malloc JEMALLOC_N(arena_malloc) #define arena_malloc_large JEMALLOC_N(arena_malloc_large) #define arena_malloc_small JEMALLOC_N(arena_malloc_small) +#define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get) +#define arena_mapbits_binind_get JEMALLOC_N(arena_mapbits_binind_get) +#define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get) +#define arena_mapbits_get JEMALLOC_N(arena_mapbits_get) +#define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set) +#define arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get) +#define arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set) +#define arena_mapbits_large_size_get JEMALLOC_N(arena_mapbits_large_size_get) +#define arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get) +#define arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set) +#define arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set) +#define arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get) +#define arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set) +#define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get) +#define arena_mapbits_unzeroed_set JEMALLOC_N(arena_mapbits_unzeroed_set) +#define arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get) +#define arena_mapp_get JEMALLOC_N(arena_mapp_get) +#define arena_maxclass JEMALLOC_N(arena_maxclass) #define arena_new JEMALLOC_N(arena_new) #define arena_palloc JEMALLOC_N(arena_palloc) +#define arena_postfork_child JEMALLOC_N(arena_postfork_child) +#define arena_postfork_parent JEMALLOC_N(arena_postfork_parent) +#define arena_prefork JEMALLOC_N(arena_prefork) #define arena_prof_accum JEMALLOC_N(arena_prof_accum) #define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) #define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) #define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) +#define arena_ptr_small_binind_get JEMALLOC_N(arena_ptr_small_binind_get) #define arena_purge_all JEMALLOC_N(arena_purge_all) #define arena_ralloc JEMALLOC_N(arena_ralloc) #define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) #define arena_run_regind JEMALLOC_N(arena_run_regind) #define arena_salloc JEMALLOC_N(arena_salloc) -#define arena_salloc_demote JEMALLOC_N(arena_salloc_demote) #define arena_stats_merge JEMALLOC_N(arena_stats_merge) #define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) +#define arenas JEMALLOC_N(arenas) #define arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index) +#define arenas_booted JEMALLOC_N(arenas_booted) +#define arenas_cleanup JEMALLOC_N(arenas_cleanup) #define arenas_extend JEMALLOC_N(arenas_extend) +#define arenas_initialized JEMALLOC_N(arenas_initialized) +#define arenas_lock JEMALLOC_N(arenas_lock) #define arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index) +#define arenas_tls JEMALLOC_N(arenas_tls) +#define arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot) +#define arenas_tsd_cleanup_wrapper JEMALLOC_N(arenas_tsd_cleanup_wrapper) +#define arenas_tsd_get JEMALLOC_N(arenas_tsd_get) +#define arenas_tsd_set JEMALLOC_N(arenas_tsd_set) +#define atomic_add_u JEMALLOC_N(atomic_add_u) #define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) #define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64) +#define atomic_add_z JEMALLOC_N(atomic_add_z) +#define atomic_sub_u JEMALLOC_N(atomic_sub_u) #define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32) #define atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64) +#define atomic_sub_z JEMALLOC_N(atomic_sub_z) #define base_alloc JEMALLOC_N(base_alloc) #define base_boot JEMALLOC_N(base_boot) +#define base_calloc JEMALLOC_N(base_calloc) #define base_node_alloc JEMALLOC_N(base_node_alloc) #define base_node_dealloc JEMALLOC_N(base_node_dealloc) +#define base_postfork_child JEMALLOC_N(base_postfork_child) +#define base_postfork_parent JEMALLOC_N(base_postfork_parent) +#define base_prefork JEMALLOC_N(base_prefork) #define bitmap_full JEMALLOC_N(bitmap_full) #define bitmap_get JEMALLOC_N(bitmap_get) #define bitmap_info_init JEMALLOC_N(bitmap_info_init) @@ -47,19 +95,19 @@ #define chunk_alloc JEMALLOC_N(chunk_alloc) #define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) #define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) -#define chunk_alloc_mmap_noreserve JEMALLOC_N(chunk_alloc_mmap_noreserve) -#define chunk_alloc_swap JEMALLOC_N(chunk_alloc_swap) #define chunk_boot JEMALLOC_N(chunk_boot) #define chunk_dealloc JEMALLOC_N(chunk_dealloc) -#define chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss) #define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) -#define chunk_dealloc_swap JEMALLOC_N(chunk_dealloc_swap) #define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) +#define chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child) +#define chunk_dss_postfork_parent JEMALLOC_N(chunk_dss_postfork_parent) +#define chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork) #define chunk_in_dss JEMALLOC_N(chunk_in_dss) -#define chunk_in_swap JEMALLOC_N(chunk_in_swap) -#define chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot) -#define chunk_swap_boot JEMALLOC_N(chunk_swap_boot) -#define chunk_swap_enable JEMALLOC_N(chunk_swap_enable) +#define chunk_npages JEMALLOC_N(chunk_npages) +#define chunks_mtx JEMALLOC_N(chunks_mtx) +#define chunks_rtree JEMALLOC_N(chunks_rtree) +#define chunksize JEMALLOC_N(chunksize) +#define chunksize_mask JEMALLOC_N(chunksize_mask) #define ckh_bucket_search JEMALLOC_N(ckh_bucket_search) #define ckh_count JEMALLOC_N(ckh_count) #define ckh_delete JEMALLOC_N(ckh_delete) @@ -77,7 +125,6 @@ #define ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp) #define ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert) #define ckh_try_insert JEMALLOC_N(ckh_try_insert) -#define create_zone JEMALLOC_N(create_zone) #define ctl_boot JEMALLOC_N(ctl_boot) #define ctl_bymib JEMALLOC_N(ctl_bymib) #define ctl_byname JEMALLOC_N(ctl_byname) @@ -115,10 +162,17 @@ #define extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start) #define extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search) #define hash JEMALLOC_N(hash) +#define huge_allocated JEMALLOC_N(huge_allocated) #define huge_boot JEMALLOC_N(huge_boot) #define huge_dalloc JEMALLOC_N(huge_dalloc) #define huge_malloc JEMALLOC_N(huge_malloc) +#define huge_mtx JEMALLOC_N(huge_mtx) +#define huge_ndalloc JEMALLOC_N(huge_ndalloc) +#define huge_nmalloc JEMALLOC_N(huge_nmalloc) #define huge_palloc JEMALLOC_N(huge_palloc) +#define huge_postfork_child JEMALLOC_N(huge_postfork_child) +#define huge_postfork_parent JEMALLOC_N(huge_postfork_parent) +#define huge_prefork JEMALLOC_N(huge_prefork) #define huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get) #define huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set) #define huge_ralloc JEMALLOC_N(huge_ralloc) @@ -129,21 +183,63 @@ #define idalloc JEMALLOC_N(idalloc) #define imalloc JEMALLOC_N(imalloc) #define ipalloc JEMALLOC_N(ipalloc) +#define iqalloc JEMALLOC_N(iqalloc) #define iralloc JEMALLOC_N(iralloc) #define isalloc JEMALLOC_N(isalloc) +#define isthreaded JEMALLOC_N(isthreaded) #define ivsalloc JEMALLOC_N(ivsalloc) -#define jemalloc_darwin_init JEMALLOC_N(jemalloc_darwin_init) -#define jemalloc_postfork JEMALLOC_N(jemalloc_postfork) +#define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) +#define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) #define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) #define malloc_cprintf JEMALLOC_N(malloc_cprintf) -#define malloc_mutex_destroy JEMALLOC_N(malloc_mutex_destroy) #define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) #define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock) -#define malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock) +#define malloc_mutex_postfork_child JEMALLOC_N(malloc_mutex_postfork_child) +#define malloc_mutex_postfork_parent JEMALLOC_N(malloc_mutex_postfork_parent) +#define malloc_mutex_prefork JEMALLOC_N(malloc_mutex_prefork) #define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) #define malloc_printf JEMALLOC_N(malloc_printf) +#define malloc_snprintf JEMALLOC_N(malloc_snprintf) +#define malloc_strtoumax JEMALLOC_N(malloc_strtoumax) +#define malloc_tsd_boot JEMALLOC_N(malloc_tsd_boot) +#define malloc_tsd_cleanup_register JEMALLOC_N(malloc_tsd_cleanup_register) +#define malloc_tsd_dalloc JEMALLOC_N(malloc_tsd_dalloc) +#define malloc_tsd_malloc JEMALLOC_N(malloc_tsd_malloc) +#define malloc_tsd_no_cleanup JEMALLOC_N(malloc_tsd_no_cleanup) +#define malloc_vcprintf JEMALLOC_N(malloc_vcprintf) +#define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) #define malloc_write JEMALLOC_N(malloc_write) +#define map_bias JEMALLOC_N(map_bias) #define mb_write JEMALLOC_N(mb_write) +#define mutex_boot JEMALLOC_N(mutex_boot) +#define narenas JEMALLOC_N(narenas) +#define ncpus JEMALLOC_N(ncpus) +#define nhbins JEMALLOC_N(nhbins) +#define opt_abort JEMALLOC_N(opt_abort) +#define opt_junk JEMALLOC_N(opt_junk) +#define opt_lg_chunk JEMALLOC_N(opt_lg_chunk) +#define opt_lg_dirty_mult JEMALLOC_N(opt_lg_dirty_mult) +#define opt_lg_prof_interval JEMALLOC_N(opt_lg_prof_interval) +#define opt_lg_prof_sample JEMALLOC_N(opt_lg_prof_sample) +#define opt_lg_tcache_max JEMALLOC_N(opt_lg_tcache_max) +#define opt_narenas JEMALLOC_N(opt_narenas) +#define opt_prof JEMALLOC_N(opt_prof) +#define opt_prof_accum JEMALLOC_N(opt_prof_accum) +#define opt_prof_active JEMALLOC_N(opt_prof_active) +#define opt_prof_final JEMALLOC_N(opt_prof_final) +#define opt_prof_gdump JEMALLOC_N(opt_prof_gdump) +#define opt_prof_leak JEMALLOC_N(opt_prof_leak) +#define opt_prof_prefix JEMALLOC_N(opt_prof_prefix) +#define opt_quarantine JEMALLOC_N(opt_quarantine) +#define opt_redzone JEMALLOC_N(opt_redzone) +#define opt_stats_print JEMALLOC_N(opt_stats_print) +#define opt_tcache JEMALLOC_N(opt_tcache) +#define opt_utrace JEMALLOC_N(opt_utrace) +#define opt_valgrind JEMALLOC_N(opt_valgrind) +#define opt_xmalloc JEMALLOC_N(opt_xmalloc) +#define opt_zero JEMALLOC_N(opt_zero) +#define p2rz JEMALLOC_N(p2rz) +#define pages_purge JEMALLOC_N(pages_purge) #define pow2_ceil JEMALLOC_N(pow2_ceil) #define prof_backtrace JEMALLOC_N(prof_backtrace) #define prof_boot0 JEMALLOC_N(prof_boot0) @@ -154,14 +250,31 @@ #define prof_free JEMALLOC_N(prof_free) #define prof_gdump JEMALLOC_N(prof_gdump) #define prof_idump JEMALLOC_N(prof_idump) +#define prof_interval JEMALLOC_N(prof_interval) #define prof_lookup JEMALLOC_N(prof_lookup) #define prof_malloc JEMALLOC_N(prof_malloc) #define prof_mdump JEMALLOC_N(prof_mdump) +#define prof_promote JEMALLOC_N(prof_promote) #define prof_realloc JEMALLOC_N(prof_realloc) #define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) #define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) +#define prof_tdata_booted JEMALLOC_N(prof_tdata_booted) +#define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) +#define prof_tdata_get JEMALLOC_N(prof_tdata_get) #define prof_tdata_init JEMALLOC_N(prof_tdata_init) -#define pthread_create JEMALLOC_N(pthread_create) +#define prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized) +#define prof_tdata_tls JEMALLOC_N(prof_tdata_tls) +#define prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot) +#define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) +#define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) +#define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) +#define quarantine JEMALLOC_N(quarantine) +#define quarantine_boot JEMALLOC_N(quarantine_boot) +#define quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot) +#define quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper) +#define quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get) +#define quarantine_tsd_set JEMALLOC_N(quarantine_tsd_set) +#define register_zone JEMALLOC_N(register_zone) #define rtree_get JEMALLOC_N(rtree_get) #define rtree_get_locked JEMALLOC_N(rtree_get_locked) #define rtree_new JEMALLOC_N(rtree_new) @@ -171,25 +284,56 @@ #define stats_arenas_i_bins_j_index JEMALLOC_N(stats_arenas_i_bins_j_index) #define stats_arenas_i_index JEMALLOC_N(stats_arenas_i_index) #define stats_arenas_i_lruns_j_index JEMALLOC_N(stats_arenas_i_lruns_j_index) +#define stats_cactive JEMALLOC_N(stats_cactive) #define stats_cactive_add JEMALLOC_N(stats_cactive_add) #define stats_cactive_get JEMALLOC_N(stats_cactive_get) #define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) +#define stats_chunks JEMALLOC_N(stats_chunks) #define stats_print JEMALLOC_N(stats_print) -#define szone2ozone JEMALLOC_N(szone2ozone) #define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy) #define tcache_alloc_large JEMALLOC_N(tcache_alloc_large) #define tcache_alloc_small JEMALLOC_N(tcache_alloc_small) #define tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard) +#define tcache_arena_associate JEMALLOC_N(tcache_arena_associate) +#define tcache_arena_dissociate JEMALLOC_N(tcache_arena_dissociate) #define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large) #define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small) -#define tcache_boot JEMALLOC_N(tcache_boot) +#define tcache_bin_info JEMALLOC_N(tcache_bin_info) +#define tcache_boot0 JEMALLOC_N(tcache_boot0) +#define tcache_boot1 JEMALLOC_N(tcache_boot1) +#define tcache_booted JEMALLOC_N(tcache_booted) #define tcache_create JEMALLOC_N(tcache_create) #define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large) #define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small) #define tcache_destroy JEMALLOC_N(tcache_destroy) +#define tcache_enabled_booted JEMALLOC_N(tcache_enabled_booted) +#define tcache_enabled_get JEMALLOC_N(tcache_enabled_get) +#define tcache_enabled_initialized JEMALLOC_N(tcache_enabled_initialized) +#define tcache_enabled_set JEMALLOC_N(tcache_enabled_set) +#define tcache_enabled_tls JEMALLOC_N(tcache_enabled_tls) +#define tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot) +#define tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper) +#define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) +#define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) #define tcache_event JEMALLOC_N(tcache_event) +#define tcache_event_hard JEMALLOC_N(tcache_event_hard) +#define tcache_flush JEMALLOC_N(tcache_flush) #define tcache_get JEMALLOC_N(tcache_get) +#define tcache_initialized JEMALLOC_N(tcache_initialized) +#define tcache_maxclass JEMALLOC_N(tcache_maxclass) +#define tcache_salloc JEMALLOC_N(tcache_salloc) #define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) -#define thread_allocated_get JEMALLOC_N(thread_allocated_get) -#define thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard) -#define u2s JEMALLOC_N(u2s) +#define tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup) +#define tcache_tls JEMALLOC_N(tcache_tls) +#define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) +#define tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper) +#define tcache_tsd_get JEMALLOC_N(tcache_tsd_get) +#define tcache_tsd_set JEMALLOC_N(tcache_tsd_set) +#define thread_allocated_booted JEMALLOC_N(thread_allocated_booted) +#define thread_allocated_initialized JEMALLOC_N(thread_allocated_initialized) +#define thread_allocated_tls JEMALLOC_N(thread_allocated_tls) +#define thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot) +#define thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper) +#define thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get) +#define thread_allocated_tsd_set JEMALLOC_N(thread_allocated_tsd_set) +#define u2rz JEMALLOC_N(u2rz) diff --git a/deps/jemalloc/include/jemalloc/internal/prn.h b/deps/jemalloc/include/jemalloc/internal/prng.h index 0709d7080..83a5462b4 100644 --- a/deps/jemalloc/include/jemalloc/internal/prn.h +++ b/deps/jemalloc/include/jemalloc/internal/prng.h @@ -4,7 +4,7 @@ /* * Simple linear congruential pseudo-random number generator: * - * prn(y) = (a*x + c) % m + * prng(y) = (a*x + c) % m * * where the following constants ensure maximal period: * @@ -25,7 +25,7 @@ * uint32_t state : Seed value. * const uint32_t a, c : See above discussion. */ -#define prn32(r, lg_range, state, a, c) do { \ +#define prng32(r, lg_range, state, a, c) do { \ assert(lg_range > 0); \ assert(lg_range <= 32); \ \ @@ -34,8 +34,8 @@ r >>= (32 - lg_range); \ } while (false) -/* Same as prn32(), but 64 bits of pseudo-randomness, using uint64_t. */ -#define prn64(r, lg_range, state, a, c) do { \ +/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ +#define prng64(r, lg_range, state, a, c) do { \ assert(lg_range > 0); \ assert(lg_range <= 64); \ \ diff --git a/deps/jemalloc/include/jemalloc/internal/prof.h b/deps/jemalloc/include/jemalloc/internal/prof.h index e9064ba6e..c3e3f9e4b 100644 --- a/deps/jemalloc/include/jemalloc/internal/prof.h +++ b/deps/jemalloc/include/jemalloc/internal/prof.h @@ -1,4 +1,3 @@ -#ifdef JEMALLOC_PROF /******************************************************************************/ #ifdef JEMALLOC_H_TYPES @@ -10,28 +9,41 @@ typedef struct prof_tdata_s prof_tdata_t; /* Option defaults. */ #define PROF_PREFIX_DEFAULT "jeprof" -#define LG_PROF_BT_MAX_DEFAULT 7 -#define LG_PROF_SAMPLE_DEFAULT 0 +#define LG_PROF_SAMPLE_DEFAULT 19 #define LG_PROF_INTERVAL_DEFAULT -1 -#define LG_PROF_TCMAX_DEFAULT -1 /* - * Hard limit on stack backtrace depth. Note that the version of - * prof_backtrace() that is based on __builtin_return_address() necessarily has - * a hard-coded number of backtrace frame handlers. + * Hard limit on stack backtrace depth. The version of prof_backtrace() that + * is based on __builtin_return_address() necessarily has a hard-coded number + * of backtrace frame handlers, and should be kept in sync with this setting. */ -#if (defined(JEMALLOC_PROF_LIBGCC) || defined(JEMALLOC_PROF_LIBUNWIND)) -# define LG_PROF_BT_MAX ((ZU(1) << (LG_SIZEOF_PTR+3)) - 1) -#else -# define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */ -#endif -#define PROF_BT_MAX (1U << LG_PROF_BT_MAX) +#define PROF_BT_MAX 128 + +/* Maximum number of backtraces to store in each per thread LRU cache. */ +#define PROF_TCMAX 1024 /* Initial hash table size. */ -#define PROF_CKH_MINITEMS 64 +#define PROF_CKH_MINITEMS 64 /* Size of memory buffer to use when writing dump files. */ -#define PROF_DUMP_BUF_SIZE 65536 +#define PROF_DUMP_BUFSIZE 65536 + +/* Size of stack-allocated buffer used by prof_printf(). */ +#define PROF_PRINTF_BUFSIZE 128 + +/* + * Number of mutexes shared among all ctx's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NCTX_LOCKS 1024 + +/* + * prof_tdata pointers close to NULL are used to encode state information that + * is used for cleaning up during thread shutdown. + */ +#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1) +#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2) +#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ @@ -109,8 +121,18 @@ struct prof_ctx_s { /* Associated backtrace. */ prof_bt_t *bt; - /* Protects cnt_merged and cnts_ql. */ - malloc_mutex_t lock; + /* Protects nlimbo, cnt_merged, and cnts_ql. */ + malloc_mutex_t *lock; + + /* + * Number of threads that currently cause this ctx to be in a state of + * limbo due to one of: + * - Initializing per thread counters associated with this ctx. + * - Preparing to destroy this ctx. + * nlimbo must be 1 (single destroyer) in order to safely destroy the + * ctx. + */ + unsigned nlimbo; /* Temporary storage for summation during dump. */ prof_cnt_t cnt_summed; @@ -145,9 +167,14 @@ struct prof_tdata_s { void **vec; /* Sampling state. */ - uint64_t prn_state; + uint64_t prng_state; uint64_t threshold; uint64_t accum; + + /* State used to avoid dumping while operating on prof internals. */ + bool enq; + bool enq_idump; + bool enq_gdump; }; #endif /* JEMALLOC_H_STRUCTS */ @@ -162,13 +189,12 @@ extern bool opt_prof; * to notice state changes. */ extern bool opt_prof_active; -extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */ extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ extern bool opt_prof_gdump; /* High-water memory dumping. */ +extern bool opt_prof_final; /* Final profile dumping. */ extern bool opt_prof_leak; /* Dump leak summary at exit. */ extern bool opt_prof_accum; /* Report cumulative bytes. */ -extern ssize_t opt_lg_prof_tcmax; /* lg(max per thread bactrace cache) */ extern char opt_prof_prefix[PATH_MAX + 1]; /* @@ -186,39 +212,14 @@ extern uint64_t prof_interval; */ extern bool prof_promote; -/* (1U << opt_lg_prof_bt_max). */ -extern unsigned prof_bt_max; - -/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ -#ifndef NO_TLS -extern __thread prof_tdata_t *prof_tdata_tls - JEMALLOC_ATTR(tls_model("initial-exec")); -# define PROF_TCACHE_GET() prof_tdata_tls -# define PROF_TCACHE_SET(v) do { \ - prof_tdata_tls = (v); \ - pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ -} while (0) -#else -# define PROF_TCACHE_GET() \ - ((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd)) -# define PROF_TCACHE_SET(v) do { \ - pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ -} while (0) -#endif -/* - * Same contents as b2cnt_tls, but initialized such that the TSD destructor is - * called when a thread exits, so that prof_tdata_tls contents can be merged, - * unlinked, and deallocated. - */ -extern pthread_key_t prof_tdata_tsd; - void bt_init(prof_bt_t *bt, void **vec); -void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); +void prof_backtrace(prof_bt_t *bt, unsigned nignore); prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); void prof_idump(void); bool prof_mdump(const char *filename); void prof_gdump(void); prof_tdata_t *prof_tdata_init(void); +void prof_tdata_cleanup(void *arg); void prof_boot0(void); void prof_boot1(void); bool prof_boot2(void); @@ -233,13 +234,13 @@ bool prof_boot2(void); \ assert(size == s2u(size)); \ \ - prof_tdata = PROF_TCACHE_GET(); \ - if (prof_tdata == NULL) { \ - prof_tdata = prof_tdata_init(); \ - if (prof_tdata == NULL) { \ + prof_tdata = prof_tdata_get(); \ + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \ + if (prof_tdata != NULL) \ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + else \ ret = NULL; \ - break; \ - } \ + break; \ } \ \ if (opt_prof_active == false) { \ @@ -249,13 +250,13 @@ bool prof_boot2(void); /* Don't bother with sampling logic, since sampling */\ /* interval is 1. */\ bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore, prof_bt_max); \ + prof_backtrace(&bt, nignore); \ ret = prof_lookup(&bt); \ } else { \ if (prof_tdata->threshold == 0) { \ /* Initialize. Seed the prng differently for */\ /* each thread. */\ - prof_tdata->prn_state = \ + prof_tdata->prng_state = \ (uint64_t)(uintptr_t)&size; \ prof_sample_threshold_update(prof_tdata); \ } \ @@ -272,7 +273,7 @@ bool prof_boot2(void); if (size >= prof_tdata->threshold - \ prof_tdata->accum) { \ bt_init(&bt, prof_tdata->vec); \ - prof_backtrace(&bt, nignore, prof_bt_max); \ + prof_backtrace(&bt, nignore); \ ret = prof_lookup(&bt); \ } else \ ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ @@ -280,6 +281,9 @@ bool prof_boot2(void); } while (0) #ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) + +prof_tdata_t *prof_tdata_get(void); void prof_sample_threshold_update(prof_tdata_t *prof_tdata); prof_ctx_t *prof_ctx_get(const void *ptr); void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); @@ -291,12 +295,35 @@ void prof_free(const void *ptr, size_t size); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) +/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ +malloc_tsd_externs(prof_tdata, prof_tdata_t *) +malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, + prof_tdata_cleanup) + +JEMALLOC_INLINE prof_tdata_t * +prof_tdata_get(void) +{ + prof_tdata_t *prof_tdata; + + cassert(config_prof); + + prof_tdata = *prof_tdata_tsd_get(); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { + if (prof_tdata == NULL) + prof_tdata = prof_tdata_init(); + } + + return (prof_tdata); +} + JEMALLOC_INLINE void prof_sample_threshold_update(prof_tdata_t *prof_tdata) { uint64_t r; double u; + cassert(config_prof); + /* * Compute sample threshold as a geometrically distributed random * variable with mean (2^opt_lg_prof_sample). @@ -315,8 +342,8 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata) * pp 500 * (http://cg.scs.carleton.ca/~luc/rnbookindex.html) */ - prn64(r, 53, prof_tdata->prn_state, - (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU); + prng64(r, 53, prof_tdata->prng_state, + UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); u = (double)r * (1.0/9007199254740992.0L); prof_tdata->threshold = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) @@ -329,13 +356,12 @@ prof_ctx_get(const void *ptr) prof_ctx_t *ret; arena_chunk_t *chunk; + cassert(config_prof); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - dassert(chunk->arena->magic == ARENA_MAGIC); - ret = arena_prof_ctx_get(ptr); } else ret = huge_prof_ctx_get(ptr); @@ -348,13 +374,12 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx) { arena_chunk_t *chunk; + cassert(config_prof); assert(ptr != NULL); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) { /* Region. */ - dassert(chunk->arena->magic == ARENA_MAGIC); - arena_prof_ctx_set(ptr, ctx); } else huge_prof_ctx_set(ptr, ctx); @@ -365,11 +390,13 @@ prof_sample_accum_update(size_t size) { prof_tdata_t *prof_tdata; + cassert(config_prof); /* Sampling logic is unnecessary if the interval is 1. */ assert(opt_lg_prof_sample != 0); - prof_tdata = PROF_TCACHE_GET(); - assert(prof_tdata != NULL); + prof_tdata = *prof_tdata_tsd_get(); + if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) + return (true); /* Take care to avoid integer overflow. */ if (size >= prof_tdata->threshold - prof_tdata->accum) { @@ -391,8 +418,9 @@ JEMALLOC_INLINE void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) { + cassert(config_prof); assert(ptr != NULL); - assert(size == isalloc(ptr)); + assert(size == isalloc(ptr, true)); if (opt_lg_prof_sample != 0) { if (prof_sample_accum_update(size)) { @@ -437,10 +465,11 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, { prof_thr_cnt_t *told_cnt; + cassert(config_prof); assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); if (ptr != NULL) { - assert(size == isalloc(ptr)); + assert(size == isalloc(ptr, true)); if (opt_lg_prof_sample != 0) { if (prof_sample_accum_update(size)) { /* @@ -463,10 +492,10 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, * It's too late to propagate OOM for this realloc(), * so operate directly on old_cnt->ctx->cnt_merged. */ - malloc_mutex_lock(&old_ctx->lock); + malloc_mutex_lock(old_ctx->lock); old_ctx->cnt_merged.curobjs--; old_ctx->cnt_merged.curbytes -= old_size; - malloc_mutex_unlock(&old_ctx->lock); + malloc_mutex_unlock(old_ctx->lock); told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; } } else @@ -510,9 +539,12 @@ prof_free(const void *ptr, size_t size) { prof_ctx_t *ctx = prof_ctx_get(ptr); + cassert(config_prof); + if ((uintptr_t)ctx > (uintptr_t)1) { - assert(size == isalloc(ptr)); - prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); + prof_thr_cnt_t *tcnt; + assert(size == isalloc(ptr, true)); + tcnt = prof_lookup(ctx->bt); if (tcnt != NULL) { tcnt->epoch++; @@ -533,10 +565,10 @@ prof_free(const void *ptr, size_t size) * OOM during free() cannot be propagated, so operate * directly on cnt->ctx->cnt_merged. */ - malloc_mutex_lock(&ctx->lock); + malloc_mutex_lock(ctx->lock); ctx->cnt_merged.curobjs--; ctx->cnt_merged.curbytes -= size; - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); } } } @@ -544,4 +576,3 @@ prof_free(const void *ptr, size_t size) #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ -#endif /* JEMALLOC_PROF */ diff --git a/deps/jemalloc/include/jemalloc/internal/zone.h b/deps/jemalloc/include/jemalloc/internal/quarantine.h index 859b529d4..38f3d696e 100644 --- a/deps/jemalloc/include/jemalloc/internal/zone.h +++ b/deps/jemalloc/include/jemalloc/internal/quarantine.h @@ -1,9 +1,9 @@ -#ifndef JEMALLOC_ZONE -# error "This source file is for zones on Darwin (OS X)." -#endif /******************************************************************************/ #ifdef JEMALLOC_H_TYPES +/* Default per thread quarantine size if valgrind is enabled. */ +#define JEMALLOC_VALGRIND_QUARANTINE_DEFAULT (ZU(1) << 24) + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS @@ -12,8 +12,8 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -malloc_zone_t *create_zone(void); -void szone2ozone(malloc_zone_t *zone); +void quarantine(void *ptr); +bool quarantine_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -21,3 +21,4 @@ void szone2ozone(malloc_zone_t *zone); #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ + diff --git a/deps/jemalloc/include/jemalloc/internal/rb.h b/deps/jemalloc/include/jemalloc/internal/rb.h index ee9b009d2..7b675f090 100644 --- a/deps/jemalloc/include/jemalloc/internal/rb.h +++ b/deps/jemalloc/include/jemalloc/internal/rb.h @@ -223,88 +223,88 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * The following API is generated: * * static void - * ex_new(ex_t *extree); + * ex_new(ex_t *tree); * Description: Initialize a red-black tree structure. * Args: - * extree: Pointer to an uninitialized red-black tree object. + * tree: Pointer to an uninitialized red-black tree object. * * static ex_node_t * - * ex_first(ex_t *extree); + * ex_first(ex_t *tree); * static ex_node_t * - * ex_last(ex_t *extree); - * Description: Get the first/last node in extree. + * ex_last(ex_t *tree); + * Description: Get the first/last node in tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * Ret: First/last node in extree, or NULL if extree is empty. + * tree: Pointer to an initialized red-black tree object. + * Ret: First/last node in tree, or NULL if tree is empty. * * static ex_node_t * - * ex_next(ex_t *extree, ex_node_t *node); + * ex_next(ex_t *tree, ex_node_t *node); * static ex_node_t * - * ex_prev(ex_t *extree, ex_node_t *node); + * ex_prev(ex_t *tree, ex_node_t *node); * Description: Get node's successor/predecessor. * Args: - * extree: Pointer to an initialized red-black tree object. - * node : A node in extree. - * Ret: node's successor/predecessor in extree, or NULL if node is + * tree: Pointer to an initialized red-black tree object. + * node: A node in tree. + * Ret: node's successor/predecessor in tree, or NULL if node is * last/first. * * static ex_node_t * - * ex_search(ex_t *extree, ex_node_t *key); + * ex_search(ex_t *tree, ex_node_t *key); * Description: Search for node that matches key. * Args: - * extree: Pointer to an initialized red-black tree object. - * key : Search key. - * Ret: Node in extree that matches key, or NULL if no match. + * tree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in tree that matches key, or NULL if no match. * * static ex_node_t * - * ex_nsearch(ex_t *extree, ex_node_t *key); + * ex_nsearch(ex_t *tree, ex_node_t *key); * static ex_node_t * - * ex_psearch(ex_t *extree, ex_node_t *key); + * ex_psearch(ex_t *tree, ex_node_t *key); * Description: Search for node that matches key. If no match is found, * return what would be key's successor/predecessor, were - * key in extree. + * key in tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * key : Search key. - * Ret: Node in extree that matches key, or if no match, hypothetical - * node's successor/predecessor (NULL if no successor/predecessor). + * tree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in tree that matches key, or if no match, hypothetical node's + * successor/predecessor (NULL if no successor/predecessor). * * static void - * ex_insert(ex_t *extree, ex_node_t *node); - * Description: Insert node into extree. + * ex_insert(ex_t *tree, ex_node_t *node); + * Description: Insert node into tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * node : Node to be inserted into extree. + * tree: Pointer to an initialized red-black tree object. + * node: Node to be inserted into tree. * * static void - * ex_remove(ex_t *extree, ex_node_t *node); - * Description: Remove node from extree. + * ex_remove(ex_t *tree, ex_node_t *node); + * Description: Remove node from tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * node : Node in extree to be removed. + * tree: Pointer to an initialized red-black tree object. + * node: Node in tree to be removed. * * static ex_node_t * - * ex_iter(ex_t *extree, ex_node_t *start, ex_node_t *(*cb)(ex_t *, + * ex_iter(ex_t *tree, ex_node_t *start, ex_node_t *(*cb)(ex_t *, * ex_node_t *, void *), void *arg); * static ex_node_t * - * ex_reverse_iter(ex_t *extree, ex_node_t *start, ex_node *(*cb)(ex_t *, + * ex_reverse_iter(ex_t *tree, ex_node_t *start, ex_node *(*cb)(ex_t *, * ex_node_t *, void *), void *arg); - * Description: Iterate forward/backward over extree, starting at node. - * If extree is modified, iteration must be immediately + * Description: Iterate forward/backward over tree, starting at node. If + * tree is modified, iteration must be immediately * terminated by the callback function that causes the * modification. * Args: - * extree: Pointer to an initialized red-black tree object. - * start : Node at which to start iteration, or NULL to start at - * first/last node. - * cb : Callback function, which is called for each node during - * iteration. Under normal circumstances the callback function - * should return NULL, which causes iteration to continue. If a - * callback function returns non-NULL, iteration is immediately - * terminated and the non-NULL return value is returned by the - * iterator. This is useful for re-starting iteration after - * modifying extree. - * arg : Opaque pointer passed to cb(). + * tree : Pointer to an initialized red-black tree object. + * start: Node at which to start iteration, or NULL to start at + * first/last node. + * cb : Callback function, which is called for each node during + * iteration. Under normal circumstances the callback function + * should return NULL, which causes iteration to continue. If a + * callback function returns non-NULL, iteration is immediately + * terminated and the non-NULL return value is returned by the + * iterator. This is useful for re-starting iteration after + * modifying tree. + * arg : Opaque pointer passed to cb(). * Ret: NULL if iteration completed, or the non-NULL callback return value * that caused termination of the iteration. */ diff --git a/deps/jemalloc/include/jemalloc/internal/size_classes.sh b/deps/jemalloc/include/jemalloc/internal/size_classes.sh new file mode 100755 index 000000000..29c80c1fb --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/size_classes.sh @@ -0,0 +1,122 @@ +#!/bin/sh + +# The following limits are chosen such that they cover all supported platforms. + +# Range of quanta. +lg_qmin=3 +lg_qmax=4 + +# The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)]. +lg_tmin=3 + +# Range of page sizes. +lg_pmin=12 +lg_pmax=16 + +pow2() { + e=$1 + pow2_result=1 + while [ ${e} -gt 0 ] ; do + pow2_result=$((${pow2_result} + ${pow2_result})) + e=$((${e} - 1)) + done +} + +cat <<EOF +/* This file was automatically generated by size_classes.sh. */ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +EOF + +lg_q=${lg_qmin} +while [ ${lg_q} -le ${lg_qmax} ] ; do + lg_t=${lg_tmin} + while [ ${lg_t} -le ${lg_q} ] ; do + lg_p=${lg_pmin} + while [ ${lg_p} -le ${lg_pmax} ] ; do + echo "#if (LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})" + echo "#define SIZE_CLASSES_DEFINED" + pow2 ${lg_q}; q=${pow2_result} + pow2 ${lg_t}; t=${pow2_result} + pow2 ${lg_p}; p=${pow2_result} + bin=0 + psz=0 + sz=${t} + delta=$((${sz} - ${psz})) + echo "/* SIZE_CLASS(bin, delta, sz) */" + echo "#define SIZE_CLASSES \\" + + # Tiny size classes. + while [ ${sz} -lt ${q} ] ; do + echo " SIZE_CLASS(${bin}, ${delta}, ${sz}) \\" + bin=$((${bin} + 1)) + psz=${sz} + sz=$((${sz} + ${sz})) + delta=$((${sz} - ${psz})) + done + # Quantum-multiple size classes. For each doubling of sz, as many as 4 + # size classes exist. Their spacing is the greater of: + # - q + # - sz/4, where sz is a power of 2 + while [ ${sz} -lt ${p} ] ; do + if [ ${sz} -ge $((${q} * 4)) ] ; then + i=$((${sz} / 4)) + else + i=${q} + fi + next_2pow=$((${sz} * 2)) + while [ ${sz} -lt $next_2pow ] ; do + echo " SIZE_CLASS(${bin}, ${delta}, ${sz}) \\" + bin=$((${bin} + 1)) + psz=${sz} + sz=$((${sz} + ${i})) + delta=$((${sz} - ${psz})) + done + done + echo + echo "#define NBINS ${bin}" + echo "#define SMALL_MAXCLASS ${psz}" + echo "#endif" + echo + lg_p=$((${lg_p} + 1)) + done + lg_t=$((${lg_t} + 1)) + done + lg_q=$((${lg_q} + 1)) +done + +cat <<EOF +#ifndef SIZE_CLASSES_DEFINED +# error "No size class definitions match configuration" +#endif +#undef SIZE_CLASSES_DEFINED +/* + * The small_size2bin lookup table uses uint8_t to encode each bin index, so we + * cannot support more than 256 small size classes. Further constrain NBINS to + * 255 to support prof_promote, since all small size classes, plus a "not + * small" size class must be stored in 8 bits of arena_chunk_map_t's bits + * field. + */ +#if (NBINS > 255) +# error "Too many small size classes" +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +EOF diff --git a/deps/jemalloc/include/jemalloc/internal/stats.h b/deps/jemalloc/include/jemalloc/internal/stats.h index 2a9b31d9f..27f68e368 100644 --- a/deps/jemalloc/include/jemalloc/internal/stats.h +++ b/deps/jemalloc/include/jemalloc/internal/stats.h @@ -1,25 +1,16 @@ /******************************************************************************/ #ifdef JEMALLOC_H_TYPES -#define UMAX2S_BUFSIZE 65 - -#ifdef JEMALLOC_STATS typedef struct tcache_bin_stats_s tcache_bin_stats_t; typedef struct malloc_bin_stats_s malloc_bin_stats_t; typedef struct malloc_large_stats_s malloc_large_stats_t; typedef struct arena_stats_s arena_stats_t; -#endif -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) typedef struct chunk_stats_s chunk_stats_t; -#endif #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS -#ifdef JEMALLOC_STATS - -#ifdef JEMALLOC_TCACHE struct tcache_bin_stats_s { /* * Number of allocation requests that corresponded to the size of this @@ -27,7 +18,6 @@ struct tcache_bin_stats_s { */ uint64_t nrequests; }; -#endif struct malloc_bin_stats_s { /* @@ -52,13 +42,11 @@ struct malloc_bin_stats_s { */ uint64_t nrequests; -#ifdef JEMALLOC_TCACHE /* Number of tcache fills from this bin. */ uint64_t nfills; /* Number of tcache flushes to this bin. */ uint64_t nflushes; -#endif /* Total number of runs created for this bin's size class. */ uint64_t nruns; @@ -69,9 +57,6 @@ struct malloc_bin_stats_s { */ uint64_t reruns; - /* High-water mark for this bin. */ - size_t highruns; - /* Current number of runs in this bin. */ size_t curruns; }; @@ -93,9 +78,6 @@ struct malloc_large_stats_s { */ uint64_t nrequests; - /* High-water mark for this size class. */ - size_t highruns; - /* Current number of runs of this size class. */ size_t curruns; }; @@ -127,14 +109,10 @@ struct arena_stats_s { */ malloc_large_stats_t *lstats; }; -#endif /* JEMALLOC_STATS */ -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) struct chunk_stats_s { -# ifdef JEMALLOC_STATS /* Number of chunks that were allocated. */ uint64_t nchunks; -# endif /* High-water mark for number of chunks allocated. */ size_t highchunks; @@ -146,7 +124,6 @@ struct chunk_stats_s { */ size_t curchunks; }; -#endif /* JEMALLOC_STATS */ #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ @@ -154,24 +131,14 @@ struct chunk_stats_s { extern bool opt_stats_print; -#ifdef JEMALLOC_STATS extern size_t stats_cactive; -#endif -char *u2s(uint64_t x, unsigned base, char *s); -#ifdef JEMALLOC_STATS -void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, - const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); -void malloc_printf(const char *format, ...) - JEMALLOC_ATTR(format(printf, 1, 2)); -#endif void stats_print(void (*write)(void *, const char *), void *cbopaque, const char *opts); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES -#ifdef JEMALLOC_STATS #ifndef JEMALLOC_ENABLE_INLINE size_t stats_cactive_get(void); @@ -202,6 +169,5 @@ stats_cactive_sub(size_t size) } #endif -#endif /* JEMALLOC_STATS */ #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/tcache.h b/deps/jemalloc/include/jemalloc/internal/tcache.h index da3c68c57..38d735c86 100644 --- a/deps/jemalloc/include/jemalloc/internal/tcache.h +++ b/deps/jemalloc/include/jemalloc/internal/tcache.h @@ -1,4 +1,3 @@ -#ifdef JEMALLOC_TCACHE /******************************************************************************/ #ifdef JEMALLOC_H_TYPES @@ -7,6 +6,16 @@ typedef struct tcache_bin_s tcache_bin_t; typedef struct tcache_s tcache_t; /* + * tcache pointers close to NULL are used to encode state information that is + * used for two purposes: preventing thread caching on a per thread basis and + * cleaning up during thread shutdown. + */ +#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1) +#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2) +#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3) +#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY + +/* * Absolute maximum number of cache slots for each small bin in the thread * cache. This is an additional constraint beyond that imposed as: twice the * number of regions per run for this size class. @@ -22,17 +31,26 @@ typedef struct tcache_s tcache_t; #define LG_TCACHE_MAXCLASS_DEFAULT 15 /* - * (1U << opt_lg_tcache_gc_sweep) is the approximate number of allocation - * events between full GC sweeps (-1: disabled). Integer rounding may cause - * the actual number to be slightly higher, since GC is performed - * incrementally. + * TCACHE_GC_SWEEP is the approximate number of allocation events between + * full GC sweeps. Integer rounding may cause the actual number to be + * slightly higher, since GC is performed incrementally. */ -#define LG_TCACHE_GC_SWEEP_DEFAULT 13 +#define TCACHE_GC_SWEEP 8192 + +/* Number of tcache allocation/deallocation events between incremental GCs. */ +#define TCACHE_GC_INCR \ + ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1)) #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS +typedef enum { + tcache_enabled_false = 0, /* Enable cast to/from bool. */ + tcache_enabled_true = 1, + tcache_enabled_default = 2 +} tcache_enabled_t; + /* * Read-only information associated with each element of tcache_t's tbins array * is stored separately, mainly to reduce memory usage. @@ -42,9 +60,7 @@ struct tcache_bin_info_s { }; struct tcache_bin_s { -# ifdef JEMALLOC_STATS tcache_bin_stats_t tstats; -# endif int low_water; /* Min # cached since last GC. */ unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ unsigned ncached; /* # of cached objects. */ @@ -52,12 +68,8 @@ struct tcache_bin_s { }; struct tcache_s { -# ifdef JEMALLOC_STATS ql_elm(tcache_t) link; /* Used for aggregating stats. */ -# endif -# ifdef JEMALLOC_PROF uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */ -# endif arena_t *arena; /* This thread's arena. */ unsigned ev_cnt; /* Event count since incremental GC. */ unsigned next_gc_bin; /* Next bin to GC. */ @@ -76,29 +88,11 @@ struct tcache_s { extern bool opt_tcache; extern ssize_t opt_lg_tcache_max; -extern ssize_t opt_lg_tcache_gc_sweep; extern tcache_bin_info_t *tcache_bin_info; -/* Map of thread-specific caches. */ -#ifndef NO_TLS -extern __thread tcache_t *tcache_tls - JEMALLOC_ATTR(tls_model("initial-exec")); -# define TCACHE_GET() tcache_tls -# define TCACHE_SET(v) do { \ - tcache_tls = (tcache_t *)(v); \ - pthread_setspecific(tcache_tsd, (void *)(v)); \ -} while (0) -#else -# define TCACHE_GET() ((tcache_t *)pthread_getspecific(tcache_tsd)) -# define TCACHE_SET(v) do { \ - pthread_setspecific(tcache_tsd, (void *)(v)); \ -} while (0) -#endif -extern pthread_key_t tcache_tsd; - /* - * Number of tcache bins. There are nbins small-object bins, plus 0 or more + * Number of tcache bins. There are NBINS small-object bins, plus 0 or more * large-object bins. */ extern size_t nhbins; @@ -106,68 +100,159 @@ extern size_t nhbins; /* Maximum cached size class. */ extern size_t tcache_maxclass; -/* Number of tcache allocation/deallocation events between incremental GCs. */ -extern unsigned tcache_gc_incr; - -void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache_t *tcache -#endif - ); -void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache_t *tcache -#endif - ); -tcache_t *tcache_create(arena_t *arena); +size_t tcache_salloc(const void *ptr); +void tcache_event_hard(tcache_t *tcache); void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind); +void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache); +void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache); +void tcache_arena_associate(tcache_t *tcache, arena_t *arena); +void tcache_arena_dissociate(tcache_t *tcache); +tcache_t *tcache_create(arena_t *arena); void tcache_destroy(tcache_t *tcache); -#ifdef JEMALLOC_STATS +void tcache_thread_cleanup(void *arg); void tcache_stats_merge(tcache_t *tcache, arena_t *arena); -#endif -bool tcache_boot(void); +bool tcache_boot0(void); +bool tcache_boot1(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *) +malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t) + void tcache_event(tcache_t *tcache); -tcache_t *tcache_get(void); +void tcache_flush(void); +bool tcache_enabled_get(void); +tcache_t *tcache_get(bool create); +void tcache_enabled_set(bool enabled); void *tcache_alloc_easy(tcache_bin_t *tbin); void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); -void tcache_dalloc_small(tcache_t *tcache, void *ptr); +void tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind); void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) +/* Map of thread-specific caches. */ +malloc_tsd_externs(tcache, tcache_t *) +malloc_tsd_funcs(JEMALLOC_INLINE, tcache, tcache_t *, NULL, + tcache_thread_cleanup) +/* Per thread flag that allows thread caches to be disabled. */ +malloc_tsd_externs(tcache_enabled, tcache_enabled_t) +malloc_tsd_funcs(JEMALLOC_INLINE, tcache_enabled, tcache_enabled_t, + tcache_enabled_default, malloc_tsd_no_cleanup) + +JEMALLOC_INLINE void +tcache_flush(void) +{ + tcache_t *tcache; + + cassert(config_tcache); + + tcache = *tcache_tsd_get(); + if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) + return; + tcache_destroy(tcache); + tcache = NULL; + tcache_tsd_set(&tcache); +} + +JEMALLOC_INLINE bool +tcache_enabled_get(void) +{ + tcache_enabled_t tcache_enabled; + + cassert(config_tcache); + + tcache_enabled = *tcache_enabled_tsd_get(); + if (tcache_enabled == tcache_enabled_default) { + tcache_enabled = (tcache_enabled_t)opt_tcache; + tcache_enabled_tsd_set(&tcache_enabled); + } + + return ((bool)tcache_enabled); +} + +JEMALLOC_INLINE void +tcache_enabled_set(bool enabled) +{ + tcache_enabled_t tcache_enabled; + tcache_t *tcache; + + cassert(config_tcache); + + tcache_enabled = (tcache_enabled_t)enabled; + tcache_enabled_tsd_set(&tcache_enabled); + tcache = *tcache_tsd_get(); + if (enabled) { + if (tcache == TCACHE_STATE_DISABLED) { + tcache = NULL; + tcache_tsd_set(&tcache); + } + } else /* disabled */ { + if (tcache > TCACHE_STATE_MAX) { + tcache_destroy(tcache); + tcache = NULL; + } + if (tcache == NULL) { + tcache = TCACHE_STATE_DISABLED; + tcache_tsd_set(&tcache); + } + } +} + JEMALLOC_INLINE tcache_t * -tcache_get(void) +tcache_get(bool create) { tcache_t *tcache; - if ((isthreaded & opt_tcache) == false) + if (config_tcache == false) + return (NULL); + if (config_lazy_lock && isthreaded == false) return (NULL); - tcache = TCACHE_GET(); - if ((uintptr_t)tcache <= (uintptr_t)2) { + tcache = *tcache_tsd_get(); + if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) { + if (tcache == TCACHE_STATE_DISABLED) + return (NULL); if (tcache == NULL) { - tcache = tcache_create(choose_arena()); - if (tcache == NULL) - return (NULL); - } else { - if (tcache == (void *)(uintptr_t)1) { + if (create == false) { /* - * Make a note that an allocator function was - * called after the tcache_thread_cleanup() was - * called. + * Creating a tcache here would cause + * allocation as a side effect of free(). + * Ordinarily that would be okay since + * tcache_create() failure is a soft failure + * that doesn't propagate. However, if TLS + * data are freed via free() as in glibc, + * subtle corruption could result from setting + * a TLS variable after its backing memory is + * freed. */ - TCACHE_SET((uintptr_t)2); + return (NULL); + } + if (tcache_enabled_get() == false) { + tcache_enabled_set(false); /* Memoize. */ + return (NULL); } + return (tcache_create(choose_arena(NULL))); + } + if (tcache == TCACHE_STATE_PURGATORY) { + /* + * Make a note that an allocator function was called + * after tcache_thread_cleanup() was called. + */ + tcache = TCACHE_STATE_REINCARNATED; + tcache_tsd_set(&tcache); return (NULL); } + if (tcache == TCACHE_STATE_REINCARNATED) + return (NULL); + not_reached(); } return (tcache); @@ -177,60 +262,13 @@ JEMALLOC_INLINE void tcache_event(tcache_t *tcache) { - if (tcache_gc_incr == 0) + if (TCACHE_GC_INCR == 0) return; tcache->ev_cnt++; - assert(tcache->ev_cnt <= tcache_gc_incr); - if (tcache->ev_cnt == tcache_gc_incr) { - size_t binind = tcache->next_gc_bin; - tcache_bin_t *tbin = &tcache->tbins[binind]; - tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; - - if (tbin->low_water > 0) { - /* - * Flush (ceiling) 3/4 of the objects below the low - * water mark. - */ - if (binind < nbins) { - tcache_bin_flush_small(tbin, binind, - tbin->ncached - tbin->low_water + - (tbin->low_water >> 2) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); - } else { - tcache_bin_flush_large(tbin, binind, - tbin->ncached - tbin->low_water + - (tbin->low_water >> 2) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); - } - /* - * Reduce fill count by 2X. Limit lg_fill_div such that - * the fill count is always at least 1. - */ - if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) - >= 1) - tbin->lg_fill_div++; - } else if (tbin->low_water < 0) { - /* - * Increase fill count by 2X. Make sure lg_fill_div - * stays greater than 0. - */ - if (tbin->lg_fill_div > 1) - tbin->lg_fill_div--; - } - tbin->low_water = tbin->ncached; - - tcache->next_gc_bin++; - if (tcache->next_gc_bin == nhbins) - tcache->next_gc_bin = 0; - tcache->ev_cnt = 0; - } + assert(tcache->ev_cnt <= TCACHE_GC_INCR); + if (tcache->ev_cnt == TCACHE_GC_INCR) + tcache_event_hard(tcache); } JEMALLOC_INLINE void * @@ -257,7 +295,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) tcache_bin_t *tbin; binind = SMALL_SIZE2BIN(size); - assert(binind < nbins); + assert(binind < NBINS); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); if (ret == NULL) { @@ -265,24 +303,29 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } - assert(arena_salloc(ret) == arena_bin_info[binind].reg_size); + assert(tcache_salloc(ret) == arena_bin_info[binind].reg_size); if (zero == false) { -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); -#endif - } else + if (config_fill) { + if (opt_junk) { + arena_alloc_junk_small(ret, + &arena_bin_info[binind], false); + } else if (opt_zero) + memset(ret, 0, size); + } + } else { + if (config_fill && opt_junk) { + arena_alloc_junk_small(ret, &arena_bin_info[binind], + true); + } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + } -#ifdef JEMALLOC_STATS - tbin->tstats.nrequests++; -#endif -#ifdef JEMALLOC_PROF - tcache->prof_accumbytes += arena_bin_info[binind].reg_size; -#endif + if (config_stats) + tbin->tstats.nrequests++; + if (config_prof) + tcache->prof_accumbytes += arena_bin_info[binind].reg_size; tcache_event(tcache); return (ret); } @@ -296,7 +339,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) size = PAGE_CEILING(size); assert(size <= tcache_maxclass); - binind = nbins + (size >> PAGE_SHIFT) - 1; + binind = NBINS + (size >> LG_PAGE) - 1; assert(binind < nhbins); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); @@ -309,28 +352,30 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) if (ret == NULL) return (NULL); } else { -#ifdef JEMALLOC_PROF - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); - size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> - PAGE_SHIFT); - chunk->map[pageind-map_bias].bits &= ~CHUNK_MAP_CLASS_MASK; -#endif + if (config_prof && prof_promote && size == PAGE) { + arena_chunk_t *chunk = + (arena_chunk_t *)CHUNK_ADDR2BASE(ret); + size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> + LG_PAGE); + arena_mapbits_large_binind_set(chunk, pageind, + BININD_INVALID); + } if (zero == false) { -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); -#endif - } else + if (config_fill) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } + } else { + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); memset(ret, 0, size); + } -#ifdef JEMALLOC_STATS - tbin->tstats.nrequests++; -#endif -#ifdef JEMALLOC_PROF - tcache->prof_accumbytes += size; -#endif + if (config_stats) + tbin->tstats.nrequests++; + if (config_prof) + tcache->prof_accumbytes += size; } tcache_event(tcache); @@ -338,45 +383,21 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) } JEMALLOC_INLINE void -tcache_dalloc_small(tcache_t *tcache, void *ptr) +tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind) { - arena_t *arena; - arena_chunk_t *chunk; - arena_run_t *run; - arena_bin_t *bin; tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; - size_t pageind, binind; - arena_chunk_map_t *mapelm; - - assert(arena_salloc(ptr) <= small_maxclass); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - mapelm = &chunk->map[pageind-map_bias]; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); - bin = run->bin; - binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / - sizeof(arena_bin_t); - assert(binind < nbins); - -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ptr, 0x5a, arena_bin_info[binind].reg_size); -#endif + + assert(tcache_salloc(ptr) <= SMALL_MAXCLASS); + + if (config_fill && opt_junk) + arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; if (tbin->ncached == tbin_info->ncached_max) { tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> - 1) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); + 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); tbin->avail[tbin->ncached] = ptr; @@ -388,35 +409,24 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) JEMALLOC_INLINE void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) { - arena_t *arena; - arena_chunk_t *chunk; - size_t pageind, binind; + size_t binind; tcache_bin_t *tbin; tcache_bin_info_t *tbin_info; assert((size & PAGE_MASK) == 0); - assert(arena_salloc(ptr) > small_maxclass); - assert(arena_salloc(ptr) <= tcache_maxclass); + assert(tcache_salloc(ptr) > SMALL_MAXCLASS); + assert(tcache_salloc(ptr) <= tcache_maxclass); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - binind = nbins + (size >> PAGE_SHIFT) - 1; + binind = NBINS + (size >> LG_PAGE) - 1; -#ifdef JEMALLOC_FILL - if (opt_junk) + if (config_fill && opt_junk) memset(ptr, 0x5a, size); -#endif tbin = &tcache->tbins[binind]; tbin_info = &tcache_bin_info[binind]; if (tbin->ncached == tbin_info->ncached_max) { tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> - 1) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); + 1), tcache); } assert(tbin->ncached < tbin_info->ncached_max); tbin->avail[tbin->ncached] = ptr; @@ -428,4 +438,3 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ -#endif /* JEMALLOC_TCACHE */ diff --git a/deps/jemalloc/include/jemalloc/internal/tsd.h b/deps/jemalloc/include/jemalloc/internal/tsd.h new file mode 100644 index 000000000..0037cf35e --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/tsd.h @@ -0,0 +1,397 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Maximum number of malloc_tsd users with cleanup functions. */ +#define MALLOC_TSD_CLEANUPS_MAX 8 + +typedef bool (*malloc_tsd_cleanup_t)(void); + +/* + * TLS/TSD-agnostic macro-based implementation of thread-specific data. There + * are four macros that support (at least) three use cases: file-private, + * library-private, and library-private inlined. Following is an example + * library-private tsd variable: + * + * In example.h: + * typedef struct { + * int x; + * int y; + * } example_t; + * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) + * malloc_tsd_protos(, example, example_t *) + * malloc_tsd_externs(example, example_t *) + * In example.c: + * malloc_tsd_data(, example, example_t *, EX_INITIALIZER) + * malloc_tsd_funcs(, example, example_t *, EX_INITIALIZER, + * example_tsd_cleanup) + * + * The result is a set of generated functions, e.g.: + * + * bool example_tsd_boot(void) {...} + * example_t **example_tsd_get() {...} + * void example_tsd_set(example_t **val) {...} + * + * Note that all of the functions deal in terms of (a_type *) rather than + * (a_type) so that it is possible to support non-pointer types (unlike + * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is + * cast to (void *). This means that the cleanup function needs to cast *and* + * dereference the function argument, e.g.: + * + * void + * example_tsd_cleanup(void *arg) + * { + * example_t *example = *(example_t **)arg; + * + * [...] + * if ([want the cleanup function to be called again]) { + * example_tsd_set(&example); + * } + * } + * + * If example_tsd_set() is called within example_tsd_cleanup(), it will be + * called again. This is similar to how pthreads TSD destruction works, except + * that pthreads only calls the cleanup function again if the value was set to + * non-NULL. + */ + +/* malloc_tsd_protos(). */ +#define malloc_tsd_protos(a_attr, a_name, a_type) \ +a_attr bool \ +a_name##_tsd_boot(void); \ +a_attr a_type * \ +a_name##_tsd_get(void); \ +a_attr void \ +a_name##_tsd_set(a_type *val); + +/* malloc_tsd_externs(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_externs(a_name, a_type) \ +extern __thread a_type a_name##_tls; \ +extern __thread bool a_name##_initialized; \ +extern bool a_name##_booted; +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_externs(a_name, a_type) \ +extern __thread a_type a_name##_tls; \ +extern pthread_key_t a_name##_tsd; \ +extern bool a_name##_booted; +#elif (defined(_WIN32)) +#define malloc_tsd_externs(a_name, a_type) \ +extern DWORD a_name##_tsd; \ +extern bool a_name##_booted; +#else +#define malloc_tsd_externs(a_name, a_type) \ +extern pthread_key_t a_name##_tsd; \ +extern bool a_name##_booted; +#endif + +/* malloc_tsd_data(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr __thread a_type JEMALLOC_TLS_MODEL \ + a_name##_tls = a_initializer; \ +a_attr __thread bool JEMALLOC_TLS_MODEL \ + a_name##_initialized = false; \ +a_attr bool a_name##_booted = false; +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr __thread a_type JEMALLOC_TLS_MODEL \ + a_name##_tls = a_initializer; \ +a_attr pthread_key_t a_name##_tsd; \ +a_attr bool a_name##_booted = false; +#elif (defined(_WIN32)) +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr DWORD a_name##_tsd; \ +a_attr bool a_name##_booted = false; +#else +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr pthread_key_t a_name##_tsd; \ +a_attr bool a_name##_booted = false; +#endif + +/* malloc_tsd_funcs(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##_tsd_cleanup_wrapper(void) \ +{ \ + \ + if (a_name##_initialized) { \ + a_name##_initialized = false; \ + a_cleanup(&a_name##_tls); \ + } \ + return (a_name##_initialized); \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##_tsd_cleanup_wrapper); \ + } \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + \ + assert(a_name##_booted); \ + return (&a_name##_tls); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + \ + assert(a_name##_booted); \ + a_name##_tls = (*val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + a_name##_initialized = true; \ +} +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + if (pthread_key_create(&a_name##_tsd, a_cleanup) != 0) \ + return (true); \ + } \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + \ + assert(a_name##_booted); \ + return (&a_name##_tls); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + \ + assert(a_name##_booted); \ + a_name##_tls = (*val); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + if (pthread_setspecific(a_name##_tsd, \ + (void *)(&a_name##_tls))) { \ + malloc_write("<jemalloc>: Error" \ + " setting TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + } \ +} +#elif (defined(_WIN32)) +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Data structure. */ \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##_tsd_wrapper_t; \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##_tsd_cleanup_wrapper(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + wrapper = (a_name##_tsd_wrapper_t *) TlsGetValue(a_name##_tsd); \ + if (wrapper == NULL) \ + return (false); \ + if (a_cleanup != malloc_tsd_no_cleanup && \ + wrapper->initialized) { \ + a_type val = wrapper->val; \ + a_type tsd_static_data = a_initializer; \ + wrapper->initialized = false; \ + wrapper->val = tsd_static_data; \ + a_cleanup(&val); \ + if (wrapper->initialized) { \ + /* Trigger another cleanup round. */ \ + return (true); \ + } \ + } \ + malloc_tsd_dalloc(wrapper); \ + return (false); \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + a_name##_tsd = TlsAlloc(); \ + if (a_name##_tsd == TLS_OUT_OF_INDEXES) \ + return (true); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##_tsd_cleanup_wrapper); \ + } \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_name##_tsd_wrapper_t * \ +a_name##_tsd_get_wrapper(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ + TlsGetValue(a_name##_tsd); \ + \ + if (wrapper == NULL) { \ + wrapper = (a_name##_tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write("<jemalloc>: Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } else { \ + static a_type tsd_static_data = a_initializer; \ + wrapper->initialized = false; \ + wrapper->val = tsd_static_data; \ + } \ + if (!TlsSetValue(a_name##_tsd, (void *)wrapper)) { \ + malloc_write("<jemalloc>: Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + } \ + return (wrapper); \ +} \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + return (&wrapper->val); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + wrapper->val = *(val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + wrapper->initialized = true; \ +} +#else +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Data structure. */ \ +typedef struct { \ + bool initialized; \ + a_type val; \ +} a_name##_tsd_wrapper_t; \ +/* Initialization/cleanup. */ \ +a_attr void \ +a_name##_tsd_cleanup_wrapper(void *arg) \ +{ \ + a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)arg;\ + \ + if (a_cleanup != malloc_tsd_no_cleanup && \ + wrapper->initialized) { \ + wrapper->initialized = false; \ + a_cleanup(&wrapper->val); \ + if (wrapper->initialized) { \ + /* Trigger another cleanup round. */ \ + if (pthread_setspecific(a_name##_tsd, \ + (void *)wrapper)) { \ + malloc_write("<jemalloc>: Error" \ + " setting TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + return; \ + } \ + } \ + malloc_tsd_dalloc(wrapper); \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + if (pthread_key_create(&a_name##_tsd, \ + a_name##_tsd_cleanup_wrapper) != 0) \ + return (true); \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_name##_tsd_wrapper_t * \ +a_name##_tsd_get_wrapper(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ + pthread_getspecific(a_name##_tsd); \ + \ + if (wrapper == NULL) { \ + wrapper = (a_name##_tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + malloc_write("<jemalloc>: Error allocating" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } else { \ + static a_type tsd_static_data = a_initializer; \ + wrapper->initialized = false; \ + wrapper->val = tsd_static_data; \ + } \ + if (pthread_setspecific(a_name##_tsd, \ + (void *)wrapper)) { \ + malloc_write("<jemalloc>: Error setting" \ + " TSD for "#a_name"\n"); \ + abort(); \ + } \ + } \ + return (wrapper); \ +} \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + return (&wrapper->val); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + wrapper->val = *(val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + wrapper->initialized = true; \ +} +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *malloc_tsd_malloc(size_t size); +void malloc_tsd_dalloc(void *wrapper); +void malloc_tsd_no_cleanup(void *); +void malloc_tsd_cleanup_register(bool (*f)(void)); +void malloc_tsd_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/internal/util.h b/deps/jemalloc/include/jemalloc/internal/util.h new file mode 100644 index 000000000..847969363 --- /dev/null +++ b/deps/jemalloc/include/jemalloc/internal/util.h @@ -0,0 +1,160 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Size of stack-allocated buffer passed to buferror(). */ +#define BUFERROR_BUF 64 + +/* + * Size of stack-allocated buffer used by malloc_{,v,vc}printf(). This must be + * large enough for all possible uses within jemalloc. + */ +#define MALLOC_PRINTF_BUFSIZE 4096 + +/* + * Wrap a cpp argument that contains commas such that it isn't broken up into + * multiple arguments. + */ +#define JEMALLOC_CONCAT(...) __VA_ARGS__ + +/* + * Silence compiler warnings due to uninitialized values. This is used + * wherever the compiler fails to recognize that the variable is never used + * uninitialized. + */ +#ifdef JEMALLOC_CC_SILENCE +# define JEMALLOC_CC_SILENCE_INIT(v) = v +#else +# define JEMALLOC_CC_SILENCE_INIT(v) +#endif + +/* + * Define a custom assert() in order to reduce the chances of deadlock during + * assertion failure. + */ +#ifndef assert +#define assert(e) do { \ + if (config_debug && !(e)) { \ + malloc_printf( \ + "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \ + __FILE__, __LINE__, #e); \ + abort(); \ + } \ +} while (0) +#endif + +/* Use to assert a particular configuration, e.g., cassert(config_debug). */ +#define cassert(c) do { \ + if ((c) == false) \ + assert(false); \ +} while (0) + +#ifndef not_reached +#define not_reached() do { \ + if (config_debug) { \ + malloc_printf( \ + "<jemalloc>: %s:%d: Unreachable code reached\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef not_implemented +#define not_implemented() do { \ + if (config_debug) { \ + malloc_printf("<jemalloc>: %s:%d: Not implemented\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#define assert_not_implemented(e) do { \ + if (config_debug && !(e)) \ + not_implemented(); \ +} while (0) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +int buferror(char *buf, size_t buflen); +uintmax_t malloc_strtoumax(const char *nptr, char **endptr, int base); +void malloc_write(const char *s); + +/* + * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating + * point math. + */ +int malloc_vsnprintf(char *str, size_t size, const char *format, + va_list ap); +int malloc_snprintf(char *str, size_t size, const char *format, ...) + JEMALLOC_ATTR(format(printf, 3, 4)); +void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, va_list ap); +void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, + const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); +void malloc_printf(const char *format, ...) + JEMALLOC_ATTR(format(printf, 1, 2)); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +size_t pow2_ceil(size_t x); +void malloc_write(const char *s); +void set_errno(int errnum); +int get_errno(void); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) +/* Compute the smallest power of 2 that is >= x. */ +JEMALLOC_INLINE size_t +pow2_ceil(size_t x) +{ + + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; +#if (LG_SIZEOF_PTR == 3) + x |= x >> 32; +#endif + x++; + return (x); +} + +/* Sets error code */ +JEMALLOC_INLINE void +set_errno(int errnum) +{ + +#ifdef _WIN32 + SetLastError(errnum); +#else + errno = errnum; +#endif +} + +/* Get last error code */ +JEMALLOC_INLINE int +get_errno(void) +{ + +#ifdef _WIN32 + return (GetLastError()); +#else + return (errno); +#endif +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/deps/jemalloc/include/jemalloc/jemalloc.h.in b/deps/jemalloc/include/jemalloc/jemalloc.h.in index 580a5ec5d..ad0694858 100644 --- a/deps/jemalloc/include/jemalloc/jemalloc.h.in +++ b/deps/jemalloc/include/jemalloc/jemalloc.h.in @@ -15,10 +15,8 @@ extern "C" { #define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" #include "jemalloc_defs@install_suffix@.h" -#ifndef JEMALLOC_P -# define JEMALLOC_P(s) s -#endif +#ifdef JEMALLOC_EXPERIMENTAL #define ALLOCM_LG_ALIGN(la) (la) #if LG_SIZEOF_PTR == 2 #define ALLOCM_ALIGN(a) (ffs(a)-1) @@ -31,34 +29,124 @@ extern "C" { #define ALLOCM_SUCCESS 0 #define ALLOCM_ERR_OOM 1 #define ALLOCM_ERR_NOT_MOVED 2 +#endif -extern const char *JEMALLOC_P(malloc_conf); -extern void (*JEMALLOC_P(malloc_message))(void *, const char *); +/* + * The je_ prefix on the following public symbol declarations is an artifact of + * namespace management, and should be omitted in application code unless + * JEMALLOC_NO_DEMANGLE is defined (see below). + */ +extern JEMALLOC_EXPORT const char *je_malloc_conf; +extern JEMALLOC_EXPORT void (*je_malloc_message)(void *cbopaque, + const char *s); -void *JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc); -void *JEMALLOC_P(calloc)(size_t num, size_t size) JEMALLOC_ATTR(malloc); -int JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) - JEMALLOC_ATTR(nonnull(1)); -void *JEMALLOC_P(realloc)(void *ptr, size_t size); -void JEMALLOC_P(free)(void *ptr); +JEMALLOC_EXPORT void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT void *je_calloc(size_t num, size_t size) + JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT int je_posix_memalign(void **memptr, size_t alignment, + size_t size) JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT void *je_aligned_alloc(size_t alignment, size_t size) + JEMALLOC_ATTR(malloc); +JEMALLOC_EXPORT void *je_realloc(void *ptr, size_t size); +JEMALLOC_EXPORT void je_free(void *ptr); -size_t JEMALLOC_P(malloc_usable_size)(const void *ptr); -void JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), - void *cbopaque, const char *opts); -int JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, - void *newp, size_t newlen); -int JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, - size_t *miblenp); -int JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_EXPORT void * je_memalign(size_t alignment, size_t size) + JEMALLOC_ATTR(malloc); +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_EXPORT void * je_valloc(size_t size) JEMALLOC_ATTR(malloc); +#endif + +JEMALLOC_EXPORT size_t je_malloc_usable_size(const void *ptr); +JEMALLOC_EXPORT void je_malloc_stats_print(void (*write_cb)(void *, + const char *), void *je_cbopaque, const char *opts); +JEMALLOC_EXPORT int je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); +JEMALLOC_EXPORT int je_mallctlnametomib(const char *name, size_t *mibp, + size_t *miblenp); +JEMALLOC_EXPORT int je_mallctlbymib(const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen); -int JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) - JEMALLOC_ATTR(nonnull(1)); -int JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, +#ifdef JEMALLOC_EXPERIMENTAL +JEMALLOC_EXPORT int je_allocm(void **ptr, size_t *rsize, size_t size, + int flags) JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); -int JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) +JEMALLOC_EXPORT int je_sallocm(const void *ptr, size_t *rsize, int flags) JEMALLOC_ATTR(nonnull(1)); -int JEMALLOC_P(dallocm)(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int je_dallocm(void *ptr, int flags) + JEMALLOC_ATTR(nonnull(1)); +JEMALLOC_EXPORT int je_nallocm(size_t *rsize, size_t size, int flags); +#endif + +/* + * By default application code must explicitly refer to mangled symbol names, + * so that it is possible to use jemalloc in conjunction with another allocator + * in the same application. Define JEMALLOC_MANGLE in order to cause automatic + * name mangling that matches the API prefixing that happened as a result of + * --with-mangling and/or --with-jemalloc-prefix configuration settings. + */ +#ifdef JEMALLOC_MANGLE +#ifndef JEMALLOC_NO_DEMANGLE +#define JEMALLOC_NO_DEMANGLE +#endif +#define malloc_conf je_malloc_conf +#define malloc_message je_malloc_message +#define malloc je_malloc +#define calloc je_calloc +#define posix_memalign je_posix_memalign +#define aligned_alloc je_aligned_alloc +#define realloc je_realloc +#define free je_free +#define malloc_usable_size je_malloc_usable_size +#define malloc_stats_print je_malloc_stats_print +#define mallctl je_mallctl +#define mallctlnametomib je_mallctlnametomib +#define mallctlbymib je_mallctlbymib +#define memalign je_memalign +#define valloc je_valloc +#ifdef JEMALLOC_EXPERIMENTAL +#define allocm je_allocm +#define rallocm je_rallocm +#define sallocm je_sallocm +#define dallocm je_dallocm +#define nallocm je_nallocm +#endif +#endif + +/* + * The je_* macros can be used as stable alternative names for the public + * jemalloc API if JEMALLOC_NO_DEMANGLE is defined. This is primarily meant + * for use in jemalloc itself, but it can be used by application code to + * provide isolation from the name mangling specified via --with-mangling + * and/or --with-jemalloc-prefix. + */ +#ifndef JEMALLOC_NO_DEMANGLE +#undef je_malloc_conf +#undef je_malloc_message +#undef je_malloc +#undef je_calloc +#undef je_posix_memalign +#undef je_aligned_alloc +#undef je_realloc +#undef je_free +#undef je_malloc_usable_size +#undef je_malloc_stats_print +#undef je_mallctl +#undef je_mallctlnametomib +#undef je_mallctlbymib +#undef je_memalign +#undef je_valloc +#ifdef JEMALLOC_EXPERIMENTAL +#undef je_allocm +#undef je_rallocm +#undef je_sallocm +#undef je_dallocm +#undef je_nallocm +#endif +#endif #ifdef __cplusplus }; diff --git a/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in b/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in index 9ac7e1c20..c469142a5 100644 --- a/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in +++ b/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in @@ -1,22 +1,36 @@ -#ifndef JEMALLOC_DEFS_H_ -#define JEMALLOC_DEFS_H_ - /* - * If JEMALLOC_PREFIX is defined, it will cause all public APIs to be prefixed. - * This makes it possible, with some care, to use multiple allocators - * simultaneously. - * - * In many cases it is more convenient to manually prefix allocator function - * calls than to let macros do it automatically, particularly when using - * multiple allocators simultaneously. Define JEMALLOC_MANGLE before - * #include'ing jemalloc.h in order to cause name mangling that corresponds to - * the API prefixing. + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. */ #undef JEMALLOC_PREFIX #undef JEMALLOC_CPREFIX -#if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE)) -#undef JEMALLOC_P -#endif + +/* + * Name mangling for public symbols is controlled by --with-mangling and + * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by + * these macro definitions. + */ +#undef je_malloc_conf +#undef je_malloc_message +#undef je_malloc +#undef je_calloc +#undef je_posix_memalign +#undef je_aligned_alloc +#undef je_realloc +#undef je_free +#undef je_malloc_usable_size +#undef je_malloc_stats_print +#undef je_mallctl +#undef je_mallctlnametomib +#undef je_mallctlbymib +#undef je_memalign +#undef je_valloc +#undef je_allocm +#undef je_rallocm +#undef je_sallocm +#undef je_dallocm +#undef je_nallocm /* * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. @@ -33,6 +47,9 @@ */ #undef CPU_SPINWAIT +/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */ +#undef JEMALLOC_ATOMIC9 + /* * Defined if OSAtomic*() functions are available, as provided by Darwin, and * documented in the atomic(3) manual page. @@ -40,19 +57,82 @@ #undef JEMALLOC_OSATOMIC /* + * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and + * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the + * functions are defined in libgcc instead of being inlines) + */ +#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 + +/* + * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and + * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the + * functions are defined in libgcc instead of being inlines) + */ +#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 + +/* * Defined if OSSpin*() functions are available, as provided by Darwin, and * documented in the spinlock(3) manual page. */ #undef JEMALLOC_OSSPIN +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +#undef JEMALLOC_MALLOC_THREAD_CLEANUP + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +#undef JEMALLOC_THREADED_INIT + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +#undef JEMALLOC_MUTEX_INIT_CB + /* Defined if __attribute__((...)) syntax is supported. */ #undef JEMALLOC_HAVE_ATTR #ifdef JEMALLOC_HAVE_ATTR # define JEMALLOC_ATTR(s) __attribute__((s)) +# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) +# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) +# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) +# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) +#elif _MSC_VER +# define JEMALLOC_ATTR(s) +# ifdef DLLEXPORT +# define JEMALLOC_EXPORT __declspec(dllexport) +# else +# define JEMALLOC_EXPORT __declspec(dllimport) +# endif +# define JEMALLOC_ALIGNED(s) __declspec(align(s)) +# define JEMALLOC_SECTION(s) __declspec(allocate(s)) +# define JEMALLOC_NOINLINE __declspec(noinline) #else # define JEMALLOC_ATTR(s) +# define JEMALLOC_EXPORT +# define JEMALLOC_ALIGNED(s) +# define JEMALLOC_SECTION(s) +# define JEMALLOC_NOINLINE #endif +/* Defined if sbrk() is supported. */ +#undef JEMALLOC_HAVE_SBRK + +/* Non-empty if the tls_model attribute is supported. */ +#undef JEMALLOC_TLS_MODEL + /* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ #undef JEMALLOC_CC_SILENCE @@ -78,12 +158,6 @@ #undef JEMALLOC_PROF_GCC /* - * JEMALLOC_TINY enables support for tiny objects, which are smaller than one - * quantum. - */ -#undef JEMALLOC_TINY - -/* * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. * This makes it possible to allocate/deallocate objects without any locking * when the cache is in the steady state. @@ -96,29 +170,43 @@ */ #undef JEMALLOC_DSS -/* JEMALLOC_SWAP enables mmap()ed swap file support. */ -#undef JEMALLOC_SWAP - -/* Support memory filling (junk/zero). */ +/* Support memory filling (junk/zero/quarantine/redzone). */ #undef JEMALLOC_FILL +/* Support the experimental API. */ +#undef JEMALLOC_EXPERIMENTAL + +/* Support utrace(2)-based tracing. */ +#undef JEMALLOC_UTRACE + +/* Support Valgrind. */ +#undef JEMALLOC_VALGRIND + /* Support optional abort() on OOM. */ #undef JEMALLOC_XMALLOC -/* Support SYSV semantics. */ -#undef JEMALLOC_SYSV - /* Support lazy locking (avoid locking unless a second thread is launched). */ #undef JEMALLOC_LAZY_LOCK -/* Determine page size at run time if defined. */ -#undef DYNAMIC_PAGE_SHIFT - /* One page is 2^STATIC_PAGE_SHIFT bytes. */ #undef STATIC_PAGE_SHIFT +/* + * If defined, use munmap() to unmap freed chunks, rather than storing them for + * later reuse. This is disabled by default on Linux because common sequences + * of mmap()/munmap() calls will cause virtual memory map holes. + */ +#undef JEMALLOC_MUNMAP + +/* + * If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). This is + * disabled by default because it is Linux-specific and it will cause virtual + * memory map holes, much like munmap(2) does. + */ +#undef JEMALLOC_MREMAP + /* TLS is used to map arenas and magazine caches to threads. */ -#undef NO_TLS +#undef JEMALLOC_TLS /* * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside @@ -139,9 +227,6 @@ #undef JEMALLOC_ZONE #undef JEMALLOC_ZONE_VERSION -/* If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). */ -#undef JEMALLOC_MREMAP_FIXED - /* * Methods for purging unused pages differ between operating systems. * @@ -164,4 +249,5 @@ /* sizeof(long) == 2^LG_SIZEOF_LONG. */ #undef LG_SIZEOF_LONG -#endif /* JEMALLOC_DEFS_H_ */ +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#undef LG_SIZEOF_INTMAX_T diff --git a/deps/jemalloc/include/msvc_compat/inttypes.h b/deps/jemalloc/include/msvc_compat/inttypes.h new file mode 100644 index 000000000..a4e6b75cb --- /dev/null +++ b/deps/jemalloc/include/msvc_compat/inttypes.h @@ -0,0 +1,313 @@ +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include "stdint.h" + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 + +#ifdef _WIN64 +# define __PRI64_PREFIX "l" +# define __PRIPTR_PREFIX "l" +#else +# define __PRI64_PREFIX "ll" +# define __PRIPTR_PREFIX +#endif + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "d" +#define PRIi32 "i" +#define PRIdLEAST32 "d" +#define PRIiLEAST32 "i" +#define PRIdFAST32 "d" +#define PRIiFAST32 "i" + +#define PRId64 __PRI64_PREFIX "d" +#define PRIi64 __PRI64_PREFIX "i" +#define PRIdLEAST64 __PRI64_PREFIX "d" +#define PRIiLEAST64 __PRI64_PREFIX "i" +#define PRIdFAST64 __PRI64_PREFIX "d" +#define PRIiFAST64 __PRI64_PREFIX "i" + +#define PRIdMAX __PRI64_PREFIX "d" +#define PRIiMAX __PRI64_PREFIX "i" + +#define PRIdPTR __PRIPTR_PREFIX "d" +#define PRIiPTR __PRIPTR_PREFIX "i" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "o" +#define PRIu32 "u" +#define PRIx32 "x" +#define PRIX32 "X" +#define PRIoLEAST32 "o" +#define PRIuLEAST32 "u" +#define PRIxLEAST32 "x" +#define PRIXLEAST32 "X" +#define PRIoFAST32 "o" +#define PRIuFAST32 "u" +#define PRIxFAST32 "x" +#define PRIXFAST32 "X" + +#define PRIo64 __PRI64_PREFIX "o" +#define PRIu64 __PRI64_PREFIX "u" +#define PRIx64 __PRI64_PREFIX "x" +#define PRIX64 __PRI64_PREFIX "X" +#define PRIoLEAST64 __PRI64_PREFIX "o" +#define PRIuLEAST64 __PRI64_PREFIX "u" +#define PRIxLEAST64 __PRI64_PREFIX "x" +#define PRIXLEAST64 __PRI64_PREFIX "X" +#define PRIoFAST64 __PRI64_PREFIX "o" +#define PRIuFAST64 __PRI64_PREFIX "u" +#define PRIxFAST64 __PRI64_PREFIX "x" +#define PRIXFAST64 __PRI64_PREFIX "X" + +#define PRIoMAX __PRI64_PREFIX "o" +#define PRIuMAX __PRI64_PREFIX "u" +#define PRIxMAX __PRI64_PREFIX "x" +#define PRIXMAX __PRI64_PREFIX "X" + +#define PRIoPTR __PRIPTR_PREFIX "o" +#define PRIuPTR __PRIPTR_PREFIX "u" +#define PRIxPTR __PRIPTR_PREFIX "x" +#define PRIXPTR __PRIPTR_PREFIX "X" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +#endif // __STDC_FORMAT_MACROS ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + + +#endif // _MSC_INTTYPES_H_ ] diff --git a/deps/jemalloc/include/msvc_compat/stdbool.h b/deps/jemalloc/include/msvc_compat/stdbool.h new file mode 100644 index 000000000..da9ee8b80 --- /dev/null +++ b/deps/jemalloc/include/msvc_compat/stdbool.h @@ -0,0 +1,16 @@ +#ifndef stdbool_h +#define stdbool_h + +#include <wtypes.h> + +/* MSVC doesn't define _Bool or bool in C, but does have BOOL */ +/* Note this doesn't pass autoconf's test because (bool) 0.5 != true */ +typedef BOOL _Bool; + +#define bool _Bool +#define true 1 +#define false 0 + +#define __bool_true_false_are_defined 1 + +#endif /* stdbool_h */ diff --git a/deps/jemalloc/include/msvc_compat/stdint.h b/deps/jemalloc/include/msvc_compat/stdint.h new file mode 100644 index 000000000..d02608a59 --- /dev/null +++ b/deps/jemalloc/include/msvc_compat/stdint.h @@ -0,0 +1,247 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2008 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include <limits.h> + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +# include <wchar.h> +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h> +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +#define INTMAX_C INT64_C +#define UINTMAX_C UINT64_C + +#endif // __STDC_CONSTANT_MACROS ] + + +#endif // _MSC_STDINT_H_ ] diff --git a/deps/jemalloc/include/msvc_compat/strings.h b/deps/jemalloc/include/msvc_compat/strings.h new file mode 100644 index 000000000..c84975b6b --- /dev/null +++ b/deps/jemalloc/include/msvc_compat/strings.h @@ -0,0 +1,23 @@ +#ifndef strings_h +#define strings_h + +/* MSVC doesn't define ffs/ffsl. This dummy strings.h header is provided + * for both */ +#include <intrin.h> +#pragma intrinsic(_BitScanForward) +static __forceinline int ffsl(long x) +{ + unsigned long i; + + if (_BitScanForward(&i, x)) + return (i + 1); + return (0); +} + +static __forceinline int ffs(int x) +{ + + return (ffsl(x)); +} + +#endif |