summaryrefslogtreecommitdiff
path: root/deps/jemalloc/include/jemalloc/internal
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2015-10-06 16:18:30 +0200
committerantirez <antirez@gmail.com>2015-10-06 16:55:37 +0200
commita9951b1b6a326532163e0fe4ee1a26e972258a1e (patch)
treeca555f37238537175cc1b34aa62a9f873026047f /deps/jemalloc/include/jemalloc/internal
parente3ded0273c43986a49ddd9d5fb4a20d187d015de (diff)
downloadredis-a9951b1b6a326532163e0fe4ee1a26e972258a1e.tar.gz
Jemalloc updated to 4.0.3.
Diffstat (limited to 'deps/jemalloc/include/jemalloc/internal')
-rw-r--r--deps/jemalloc/include/jemalloc/internal/arena.h1032
-rw-r--r--deps/jemalloc/include/jemalloc/internal/atomic.h477
-rw-r--r--deps/jemalloc/include/jemalloc/internal/base.h4
-rw-r--r--deps/jemalloc/include/jemalloc/internal/bitmap.h58
-rw-r--r--deps/jemalloc/include/jemalloc/internal/chunk.h62
-rw-r--r--deps/jemalloc/include/jemalloc/internal/chunk_dss.h3
-rw-r--r--deps/jemalloc/include/jemalloc/internal/chunk_mmap.h7
-rw-r--r--deps/jemalloc/include/jemalloc/internal/ckh.h8
-rw-r--r--deps/jemalloc/include/jemalloc/internal/ctl.h14
-rw-r--r--deps/jemalloc/include/jemalloc/internal/extent.h217
-rw-r--r--deps/jemalloc/include/jemalloc/internal/hash.h13
-rw-r--r--deps/jemalloc/include/jemalloc/internal/huge.h36
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in878
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h64
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in91
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h6
-rw-r--r--deps/jemalloc/include/jemalloc/internal/mutex.h14
-rw-r--r--deps/jemalloc/include/jemalloc/internal/pages.h26
-rw-r--r--deps/jemalloc/include/jemalloc/internal/private_symbols.txt336
-rw-r--r--deps/jemalloc/include/jemalloc/internal/prng.h14
-rw-r--r--deps/jemalloc/include/jemalloc/internal/prof.h702
-rw-r--r--deps/jemalloc/include/jemalloc/internal/ql.h4
-rw-r--r--deps/jemalloc/include/jemalloc/internal/qr.h6
-rw-r--r--deps/jemalloc/include/jemalloc/internal/quarantine.h21
-rw-r--r--deps/jemalloc/include/jemalloc/internal/rb.h24
-rw-r--r--deps/jemalloc/include/jemalloc/internal/rtree.h366
-rwxr-xr-xdeps/jemalloc/include/jemalloc/internal/size_classes.sh290
-rw-r--r--deps/jemalloc/include/jemalloc/internal/stats.h64
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tcache.h305
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tsd.h539
-rw-r--r--deps/jemalloc/include/jemalloc/internal/util.h174
-rw-r--r--deps/jemalloc/include/jemalloc/internal/valgrind.h112
32 files changed, 3962 insertions, 2005 deletions
diff --git a/deps/jemalloc/include/jemalloc/internal/arena.h b/deps/jemalloc/include/jemalloc/internal/arena.h
index 9d000c03d..12c617979 100644
--- a/deps/jemalloc/include/jemalloc/internal/arena.h
+++ b/deps/jemalloc/include/jemalloc/internal/arena.h
@@ -1,30 +1,10 @@
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
-/*
- * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
- * as small as possible such that this setting is still honored, without
- * violating other constraints. The goal is to make runs as small as possible
- * without exceeding a per run external fragmentation threshold.
- *
- * We use binary fixed point math for overhead computations, where the binary
- * point is implicitly RUN_BFP bits to the left.
- *
- * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
- * honored for some/all object sizes, since when heap profiling is enabled
- * there is one pointer of header overhead per object (plus a constant). This
- * constraint is relaxed (ignored) for runs that are so small that the
- * per-region overhead is greater than:
- *
- * (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP))
- */
-#define RUN_BFP 12
-/* \/ Implicit binary fixed point. */
-#define RUN_MAX_OVRHD 0x0000003dU
-#define RUN_MAX_OVRHD_RELAX 0x00001800U
+#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS)
/* Maximum number of regions in one run. */
-#define LG_RUN_MAXREGS 11
+#define LG_RUN_MAXREGS (LG_PAGE - LG_TINY_MIN)
#define RUN_MAXREGS (1U << LG_RUN_MAXREGS)
/*
@@ -36,16 +16,18 @@
/*
* The minimum ratio of active:dirty pages per arena is computed as:
*
- * (nactive >> opt_lg_dirty_mult) >= ndirty
+ * (nactive >> lg_dirty_mult) >= ndirty
*
- * So, supposing that opt_lg_dirty_mult is 3, there can be no less than 8 times
- * as many active pages as dirty pages.
+ * So, supposing that lg_dirty_mult is 3, there can be no less than 8 times as
+ * many active pages as dirty pages.
*/
#define LG_DIRTY_MULT_DEFAULT 3
-typedef struct arena_chunk_map_s arena_chunk_map_t;
-typedef struct arena_chunk_s arena_chunk_t;
+typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t;
typedef struct arena_run_s arena_run_t;
+typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
+typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t;
+typedef struct arena_chunk_s arena_chunk_t;
typedef struct arena_bin_info_s arena_bin_info_t;
typedef struct arena_bin_s arena_bin_t;
typedef struct arena_s arena_t;
@@ -54,54 +36,34 @@ typedef struct arena_s arena_t;
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
-/* Each element of the chunk map corresponds to one page within the chunk. */
-struct arena_chunk_map_s {
-#ifndef JEMALLOC_PROF
- /*
- * Overlay prof_ctx in order to allow it to be referenced by dead code.
- * Such antics aren't warranted for per arena data structures, but
- * chunk map overhead accounts for a percentage of memory, rather than
- * being just a fixed cost.
- */
- union {
-#endif
- union {
- /*
- * Linkage for run trees. There are two disjoint uses:
- *
- * 1) arena_t's runs_avail tree.
- * 2) arena_run_t conceptually uses this linkage for in-use
- * non-full runs, rather than directly embedding linkage.
- */
- rb_node(arena_chunk_map_t) rb_link;
- /*
- * List of runs currently in purgatory. arena_chunk_purge()
- * temporarily allocates runs that contain dirty pages while
- * purging, so that other threads cannot use the runs while the
- * purging thread is operating without the arena lock held.
- */
- ql_elm(arena_chunk_map_t) ql_link;
- } u;
+#ifdef JEMALLOC_ARENA_STRUCTS_A
+struct arena_run_s {
+ /* Index of bin this run is associated with. */
+ szind_t binind;
- /* Profile counters, used for large object runs. */
- prof_ctx_t *prof_ctx;
-#ifndef JEMALLOC_PROF
- }; /* union { ... }; */
-#endif
+ /* Number of free regions in run. */
+ unsigned nfree;
+ /* Per region allocated/deallocated bitmap. */
+ bitmap_t bitmap[BITMAP_GROUPS_MAX];
+};
+
+/* Each element of the chunk map corresponds to one page within the chunk. */
+struct arena_chunk_map_bits_s {
/*
* Run address (or size) and various flags are stored together. The bit
* layout looks like (assuming 32-bit system):
*
- * ???????? ???????? ????nnnn nnnndula
+ * ???????? ???????? ???nnnnn nnndumla
*
* ? : Unallocated: Run address for first/last pages, unset for internal
* pages.
* Small: Run page offset.
- * Large: Run size for first page, unset for trailing pages.
+ * Large: Run page count for first page, unset for trailing pages.
* n : binind for small size class, BININD_INVALID for large size class.
* d : dirty?
* u : unzeroed?
+ * m : decommitted?
* l : large?
* a : allocated?
*
@@ -110,78 +72,109 @@ struct arena_chunk_map_s {
* p : run page offset
* s : run size
* n : binind for size class; large objects set these to BININD_INVALID
- * except for promoted allocations (see prof_promote)
* x : don't care
* - : 0
* + : 1
- * [DULA] : bit set
- * [dula] : bit unset
+ * [DUMLA] : bit set
+ * [dumla] : bit unset
*
* Unallocated (clean):
- * ssssssss ssssssss ssss++++ ++++du-a
- * xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx
- * ssssssss ssssssss ssss++++ ++++dU-a
+ * ssssssss ssssssss sss+++++ +++dum-a
+ * xxxxxxxx xxxxxxxx xxxxxxxx xxx-Uxxx
+ * ssssssss ssssssss sss+++++ +++dUm-a
*
* Unallocated (dirty):
- * ssssssss ssssssss ssss++++ ++++D--a
+ * ssssssss ssssssss sss+++++ +++D-m-a
* xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
- * ssssssss ssssssss ssss++++ ++++D--a
+ * ssssssss ssssssss sss+++++ +++D-m-a
*
* Small:
- * pppppppp pppppppp ppppnnnn nnnnd--A
- * pppppppp pppppppp ppppnnnn nnnn---A
- * pppppppp pppppppp ppppnnnn nnnnd--A
+ * pppppppp pppppppp pppnnnnn nnnd---A
+ * pppppppp pppppppp pppnnnnn nnn----A
+ * pppppppp pppppppp pppnnnnn nnnd---A
*
* Large:
- * ssssssss ssssssss ssss++++ ++++D-LA
+ * ssssssss ssssssss sss+++++ +++D--LA
* xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
- * -------- -------- ----++++ ++++D-LA
+ * -------- -------- ---+++++ +++D--LA
*
- * Large (sampled, size <= PAGE):
- * ssssssss ssssssss ssssnnnn nnnnD-LA
+ * Large (sampled, size <= LARGE_MINCLASS):
+ * ssssssss ssssssss sssnnnnn nnnD--LA
+ * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
+ * -------- -------- ---+++++ +++D--LA
*
- * Large (not sampled, size == PAGE):
- * ssssssss ssssssss ssss++++ ++++D-LA
+ * Large (not sampled, size == LARGE_MINCLASS):
+ * ssssssss ssssssss sss+++++ +++D--LA
+ * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
+ * -------- -------- ---+++++ +++D--LA
*/
size_t bits;
-#define CHUNK_MAP_BININD_SHIFT 4
+#define CHUNK_MAP_ALLOCATED ((size_t)0x01U)
+#define CHUNK_MAP_LARGE ((size_t)0x02U)
+#define CHUNK_MAP_STATE_MASK ((size_t)0x3U)
+
+#define CHUNK_MAP_DECOMMITTED ((size_t)0x04U)
+#define CHUNK_MAP_UNZEROED ((size_t)0x08U)
+#define CHUNK_MAP_DIRTY ((size_t)0x10U)
+#define CHUNK_MAP_FLAGS_MASK ((size_t)0x1cU)
+
+#define CHUNK_MAP_BININD_SHIFT 5
#define BININD_INVALID ((size_t)0xffU)
-/* CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */
-#define CHUNK_MAP_BININD_MASK ((size_t)0xff0U)
+#define CHUNK_MAP_BININD_MASK (BININD_INVALID << CHUNK_MAP_BININD_SHIFT)
#define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK
-#define CHUNK_MAP_FLAGS_MASK ((size_t)0xcU)
-#define CHUNK_MAP_DIRTY ((size_t)0x8U)
-#define CHUNK_MAP_UNZEROED ((size_t)0x4U)
-#define CHUNK_MAP_LARGE ((size_t)0x2U)
-#define CHUNK_MAP_ALLOCATED ((size_t)0x1U)
-#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED
+
+#define CHUNK_MAP_RUNIND_SHIFT (CHUNK_MAP_BININD_SHIFT + 8)
+#define CHUNK_MAP_SIZE_SHIFT (CHUNK_MAP_RUNIND_SHIFT - LG_PAGE)
+#define CHUNK_MAP_SIZE_MASK \
+ (~(CHUNK_MAP_BININD_MASK | CHUNK_MAP_FLAGS_MASK | CHUNK_MAP_STATE_MASK))
};
-typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
-typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
-typedef ql_head(arena_chunk_map_t) arena_chunk_mapelms_t;
-/* Arena chunk header. */
-struct arena_chunk_s {
- /* Arena that owns the chunk. */
- arena_t *arena;
+struct arena_runs_dirty_link_s {
+ qr(arena_runs_dirty_link_t) rd_link;
+};
- /* Linkage for tree of arena chunks that contain dirty runs. */
- rb_node(arena_chunk_t) dirty_link;
+/*
+ * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just
+ * like arena_chunk_map_bits_t. Two separate arrays are stored within each
+ * chunk header in order to improve cache locality.
+ */
+struct arena_chunk_map_misc_s {
+ /*
+ * Linkage for run trees. There are two disjoint uses:
+ *
+ * 1) arena_t's runs_avail tree.
+ * 2) arena_run_t conceptually uses this linkage for in-use non-full
+ * runs, rather than directly embedding linkage.
+ */
+ rb_node(arena_chunk_map_misc_t) rb_link;
- /* Number of dirty pages. */
- size_t ndirty;
+ union {
+ /* Linkage for list of dirty runs. */
+ arena_runs_dirty_link_t rd;
- /* Number of available runs. */
- size_t nruns_avail;
+ /* Profile counters, used for large object runs. */
+ union {
+ void *prof_tctx_pun;
+ prof_tctx_t *prof_tctx;
+ };
+ /* Small region run metadata. */
+ arena_run_t run;
+ };
+};
+typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t;
+typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t;
+#endif /* JEMALLOC_ARENA_STRUCTS_A */
+
+#ifdef JEMALLOC_ARENA_STRUCTS_B
+/* Arena chunk header. */
+struct arena_chunk_s {
/*
- * Number of available run adjacencies that purging could coalesce.
- * Clean and dirty available runs are not coalesced, which causes
- * virtual memory fragmentation. The ratio of
- * (nruns_avail-nruns_adjac):nruns_adjac is used for tracking this
- * fragmentation.
+ * A pointer to the arena that owns the chunk is stored within the node.
+ * This field as a whole is used by chunks_rtree to support both
+ * ivsalloc() and core-based debugging.
*/
- size_t nruns_adjac;
+ extent_node_t node;
/*
* Map of pages within chunk that keeps track of free/large/small. The
@@ -189,19 +182,7 @@ struct arena_chunk_s {
* need to be tracked in the map. This omission saves a header page
* for common chunk sizes (e.g. 4 MiB).
*/
- arena_chunk_map_t map[1]; /* Dynamically sized. */
-};
-typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
-
-struct arena_run_s {
- /* Bin this run is associated with. */
- arena_bin_t *bin;
-
- /* Index of next region that has never been allocated, or nregs. */
- uint32_t nextind;
-
- /* Number of free regions in run. */
- unsigned nfree;
+ arena_chunk_map_bits_t map_bits[1]; /* Dynamically sized. */
};
/*
@@ -212,12 +193,7 @@ struct arena_run_s {
* Each run has the following layout:
*
* /--------------------\
- * | arena_run_t header |
- * | ... |
- * bitmap_offset | bitmap |
- * | ... |
- * ctx0_offset | ctx map |
- * | ... |
+ * | pad? |
* |--------------------|
* | redzone |
* reg0_offset | region 0 |
@@ -259,23 +235,11 @@ struct arena_bin_info_s {
uint32_t nregs;
/*
- * Offset of first bitmap_t element in a run header for this bin's size
- * class.
- */
- uint32_t bitmap_offset;
-
- /*
* Metadata used to manipulate bitmaps for runs associated with this
* bin.
*/
bitmap_info_t bitmap_info;
- /*
- * Offset of first (prof_ctx_t *) in a run header for this bin's size
- * class, or 0 if (config_prof == false || opt_prof == false).
- */
- uint32_t ctx0_offset;
-
/* Offset of first region in a run for this bin's size class. */
uint32_t reg0_offset;
};
@@ -321,8 +285,7 @@ struct arena_s {
/*
* There are three classes of arena operations from a locking
* perspective:
- * 1) Thread asssignment (modifies nthreads) is protected by
- * arenas_lock.
+ * 1) Thread assignment (modifies nthreads) is protected by arenas_lock.
* 2) Bin-related operations are protected by bin locks.
* 3) Chunk- and run-related operations are protected by this mutex.
*/
@@ -331,16 +294,20 @@ struct arena_s {
arena_stats_t stats;
/*
* List of tcaches for extant threads associated with this arena.
- * Stats from these are merged incrementally, and at exit.
+ * Stats from these are merged incrementally, and at exit if
+ * opt_stats_print is enabled.
*/
ql_head(tcache_t) tcache_ql;
uint64_t prof_accumbytes;
- dss_prec_t dss_prec;
+ /*
+ * PRNG state for cache index randomization of large allocation base
+ * pointers.
+ */
+ uint64_t offset_state;
- /* Tree of dirty-page-containing chunks this arena manages. */
- arena_chunk_tree_t chunks_dirty;
+ dss_prec_t dss_prec;
/*
* In order to avoid rapid chunk allocation/deallocation when an arena
@@ -354,7 +321,13 @@ struct arena_s {
*/
arena_chunk_t *spare;
- /* Number of pages in active runs. */
+ /* Minimum ratio (log base 2) of nactive:ndirty. */
+ ssize_t lg_dirty_mult;
+
+ /* True if a thread is currently executing arena_purge(). */
+ bool purging;
+
+ /* Number of pages in active runs and huge regions. */
size_t nactive;
/*
@@ -366,44 +339,116 @@ struct arena_s {
size_t ndirty;
/*
- * Approximate number of pages being purged. It is possible for
- * multiple threads to purge dirty pages concurrently, and they use
- * npurgatory to indicate the total number of pages all threads are
- * attempting to purge.
+ * Size/address-ordered tree of this arena's available runs. The tree
+ * is used for first-best-fit run allocation.
*/
- size_t npurgatory;
+ arena_avail_tree_t runs_avail;
/*
- * Size/address-ordered trees of this arena's available runs. The trees
- * are used for first-best-fit run allocation.
+ * Unused dirty memory this arena manages. Dirty memory is conceptually
+ * tracked as an arbitrarily interleaved LRU of dirty runs and cached
+ * chunks, but the list linkage is actually semi-duplicated in order to
+ * avoid extra arena_chunk_map_misc_t space overhead.
+ *
+ * LRU-----------------------------------------------------------MRU
+ *
+ * /-- arena ---\
+ * | |
+ * | |
+ * |------------| /- chunk -\
+ * ...->|chunks_cache|<--------------------------->| /----\ |<--...
+ * |------------| | |node| |
+ * | | | | | |
+ * | | /- run -\ /- run -\ | | | |
+ * | | | | | | | | | |
+ * | | | | | | | | | |
+ * |------------| |-------| |-------| | |----| |
+ * ...->|runs_dirty |<-->|rd |<-->|rd |<---->|rd |<----...
+ * |------------| |-------| |-------| | |----| |
+ * | | | | | | | | | |
+ * | | | | | | | \----/ |
+ * | | \-------/ \-------/ | |
+ * | | | |
+ * | | | |
+ * \------------/ \---------/
*/
- arena_avail_tree_t runs_avail;
+ arena_runs_dirty_link_t runs_dirty;
+ extent_node_t chunks_cache;
+
+ /* Extant huge allocations. */
+ ql_head(extent_node_t) huge;
+ /* Synchronizes all huge allocation/update/deallocation. */
+ malloc_mutex_t huge_mtx;
+
+ /*
+ * Trees of chunks that were previously allocated (trees differ only in
+ * node ordering). These are used when allocating chunks, in an attempt
+ * to re-use address space. Depending on function, different tree
+ * orderings are needed, which is why there are two trees with the same
+ * contents.
+ */
+ extent_tree_t chunks_szad_cached;
+ extent_tree_t chunks_ad_cached;
+ extent_tree_t chunks_szad_retained;
+ extent_tree_t chunks_ad_retained;
+
+ malloc_mutex_t chunks_mtx;
+ /* Cache of nodes that were allocated via base_alloc(). */
+ ql_head(extent_node_t) node_cache;
+ malloc_mutex_t node_cache_mtx;
+
+ /* User-configurable chunk hook functions. */
+ chunk_hooks_t chunk_hooks;
/* bins is used to store trees of free regions. */
arena_bin_t bins[NBINS];
};
+#endif /* JEMALLOC_ARENA_STRUCTS_B */
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
-extern ssize_t opt_lg_dirty_mult;
-/*
- * small_size2bin is a compact lookup table that rounds request sizes up to
- * size classes. In order to reduce cache footprint, the table is compressed,
- * and all accesses are via the SMALL_SIZE2BIN macro.
- */
-extern uint8_t const small_size2bin[];
-#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN])
+static const size_t large_pad =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+ PAGE
+#else
+ 0
+#endif
+ ;
-extern arena_bin_info_t arena_bin_info[NBINS];
+extern ssize_t opt_lg_dirty_mult;
-/* Number of large size classes. */
-#define nlclasses (chunk_npages - map_bias)
+extern arena_bin_info_t arena_bin_info[NBINS];
+extern size_t map_bias; /* Number of arena chunk header pages. */
+extern size_t map_misc_offset;
+extern size_t arena_maxrun; /* Max run size for arenas. */
+extern size_t large_maxclass; /* Max large size class. */
+extern unsigned nlclasses; /* Number of large size classes. */
+extern unsigned nhclasses; /* Number of huge size classes. */
+
+void arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node,
+ bool cache);
+void arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node,
+ bool cache);
+extent_node_t *arena_node_alloc(arena_t *arena);
+void arena_node_dalloc(arena_t *arena, extent_node_t *node);
+void *arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
+ bool *zero);
+void arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize);
+void arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk,
+ size_t oldsize, size_t usize);
+void arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk,
+ size_t oldsize, size_t usize);
+bool arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk,
+ size_t oldsize, size_t usize, bool *zero);
+ssize_t arena_lg_dirty_mult_get(arena_t *arena);
+bool arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult);
+void arena_maybe_purge(arena_t *arena);
void arena_purge_all(arena_t *arena);
void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
- size_t binind, uint64_t prof_accumbytes);
+ szind_t binind, uint64_t prof_accumbytes);
void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
bool zero);
#ifdef JEMALLOC_JET
@@ -418,19 +463,22 @@ void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
void arena_quarantine_junk_small(void *ptr, size_t usize);
void *arena_malloc_small(arena_t *arena, size_t size, bool zero);
void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
-void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero);
+void *arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
+ size_t alignment, bool zero, tcache_t *tcache);
void arena_prof_promoted(const void *ptr, size_t size);
-void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- arena_chunk_map_t *mapelm);
+void arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk,
+ void *ptr, arena_chunk_map_bits_t *bitselm);
void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- size_t pageind, arena_chunk_map_t *mapelm);
+ size_t pageind, arena_chunk_map_bits_t *bitselm);
void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
size_t pageind);
#ifdef JEMALLOC_JET
typedef void (arena_dalloc_junk_large_t)(void *, size_t);
extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
+#else
+void arena_dalloc_junk_large(void *ptr, size_t usize);
#endif
-void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk,
+void arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
void *ptr);
void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
#ifdef JEMALLOC_JET
@@ -439,16 +487,18 @@ extern arena_ralloc_junk_large_t *arena_ralloc_junk_large;
#endif
bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
size_t extra, bool zero);
-void *arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size,
- size_t extra, size_t alignment, bool zero, bool try_tcache_alloc,
- bool try_tcache_dalloc);
+void *arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
+ size_t size, size_t alignment, bool zero, tcache_t *tcache);
dss_prec_t arena_dss_prec_get(arena_t *arena);
-void arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
-void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive,
- size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
- malloc_large_stats_t *lstats);
-bool arena_new(arena_t *arena, unsigned ind);
-void arena_boot(void);
+bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+ssize_t arena_lg_dirty_mult_default_get(void);
+bool arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult);
+void arena_stats_merge(arena_t *arena, const char **dss,
+ ssize_t *lg_dirty_mult, size_t *nactive, size_t *ndirty,
+ arena_stats_t *astats, malloc_bin_stats_t *bstats,
+ malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats);
+arena_t *arena_new(unsigned ind);
+bool arena_boot(void);
void arena_prefork(arena_t *arena);
void arena_postfork_parent(arena_t *arena);
void arena_postfork_child(arena_t *arena);
@@ -458,64 +508,138 @@ void arena_postfork_child(arena_t *arena);
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
-arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind);
+arena_chunk_map_bits_t *arena_bitselm_get(arena_chunk_t *chunk,
+ size_t pageind);
+arena_chunk_map_misc_t *arena_miscelm_get(arena_chunk_t *chunk,
+ size_t pageind);
+size_t arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm);
+void *arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm);
+arena_chunk_map_misc_t *arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
+arena_chunk_map_misc_t *arena_run_to_miscelm(arena_run_t *run);
size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
size_t arena_mapbitsp_read(size_t *mapbitsp);
size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind);
+size_t arena_mapbits_size_decode(size_t mapbits);
size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk,
size_t pageind);
size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind);
size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind);
-size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
+szind_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind);
size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind);
+size_t arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind);
size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind);
size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind);
void arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits);
+size_t arena_mapbits_size_encode(size_t size);
void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind,
size_t size, size_t flags);
void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
size_t size);
+void arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind,
+ size_t flags);
void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind,
size_t size, size_t flags);
void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
- size_t binind);
+ szind_t binind);
void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind,
- size_t runind, size_t binind, size_t flags);
-void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind,
- size_t unzeroed);
+ size_t runind, szind_t binind, size_t flags);
+void arena_metadata_allocated_add(arena_t *arena, size_t size);
+void arena_metadata_allocated_sub(arena_t *arena, size_t size);
+size_t arena_metadata_allocated_get(arena_t *arena);
bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
bool arena_prof_accum(arena_t *arena, uint64_t accumbytes);
-size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
-size_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
+szind_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
+szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
const void *ptr);
-prof_ctx_t *arena_prof_ctx_get(const void *ptr);
-void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx);
-void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache);
+prof_tctx_t *arena_prof_tctx_get(const void *ptr);
+void arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
+void arena_prof_tctx_reset(const void *ptr, size_t usize,
+ const void *old_ptr, prof_tctx_t *old_tctx);
+void *arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
+ tcache_t *tcache);
+arena_t *arena_aalloc(const void *ptr);
size_t arena_salloc(const void *ptr, bool demote);
-void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- bool try_tcache);
+void arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
# ifdef JEMALLOC_ARENA_INLINE_A
-JEMALLOC_ALWAYS_INLINE arena_chunk_map_t *
-arena_mapp_get(arena_chunk_t *chunk, size_t pageind)
+JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t *
+arena_bitselm_get(arena_chunk_t *chunk, size_t pageind)
{
assert(pageind >= map_bias);
assert(pageind < chunk_npages);
- return (&chunk->map[pageind-map_bias]);
+ return (&chunk->map_bits[pageind-map_bias]);
+}
+
+JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
+arena_miscelm_get(arena_chunk_t *chunk, size_t pageind)
+{
+
+ assert(pageind >= map_bias);
+ assert(pageind < chunk_npages);
+
+ return ((arena_chunk_map_misc_t *)((uintptr_t)chunk +
+ (uintptr_t)map_misc_offset) + pageind-map_bias);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm)
+{
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
+ size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk +
+ map_misc_offset)) / sizeof(arena_chunk_map_misc_t) + map_bias;
+
+ assert(pageind >= map_bias);
+ assert(pageind < chunk_npages);
+
+ return (pageind);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm)
+{
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
+ size_t pageind = arena_miscelm_to_pageind(miscelm);
+
+ return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE)));
+}
+
+JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
+arena_rd_to_miscelm(arena_runs_dirty_link_t *rd)
+{
+ arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t
+ *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd));
+
+ assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
+ assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
+
+ return (miscelm);
+}
+
+JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
+arena_run_to_miscelm(arena_run_t *run)
+{
+ arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t
+ *)((uintptr_t)run - offsetof(arena_chunk_map_misc_t, run));
+
+ assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
+ assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
+
+ return (miscelm);
}
JEMALLOC_ALWAYS_INLINE size_t *
arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind)
{
- return (&arena_mapp_get(chunk, pageind)->bits);
+ return (&arena_bitselm_get(chunk, pageind)->bits);
}
JEMALLOC_ALWAYS_INLINE size_t
@@ -533,13 +657,29 @@ arena_mapbits_get(arena_chunk_t *chunk, size_t pageind)
}
JEMALLOC_ALWAYS_INLINE size_t
+arena_mapbits_size_decode(size_t mapbits)
+{
+ size_t size;
+
+#if CHUNK_MAP_SIZE_SHIFT > 0
+ size = (mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT;
+#elif CHUNK_MAP_SIZE_SHIFT == 0
+ size = mapbits & CHUNK_MAP_SIZE_MASK;
+#else
+ size = (mapbits & CHUNK_MAP_SIZE_MASK) << -CHUNK_MAP_SIZE_SHIFT;
+#endif
+
+ return (size);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind)
{
size_t mapbits;
mapbits = arena_mapbits_get(chunk, pageind);
assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
- return (mapbits & ~PAGE_MASK);
+ return (arena_mapbits_size_decode(mapbits));
}
JEMALLOC_ALWAYS_INLINE size_t
@@ -550,7 +690,7 @@ arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind)
mapbits = arena_mapbits_get(chunk, pageind);
assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
(CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED));
- return (mapbits & ~PAGE_MASK);
+ return (arena_mapbits_size_decode(mapbits));
}
JEMALLOC_ALWAYS_INLINE size_t
@@ -561,14 +701,14 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind)
mapbits = arena_mapbits_get(chunk, pageind);
assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
CHUNK_MAP_ALLOCATED);
- return (mapbits >> LG_PAGE);
+ return (mapbits >> CHUNK_MAP_RUNIND_SHIFT);
}
-JEMALLOC_ALWAYS_INLINE size_t
+JEMALLOC_ALWAYS_INLINE szind_t
arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind)
{
size_t mapbits;
- size_t binind;
+ szind_t binind;
mapbits = arena_mapbits_get(chunk, pageind);
binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
@@ -582,6 +722,8 @@ arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind)
size_t mapbits;
mapbits = arena_mapbits_get(chunk, pageind);
+ assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits &
+ (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
return (mapbits & CHUNK_MAP_DIRTY);
}
@@ -591,10 +733,23 @@ arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind)
size_t mapbits;
mapbits = arena_mapbits_get(chunk, pageind);
+ assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits &
+ (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
return (mapbits & CHUNK_MAP_UNZEROED);
}
JEMALLOC_ALWAYS_INLINE size_t
+arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind)
+{
+ size_t mapbits;
+
+ mapbits = arena_mapbits_get(chunk, pageind);
+ assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits &
+ (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
+ return (mapbits & CHUNK_MAP_DECOMMITTED);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind)
{
size_t mapbits;
@@ -619,6 +774,23 @@ arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits)
*mapbitsp = mapbits;
}
+JEMALLOC_ALWAYS_INLINE size_t
+arena_mapbits_size_encode(size_t size)
+{
+ size_t mapbits;
+
+#if CHUNK_MAP_SIZE_SHIFT > 0
+ mapbits = size << CHUNK_MAP_SIZE_SHIFT;
+#elif CHUNK_MAP_SIZE_SHIFT == 0
+ mapbits = size;
+#else
+ mapbits = size >> -CHUNK_MAP_SIZE_SHIFT;
+#endif
+
+ assert((mapbits & ~CHUNK_MAP_SIZE_MASK) == 0);
+ return (mapbits);
+}
+
JEMALLOC_ALWAYS_INLINE void
arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
size_t flags)
@@ -626,9 +798,11 @@ arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
assert((size & PAGE_MASK) == 0);
- assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0);
- assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags);
- arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags);
+ assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
+ assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags &
+ (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
+ arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
+ CHUNK_MAP_BININD_INVALID | flags);
}
JEMALLOC_ALWAYS_INLINE void
@@ -640,7 +814,17 @@ arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
assert((size & PAGE_MASK) == 0);
assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
- arena_mapbitsp_write(mapbitsp, size | (mapbits & PAGE_MASK));
+ arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
+ (mapbits & ~CHUNK_MAP_SIZE_MASK));
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, size_t flags)
+{
+ size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+
+ assert((flags & CHUNK_MAP_UNZEROED) == flags);
+ arena_mapbitsp_write(mapbitsp, flags);
}
JEMALLOC_ALWAYS_INLINE void
@@ -648,54 +832,62 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
size_t flags)
{
size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
- size_t mapbits = arena_mapbitsp_read(mapbitsp);
- size_t unzeroed;
assert((size & PAGE_MASK) == 0);
- assert((flags & CHUNK_MAP_DIRTY) == flags);
- unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */
- arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags
- | unzeroed | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED);
+ assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
+ assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags &
+ (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
+ arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
+ CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE |
+ CHUNK_MAP_ALLOCATED);
}
JEMALLOC_ALWAYS_INLINE void
arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
- size_t binind)
+ szind_t binind)
{
size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
size_t mapbits = arena_mapbitsp_read(mapbitsp);
assert(binind <= BININD_INVALID);
- assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE);
+ assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS +
+ large_pad);
arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) |
(binind << CHUNK_MAP_BININD_SHIFT));
}
JEMALLOC_ALWAYS_INLINE void
arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind,
- size_t binind, size_t flags)
+ szind_t binind, size_t flags)
{
size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
- size_t mapbits = arena_mapbitsp_read(mapbitsp);
- size_t unzeroed;
assert(binind < BININD_INVALID);
assert(pageind - runind >= map_bias);
- assert((flags & CHUNK_MAP_DIRTY) == flags);
- unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */
- arena_mapbitsp_write(mapbitsp, (runind << LG_PAGE) | (binind <<
- CHUNK_MAP_BININD_SHIFT) | flags | unzeroed | CHUNK_MAP_ALLOCATED);
+ assert((flags & CHUNK_MAP_UNZEROED) == flags);
+ arena_mapbitsp_write(mapbitsp, (runind << CHUNK_MAP_RUNIND_SHIFT) |
+ (binind << CHUNK_MAP_BININD_SHIFT) | flags | CHUNK_MAP_ALLOCATED);
}
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind,
- size_t unzeroed)
+JEMALLOC_INLINE void
+arena_metadata_allocated_add(arena_t *arena, size_t size)
+{
+
+ atomic_add_z(&arena->stats.metadata_allocated, size);
+}
+
+JEMALLOC_INLINE void
+arena_metadata_allocated_sub(arena_t *arena, size_t size)
{
- size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
- size_t mapbits = arena_mapbitsp_read(mapbitsp);
- arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_UNZEROED) |
- unzeroed);
+ atomic_sub_z(&arena->stats.metadata_allocated, size);
+}
+
+JEMALLOC_INLINE size_t
+arena_metadata_allocated_get(arena_t *arena)
+{
+
+ return (atomic_read_z(&arena->stats.metadata_allocated));
}
JEMALLOC_INLINE bool
@@ -719,7 +911,7 @@ arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes)
cassert(config_prof);
- if (prof_interval == 0)
+ if (likely(prof_interval == 0))
return (false);
return (arena_prof_accum_impl(arena, accumbytes));
}
@@ -730,7 +922,7 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes)
cassert(config_prof);
- if (prof_interval == 0)
+ if (likely(prof_interval == 0))
return (false);
{
@@ -743,10 +935,10 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes)
}
}
-JEMALLOC_ALWAYS_INLINE size_t
+JEMALLOC_ALWAYS_INLINE szind_t
arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
{
- size_t binind;
+ szind_t binind;
binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
@@ -755,27 +947,34 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
arena_t *arena;
size_t pageind;
size_t actual_mapbits;
+ size_t rpages_ind;
arena_run_t *run;
arena_bin_t *bin;
- size_t actual_binind;
+ szind_t run_binind, actual_binind;
arena_bin_info_t *bin_info;
+ arena_chunk_map_misc_t *miscelm;
+ void *rpages;
assert(binind != BININD_INVALID);
assert(binind < NBINS);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- arena = chunk->arena;
+ arena = extent_node_arena_get(&chunk->node);
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
actual_mapbits = arena_mapbits_get(chunk, pageind);
assert(mapbits == actual_mapbits);
assert(arena_mapbits_large_get(chunk, pageind) == 0);
assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
- run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
- (actual_mapbits >> LG_PAGE)) << LG_PAGE));
- bin = run->bin;
+ rpages_ind = pageind - arena_mapbits_small_runind_get(chunk,
+ pageind);
+ miscelm = arena_miscelm_get(chunk, rpages_ind);
+ run = &miscelm->run;
+ run_binind = run->binind;
+ bin = &arena->bins[run_binind];
actual_binind = bin - arena->bins;
- assert(binind == actual_binind);
+ assert(run_binind == actual_binind);
bin_info = &arena_bin_info[actual_binind];
- assert(((uintptr_t)ptr - ((uintptr_t)run +
+ rpages = arena_miscelm_to_rpages(miscelm);
+ assert(((uintptr_t)ptr - ((uintptr_t)rpages +
(uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval
== 0);
}
@@ -785,10 +984,10 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
# endif /* JEMALLOC_ARENA_INLINE_A */
# ifdef JEMALLOC_ARENA_INLINE_B
-JEMALLOC_INLINE size_t
+JEMALLOC_INLINE szind_t
arena_bin_index(arena_t *arena, arena_bin_t *bin)
{
- size_t binind = bin - arena->bins;
+ szind_t binind = bin - arena->bins;
assert(binind < NBINS);
return (binind);
}
@@ -798,24 +997,26 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
{
unsigned shift, diff, regind;
size_t interval;
+ arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
+ void *rpages = arena_miscelm_to_rpages(miscelm);
/*
* Freeing a pointer lower than region zero can cause assertion
* failure.
*/
- assert((uintptr_t)ptr >= (uintptr_t)run +
+ assert((uintptr_t)ptr >= (uintptr_t)rpages +
(uintptr_t)bin_info->reg0_offset);
/*
* Avoid doing division with a variable divisor if possible. Using
* actual division here can reduce allocator throughput by over 20%!
*/
- diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run -
+ diff = (unsigned)((uintptr_t)ptr - (uintptr_t)rpages -
bin_info->reg0_offset);
/* Rescale (factor powers of 2 out of the numerator and denominator). */
interval = bin_info->reg_interval;
- shift = ffs(interval) - 1;
+ shift = jemalloc_ffs(interval) - 1;
diff >>= shift;
interval >>= shift;
@@ -850,8 +1051,8 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
};
- if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) +
- 2)) {
+ if (likely(interval <= ((sizeof(interval_invs) /
+ sizeof(unsigned)) + 2))) {
regind = (diff * interval_invs[interval - 3]) >>
SIZE_INV_SHIFT;
} else
@@ -865,113 +1066,138 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
return (regind);
}
-JEMALLOC_INLINE prof_ctx_t *
-arena_prof_ctx_get(const void *ptr)
+JEMALLOC_INLINE prof_tctx_t *
+arena_prof_tctx_get(const void *ptr)
{
- prof_ctx_t *ret;
+ prof_tctx_t *ret;
arena_chunk_t *chunk;
- size_t pageind, mapbits;
cassert(config_prof);
assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- mapbits = arena_mapbits_get(chunk, pageind);
- assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
- if ((mapbits & CHUNK_MAP_LARGE) == 0) {
- if (prof_promote)
- ret = (prof_ctx_t *)(uintptr_t)1U;
+ if (likely(chunk != ptr)) {
+ size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+ size_t mapbits = arena_mapbits_get(chunk, pageind);
+ assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
+ if (likely((mapbits & CHUNK_MAP_LARGE) == 0))
+ ret = (prof_tctx_t *)(uintptr_t)1U;
else {
- arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
- (uintptr_t)((pageind - (mapbits >> LG_PAGE)) <<
- LG_PAGE));
- size_t binind = arena_ptr_small_binind_get(ptr,
- mapbits);
- arena_bin_info_t *bin_info = &arena_bin_info[binind];
- unsigned regind;
-
- regind = arena_run_regind(run, bin_info, ptr);
- ret = *(prof_ctx_t **)((uintptr_t)run +
- bin_info->ctx0_offset + (regind *
- sizeof(prof_ctx_t *)));
+ arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk,
+ pageind);
+ ret = atomic_read_p(&elm->prof_tctx_pun);
}
} else
- ret = arena_mapp_get(chunk, pageind)->prof_ctx;
+ ret = huge_prof_tctx_get(ptr);
return (ret);
}
JEMALLOC_INLINE void
-arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx)
+arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
{
arena_chunk_t *chunk;
- size_t pageind;
cassert(config_prof);
assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
-
- if (usize > SMALL_MAXCLASS || (prof_promote &&
- ((uintptr_t)ctx != (uintptr_t)1U || arena_mapbits_large_get(chunk,
- pageind) != 0))) {
- assert(arena_mapbits_large_get(chunk, pageind) != 0);
- arena_mapp_get(chunk, pageind)->prof_ctx = ctx;
- } else {
- assert(arena_mapbits_large_get(chunk, pageind) == 0);
- if (prof_promote == false) {
- size_t mapbits = arena_mapbits_get(chunk, pageind);
- arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
- (uintptr_t)((pageind - (mapbits >> LG_PAGE)) <<
- LG_PAGE));
- size_t binind;
- arena_bin_info_t *bin_info;
- unsigned regind;
-
- binind = arena_ptr_small_binind_get(ptr, mapbits);
- bin_info = &arena_bin_info[binind];
- regind = arena_run_regind(run, bin_info, ptr);
-
- *((prof_ctx_t **)((uintptr_t)run +
- bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t
- *)))) = ctx;
+ if (likely(chunk != ptr)) {
+ size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+
+ assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+
+ if (unlikely(usize > SMALL_MAXCLASS || (uintptr_t)tctx >
+ (uintptr_t)1U)) {
+ arena_chunk_map_misc_t *elm;
+
+ assert(arena_mapbits_large_get(chunk, pageind) != 0);
+
+ elm = arena_miscelm_get(chunk, pageind);
+ atomic_write_p(&elm->prof_tctx_pun, tctx);
+ } else {
+ /*
+ * tctx must always be initialized for large runs.
+ * Assert that the surrounding conditional logic is
+ * equivalent to checking whether ptr refers to a large
+ * run.
+ */
+ assert(arena_mapbits_large_get(chunk, pageind) == 0);
}
+ } else
+ huge_prof_tctx_set(ptr, tctx);
+}
+
+JEMALLOC_INLINE void
+arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+ prof_tctx_t *old_tctx)
+{
+
+ cassert(config_prof);
+ assert(ptr != NULL);
+
+ if (unlikely(usize > SMALL_MAXCLASS || (ptr == old_ptr &&
+ (uintptr_t)old_tctx > (uintptr_t)1U))) {
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (likely(chunk != ptr)) {
+ size_t pageind;
+ arena_chunk_map_misc_t *elm;
+
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
+ LG_PAGE;
+ assert(arena_mapbits_allocated_get(chunk, pageind) !=
+ 0);
+ assert(arena_mapbits_large_get(chunk, pageind) != 0);
+
+ elm = arena_miscelm_get(chunk, pageind);
+ atomic_write_p(&elm->prof_tctx_pun,
+ (prof_tctx_t *)(uintptr_t)1U);
+ } else
+ huge_prof_tctx_reset(ptr);
}
}
JEMALLOC_ALWAYS_INLINE void *
-arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache)
+arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
+ tcache_t *tcache)
{
- tcache_t *tcache;
assert(size != 0);
- assert(size <= arena_maxclass);
- if (size <= SMALL_MAXCLASS) {
- if (try_tcache && (tcache = tcache_get(true)) != NULL)
- return (tcache_alloc_small(tcache, size, zero));
- else {
- return (arena_malloc_small(choose_arena(arena), size,
+ arena = arena_choose(tsd, arena);
+ if (unlikely(arena == NULL))
+ return (NULL);
+
+ if (likely(size <= SMALL_MAXCLASS)) {
+ if (likely(tcache != NULL)) {
+ return (tcache_alloc_small(tsd, arena, tcache, size,
zero));
- }
- } else {
+ } else
+ return (arena_malloc_small(arena, size, zero));
+ } else if (likely(size <= large_maxclass)) {
/*
* Initialize tcache after checking size in order to avoid
* infinite recursion during tcache initialization.
*/
- if (try_tcache && size <= tcache_maxclass && (tcache =
- tcache_get(true)) != NULL)
- return (tcache_alloc_large(tcache, size, zero));
- else {
- return (arena_malloc_large(choose_arena(arena), size,
+ if (likely(tcache != NULL) && size <= tcache_maxclass) {
+ return (tcache_alloc_large(tsd, arena, tcache, size,
zero));
- }
- }
+ } else
+ return (arena_malloc_large(arena, size, zero));
+ } else
+ return (huge_malloc(tsd, arena, size, zero, tcache));
+}
+
+JEMALLOC_ALWAYS_INLINE arena_t *
+arena_aalloc(const void *ptr)
+{
+ arena_chunk_t *chunk;
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (likely(chunk != ptr))
+ return (extent_node_arena_get(&chunk->node));
+ else
+ return (huge_aalloc(ptr));
}
/* Return the size of the allocation pointed to by ptr. */
@@ -980,81 +1206,139 @@ arena_salloc(const void *ptr, bool demote)
{
size_t ret;
arena_chunk_t *chunk;
- size_t pageind, binind;
+ size_t pageind;
+ szind_t binind;
assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
- binind = arena_mapbits_binind_get(chunk, pageind);
- if (binind == BININD_INVALID || (config_prof && demote == false &&
- prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) {
- /*
- * Large allocation. In the common case (demote == true), and
- * as this is an inline function, most callers will only end up
- * looking at binind to determine that ptr is a small
- * allocation.
- */
- assert(((uintptr_t)ptr & PAGE_MASK) == 0);
- ret = arena_mapbits_large_size_get(chunk, pageind);
- assert(ret != 0);
- assert(pageind + (ret>>LG_PAGE) <= chunk_npages);
- assert(ret == PAGE || arena_mapbits_large_size_get(chunk,
- pageind+(ret>>LG_PAGE)-1) == 0);
- assert(binind == arena_mapbits_binind_get(chunk,
- pageind+(ret>>LG_PAGE)-1));
- assert(arena_mapbits_dirty_get(chunk, pageind) ==
- arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1));
- } else {
- /*
- * Small allocation (possibly promoted to a large object due to
- * prof_promote).
- */
- assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
- arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
- pageind)) == binind);
- ret = arena_bin_info[binind].reg_size;
- }
+ if (likely(chunk != ptr)) {
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+ assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+ binind = arena_mapbits_binind_get(chunk, pageind);
+ if (unlikely(binind == BININD_INVALID || (config_prof && !demote
+ && arena_mapbits_large_get(chunk, pageind) != 0))) {
+ /*
+ * Large allocation. In the common case (demote), and
+ * as this is an inline function, most callers will only
+ * end up looking at binind to determine that ptr is a
+ * small allocation.
+ */
+ assert(config_cache_oblivious || ((uintptr_t)ptr &
+ PAGE_MASK) == 0);
+ ret = arena_mapbits_large_size_get(chunk, pageind) -
+ large_pad;
+ assert(ret != 0);
+ assert(pageind + ((ret+large_pad)>>LG_PAGE) <=
+ chunk_npages);
+ assert(arena_mapbits_dirty_get(chunk, pageind) ==
+ arena_mapbits_dirty_get(chunk,
+ pageind+((ret+large_pad)>>LG_PAGE)-1));
+ } else {
+ /*
+ * Small allocation (possibly promoted to a large
+ * object).
+ */
+ assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
+ arena_ptr_small_binind_get(ptr,
+ arena_mapbits_get(chunk, pageind)) == binind);
+ ret = index2size(binind);
+ }
+ } else
+ ret = huge_salloc(ptr);
return (ret);
}
JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache)
+arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
{
+ arena_chunk_t *chunk;
size_t pageind, mapbits;
- tcache_t *tcache;
- assert(arena != NULL);
- assert(chunk->arena == arena);
assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- mapbits = arena_mapbits_get(chunk, pageind);
- assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
- if ((mapbits & CHUNK_MAP_LARGE) == 0) {
- /* Small allocation. */
- if (try_tcache && (tcache = tcache_get(false)) != NULL) {
- size_t binind;
-
- binind = arena_ptr_small_binind_get(ptr, mapbits);
- tcache_dalloc_small(tcache, ptr, binind);
- } else
- arena_dalloc_small(arena, chunk, ptr, pageind);
- } else {
- size_t size = arena_mapbits_large_size_get(chunk, pageind);
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (likely(chunk != ptr)) {
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+ mapbits = arena_mapbits_get(chunk, pageind);
+ assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+ if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) {
+ /* Small allocation. */
+ if (likely(tcache != NULL)) {
+ szind_t binind = arena_ptr_small_binind_get(ptr,
+ mapbits);
+ tcache_dalloc_small(tsd, tcache, ptr, binind);
+ } else {
+ arena_dalloc_small(extent_node_arena_get(
+ &chunk->node), chunk, ptr, pageind);
+ }
+ } else {
+ size_t size = arena_mapbits_large_size_get(chunk,
+ pageind);
+
+ assert(config_cache_oblivious || ((uintptr_t)ptr &
+ PAGE_MASK) == 0);
+
+ if (likely(tcache != NULL) && size - large_pad <=
+ tcache_maxclass) {
+ tcache_dalloc_large(tsd, tcache, ptr, size -
+ large_pad);
+ } else {
+ arena_dalloc_large(extent_node_arena_get(
+ &chunk->node), chunk, ptr);
+ }
+ }
+ } else
+ huge_dalloc(tsd, ptr, tcache);
+}
- assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+JEMALLOC_ALWAYS_INLINE void
+arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+{
+ arena_chunk_t *chunk;
- if (try_tcache && size <= tcache_maxclass && (tcache =
- tcache_get(false)) != NULL) {
- tcache_dalloc_large(tcache, ptr, size);
- } else
- arena_dalloc_large(arena, chunk, ptr);
- }
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (likely(chunk != ptr)) {
+ if (config_prof && opt_prof) {
+ size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
+ LG_PAGE;
+ assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+ if (arena_mapbits_large_get(chunk, pageind) != 0) {
+ /*
+ * Make sure to use promoted size, not request
+ * size.
+ */
+ size = arena_mapbits_large_size_get(chunk,
+ pageind) - large_pad;
+ }
+ }
+ assert(s2u(size) == s2u(arena_salloc(ptr, false)));
+
+ if (likely(size <= SMALL_MAXCLASS)) {
+ /* Small allocation. */
+ if (likely(tcache != NULL)) {
+ szind_t binind = size2index(size);
+ tcache_dalloc_small(tsd, tcache, ptr, binind);
+ } else {
+ size_t pageind = ((uintptr_t)ptr -
+ (uintptr_t)chunk) >> LG_PAGE;
+ arena_dalloc_small(extent_node_arena_get(
+ &chunk->node), chunk, ptr, pageind);
+ }
+ } else {
+ assert(config_cache_oblivious || ((uintptr_t)ptr &
+ PAGE_MASK) == 0);
+
+ if (likely(tcache != NULL) && size <= tcache_maxclass)
+ tcache_dalloc_large(tsd, tcache, ptr, size);
+ else {
+ arena_dalloc_large(extent_node_arena_get(
+ &chunk->node), chunk, ptr);
+ }
+ }
+ } else
+ huge_dalloc(tsd, ptr, tcache);
}
# endif /* JEMALLOC_ARENA_INLINE_B */
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic.h
index 11a7b47fe..a9aad35d1 100644
--- a/deps/jemalloc/include/jemalloc/internal/atomic.h
+++ b/deps/jemalloc/include/jemalloc/internal/atomic.h
@@ -11,6 +11,7 @@
#define atomic_read_uint64(p) atomic_add_uint64(p, 0)
#define atomic_read_uint32(p) atomic_add_uint32(p, 0)
+#define atomic_read_p(p) atomic_add_p(p, NULL)
#define atomic_read_z(p) atomic_add_z(p, 0)
#define atomic_read_u(p) atomic_add_u(p, 0)
@@ -18,113 +19,244 @@
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
+/*
+ * All arithmetic functions return the arithmetic result of the atomic
+ * operation. Some atomic operation APIs return the value prior to mutation, in
+ * which case the following functions must redundantly compute the result so
+ * that it can be returned. These functions are normally inlined, so the extra
+ * operations can be optimized away if the return values aren't used by the
+ * callers.
+ *
+ * <t> atomic_read_<t>(<t> *p) { return (*p); }
+ * <t> atomic_add_<t>(<t> *p, <t> x) { return (*p + x); }
+ * <t> atomic_sub_<t>(<t> *p, <t> x) { return (*p - x); }
+ * bool atomic_cas_<t>(<t> *p, <t> c, <t> s)
+ * {
+ * if (*p != c)
+ * return (true);
+ * *p = s;
+ * return (false);
+ * }
+ * void atomic_write_<t>(<t> *p, <t> x) { *p = x; }
+ */
+
#ifndef JEMALLOC_ENABLE_INLINE
uint64_t atomic_add_uint64(uint64_t *p, uint64_t x);
uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x);
+bool atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s);
+void atomic_write_uint64(uint64_t *p, uint64_t x);
uint32_t atomic_add_uint32(uint32_t *p, uint32_t x);
uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x);
+bool atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s);
+void atomic_write_uint32(uint32_t *p, uint32_t x);
+void *atomic_add_p(void **p, void *x);
+void *atomic_sub_p(void **p, void *x);
+bool atomic_cas_p(void **p, void *c, void *s);
+void atomic_write_p(void **p, const void *x);
size_t atomic_add_z(size_t *p, size_t x);
size_t atomic_sub_z(size_t *p, size_t x);
+bool atomic_cas_z(size_t *p, size_t c, size_t s);
+void atomic_write_z(size_t *p, size_t x);
unsigned atomic_add_u(unsigned *p, unsigned x);
unsigned atomic_sub_u(unsigned *p, unsigned x);
+bool atomic_cas_u(unsigned *p, unsigned c, unsigned s);
+void atomic_write_u(unsigned *p, unsigned x);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
/******************************************************************************/
/* 64-bit operations. */
#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
+# if (defined(__amd64__) || defined(__x86_64__))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
+ uint64_t t = x;
- return (__sync_add_and_fetch(p, x));
+ asm volatile (
+ "lock; xaddq %0, %1;"
+ : "+r" (t), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+
+ return (t + x);
}
JEMALLOC_INLINE uint64_t
atomic_sub_uint64(uint64_t *p, uint64_t x)
{
+ uint64_t t;
- return (__sync_sub_and_fetch(p, x));
+ x = (uint64_t)(-(int64_t)x);
+ t = x;
+ asm volatile (
+ "lock; xaddq %0, %1;"
+ : "+r" (t), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+
+ return (t + x);
}
-#elif (defined(_MSC_VER))
+
+JEMALLOC_INLINE bool
+atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+{
+ uint8_t success;
+
+ asm volatile (
+ "lock; cmpxchgq %4, %0;"
+ "sete %1;"
+ : "=m" (*p), "=a" (success) /* Outputs. */
+ : "m" (*p), "a" (c), "r" (s) /* Inputs. */
+ : "memory" /* Clobbers. */
+ );
+
+ return (!(bool)success);
+}
+
+JEMALLOC_INLINE void
+atomic_write_uint64(uint64_t *p, uint64_t x)
+{
+
+ asm volatile (
+ "xchgq %1, %0;" /* Lock is implied by xchgq. */
+ : "=m" (*p), "+r" (x) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ : "memory" /* Clobbers. */
+ );
+}
+# elif (defined(JEMALLOC_C11ATOMICS))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
-
- return (InterlockedExchangeAdd64(p, x));
+ volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
+ return (atomic_fetch_add(a, x) + x);
}
JEMALLOC_INLINE uint64_t
atomic_sub_uint64(uint64_t *p, uint64_t x)
{
+ volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
+ return (atomic_fetch_sub(a, x) - x);
+}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+{
+ volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
+ return (!atomic_compare_exchange_strong(a, &c, s));
+}
- return (InterlockedExchangeAdd64(p, -((int64_t)x)));
+JEMALLOC_INLINE void
+atomic_write_uint64(uint64_t *p, uint64_t x)
+{
+ volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
+ atomic_store(a, x);
}
-#elif (defined(JEMALLOC_OSATOMIC))
+# elif (defined(JEMALLOC_ATOMIC9))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
- return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
+ /*
+ * atomic_fetchadd_64() doesn't exist, but we only ever use this
+ * function on LP64 systems, so atomic_fetchadd_long() will do.
+ */
+ assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+ return (atomic_fetchadd_long(p, (unsigned long)x) + x);
}
JEMALLOC_INLINE uint64_t
atomic_sub_uint64(uint64_t *p, uint64_t x)
{
- return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
+ assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+ return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x);
+}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+{
+
+ assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+ return (!atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s));
+}
+
+JEMALLOC_INLINE void
+atomic_write_uint64(uint64_t *p, uint64_t x)
+{
+
+ assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+ atomic_store_rel_long(p, x);
}
-# elif (defined(__amd64__) || defined(__x86_64__))
+# elif (defined(JEMALLOC_OSATOMIC))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
- asm volatile (
- "lock; xaddq %0, %1;"
- : "+r" (x), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
-
- return (x);
+ return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
}
JEMALLOC_INLINE uint64_t
atomic_sub_uint64(uint64_t *p, uint64_t x)
{
- x = (uint64_t)(-(int64_t)x);
- asm volatile (
- "lock; xaddq %0, %1;"
- : "+r" (x), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
+ return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
+}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+{
- return (x);
+ return (!OSAtomicCompareAndSwap64(c, s, (int64_t *)p));
}
-# elif (defined(JEMALLOC_ATOMIC9))
+
+JEMALLOC_INLINE void
+atomic_write_uint64(uint64_t *p, uint64_t x)
+{
+ uint64_t o;
+
+ /*The documented OSAtomic*() API does not expose an atomic exchange. */
+ do {
+ o = atomic_read_uint64(p);
+ } while (atomic_cas_uint64(p, o, x));
+}
+# elif (defined(_MSC_VER))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
- /*
- * atomic_fetchadd_64() doesn't exist, but we only ever use this
- * function on LP64 systems, so atomic_fetchadd_long() will do.
- */
- assert(sizeof(uint64_t) == sizeof(unsigned long));
-
- return (atomic_fetchadd_long(p, (unsigned long)x) + x);
+ return (InterlockedExchangeAdd64(p, x) + x);
}
JEMALLOC_INLINE uint64_t
atomic_sub_uint64(uint64_t *p, uint64_t x)
{
- assert(sizeof(uint64_t) == sizeof(unsigned long));
+ return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x);
+}
- return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x);
+JEMALLOC_INLINE bool
+atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+{
+ uint64_t o;
+
+ o = InterlockedCompareExchange64(p, s, c);
+ return (o != c);
}
-# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
+
+JEMALLOC_INLINE void
+atomic_write_uint64(uint64_t *p, uint64_t x)
+{
+
+ InterlockedExchange64(p, x);
+}
+# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
+ defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
@@ -138,6 +270,20 @@ atomic_sub_uint64(uint64_t *p, uint64_t x)
return (__sync_sub_and_fetch(p, x));
}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+{
+
+ return (!__sync_bool_compare_and_swap(p, c, s));
+}
+
+JEMALLOC_INLINE void
+atomic_write_uint64(uint64_t *p, uint64_t x)
+{
+
+ __sync_lock_test_and_set(p, x);
+}
# else
# error "Missing implementation for 64-bit atomic operations"
# endif
@@ -145,90 +291,184 @@ atomic_sub_uint64(uint64_t *p, uint64_t x)
/******************************************************************************/
/* 32-bit operations. */
-#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
+#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
JEMALLOC_INLINE uint32_t
atomic_add_uint32(uint32_t *p, uint32_t x)
{
+ uint32_t t = x;
- return (__sync_add_and_fetch(p, x));
+ asm volatile (
+ "lock; xaddl %0, %1;"
+ : "+r" (t), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+
+ return (t + x);
}
JEMALLOC_INLINE uint32_t
atomic_sub_uint32(uint32_t *p, uint32_t x)
{
+ uint32_t t;
- return (__sync_sub_and_fetch(p, x));
+ x = (uint32_t)(-(int32_t)x);
+ t = x;
+ asm volatile (
+ "lock; xaddl %0, %1;"
+ : "+r" (t), "=m" (*p) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ );
+
+ return (t + x);
}
-#elif (defined(_MSC_VER))
+
+JEMALLOC_INLINE bool
+atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+{
+ uint8_t success;
+
+ asm volatile (
+ "lock; cmpxchgl %4, %0;"
+ "sete %1;"
+ : "=m" (*p), "=a" (success) /* Outputs. */
+ : "m" (*p), "a" (c), "r" (s) /* Inputs. */
+ : "memory"
+ );
+
+ return (!(bool)success);
+}
+
+JEMALLOC_INLINE void
+atomic_write_uint32(uint32_t *p, uint32_t x)
+{
+
+ asm volatile (
+ "xchgl %1, %0;" /* Lock is implied by xchgl. */
+ : "=m" (*p), "+r" (x) /* Outputs. */
+ : "m" (*p) /* Inputs. */
+ : "memory" /* Clobbers. */
+ );
+}
+# elif (defined(JEMALLOC_C11ATOMICS))
JEMALLOC_INLINE uint32_t
atomic_add_uint32(uint32_t *p, uint32_t x)
{
-
- return (InterlockedExchangeAdd(p, x));
+ volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
+ return (atomic_fetch_add(a, x) + x);
}
JEMALLOC_INLINE uint32_t
atomic_sub_uint32(uint32_t *p, uint32_t x)
{
+ volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
+ return (atomic_fetch_sub(a, x) - x);
+}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+{
+ volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
+ return (!atomic_compare_exchange_strong(a, &c, s));
+}
- return (InterlockedExchangeAdd(p, -((int32_t)x)));
+JEMALLOC_INLINE void
+atomic_write_uint32(uint32_t *p, uint32_t x)
+{
+ volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
+ atomic_store(a, x);
}
-#elif (defined(JEMALLOC_OSATOMIC))
+#elif (defined(JEMALLOC_ATOMIC9))
JEMALLOC_INLINE uint32_t
atomic_add_uint32(uint32_t *p, uint32_t x)
{
- return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
+ return (atomic_fetchadd_32(p, x) + x);
}
JEMALLOC_INLINE uint32_t
atomic_sub_uint32(uint32_t *p, uint32_t x)
{
- return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
+ return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x);
+}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+{
+
+ return (!atomic_cmpset_32(p, c, s));
+}
+
+JEMALLOC_INLINE void
+atomic_write_uint32(uint32_t *p, uint32_t x)
+{
+
+ atomic_store_rel_32(p, x);
}
-#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
+#elif (defined(JEMALLOC_OSATOMIC))
JEMALLOC_INLINE uint32_t
atomic_add_uint32(uint32_t *p, uint32_t x)
{
- asm volatile (
- "lock; xaddl %0, %1;"
- : "+r" (x), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
-
- return (x);
+ return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
}
JEMALLOC_INLINE uint32_t
atomic_sub_uint32(uint32_t *p, uint32_t x)
{
- x = (uint32_t)(-(int32_t)x);
- asm volatile (
- "lock; xaddl %0, %1;"
- : "+r" (x), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
+ return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
+}
- return (x);
+JEMALLOC_INLINE bool
+atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+{
+
+ return (!OSAtomicCompareAndSwap32(c, s, (int32_t *)p));
}
-#elif (defined(JEMALLOC_ATOMIC9))
+
+JEMALLOC_INLINE void
+atomic_write_uint32(uint32_t *p, uint32_t x)
+{
+ uint32_t o;
+
+ /*The documented OSAtomic*() API does not expose an atomic exchange. */
+ do {
+ o = atomic_read_uint32(p);
+ } while (atomic_cas_uint32(p, o, x));
+}
+#elif (defined(_MSC_VER))
JEMALLOC_INLINE uint32_t
atomic_add_uint32(uint32_t *p, uint32_t x)
{
- return (atomic_fetchadd_32(p, x) + x);
+ return (InterlockedExchangeAdd(p, x) + x);
}
JEMALLOC_INLINE uint32_t
atomic_sub_uint32(uint32_t *p, uint32_t x)
{
- return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x);
+ return (InterlockedExchangeAdd(p, -((int32_t)x)) - x);
+}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+{
+ uint32_t o;
+
+ o = InterlockedCompareExchange(p, s, c);
+ return (o != c);
}
-#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
+
+JEMALLOC_INLINE void
+atomic_write_uint32(uint32_t *p, uint32_t x)
+{
+
+ InterlockedExchange(p, x);
+}
+#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \
+ defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
JEMALLOC_INLINE uint32_t
atomic_add_uint32(uint32_t *p, uint32_t x)
{
@@ -242,11 +482,73 @@ atomic_sub_uint32(uint32_t *p, uint32_t x)
return (__sync_sub_and_fetch(p, x));
}
+
+JEMALLOC_INLINE bool
+atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+{
+
+ return (!__sync_bool_compare_and_swap(p, c, s));
+}
+
+JEMALLOC_INLINE void
+atomic_write_uint32(uint32_t *p, uint32_t x)
+{
+
+ __sync_lock_test_and_set(p, x);
+}
#else
# error "Missing implementation for 32-bit atomic operations"
#endif
/******************************************************************************/
+/* Pointer operations. */
+JEMALLOC_INLINE void *
+atomic_add_p(void **p, void *x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+ return ((void *)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
+#elif (LG_SIZEOF_PTR == 2)
+ return ((void *)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
+#endif
+}
+
+JEMALLOC_INLINE void *
+atomic_sub_p(void **p, void *x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+ return ((void *)atomic_add_uint64((uint64_t *)p,
+ (uint64_t)-((int64_t)x)));
+#elif (LG_SIZEOF_PTR == 2)
+ return ((void *)atomic_add_uint32((uint32_t *)p,
+ (uint32_t)-((int32_t)x)));
+#endif
+}
+
+JEMALLOC_INLINE bool
+atomic_cas_p(void **p, void *c, void *s)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+ return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
+#elif (LG_SIZEOF_PTR == 2)
+ return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
+#endif
+}
+
+JEMALLOC_INLINE void
+atomic_write_p(void **p, const void *x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+ atomic_write_uint64((uint64_t *)p, (uint64_t)x);
+#elif (LG_SIZEOF_PTR == 2)
+ atomic_write_uint32((uint32_t *)p, (uint32_t)x);
+#endif
+}
+
+/******************************************************************************/
/* size_t operations. */
JEMALLOC_INLINE size_t
atomic_add_z(size_t *p, size_t x)
@@ -272,6 +574,28 @@ atomic_sub_z(size_t *p, size_t x)
#endif
}
+JEMALLOC_INLINE bool
+atomic_cas_z(size_t *p, size_t c, size_t s)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+ return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
+#elif (LG_SIZEOF_PTR == 2)
+ return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
+#endif
+}
+
+JEMALLOC_INLINE void
+atomic_write_z(size_t *p, size_t x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+ atomic_write_uint64((uint64_t *)p, (uint64_t)x);
+#elif (LG_SIZEOF_PTR == 2)
+ atomic_write_uint32((uint32_t *)p, (uint32_t)x);
+#endif
+}
+
/******************************************************************************/
/* unsigned operations. */
JEMALLOC_INLINE unsigned
@@ -297,6 +621,29 @@ atomic_sub_u(unsigned *p, unsigned x)
(uint32_t)-((int32_t)x)));
#endif
}
+
+JEMALLOC_INLINE bool
+atomic_cas_u(unsigned *p, unsigned c, unsigned s)
+{
+
+#if (LG_SIZEOF_INT == 3)
+ return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
+#elif (LG_SIZEOF_INT == 2)
+ return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
+#endif
+}
+
+JEMALLOC_INLINE void
+atomic_write_u(unsigned *p, unsigned x)
+{
+
+#if (LG_SIZEOF_INT == 3)
+ atomic_write_uint64((uint64_t *)p, (uint64_t)x);
+#elif (LG_SIZEOF_INT == 2)
+ atomic_write_uint32((uint32_t *)p, (uint32_t)x);
+#endif
+}
+
/******************************************************************************/
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/base.h b/deps/jemalloc/include/jemalloc/internal/base.h
index 9cf75ffb0..39e46ee44 100644
--- a/deps/jemalloc/include/jemalloc/internal/base.h
+++ b/deps/jemalloc/include/jemalloc/internal/base.h
@@ -10,9 +10,7 @@
#ifdef JEMALLOC_H_EXTERNS
void *base_alloc(size_t size);
-void *base_calloc(size_t number, size_t size);
-extent_node_t *base_node_alloc(void);
-void base_node_dealloc(extent_node_t *node);
+void base_stats_get(size_t *allocated, size_t *resident, size_t *mapped);
bool base_boot(void);
void base_prefork(void);
void base_postfork_parent(void);
diff --git a/deps/jemalloc/include/jemalloc/internal/bitmap.h b/deps/jemalloc/include/jemalloc/internal/bitmap.h
index 605ebac58..fcc6005c7 100644
--- a/deps/jemalloc/include/jemalloc/internal/bitmap.h
+++ b/deps/jemalloc/include/jemalloc/internal/bitmap.h
@@ -3,6 +3,7 @@
/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
#define LG_BITMAP_MAXBITS LG_RUN_MAXREGS
+#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS)
typedef struct bitmap_level_s bitmap_level_t;
typedef struct bitmap_info_s bitmap_info_t;
@@ -14,6 +15,51 @@ typedef unsigned long bitmap_t;
#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS)
#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1)
+/* Number of groups required to store a given number of bits. */
+#define BITMAP_BITS2GROUPS(nbits) \
+ ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
+
+/*
+ * Number of groups required at a particular level for a given number of bits.
+ */
+#define BITMAP_GROUPS_L0(nbits) \
+ BITMAP_BITS2GROUPS(nbits)
+#define BITMAP_GROUPS_L1(nbits) \
+ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
+#define BITMAP_GROUPS_L2(nbits) \
+ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
+#define BITMAP_GROUPS_L3(nbits) \
+ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \
+ BITMAP_BITS2GROUPS((nbits)))))
+
+/*
+ * Assuming the number of levels, number of groups required for a given number
+ * of bits.
+ */
+#define BITMAP_GROUPS_1_LEVEL(nbits) \
+ BITMAP_GROUPS_L0(nbits)
+#define BITMAP_GROUPS_2_LEVEL(nbits) \
+ (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
+#define BITMAP_GROUPS_3_LEVEL(nbits) \
+ (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
+#define BITMAP_GROUPS_4_LEVEL(nbits) \
+ (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
+
+/*
+ * Maximum number of groups required to support LG_BITMAP_MAXBITS.
+ */
+#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
+# define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
+# define BITMAP_GROUPS_MAX BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
+# define BITMAP_GROUPS_MAX BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
+# define BITMAP_GROUPS_MAX BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
+#else
+# error "Unsupported bitmap size"
+#endif
+
/* Maximum number of levels possible. */
#define BITMAP_MAX_LEVELS \
(LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \
@@ -93,7 +139,7 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
bitmap_t g;
assert(bit < binfo->nbits);
- assert(bitmap_get(bitmap, binfo, bit) == false);
+ assert(!bitmap_get(bitmap, binfo, bit));
goff = bit >> LG_BITMAP_GROUP_NBITS;
gp = &bitmap[goff];
g = *gp;
@@ -126,15 +172,15 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
bitmap_t g;
unsigned i;
- assert(bitmap_full(bitmap, binfo) == false);
+ assert(!bitmap_full(bitmap, binfo));
i = binfo->nlevels - 1;
g = bitmap[binfo->levels[i].group_offset];
- bit = ffsl(g) - 1;
+ bit = jemalloc_ffsl(g) - 1;
while (i > 0) {
i--;
g = bitmap[binfo->levels[i].group_offset + bit];
- bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1);
+ bit = (bit << LG_BITMAP_GROUP_NBITS) + (jemalloc_ffsl(g) - 1);
}
bitmap_set(bitmap, binfo, bit);
@@ -158,7 +204,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
- assert(bitmap_get(bitmap, binfo, bit) == false);
+ assert(!bitmap_get(bitmap, binfo, bit));
/* Propagate group state transitions up the tree. */
if (propagate) {
unsigned i;
@@ -172,7 +218,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
== 0);
g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
- if (propagate == false)
+ if (!propagate)
break;
}
}
diff --git a/deps/jemalloc/include/jemalloc/internal/chunk.h b/deps/jemalloc/include/jemalloc/internal/chunk.h
index 87d8700da..5d1938353 100644
--- a/deps/jemalloc/include/jemalloc/internal/chunk.h
+++ b/deps/jemalloc/include/jemalloc/internal/chunk.h
@@ -5,7 +5,7 @@
* Size and alignment of memory chunks that are allocated by the OS's virtual
* memory system.
*/
-#define LG_CHUNK_DEFAULT 22
+#define LG_CHUNK_DEFAULT 21
/* Return the chunk address for allocation address a. */
#define CHUNK_ADDR2BASE(a) \
@@ -19,6 +19,16 @@
#define CHUNK_CEILING(s) \
(((s) + chunksize_mask) & ~chunksize_mask)
+#define CHUNK_HOOKS_INITIALIZER { \
+ NULL, \
+ NULL, \
+ NULL, \
+ NULL, \
+ NULL, \
+ NULL, \
+ NULL \
+}
+
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
@@ -30,23 +40,36 @@
extern size_t opt_lg_chunk;
extern const char *opt_dss;
-/* Protects stats_chunks; currently not used for any other purpose. */
-extern malloc_mutex_t chunks_mtx;
-/* Chunk statistics. */
-extern chunk_stats_t stats_chunks;
-
-extern rtree_t *chunks_rtree;
+extern rtree_t chunks_rtree;
extern size_t chunksize;
extern size_t chunksize_mask; /* (chunksize - 1). */
extern size_t chunk_npages;
-extern size_t map_bias; /* Number of arena chunk header pages. */
-extern size_t arena_maxclass; /* Max size class for arenas. */
-void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero,
- dss_prec_t dss_prec);
-void chunk_unmap(void *chunk, size_t size);
-void chunk_dealloc(void *chunk, size_t size, bool unmap);
+extern const chunk_hooks_t chunk_hooks_default;
+
+chunk_hooks_t chunk_hooks_get(arena_t *arena);
+chunk_hooks_t chunk_hooks_set(arena_t *arena,
+ const chunk_hooks_t *chunk_hooks);
+
+bool chunk_register(const void *chunk, const extent_node_t *node);
+void chunk_deregister(const void *chunk, const extent_node_t *node);
+void *chunk_alloc_base(size_t size);
+void *chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ void *new_addr, size_t size, size_t alignment, bool *zero,
+ bool dalloc_node);
+void *chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit);
+void chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ void *chunk, size_t size, bool committed);
+void chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ void *chunk, size_t size, bool zeroed, bool committed);
+void chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ void *chunk, size_t size, bool committed);
+bool chunk_purge_arena(arena_t *arena, void *chunk, size_t offset,
+ size_t length);
+bool chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
+ void *chunk, size_t size, size_t offset, size_t length);
bool chunk_boot(void);
void chunk_prefork(void);
void chunk_postfork_parent(void);
@@ -56,6 +79,19 @@ void chunk_postfork_child(void);
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
+#ifndef JEMALLOC_ENABLE_INLINE
+extent_node_t *chunk_lookup(const void *chunk, bool dependent);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_CHUNK_C_))
+JEMALLOC_INLINE extent_node_t *
+chunk_lookup(const void *ptr, bool dependent)
+{
+
+ return (rtree_get(&chunks_rtree, (uintptr_t)ptr, dependent));
+}
+#endif
+
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h
index 4535ce09c..388f46be0 100644
--- a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h
+++ b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h
@@ -23,7 +23,8 @@ extern const char *dss_prec_names[];
dss_prec_t chunk_dss_prec_get(void);
bool chunk_dss_prec_set(dss_prec_t dss_prec);
-void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero);
+void *chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size,
+ size_t alignment, bool *zero, bool *commit);
bool chunk_in_dss(void *chunk);
bool chunk_dss_boot(void);
void chunk_dss_prefork(void);
diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h
index f24abac75..7d8014c58 100644
--- a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h
+++ b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h
@@ -9,10 +9,9 @@
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
-bool pages_purge(void *addr, size_t length);
-
-void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero);
-bool chunk_dealloc_mmap(void *chunk, size_t size);
+void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero,
+ bool *commit);
+bool chunk_dalloc_mmap(void *chunk, size_t size);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/ckh.h b/deps/jemalloc/include/jemalloc/internal/ckh.h
index 58712a6a7..75c1c979f 100644
--- a/deps/jemalloc/include/jemalloc/internal/ckh.h
+++ b/deps/jemalloc/include/jemalloc/internal/ckh.h
@@ -66,13 +66,13 @@ struct ckh_s {
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
-bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
ckh_keycomp_t *keycomp);
-void ckh_delete(ckh_t *ckh);
+void ckh_delete(tsd_t *tsd, ckh_t *ckh);
size_t ckh_count(ckh_t *ckh);
bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
-bool ckh_insert(ckh_t *ckh, const void *key, const void *data);
-bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key,
+bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
+bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
void **data);
bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data);
void ckh_string_hash(const void *key, size_t r_hash[2]);
diff --git a/deps/jemalloc/include/jemalloc/internal/ctl.h b/deps/jemalloc/include/jemalloc/internal/ctl.h
index 0ffecc5f2..751c14b5b 100644
--- a/deps/jemalloc/include/jemalloc/internal/ctl.h
+++ b/deps/jemalloc/include/jemalloc/internal/ctl.h
@@ -34,6 +34,7 @@ struct ctl_arena_stats_s {
bool initialized;
unsigned nthreads;
const char *dss;
+ ssize_t lg_dirty_mult;
size_t pactive;
size_t pdirty;
arena_stats_t astats;
@@ -46,22 +47,15 @@ struct ctl_arena_stats_s {
malloc_bin_stats_t bstats[NBINS];
malloc_large_stats_t *lstats; /* nlclasses elements. */
+ malloc_huge_stats_t *hstats; /* nhclasses elements. */
};
struct ctl_stats_s {
size_t allocated;
size_t active;
+ size_t metadata;
+ size_t resident;
size_t mapped;
- struct {
- size_t current; /* stats_chunks.curchunks */
- uint64_t total; /* stats_chunks.nchunks */
- size_t high; /* stats_chunks.highchunks */
- } chunks;
- struct {
- size_t allocated; /* huge_allocated */
- uint64_t nmalloc; /* huge_nmalloc */
- uint64_t ndalloc; /* huge_ndalloc */
- } huge;
unsigned narenas;
ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */
};
diff --git a/deps/jemalloc/include/jemalloc/internal/extent.h b/deps/jemalloc/include/jemalloc/internal/extent.h
index ba95ca816..386d50ef4 100644
--- a/deps/jemalloc/include/jemalloc/internal/extent.h
+++ b/deps/jemalloc/include/jemalloc/internal/extent.h
@@ -7,25 +7,53 @@ typedef struct extent_node_s extent_node_t;
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
-/* Tree of extents. */
+/* Tree of extents. Use accessor functions for en_* fields. */
struct extent_node_s {
- /* Linkage for the size/address-ordered tree. */
- rb_node(extent_node_t) link_szad;
+ /* Arena from which this extent came, if any. */
+ arena_t *en_arena;
- /* Linkage for the address-ordered tree. */
- rb_node(extent_node_t) link_ad;
+ /* Pointer to the extent that this tree node is responsible for. */
+ void *en_addr;
+
+ /* Total region size. */
+ size_t en_size;
+
+ /*
+ * The zeroed flag is used by chunk recycling code to track whether
+ * memory is zero-filled.
+ */
+ bool en_zeroed;
+
+ /*
+ * True if physical memory is committed to the extent, whether
+ * explicitly or implicitly as on a system that overcommits and
+ * satisfies physical memory needs on demand via soft page faults.
+ */
+ bool en_committed;
+
+ /*
+ * The achunk flag is used to validate that huge allocation lookups
+ * don't return arena chunks.
+ */
+ bool en_achunk;
/* Profile counters, used for huge objects. */
- prof_ctx_t *prof_ctx;
+ prof_tctx_t *en_prof_tctx;
- /* Pointer to the extent that this tree node is responsible for. */
- void *addr;
+ /* Linkage for arena's runs_dirty and chunks_cache rings. */
+ arena_runs_dirty_link_t rd;
+ qr(extent_node_t) cc_link;
- /* Total region size. */
- size_t size;
+ union {
+ /* Linkage for the size/address-ordered tree. */
+ rb_node(extent_node_t) szad_link;
+
+ /* Linkage for arena's huge and node_cache lists. */
+ ql_elm(extent_node_t) ql_link;
+ };
- /* True if zero-filled; used by chunk recycling code. */
- bool zeroed;
+ /* Linkage for the address-ordered tree. */
+ rb_node(extent_node_t) ad_link;
};
typedef rb_tree(extent_node_t) extent_tree_t;
@@ -41,6 +69,171 @@ rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t)
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
+#ifndef JEMALLOC_ENABLE_INLINE
+arena_t *extent_node_arena_get(const extent_node_t *node);
+void *extent_node_addr_get(const extent_node_t *node);
+size_t extent_node_size_get(const extent_node_t *node);
+bool extent_node_zeroed_get(const extent_node_t *node);
+bool extent_node_committed_get(const extent_node_t *node);
+bool extent_node_achunk_get(const extent_node_t *node);
+prof_tctx_t *extent_node_prof_tctx_get(const extent_node_t *node);
+void extent_node_arena_set(extent_node_t *node, arena_t *arena);
+void extent_node_addr_set(extent_node_t *node, void *addr);
+void extent_node_size_set(extent_node_t *node, size_t size);
+void extent_node_zeroed_set(extent_node_t *node, bool zeroed);
+void extent_node_committed_set(extent_node_t *node, bool committed);
+void extent_node_achunk_set(extent_node_t *node, bool achunk);
+void extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx);
+void extent_node_init(extent_node_t *node, arena_t *arena, void *addr,
+ size_t size, bool zeroed, bool committed);
+void extent_node_dirty_linkage_init(extent_node_t *node);
+void extent_node_dirty_insert(extent_node_t *node,
+ arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty);
+void extent_node_dirty_remove(extent_node_t *node);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_))
+JEMALLOC_INLINE arena_t *
+extent_node_arena_get(const extent_node_t *node)
+{
+
+ return (node->en_arena);
+}
+
+JEMALLOC_INLINE void *
+extent_node_addr_get(const extent_node_t *node)
+{
+
+ return (node->en_addr);
+}
+
+JEMALLOC_INLINE size_t
+extent_node_size_get(const extent_node_t *node)
+{
+
+ return (node->en_size);
+}
+
+JEMALLOC_INLINE bool
+extent_node_zeroed_get(const extent_node_t *node)
+{
+
+ return (node->en_zeroed);
+}
+
+JEMALLOC_INLINE bool
+extent_node_committed_get(const extent_node_t *node)
+{
+
+ assert(!node->en_achunk);
+ return (node->en_committed);
+}
+
+JEMALLOC_INLINE bool
+extent_node_achunk_get(const extent_node_t *node)
+{
+
+ return (node->en_achunk);
+}
+
+JEMALLOC_INLINE prof_tctx_t *
+extent_node_prof_tctx_get(const extent_node_t *node)
+{
+
+ return (node->en_prof_tctx);
+}
+
+JEMALLOC_INLINE void
+extent_node_arena_set(extent_node_t *node, arena_t *arena)
+{
+
+ node->en_arena = arena;
+}
+
+JEMALLOC_INLINE void
+extent_node_addr_set(extent_node_t *node, void *addr)
+{
+
+ node->en_addr = addr;
+}
+
+JEMALLOC_INLINE void
+extent_node_size_set(extent_node_t *node, size_t size)
+{
+
+ node->en_size = size;
+}
+
+JEMALLOC_INLINE void
+extent_node_zeroed_set(extent_node_t *node, bool zeroed)
+{
+
+ node->en_zeroed = zeroed;
+}
+
+JEMALLOC_INLINE void
+extent_node_committed_set(extent_node_t *node, bool committed)
+{
+
+ node->en_committed = committed;
+}
+
+JEMALLOC_INLINE void
+extent_node_achunk_set(extent_node_t *node, bool achunk)
+{
+
+ node->en_achunk = achunk;
+}
+
+JEMALLOC_INLINE void
+extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx)
+{
+
+ node->en_prof_tctx = tctx;
+}
+
+JEMALLOC_INLINE void
+extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size,
+ bool zeroed, bool committed)
+{
+
+ extent_node_arena_set(node, arena);
+ extent_node_addr_set(node, addr);
+ extent_node_size_set(node, size);
+ extent_node_zeroed_set(node, zeroed);
+ extent_node_committed_set(node, committed);
+ extent_node_achunk_set(node, false);
+ if (config_prof)
+ extent_node_prof_tctx_set(node, NULL);
+}
+
+JEMALLOC_INLINE void
+extent_node_dirty_linkage_init(extent_node_t *node)
+{
+
+ qr_new(&node->rd, rd_link);
+ qr_new(node, cc_link);
+}
+
+JEMALLOC_INLINE void
+extent_node_dirty_insert(extent_node_t *node,
+ arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty)
+{
+
+ qr_meld(runs_dirty, &node->rd, rd_link);
+ qr_meld(chunks_dirty, node, cc_link);
+}
+
+JEMALLOC_INLINE void
+extent_node_dirty_remove(extent_node_t *node)
+{
+
+ qr_remove(&node->rd, rd_link);
+ qr_remove(node, cc_link);
+}
+
+#endif
+
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/hash.h b/deps/jemalloc/include/jemalloc/internal/hash.h
index c7183ede8..bcead337a 100644
--- a/deps/jemalloc/include/jemalloc/internal/hash.h
+++ b/deps/jemalloc/include/jemalloc/internal/hash.h
@@ -35,13 +35,14 @@ JEMALLOC_INLINE uint32_t
hash_rotl_32(uint32_t x, int8_t r)
{
- return (x << r) | (x >> (32 - r));
+ return ((x << r) | (x >> (32 - r)));
}
JEMALLOC_INLINE uint64_t
hash_rotl_64(uint64_t x, int8_t r)
{
- return (x << r) | (x >> (64 - r));
+
+ return ((x << r) | (x >> (64 - r)));
}
JEMALLOC_INLINE uint32_t
@@ -76,9 +77,9 @@ hash_fmix_64(uint64_t k)
{
k ^= k >> 33;
- k *= QU(0xff51afd7ed558ccdLLU);
+ k *= KQU(0xff51afd7ed558ccd);
k ^= k >> 33;
- k *= QU(0xc4ceb9fe1a85ec53LLU);
+ k *= KQU(0xc4ceb9fe1a85ec53);
k ^= k >> 33;
return (k);
@@ -247,8 +248,8 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
uint64_t h1 = seed;
uint64_t h2 = seed;
- const uint64_t c1 = QU(0x87c37b91114253d5LLU);
- const uint64_t c2 = QU(0x4cf5ad432745937fLLU);
+ const uint64_t c1 = KQU(0x87c37b91114253d5);
+ const uint64_t c2 = KQU(0x4cf5ad432745937f);
/* body */
{
diff --git a/deps/jemalloc/include/jemalloc/internal/huge.h b/deps/jemalloc/include/jemalloc/internal/huge.h
index a2b9c7791..ece7af980 100644
--- a/deps/jemalloc/include/jemalloc/internal/huge.h
+++ b/deps/jemalloc/include/jemalloc/internal/huge.h
@@ -9,34 +9,24 @@
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
-/* Huge allocation statistics. */
-extern uint64_t huge_nmalloc;
-extern uint64_t huge_ndalloc;
-extern size_t huge_allocated;
-
-/* Protects chunk-related data structures. */
-extern malloc_mutex_t huge_mtx;
-
-void *huge_malloc(size_t size, bool zero, dss_prec_t dss_prec);
-void *huge_palloc(size_t size, size_t alignment, bool zero,
- dss_prec_t dss_prec);
-bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
- size_t extra);
-void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
- size_t alignment, bool zero, bool try_tcache_dalloc, dss_prec_t dss_prec);
+void *huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
+ tcache_t *tcache);
+void *huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
+ bool zero, tcache_t *tcache);
+bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min,
+ size_t usize_max, bool zero);
+void *huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
+ size_t usize, size_t alignment, bool zero, tcache_t *tcache);
#ifdef JEMALLOC_JET
typedef void (huge_dalloc_junk_t)(void *, size_t);
extern huge_dalloc_junk_t *huge_dalloc_junk;
#endif
-void huge_dalloc(void *ptr, bool unmap);
+void huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+arena_t *huge_aalloc(const void *ptr);
size_t huge_salloc(const void *ptr);
-dss_prec_t huge_dss_prec_get(arena_t *arena);
-prof_ctx_t *huge_prof_ctx_get(const void *ptr);
-void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
-bool huge_boot(void);
-void huge_prefork(void);
-void huge_postfork_parent(void);
-void huge_postfork_child(void);
+prof_tctx_t *huge_prof_tctx_get(const void *ptr);
+void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
+void huge_prof_tctx_reset(const void *ptr);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
index df266abb7..8536a3eda 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1,70 +1,13 @@
#ifndef JEMALLOC_INTERNAL_H
#define JEMALLOC_INTERNAL_H
-#include <math.h>
-#ifdef _WIN32
-# include <windows.h>
-# define ENOENT ERROR_PATH_NOT_FOUND
-# define EINVAL ERROR_BAD_ARGUMENTS
-# define EAGAIN ERROR_OUTOFMEMORY
-# define EPERM ERROR_WRITE_FAULT
-# define EFAULT ERROR_INVALID_ADDRESS
-# define ENOMEM ERROR_NOT_ENOUGH_MEMORY
-# undef ERANGE
-# define ERANGE ERROR_INVALID_DATA
-#else
-# include <sys/param.h>
-# include <sys/mman.h>
-# include <sys/syscall.h>
-# if !defined(SYS_write) && defined(__NR_write)
-# define SYS_write __NR_write
-# endif
-# include <sys/uio.h>
-# include <pthread.h>
-# include <errno.h>
-#endif
-#include <sys/types.h>
-
-#include <limits.h>
-#ifndef SIZE_T_MAX
-# define SIZE_T_MAX SIZE_MAX
-#endif
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stddef.h>
-#ifndef offsetof
-# define offsetof(type, member) ((size_t)&(((type *)NULL)->member))
-#endif
-#include <inttypes.h>
-#include <string.h>
-#include <strings.h>
-#include <ctype.h>
-#ifdef _MSC_VER
-# include <io.h>
-typedef intptr_t ssize_t;
-# define PATH_MAX 1024
-# define STDERR_FILENO 2
-# define __func__ __FUNCTION__
-/* Disable warnings about deprecated system functions */
-# pragma warning(disable: 4996)
-#else
-# include <unistd.h>
-#endif
-#include <fcntl.h>
#include "jemalloc_internal_defs.h"
+#include "jemalloc/internal/jemalloc_internal_decls.h"
#ifdef JEMALLOC_UTRACE
#include <sys/ktrace.h>
#endif
-#ifdef JEMALLOC_VALGRIND
-#include <valgrind/valgrind.h>
-#include <valgrind/memcheck.h>
-#endif
-
#define JEMALLOC_NO_DEMANGLE
#ifdef JEMALLOC_JET
# define JEMALLOC_N(n) jet_##n
@@ -85,7 +28,7 @@ static const bool config_debug =
false
#endif
;
-static const bool config_dss =
+static const bool have_dss =
#ifdef JEMALLOC_DSS
true
#else
@@ -127,8 +70,8 @@ static const bool config_prof_libunwind =
false
#endif
;
-static const bool config_mremap =
-#ifdef JEMALLOC_MREMAP
+static const bool maps_coalesce =
+#ifdef JEMALLOC_MAPS_COALESCE
true
#else
false
@@ -190,6 +133,17 @@ static const bool config_ivsalloc =
false
#endif
;
+static const bool config_cache_oblivious =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+ true
+#else
+ false
+#endif
+ ;
+
+#ifdef JEMALLOC_C11ATOMICS
+#include <stdatomic.h>
+#endif
#ifdef JEMALLOC_ATOMIC9
#include <machine/atomic.h>
@@ -229,20 +183,48 @@ static const bool config_ivsalloc =
#include "jemalloc/internal/jemalloc_internal_macros.h"
+/* Size class index type. */
+typedef unsigned szind_t;
+
+/*
+ * Flags bits:
+ *
+ * a: arena
+ * t: tcache
+ * 0: unused
+ * z: zero
+ * n: alignment
+ *
+ * aaaaaaaa aaaatttt tttttttt 0znnnnnn
+ */
+#define MALLOCX_ARENA_MASK ((int)~0xfffff)
+#define MALLOCX_ARENA_MAX 0xffe
+#define MALLOCX_TCACHE_MASK ((int)~0xfff000ffU)
+#define MALLOCX_TCACHE_MAX 0xffd
#define MALLOCX_LG_ALIGN_MASK ((int)0x3f)
-#define ALLOCM_LG_ALIGN_MASK ((int)0x3f)
+/* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
+#define MALLOCX_ALIGN_GET_SPECIFIED(flags) \
+ (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
+#define MALLOCX_ALIGN_GET(flags) \
+ (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1))
+#define MALLOCX_ZERO_GET(flags) \
+ ((bool)(flags & MALLOCX_ZERO))
+
+#define MALLOCX_TCACHE_GET(flags) \
+ (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> 8)) - 2)
+#define MALLOCX_ARENA_GET(flags) \
+ (((unsigned)(((unsigned)flags) >> 20)) - 1)
/* Smallest size class to support. */
-#define LG_TINY_MIN 3
#define TINY_MIN (1U << LG_TINY_MIN)
/*
- * Minimum alignment of allocations is 2^LG_QUANTUM bytes (ignoring tiny size
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
* classes).
*/
#ifndef LG_QUANTUM
# if (defined(__i386__) || defined(_M_IX86))
-# define LG_QUANTUM 3
+# define LG_QUANTUM 4
# endif
# ifdef __ia64__
# define LG_QUANTUM 4
@@ -250,11 +232,11 @@ static const bool config_ivsalloc =
# ifdef __alpha__
# define LG_QUANTUM 4
# endif
-# ifdef __sparc64__
+# if (defined(__sparc64__) || defined(__sparcv9))
# define LG_QUANTUM 4
# endif
# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
-# define LG_QUANTUM 3
+# define LG_QUANTUM 4
# endif
# ifdef __arm__
# define LG_QUANTUM 3
@@ -268,6 +250,9 @@ static const bool config_ivsalloc =
# ifdef __mips__
# define LG_QUANTUM 3
# endif
+# ifdef __or1k__
+# define LG_QUANTUM 3
+# endif
# ifdef __powerpc__
# define LG_QUANTUM 4
# endif
@@ -280,8 +265,12 @@ static const bool config_ivsalloc =
# ifdef __tile__
# define LG_QUANTUM 4
# endif
+# ifdef __le32__
+# define LG_QUANTUM 4
+# endif
# ifndef LG_QUANTUM
-# error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS"
+# error "Unknown minimum alignment for architecture; specify via "
+ "--with-lg-quantum"
# endif
#endif
@@ -321,12 +310,11 @@ static const bool config_ivsalloc =
#define CACHELINE_CEILING(s) \
(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
-/* Page size. STATIC_PAGE_SHIFT is determined by the configure script. */
+/* Page size. LG_PAGE is determined by the configure script. */
#ifdef PAGE_MASK
# undef PAGE_MASK
#endif
-#define LG_PAGE STATIC_PAGE_SHIFT
-#define PAGE ((size_t)(1U << STATIC_PAGE_SHIFT))
+#define PAGE ((size_t)(1U << LG_PAGE))
#define PAGE_MASK ((size_t)(PAGE - 1))
/* Return the smallest pagesize multiple that is >= s. */
@@ -345,7 +333,7 @@ static const bool config_ivsalloc =
#define ALIGNMENT_CEILING(s, alignment) \
(((s) + (alignment - 1)) & (-(alignment)))
-/* Declare a variable length array */
+/* Declare a variable-length array. */
#if __STDC_VERSION__ < 199901L
# ifdef _MSC_VER
# include <malloc.h>
@@ -358,86 +346,12 @@ static const bool config_ivsalloc =
# endif
# endif
# define VARIABLE_ARRAY(type, name, count) \
- type *name = alloca(sizeof(type) * count)
-#else
-# define VARIABLE_ARRAY(type, name, count) type name[count]
-#endif
-
-#ifdef JEMALLOC_VALGRIND
-/*
- * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions
- * so that when Valgrind reports errors, there are no extra stack frames
- * in the backtraces.
- *
- * The size that is reported to valgrind must be consistent through a chain of
- * malloc..realloc..realloc calls. Request size isn't recorded anywhere in
- * jemalloc, so it is critical that all callers of these macros provide usize
- * rather than request size. As a result, buffer overflow detection is
- * technically weakened for the standard API, though it is generally accepted
- * practice to consider any extra bytes reported by malloc_usable_size() as
- * usable space.
- */
-#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \
- if (config_valgrind && opt_valgrind && cond) \
- VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \
-} while (0)
-#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \
- old_rzsize, zero) do { \
- if (config_valgrind && opt_valgrind) { \
- size_t rzsize = p2rz(ptr); \
- \
- if (ptr == old_ptr) { \
- VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \
- usize, rzsize); \
- if (zero && old_usize < usize) { \
- VALGRIND_MAKE_MEM_DEFINED( \
- (void *)((uintptr_t)ptr + \
- old_usize), usize - old_usize); \
- } \
- } else { \
- if (old_ptr != NULL) { \
- VALGRIND_FREELIKE_BLOCK(old_ptr, \
- old_rzsize); \
- } \
- if (ptr != NULL) { \
- size_t copy_size = (old_usize < usize) \
- ? old_usize : usize; \
- size_t tail_size = usize - copy_size; \
- VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \
- rzsize, false); \
- if (copy_size > 0) { \
- VALGRIND_MAKE_MEM_DEFINED(ptr, \
- copy_size); \
- } \
- if (zero && tail_size > 0) { \
- VALGRIND_MAKE_MEM_DEFINED( \
- (void *)((uintptr_t)ptr + \
- copy_size), tail_size); \
- } \
- } \
- } \
- } \
-} while (0)
-#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \
- if (config_valgrind && opt_valgrind) \
- VALGRIND_FREELIKE_BLOCK(ptr, rzsize); \
-} while (0)
+ type *name = alloca(sizeof(type) * (count))
#else
-#define RUNNING_ON_VALGRIND ((unsigned)0)
-#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \
- do {} while (0)
-#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) \
- do {} while (0)
-#define VALGRIND_FREELIKE_BLOCK(addr, rzB) do {} while (0)
-#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr, _qzz_len) do {} while (0)
-#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) do {} while (0)
-#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) do {} while (0)
-#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0)
-#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \
- old_rzsize, zero) do {} while (0)
-#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0)
+# define VARIABLE_ARRAY(type, name, count) type name[(count)]
#endif
+#include "jemalloc/internal/valgrind.h"
#include "jemalloc/internal/util.h"
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/prng.h"
@@ -452,9 +366,10 @@ static const bool config_ivsalloc =
#include "jemalloc/internal/arena.h"
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/pages.h"
#include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h"
-#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/hash.h"
#include "jemalloc/internal/quarantine.h"
@@ -464,6 +379,7 @@ static const bool config_ivsalloc =
/******************************************************************************/
#define JEMALLOC_H_STRUCTS
+#include "jemalloc/internal/valgrind.h"
#include "jemalloc/internal/util.h"
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/prng.h"
@@ -472,68 +388,83 @@ static const bool config_ivsalloc =
#include "jemalloc/internal/stats.h"
#include "jemalloc/internal/ctl.h"
#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/tsd.h"
#include "jemalloc/internal/mb.h"
#include "jemalloc/internal/bitmap.h"
+#define JEMALLOC_ARENA_STRUCTS_A
+#include "jemalloc/internal/arena.h"
+#undef JEMALLOC_ARENA_STRUCTS_A
#include "jemalloc/internal/extent.h"
+#define JEMALLOC_ARENA_STRUCTS_B
#include "jemalloc/internal/arena.h"
+#undef JEMALLOC_ARENA_STRUCTS_B
#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/pages.h"
#include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h"
-#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/hash.h"
#include "jemalloc/internal/quarantine.h"
#include "jemalloc/internal/prof.h"
-typedef struct {
- uint64_t allocated;
- uint64_t deallocated;
-} thread_allocated_t;
-/*
- * The JEMALLOC_ARG_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro
- * argument.
- */
-#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_ARG_CONCAT({0, 0})
+#include "jemalloc/internal/tsd.h"
#undef JEMALLOC_H_STRUCTS
/******************************************************************************/
#define JEMALLOC_H_EXTERNS
extern bool opt_abort;
-extern bool opt_junk;
+extern const char *opt_junk;
+extern bool opt_junk_alloc;
+extern bool opt_junk_free;
extern size_t opt_quarantine;
extern bool opt_redzone;
extern bool opt_utrace;
-extern bool opt_valgrind;
extern bool opt_xmalloc;
extern bool opt_zero;
extern size_t opt_narenas;
+extern bool in_valgrind;
+
/* Number of CPUs. */
extern unsigned ncpus;
-/* Protects arenas initialization (arenas, arenas_total). */
-extern malloc_mutex_t arenas_lock;
/*
- * Arenas that are used to service external requests. Not all elements of the
- * arenas array are necessarily used; arenas are created lazily as needed.
- *
- * arenas[0..narenas_auto) are used for automatic multiplexing of threads and
- * arenas. arenas[narenas_auto..narenas_total) are only used if the application
- * takes some action to create them and allocate from them.
+ * index2size_tab encodes the same information as could be computed (at
+ * unacceptable cost in some code paths) by index2size_compute().
*/
-extern arena_t **arenas;
-extern unsigned narenas_total;
-extern unsigned narenas_auto; /* Read-only after initialization. */
-
+extern size_t const index2size_tab[NSIZES];
+/*
+ * size2index_tab is a compact lookup table that rounds request sizes up to
+ * size classes. In order to reduce cache footprint, the table is compressed,
+ * and all accesses are via size2index().
+ */
+extern uint8_t const size2index_tab[];
+
+arena_t *a0get(void);
+void *a0malloc(size_t size);
+void a0dalloc(void *ptr);
+void *bootstrap_malloc(size_t size);
+void *bootstrap_calloc(size_t num, size_t size);
+void bootstrap_free(void *ptr);
arena_t *arenas_extend(unsigned ind);
-void arenas_cleanup(void *arg);
-arena_t *choose_arena_hard(void);
+arena_t *arena_init(unsigned ind);
+unsigned narenas_total_get(void);
+arena_t *arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing);
+arena_t *arena_choose_hard(tsd_t *tsd);
+void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
+unsigned arena_nbound(unsigned ind);
+void thread_allocated_cleanup(tsd_t *tsd);
+void thread_deallocated_cleanup(tsd_t *tsd);
+void arena_cleanup(tsd_t *tsd);
+void arenas_cache_cleanup(tsd_t *tsd);
+void narenas_cache_cleanup(tsd_t *tsd);
+void arenas_cache_bypass_cleanup(tsd_t *tsd);
void jemalloc_prefork(void);
void jemalloc_postfork_parent(void);
void jemalloc_postfork_child(void);
+#include "jemalloc/internal/valgrind.h"
#include "jemalloc/internal/util.h"
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/prng.h"
@@ -542,24 +473,26 @@ void jemalloc_postfork_child(void);
#include "jemalloc/internal/stats.h"
#include "jemalloc/internal/ctl.h"
#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/tsd.h"
#include "jemalloc/internal/mb.h"
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/extent.h"
#include "jemalloc/internal/arena.h"
#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/pages.h"
#include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h"
-#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/hash.h"
#include "jemalloc/internal/quarantine.h"
#include "jemalloc/internal/prof.h"
+#include "jemalloc/internal/tsd.h"
#undef JEMALLOC_H_EXTERNS
/******************************************************************************/
#define JEMALLOC_H_INLINES
+#include "jemalloc/internal/valgrind.h"
#include "jemalloc/internal/util.h"
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/prng.h"
@@ -572,26 +505,158 @@ void jemalloc_postfork_child(void);
#include "jemalloc/internal/mb.h"
#include "jemalloc/internal/extent.h"
#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/pages.h"
#include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h"
#ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *)
-
+szind_t size2index_compute(size_t size);
+szind_t size2index_lookup(size_t size);
+szind_t size2index(size_t size);
+size_t index2size_compute(szind_t index);
+size_t index2size_lookup(szind_t index);
+size_t index2size(szind_t index);
+size_t s2u_compute(size_t size);
+size_t s2u_lookup(size_t size);
size_t s2u(size_t size);
size_t sa2u(size_t size, size_t alignment);
-unsigned narenas_total_get(void);
-arena_t *choose_arena(arena_t *arena);
+arena_t *arena_choose(tsd_t *tsd, arena_t *arena);
+arena_t *arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
+ bool refresh_if_missing);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
-/*
- * Map of pthread_self() --> arenas[???], used for selecting an arena to use
- * for allocations.
- */
-malloc_tsd_externs(arenas, arena_t *)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, arenas, arena_t *, NULL,
- arenas_cleanup)
+JEMALLOC_INLINE szind_t
+size2index_compute(size_t size)
+{
+
+#if (NTBINS != 0)
+ if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+ size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+ size_t lg_ceil = lg_floor(pow2_ceil(size));
+ return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
+ }
+#endif
+ {
+ size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
+ (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
+ : lg_floor((size<<1)-1);
+ size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
+ x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
+ size_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+ size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+ ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+
+ size_t delta_inverse_mask = ZI(-1) << lg_delta;
+ size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
+ ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+ size_t index = NTBINS + grp + mod;
+ return (index);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+size2index_lookup(size_t size)
+{
+
+ assert(size <= LOOKUP_MAXCLASS);
+ {
+ size_t ret = ((size_t)(size2index_tab[(size-1) >>
+ LG_TINY_MIN]));
+ assert(ret == size2index_compute(size));
+ return (ret);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+size2index(size_t size)
+{
+
+ assert(size > 0);
+ if (likely(size <= LOOKUP_MAXCLASS))
+ return (size2index_lookup(size));
+ return (size2index_compute(size));
+}
+
+JEMALLOC_INLINE size_t
+index2size_compute(szind_t index)
+{
+
+#if (NTBINS > 0)
+ if (index < NTBINS)
+ return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index));
+#endif
+ {
+ size_t reduced_index = index - NTBINS;
+ size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP;
+ size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) -
+ 1);
+
+ size_t grp_size_mask = ~((!!grp)-1);
+ size_t grp_size = ((ZU(1) << (LG_QUANTUM +
+ (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+ size_t shift = (grp == 0) ? 1 : grp;
+ size_t lg_delta = shift + (LG_QUANTUM-1);
+ size_t mod_size = (mod+1) << lg_delta;
+
+ size_t usize = grp_size + mod_size;
+ return (usize);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+index2size_lookup(szind_t index)
+{
+ size_t ret = (size_t)index2size_tab[index];
+ assert(ret == index2size_compute(index));
+ return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+index2size(szind_t index)
+{
+
+ assert(index < NSIZES);
+ return (index2size_lookup(index));
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+s2u_compute(size_t size)
+{
+
+#if (NTBINS > 0)
+ if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+ size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+ size_t lg_ceil = lg_floor(pow2_ceil(size));
+ return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
+ (ZU(1) << lg_ceil));
+ }
+#endif
+ {
+ size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
+ (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
+ : lg_floor((size<<1)-1);
+ size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+ ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+ size_t delta = ZU(1) << lg_delta;
+ size_t delta_mask = delta - 1;
+ size_t usize = (size + delta_mask) & ~delta_mask;
+ return (usize);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+s2u_lookup(size_t size)
+{
+ size_t ret = index2size_lookup(size2index_lookup(size));
+
+ assert(ret == s2u_compute(size));
+ return (ret);
+}
/*
* Compute usable size that would result from allocating an object with the
@@ -601,11 +666,10 @@ JEMALLOC_ALWAYS_INLINE size_t
s2u(size_t size)
{
- if (size <= SMALL_MAXCLASS)
- return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size);
- if (size <= arena_maxclass)
- return (PAGE_CEILING(size));
- return (CHUNK_CEILING(size));
+ assert(size > 0);
+ if (likely(size <= LOOKUP_MAXCLASS))
+ return (s2u_lookup(size));
+ return (s2u_compute(size));
}
/*
@@ -619,108 +683,128 @@ sa2u(size_t size, size_t alignment)
assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
- /*
- * Round size up to the nearest multiple of alignment.
- *
- * This done, we can take advantage of the fact that for each small
- * size class, every object is aligned at the smallest power of two
- * that is non-zero in the base two representation of the size. For
- * example:
- *
- * Size | Base 2 | Minimum alignment
- * -----+----------+------------------
- * 96 | 1100000 | 32
- * 144 | 10100000 | 32
- * 192 | 11000000 | 64
- */
- usize = ALIGNMENT_CEILING(size, alignment);
- /*
- * (usize < size) protects against the combination of maximal
- * alignment and size greater than maximal alignment.
- */
- if (usize < size) {
- /* size_t overflow. */
- return (0);
+ /* Try for a small size class. */
+ if (size <= SMALL_MAXCLASS && alignment < PAGE) {
+ /*
+ * Round size up to the nearest multiple of alignment.
+ *
+ * This done, we can take advantage of the fact that for each
+ * small size class, every object is aligned at the smallest
+ * power of two that is non-zero in the base two representation
+ * of the size. For example:
+ *
+ * Size | Base 2 | Minimum alignment
+ * -----+----------+------------------
+ * 96 | 1100000 | 32
+ * 144 | 10100000 | 32
+ * 192 | 11000000 | 64
+ */
+ usize = s2u(ALIGNMENT_CEILING(size, alignment));
+ if (usize < LARGE_MINCLASS)
+ return (usize);
}
- if (usize <= arena_maxclass && alignment <= PAGE) {
- if (usize <= SMALL_MAXCLASS)
- return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size);
- return (PAGE_CEILING(usize));
- } else {
- size_t run_size;
-
+ /* Try for a large size class. */
+ if (likely(size <= large_maxclass) && likely(alignment < chunksize)) {
/*
* We can't achieve subpage alignment, so round up alignment
- * permanently; it makes later calculations simpler.
+ * to the minimum that can actually be supported.
*/
alignment = PAGE_CEILING(alignment);
- usize = PAGE_CEILING(size);
- /*
- * (usize < size) protects against very large sizes within
- * PAGE of SIZE_T_MAX.
- *
- * (usize + alignment < usize) protects against the
- * combination of maximal alignment and usize large enough
- * to cause overflow. This is similar to the first overflow
- * check above, but it needs to be repeated due to the new
- * usize value, which may now be *equal* to maximal
- * alignment, whereas before we only detected overflow if the
- * original size was *greater* than maximal alignment.
- */
- if (usize < size || usize + alignment < usize) {
- /* size_t overflow. */
- return (0);
- }
+
+ /* Make sure result is a large size class. */
+ usize = (size <= LARGE_MINCLASS) ? LARGE_MINCLASS : s2u(size);
/*
* Calculate the size of the over-size run that arena_palloc()
* would need to allocate in order to guarantee the alignment.
- * If the run wouldn't fit within a chunk, round up to a huge
- * allocation size.
*/
- run_size = usize + alignment - PAGE;
- if (run_size <= arena_maxclass)
- return (PAGE_CEILING(usize));
- return (CHUNK_CEILING(usize));
+ if (usize + large_pad + alignment - PAGE <= arena_maxrun)
+ return (usize);
}
-}
-JEMALLOC_INLINE unsigned
-narenas_total_get(void)
-{
- unsigned narenas;
+ /* Huge size class. Beware of size_t overflow. */
- malloc_mutex_lock(&arenas_lock);
- narenas = narenas_total;
- malloc_mutex_unlock(&arenas_lock);
+ /*
+ * We can't achieve subchunk alignment, so round up alignment to the
+ * minimum that can actually be supported.
+ */
+ alignment = CHUNK_CEILING(alignment);
+ if (alignment == 0) {
+ /* size_t overflow. */
+ return (0);
+ }
+
+ /* Make sure result is a huge size class. */
+ if (size <= chunksize)
+ usize = chunksize;
+ else {
+ usize = s2u(size);
+ if (usize < size) {
+ /* size_t overflow. */
+ return (0);
+ }
+ }
- return (narenas);
+ /*
+ * Calculate the multi-chunk mapping that huge_palloc() would need in
+ * order to guarantee the alignment.
+ */
+ if (usize + alignment - PAGE < usize) {
+ /* size_t overflow. */
+ return (0);
+ }
+ return (usize);
}
/* Choose an arena based on a per-thread value. */
JEMALLOC_INLINE arena_t *
-choose_arena(arena_t *arena)
+arena_choose(tsd_t *tsd, arena_t *arena)
{
arena_t *ret;
if (arena != NULL)
return (arena);
- if ((ret = *arenas_tsd_get()) == NULL) {
- ret = choose_arena_hard();
- assert(ret != NULL);
- }
+ if (unlikely((ret = tsd_arena_get(tsd)) == NULL))
+ ret = arena_choose_hard(tsd);
return (ret);
}
+
+JEMALLOC_INLINE arena_t *
+arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
+ bool refresh_if_missing)
+{
+ arena_t *arena;
+ arena_t **arenas_cache = tsd_arenas_cache_get(tsd);
+
+ /* init_if_missing requires refresh_if_missing. */
+ assert(!init_if_missing || refresh_if_missing);
+
+ if (unlikely(arenas_cache == NULL)) {
+ /* arenas_cache hasn't been initialized yet. */
+ return (arena_get_hard(tsd, ind, init_if_missing));
+ }
+ if (unlikely(ind >= tsd_narenas_cache_get(tsd))) {
+ /*
+ * ind is invalid, cache is old (too small), or arena to be
+ * initialized.
+ */
+ return (refresh_if_missing ? arena_get_hard(tsd, ind,
+ init_if_missing) : NULL);
+ }
+ arena = arenas_cache[ind];
+ if (likely(arena != NULL) || !refresh_if_missing)
+ return (arena);
+ return (arena_get_hard(tsd, ind, init_if_missing));
+}
#endif
#include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/rtree.h"
/*
- * Include arena.h twice in order to resolve circular dependencies with
- * tcache.h.
+ * Include portions of arena.h interleaved with tcache.h in order to resolve
+ * circular dependencies.
*/
#define JEMALLOC_ARENA_INLINE_A
#include "jemalloc/internal/arena.h"
@@ -733,133 +817,155 @@ choose_arena(arena_t *arena)
#include "jemalloc/internal/quarantine.h"
#ifndef JEMALLOC_ENABLE_INLINE
-void *imalloct(size_t size, bool try_tcache, arena_t *arena);
-void *imalloc(size_t size);
-void *icalloct(size_t size, bool try_tcache, arena_t *arena);
-void *icalloc(size_t size);
-void *ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache,
- arena_t *arena);
-void *ipalloc(size_t usize, size_t alignment, bool zero);
+arena_t *iaalloc(const void *ptr);
size_t isalloc(const void *ptr, bool demote);
+void *iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache,
+ bool is_metadata, arena_t *arena);
+void *imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena);
+void *imalloc(tsd_t *tsd, size_t size);
+void *icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena);
+void *icalloc(tsd_t *tsd, size_t size);
+void *ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ tcache_t *tcache, bool is_metadata, arena_t *arena);
+void *ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ tcache_t *tcache, arena_t *arena);
+void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
size_t ivsalloc(const void *ptr, bool demote);
size_t u2rz(size_t usize);
size_t p2rz(const void *ptr);
-void idalloct(void *ptr, bool try_tcache);
-void idalloc(void *ptr);
-void iqalloct(void *ptr, bool try_tcache);
-void iqalloc(void *ptr);
-void *iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
- size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
+void idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata);
+void idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void idalloc(tsd_t *tsd, void *ptr);
+void iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
+void isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
+void *iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+ size_t extra, size_t alignment, bool zero, tcache_t *tcache,
arena_t *arena);
-void *iralloct(void *ptr, size_t size, size_t extra, size_t alignment,
- bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena);
-void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment,
- bool zero);
-bool ixalloc(void *ptr, size_t size, size_t extra, size_t alignment,
- bool zero);
-malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t)
+void *iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+ size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
+void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+ size_t alignment, bool zero);
+bool ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra,
+ size_t alignment, bool zero);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_ALWAYS_INLINE arena_t *
+iaalloc(const void *ptr)
+{
+
+ assert(ptr != NULL);
+
+ return (arena_aalloc(ptr));
+}
+
+/*
+ * Typical usage:
+ * void *ptr = [...]
+ * size_t sz = isalloc(ptr, config_prof);
+ */
+JEMALLOC_ALWAYS_INLINE size_t
+isalloc(const void *ptr, bool demote)
+{
+
+ assert(ptr != NULL);
+ /* Demotion only makes sense if config_prof is true. */
+ assert(config_prof || !demote);
+
+ return (arena_salloc(ptr, demote));
+}
+
JEMALLOC_ALWAYS_INLINE void *
-imalloct(size_t size, bool try_tcache, arena_t *arena)
+iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata,
+ arena_t *arena)
{
+ void *ret;
assert(size != 0);
- if (size <= arena_maxclass)
- return (arena_malloc(arena, size, false, try_tcache));
- else
- return (huge_malloc(size, false, huge_dss_prec_get(arena)));
+ ret = arena_malloc(tsd, arena, size, zero, tcache);
+ if (config_stats && is_metadata && likely(ret != NULL)) {
+ arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
+ config_prof));
+ }
+ return (ret);
}
JEMALLOC_ALWAYS_INLINE void *
-imalloc(size_t size)
+imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena)
{
- return (imalloct(size, true, NULL));
+ return (iallocztm(tsd, size, false, tcache, false, arena));
}
JEMALLOC_ALWAYS_INLINE void *
-icalloct(size_t size, bool try_tcache, arena_t *arena)
+imalloc(tsd_t *tsd, size_t size)
{
- if (size <= arena_maxclass)
- return (arena_malloc(arena, size, true, try_tcache));
- else
- return (huge_malloc(size, true, huge_dss_prec_get(arena)));
+ return (iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL));
}
JEMALLOC_ALWAYS_INLINE void *
-icalloc(size_t size)
+icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena)
{
- return (icalloct(size, true, NULL));
+ return (iallocztm(tsd, size, true, tcache, false, arena));
}
JEMALLOC_ALWAYS_INLINE void *
-ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache,
- arena_t *arena)
+icalloc(tsd_t *tsd, size_t size)
+{
+
+ return (iallocztm(tsd, size, true, tcache_get(tsd, true), false, NULL));
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ tcache_t *tcache, bool is_metadata, arena_t *arena)
{
void *ret;
assert(usize != 0);
assert(usize == sa2u(usize, alignment));
- if (usize <= arena_maxclass && alignment <= PAGE)
- ret = arena_malloc(arena, usize, zero, try_tcache);
- else {
- if (usize <= arena_maxclass) {
- ret = arena_palloc(choose_arena(arena), usize,
- alignment, zero);
- } else if (alignment <= chunksize)
- ret = huge_malloc(usize, zero, huge_dss_prec_get(arena));
- else
- ret = huge_palloc(usize, alignment, zero, huge_dss_prec_get(arena));
- }
-
+ ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache);
assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
+ if (config_stats && is_metadata && likely(ret != NULL)) {
+ arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
+ config_prof));
+ }
return (ret);
}
JEMALLOC_ALWAYS_INLINE void *
-ipalloc(size_t usize, size_t alignment, bool zero)
+ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ tcache_t *tcache, arena_t *arena)
{
- return (ipalloct(usize, alignment, zero, true, NULL));
+ return (ipallocztm(tsd, usize, alignment, zero, tcache, false, arena));
}
-/*
- * Typical usage:
- * void *ptr = [...]
- * size_t sz = isalloc(ptr, config_prof);
- */
-JEMALLOC_ALWAYS_INLINE size_t
-isalloc(const void *ptr, bool demote)
+JEMALLOC_ALWAYS_INLINE void *
+ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero)
{
- size_t ret;
- arena_chunk_t *chunk;
-
- assert(ptr != NULL);
- /* Demotion only makes sense if config_prof is true. */
- assert(config_prof || demote == false);
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr)
- ret = arena_salloc(ptr, demote);
- else
- ret = huge_salloc(ptr);
-
- return (ret);
+ return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd,
+ NULL), false, NULL));
}
JEMALLOC_ALWAYS_INLINE size_t
ivsalloc(const void *ptr, bool demote)
{
+ extent_node_t *node;
/* Return 0 if ptr is not within a chunk managed by jemalloc. */
- if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0)
+ node = chunk_lookup(ptr, false);
+ if (node == NULL)
return (0);
+ /* Only arena chunks should be looked up via interior pointers. */
+ assert(extent_node_addr_get(node) == ptr ||
+ extent_node_achunk_get(node));
return (isalloc(ptr, demote));
}
@@ -870,7 +976,7 @@ u2rz(size_t usize)
size_t ret;
if (usize <= SMALL_MAXCLASS) {
- size_t binind = SMALL_SIZE2BIN(usize);
+ szind_t binind = size2index(usize);
ret = arena_bin_info[binind].redzone_size;
} else
ret = 0;
@@ -887,47 +993,62 @@ p2rz(const void *ptr)
}
JEMALLOC_ALWAYS_INLINE void
-idalloct(void *ptr, bool try_tcache)
+idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata)
{
- arena_chunk_t *chunk;
assert(ptr != NULL);
+ if (config_stats && is_metadata) {
+ arena_metadata_allocated_sub(iaalloc(ptr), isalloc(ptr,
+ config_prof));
+ }
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr)
- arena_dalloc(chunk->arena, chunk, ptr, try_tcache);
- else
- huge_dalloc(ptr, true);
+ arena_dalloc(tsd, ptr, tcache);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache)
+{
+
+ idalloctm(tsd, ptr, tcache, false);
}
JEMALLOC_ALWAYS_INLINE void
-idalloc(void *ptr)
+idalloc(tsd_t *tsd, void *ptr)
{
- idalloct(ptr, true);
+ idalloctm(tsd, ptr, tcache_get(tsd, false), false);
}
JEMALLOC_ALWAYS_INLINE void
-iqalloct(void *ptr, bool try_tcache)
+iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
{
- if (config_fill && opt_quarantine)
- quarantine(ptr);
+ if (config_fill && unlikely(opt_quarantine))
+ quarantine(tsd, ptr);
else
- idalloct(ptr, try_tcache);
+ idalloctm(tsd, ptr, tcache, false);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+{
+
+ arena_sdalloc(tsd, ptr, size, tcache);
}
JEMALLOC_ALWAYS_INLINE void
-iqalloc(void *ptr)
+isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
{
- iqalloct(ptr, true);
+ if (config_fill && unlikely(opt_quarantine))
+ quarantine(tsd, ptr);
+ else
+ isdalloct(tsd, ptr, size, tcache);
}
JEMALLOC_ALWAYS_INLINE void *
-iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
- size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
- arena_t *arena)
+iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+ size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena)
{
void *p;
size_t usize, copysize;
@@ -935,7 +1056,7 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
usize = sa2u(size + extra, alignment);
if (usize == 0)
return (NULL);
- p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena);
+ p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
if (p == NULL) {
if (extra == 0)
return (NULL);
@@ -943,7 +1064,7 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
usize = sa2u(size, alignment);
if (usize == 0)
return (NULL);
- p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena);
+ p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
if (p == NULL)
return (NULL);
}
@@ -953,72 +1074,57 @@ iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
*/
copysize = (size < oldsize) ? size : oldsize;
memcpy(p, ptr, copysize);
- iqalloct(ptr, try_tcache_dalloc);
+ isqalloc(tsd, ptr, oldsize, tcache);
return (p);
}
JEMALLOC_ALWAYS_INLINE void *
-iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
- bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena)
+iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
+ bool zero, tcache_t *tcache, arena_t *arena)
{
- size_t oldsize;
assert(ptr != NULL);
assert(size != 0);
- oldsize = isalloc(ptr, config_prof);
-
if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
!= 0) {
/*
* Existing object alignment is inadequate; allocate new space
* and copy.
*/
- return (iralloct_realign(ptr, oldsize, size, extra, alignment,
- zero, try_tcache_alloc, try_tcache_dalloc, arena));
+ return (iralloct_realign(tsd, ptr, oldsize, size, 0, alignment,
+ zero, tcache, arena));
}
- if (size + extra <= arena_maxclass) {
- return (arena_ralloc(arena, ptr, oldsize, size, extra,
- alignment, zero, try_tcache_alloc,
- try_tcache_dalloc));
- } else {
- return (huge_ralloc(ptr, oldsize, size, extra,
- alignment, zero, try_tcache_dalloc, huge_dss_prec_get(arena)));
- }
+ return (arena_ralloc(tsd, arena, ptr, oldsize, size, alignment, zero,
+ tcache));
}
JEMALLOC_ALWAYS_INLINE void *
-iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero)
+iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
+ bool zero)
{
- return (iralloct(ptr, size, extra, alignment, zero, true, true, NULL));
+ return (iralloct(tsd, ptr, oldsize, size, alignment, zero,
+ tcache_get(tsd, true), NULL));
}
JEMALLOC_ALWAYS_INLINE bool
-ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero)
+ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment,
+ bool zero)
{
- size_t oldsize;
assert(ptr != NULL);
assert(size != 0);
- oldsize = isalloc(ptr, config_prof);
if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
!= 0) {
/* Existing object alignment is inadequate. */
return (true);
}
- if (size <= arena_maxclass)
- return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero));
- else
- return (huge_ralloc_no_move(ptr, oldsize, size, extra));
+ return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero));
}
-
-malloc_tsd_externs(thread_allocated, thread_allocated_t)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, thread_allocated, thread_allocated_t,
- THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup)
#endif
#include "jemalloc/internal/prof.h"
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h
new file mode 100644
index 000000000..a601d6ebb
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -0,0 +1,64 @@
+#ifndef JEMALLOC_INTERNAL_DECLS_H
+#define JEMALLOC_INTERNAL_DECLS_H
+
+#include <math.h>
+#ifdef _WIN32
+# include <windows.h>
+# include "msvc_compat/windows_extra.h"
+
+#else
+# include <sys/param.h>
+# include <sys/mman.h>
+# if !defined(__pnacl__) && !defined(__native_client__)
+# include <sys/syscall.h>
+# if !defined(SYS_write) && defined(__NR_write)
+# define SYS_write __NR_write
+# endif
+# include <sys/uio.h>
+# endif
+# include <pthread.h>
+# include <errno.h>
+#endif
+#include <sys/types.h>
+
+#include <limits.h>
+#ifndef SIZE_T_MAX
+# define SIZE_T_MAX SIZE_MAX
+#endif
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#ifndef offsetof
+# define offsetof(type, member) ((size_t)&(((type *)NULL)->member))
+#endif
+#include <string.h>
+#include <strings.h>
+#include <ctype.h>
+#ifdef _MSC_VER
+# include <io.h>
+typedef intptr_t ssize_t;
+# define PATH_MAX 1024
+# define STDERR_FILENO 2
+# define __func__ __FUNCTION__
+# ifdef JEMALLOC_HAS_RESTRICT
+# define restrict __restrict
+# endif
+/* Disable warnings about deprecated system functions. */
+# pragma warning(disable: 4996)
+#if _MSC_VER < 1800
+static int
+isblank(int c)
+{
+
+ return (c == '\t' || c == ' ');
+}
+#endif
+#else
+# include <unistd.h>
+#endif
+#include <fcntl.h>
+
+#endif /* JEMALLOC_INTERNAL_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c166fbd9e..b0f8caaf8 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -22,6 +22,9 @@
*/
#undef CPU_SPINWAIT
+/* Defined if C11 atomics are available. */
+#undef JEMALLOC_C11ATOMICS
+
/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */
#undef JEMALLOC_ATOMIC9
@@ -35,7 +38,7 @@
* Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and
* __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite
* __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the
- * functions are defined in libgcc instead of being inlines)
+ * functions are defined in libgcc instead of being inlines).
*/
#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4
@@ -43,17 +46,37 @@
* Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and
* __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite
* __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the
- * functions are defined in libgcc instead of being inlines)
+ * functions are defined in libgcc instead of being inlines).
*/
#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8
/*
+ * Defined if __builtin_clz() and __builtin_clzl() are available.
+ */
+#undef JEMALLOC_HAVE_BUILTIN_CLZ
+
+/*
+ * Defined if madvise(2) is available.
+ */
+#undef JEMALLOC_HAVE_MADVISE
+
+/*
* Defined if OSSpin*() functions are available, as provided by Darwin, and
* documented in the spinlock(3) manual page.
*/
#undef JEMALLOC_OSSPIN
/*
+ * Defined if secure_getenv(3) is available.
+ */
+#undef JEMALLOC_HAVE_SECURE_GETENV
+
+/*
+ * Defined if issetugid(2) is available.
+ */
+#undef JEMALLOC_HAVE_ISSETUGID
+
+/*
* Defined if _malloc_thread_cleanup() exists. At least in the case of
* FreeBSD, pthread_key_create() allocates, which if used during malloc
* bootstrapping will cause recursion into the pthreads library. Therefore, if
@@ -76,9 +99,6 @@
*/
#undef JEMALLOC_MUTEX_INIT_CB
-/* Defined if sbrk() is supported. */
-#undef JEMALLOC_HAVE_SBRK
-
/* Non-empty if the tls_model attribute is supported. */
#undef JEMALLOC_TLS_MODEL
@@ -137,8 +157,26 @@
/* Support lazy locking (avoid locking unless a second thread is launched). */
#undef JEMALLOC_LAZY_LOCK
-/* One page is 2^STATIC_PAGE_SHIFT bytes. */
-#undef STATIC_PAGE_SHIFT
+/* Minimum size class to support is 2^LG_TINY_MIN bytes. */
+#undef LG_TINY_MIN
+
+/*
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+#undef LG_QUANTUM
+
+/* One page is 2^LG_PAGE bytes. */
+#undef LG_PAGE
+
+/*
+ * If defined, adjacent virtual memory mappings with identical attributes
+ * automatically coalesce, and they fragment when changes are made to subranges.
+ * This is the normal order of things for mmap()/munmap(), but on Windows
+ * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
+ * mappings do *not* coalesce/fragment.
+ */
+#undef JEMALLOC_MAPS_COALESCE
/*
* If defined, use munmap() to unmap freed chunks, rather than storing them for
@@ -147,23 +185,29 @@
*/
#undef JEMALLOC_MUNMAP
-/*
- * If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). This is
- * disabled by default because it is Linux-specific and it will cause virtual
- * memory map holes, much like munmap(2) does.
- */
-#undef JEMALLOC_MREMAP
-
/* TLS is used to map arenas and magazine caches to threads. */
#undef JEMALLOC_TLS
/*
+ * ffs()/ffsl() functions to use for bitmapping. Don't use these directly;
+ * instead, use jemalloc_ffs() or jemalloc_ffsl() from util.h.
+ */
+#undef JEMALLOC_INTERNAL_FFSL
+#undef JEMALLOC_INTERNAL_FFS
+
+/*
* JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside
* within jemalloc-owned chunks before dereferencing them.
*/
#undef JEMALLOC_IVSALLOC
/*
+ * If defined, explicitly attempt to more uniformly distribute large allocation
+ * pointer alignments across all cache indices.
+ */
+#undef JEMALLOC_CACHE_OBLIVIOUS
+
+/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
#undef JEMALLOC_ZONE
@@ -182,9 +226,7 @@
#undef JEMALLOC_PURGE_MADVISE_DONTNEED
#undef JEMALLOC_PURGE_MADVISE_FREE
-/*
- * Define if operating system has alloca.h header.
- */
+/* Define if operating system has alloca.h header. */
#undef JEMALLOC_HAS_ALLOCA_H
/* C99 restrict keyword supported. */
@@ -202,4 +244,19 @@
/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
#undef LG_SIZEOF_INTMAX_T
+/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
+#undef JEMALLOC_GLIBC_MALLOC_HOOK
+
+/* glibc memalign hook. */
+#undef JEMALLOC_GLIBC_MEMALIGN_HOOK
+
+/* Adaptive mutex support in pthreads. */
+#undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
+
+/*
+ * If defined, jemalloc symbols are not exported (doesn't work when
+ * JEMALLOC_PREFIX is not defined).
+ */
+#undef JEMALLOC_EXPORT
+
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h
index 4e2392302..a08ba772e 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -39,9 +39,15 @@
#endif
#define ZU(z) ((size_t)z)
+#define ZI(z) ((ssize_t)z)
#define QU(q) ((uint64_t)q)
#define QI(q) ((int64_t)q)
+#define KZU(z) ZU(z##ULL)
+#define KZI(z) ZI(z##LL)
+#define KQU(q) QU(q##ULL)
+#define KQI(q) QI(q##LL)
+
#ifndef __DECONST
# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/mutex.h b/deps/jemalloc/include/jemalloc/internal/mutex.h
index de44e1435..f051f2917 100644
--- a/deps/jemalloc/include/jemalloc/internal/mutex.h
+++ b/deps/jemalloc/include/jemalloc/internal/mutex.h
@@ -10,7 +10,7 @@ typedef struct malloc_mutex_s malloc_mutex_t;
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL}
#else
-# if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \
+# if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) && \
defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
# define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}
@@ -26,7 +26,11 @@ typedef struct malloc_mutex_s malloc_mutex_t;
struct malloc_mutex_s {
#ifdef _WIN32
+# if _WIN32_WINNT >= 0x0600
+ SRWLOCK lock;
+# else
CRITICAL_SECTION lock;
+# endif
#elif (defined(JEMALLOC_OSSPIN))
OSSpinLock lock;
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
@@ -70,7 +74,11 @@ malloc_mutex_lock(malloc_mutex_t *mutex)
if (isthreaded) {
#ifdef _WIN32
+# if _WIN32_WINNT >= 0x0600
+ AcquireSRWLockExclusive(&mutex->lock);
+# else
EnterCriticalSection(&mutex->lock);
+# endif
#elif (defined(JEMALLOC_OSSPIN))
OSSpinLockLock(&mutex->lock);
#else
@@ -85,7 +93,11 @@ malloc_mutex_unlock(malloc_mutex_t *mutex)
if (isthreaded) {
#ifdef _WIN32
+# if _WIN32_WINNT >= 0x0600
+ ReleaseSRWLockExclusive(&mutex->lock);
+# else
LeaveCriticalSection(&mutex->lock);
+# endif
#elif (defined(JEMALLOC_OSSPIN))
OSSpinLockUnlock(&mutex->lock);
#else
diff --git a/deps/jemalloc/include/jemalloc/internal/pages.h b/deps/jemalloc/include/jemalloc/internal/pages.h
new file mode 100644
index 000000000..da7eb9686
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/pages.h
@@ -0,0 +1,26 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void *pages_map(void *addr, size_t size);
+void pages_unmap(void *addr, size_t size);
+void *pages_trim(void *addr, size_t alloc_size, size_t leadsize,
+ size_t size);
+bool pages_commit(void *addr, size_t size);
+bool pages_decommit(void *addr, size_t size);
+bool pages_purge(void *addr, size_t size);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
diff --git a/deps/jemalloc/include/jemalloc/internal/private_symbols.txt b/deps/jemalloc/include/jemalloc/internal/private_symbols.txt
index 93516d242..a90021aa6 100644
--- a/deps/jemalloc/include/jemalloc/internal/private_symbols.txt
+++ b/deps/jemalloc/include/jemalloc/internal/private_symbols.txt
@@ -1,44 +1,76 @@
-a0calloc
-a0free
+a0dalloc
+a0get
a0malloc
+arena_aalloc
arena_alloc_junk_small
arena_bin_index
arena_bin_info
+arena_bitselm_get
arena_boot
+arena_choose
+arena_choose_hard
+arena_chunk_alloc_huge
+arena_chunk_cache_maybe_insert
+arena_chunk_cache_maybe_remove
+arena_chunk_dalloc_huge
+arena_chunk_ralloc_huge_expand
+arena_chunk_ralloc_huge_shrink
+arena_chunk_ralloc_huge_similar
+arena_cleanup
arena_dalloc
arena_dalloc_bin
-arena_dalloc_bin_locked
+arena_dalloc_bin_junked_locked
arena_dalloc_junk_large
arena_dalloc_junk_small
arena_dalloc_large
-arena_dalloc_large_locked
+arena_dalloc_large_junked_locked
arena_dalloc_small
arena_dss_prec_get
arena_dss_prec_set
+arena_get
+arena_get_hard
+arena_init
+arena_lg_dirty_mult_default_get
+arena_lg_dirty_mult_default_set
+arena_lg_dirty_mult_get
+arena_lg_dirty_mult_set
arena_malloc
arena_malloc_large
arena_malloc_small
arena_mapbits_allocated_get
arena_mapbits_binind_get
+arena_mapbits_decommitted_get
arena_mapbits_dirty_get
arena_mapbits_get
+arena_mapbits_internal_set
arena_mapbits_large_binind_set
arena_mapbits_large_get
arena_mapbits_large_set
arena_mapbits_large_size_get
+arena_mapbitsp_get
+arena_mapbitsp_read
+arena_mapbitsp_write
+arena_mapbits_size_decode
+arena_mapbits_size_encode
arena_mapbits_small_runind_get
arena_mapbits_small_set
arena_mapbits_unallocated_set
arena_mapbits_unallocated_size_get
arena_mapbits_unallocated_size_set
arena_mapbits_unzeroed_get
-arena_mapbits_unzeroed_set
-arena_mapbitsp_get
-arena_mapbitsp_read
-arena_mapbitsp_write
-arena_mapp_get
-arena_maxclass
+arena_maxrun
+arena_maybe_purge
+arena_metadata_allocated_add
+arena_metadata_allocated_get
+arena_metadata_allocated_sub
+arena_migrate
+arena_miscelm_get
+arena_miscelm_to_pageind
+arena_miscelm_to_rpages
+arena_nbound
arena_new
+arena_node_alloc
+arena_node_dalloc
arena_palloc
arena_postfork_child
arena_postfork_parent
@@ -46,50 +78,47 @@ arena_prefork
arena_prof_accum
arena_prof_accum_impl
arena_prof_accum_locked
-arena_prof_ctx_get
-arena_prof_ctx_set
arena_prof_promoted
+arena_prof_tctx_get
+arena_prof_tctx_reset
+arena_prof_tctx_set
arena_ptr_small_binind_get
arena_purge_all
arena_quarantine_junk_small
arena_ralloc
arena_ralloc_junk_large
arena_ralloc_no_move
+arena_rd_to_miscelm
arena_redzone_corruption
arena_run_regind
+arena_run_to_miscelm
arena_salloc
+arenas_cache_bypass_cleanup
+arenas_cache_cleanup
+arena_sdalloc
arena_stats_merge
arena_tcache_fill_small
-arenas
-arenas_booted
-arenas_cleanup
-arenas_extend
-arenas_initialized
-arenas_lock
-arenas_tls
-arenas_tsd
-arenas_tsd_boot
-arenas_tsd_cleanup_wrapper
-arenas_tsd_get
-arenas_tsd_get_wrapper
-arenas_tsd_init_head
-arenas_tsd_set
+atomic_add_p
atomic_add_u
atomic_add_uint32
atomic_add_uint64
atomic_add_z
+atomic_cas_p
+atomic_cas_u
+atomic_cas_uint32
+atomic_cas_uint64
+atomic_cas_z
+atomic_sub_p
atomic_sub_u
atomic_sub_uint32
atomic_sub_uint64
atomic_sub_z
base_alloc
base_boot
-base_calloc
-base_node_alloc
-base_node_dealloc
base_postfork_child
base_postfork_parent
base_prefork
+base_stats_get
bitmap_full
bitmap_get
bitmap_info_init
@@ -99,49 +128,54 @@ bitmap_set
bitmap_sfu
bitmap_size
bitmap_unset
+bootstrap_calloc
+bootstrap_free
+bootstrap_malloc
bt_init
buferror
-choose_arena
-choose_arena_hard
-chunk_alloc
+chunk_alloc_base
+chunk_alloc_cache
chunk_alloc_dss
chunk_alloc_mmap
+chunk_alloc_wrapper
chunk_boot
-chunk_dealloc
-chunk_dealloc_mmap
+chunk_dalloc_arena
+chunk_dalloc_cache
+chunk_dalloc_mmap
+chunk_dalloc_wrapper
+chunk_deregister
chunk_dss_boot
chunk_dss_postfork_child
chunk_dss_postfork_parent
chunk_dss_prec_get
chunk_dss_prec_set
chunk_dss_prefork
+chunk_hooks_default
+chunk_hooks_get
+chunk_hooks_set
chunk_in_dss
+chunk_lookup
chunk_npages
chunk_postfork_child
chunk_postfork_parent
chunk_prefork
-chunk_unmap
-chunks_mtx
-chunks_rtree
+chunk_purge_arena
+chunk_purge_wrapper
+chunk_register
chunksize
chunksize_mask
-ckh_bucket_search
+chunks_rtree
ckh_count
ckh_delete
-ckh_evict_reloc_insert
ckh_insert
-ckh_isearch
ckh_iter
ckh_new
ckh_pointer_hash
ckh_pointer_keycomp
-ckh_rebuild
ckh_remove
ckh_search
ckh_string_hash
ckh_string_keycomp
-ckh_try_bucket_insert
-ckh_try_insert
ctl_boot
ctl_bymib
ctl_byname
@@ -150,6 +184,23 @@ ctl_postfork_child
ctl_postfork_parent
ctl_prefork
dss_prec_names
+extent_node_achunk_get
+extent_node_achunk_set
+extent_node_addr_get
+extent_node_addr_set
+extent_node_arena_get
+extent_node_arena_set
+extent_node_dirty_insert
+extent_node_dirty_linkage_init
+extent_node_dirty_remove
+extent_node_init
+extent_node_prof_tctx_get
+extent_node_prof_tctx_set
+extent_node_size_get
+extent_node_size_set
+extent_node_zeroed_get
+extent_node_zeroed_set
+extent_tree_ad_empty
extent_tree_ad_first
extent_tree_ad_insert
extent_tree_ad_iter
@@ -166,6 +217,7 @@ extent_tree_ad_reverse_iter
extent_tree_ad_reverse_iter_recurse
extent_tree_ad_reverse_iter_start
extent_tree_ad_search
+extent_tree_szad_empty
extent_tree_szad_first
extent_tree_szad_insert
extent_tree_szad_iter
@@ -193,45 +245,49 @@ hash_rotl_64
hash_x64_128
hash_x86_128
hash_x86_32
-huge_allocated
-huge_boot
+huge_aalloc
huge_dalloc
huge_dalloc_junk
-huge_dss_prec_get
huge_malloc
-huge_mtx
-huge_ndalloc
-huge_nmalloc
huge_palloc
-huge_postfork_child
-huge_postfork_parent
-huge_prefork
-huge_prof_ctx_get
-huge_prof_ctx_set
+huge_prof_tctx_get
+huge_prof_tctx_reset
+huge_prof_tctx_set
huge_ralloc
huge_ralloc_no_move
huge_salloc
-iallocm
+iaalloc
+iallocztm
icalloc
icalloct
idalloc
idalloct
+idalloctm
imalloc
imalloct
+index2size
+index2size_compute
+index2size_lookup
+index2size_tab
+in_valgrind
ipalloc
ipalloct
+ipallocztm
iqalloc
-iqalloct
iralloc
iralloct
iralloct_realign
isalloc
+isdalloct
+isqalloc
isthreaded
ivsalloc
ixalloc
jemalloc_postfork_child
jemalloc_postfork_parent
jemalloc_prefork
+large_maxclass
+lg_floor
malloc_cprintf
malloc_mutex_init
malloc_mutex_lock
@@ -242,7 +298,8 @@ malloc_mutex_unlock
malloc_printf
malloc_snprintf
malloc_strtoumax
-malloc_tsd_boot
+malloc_tsd_boot0
+malloc_tsd_boot1
malloc_tsd_cleanup_register
malloc_tsd_dalloc
malloc_tsd_malloc
@@ -251,16 +308,18 @@ malloc_vcprintf
malloc_vsnprintf
malloc_write
map_bias
+map_misc_offset
mb_write
mutex_boot
-narenas_auto
-narenas_total
+narenas_cache_cleanup
narenas_total_get
ncpus
nhbins
opt_abort
opt_dss
opt_junk
+opt_junk_alloc
+opt_junk_free
opt_lg_chunk
opt_lg_dirty_mult
opt_lg_prof_interval
@@ -274,84 +333,99 @@ opt_prof_final
opt_prof_gdump
opt_prof_leak
opt_prof_prefix
+opt_prof_thread_active_init
opt_quarantine
opt_redzone
opt_stats_print
opt_tcache
opt_utrace
-opt_valgrind
opt_xmalloc
opt_zero
p2rz
+pages_commit
+pages_decommit
+pages_map
pages_purge
+pages_trim
+pages_unmap
pow2_ceil
+prof_active_get
+prof_active_get_unlocked
+prof_active_set
+prof_alloc_prep
+prof_alloc_rollback
prof_backtrace
prof_boot0
prof_boot1
prof_boot2
-prof_bt_count
-prof_ctx_get
-prof_ctx_set
+prof_dump_header
prof_dump_open
prof_free
+prof_free_sampled_object
prof_gdump
+prof_gdump_get
+prof_gdump_get_unlocked
+prof_gdump_set
+prof_gdump_val
prof_idump
prof_interval
prof_lookup
prof_malloc
+prof_malloc_sample_object
prof_mdump
prof_postfork_child
prof_postfork_parent
prof_prefork
-prof_promote
prof_realloc
+prof_reset
prof_sample_accum_update
prof_sample_threshold_update
-prof_tdata_booted
+prof_tctx_get
+prof_tctx_reset
+prof_tctx_set
prof_tdata_cleanup
prof_tdata_get
prof_tdata_init
-prof_tdata_initialized
-prof_tdata_tls
-prof_tdata_tsd
-prof_tdata_tsd_boot
-prof_tdata_tsd_cleanup_wrapper
-prof_tdata_tsd_get
-prof_tdata_tsd_get_wrapper
-prof_tdata_tsd_init_head
-prof_tdata_tsd_set
+prof_tdata_reinit
+prof_thread_active_get
+prof_thread_active_init_get
+prof_thread_active_init_set
+prof_thread_active_set
+prof_thread_name_get
+prof_thread_name_set
quarantine
quarantine_alloc_hook
-quarantine_boot
-quarantine_booted
+quarantine_alloc_hook_work
quarantine_cleanup
-quarantine_init
-quarantine_tls
-quarantine_tsd
-quarantine_tsd_boot
-quarantine_tsd_cleanup_wrapper
-quarantine_tsd_get
-quarantine_tsd_get_wrapper
-quarantine_tsd_init_head
-quarantine_tsd_set
register_zone
+rtree_child_read
+rtree_child_read_hard
+rtree_child_tryread
rtree_delete
rtree_get
-rtree_get_locked
rtree_new
-rtree_postfork_child
-rtree_postfork_parent
-rtree_prefork
+rtree_node_valid
rtree_set
+rtree_start_level
+rtree_subkey
+rtree_subtree_read
+rtree_subtree_read_hard
+rtree_subtree_tryread
+rtree_val_read
+rtree_val_write
s2u
+s2u_compute
+s2u_lookup
sa2u
set_errno
-small_size2bin
+size2index
+size2index_compute
+size2index_lookup
+size2index_tab
stats_cactive
stats_cactive_add
stats_cactive_get
stats_cactive_sub
-stats_chunks
stats_print
tcache_alloc_easy
tcache_alloc_large
@@ -359,55 +433,67 @@ tcache_alloc_small
tcache_alloc_small_hard
tcache_arena_associate
tcache_arena_dissociate
+tcache_arena_reassociate
tcache_bin_flush_large
tcache_bin_flush_small
tcache_bin_info
-tcache_boot0
-tcache_boot1
-tcache_booted
+tcache_boot
+tcache_cleanup
tcache_create
tcache_dalloc_large
tcache_dalloc_small
-tcache_destroy
-tcache_enabled_booted
+tcache_enabled_cleanup
tcache_enabled_get
-tcache_enabled_initialized
tcache_enabled_set
-tcache_enabled_tls
-tcache_enabled_tsd
-tcache_enabled_tsd_boot
-tcache_enabled_tsd_cleanup_wrapper
-tcache_enabled_tsd_get
-tcache_enabled_tsd_get_wrapper
-tcache_enabled_tsd_init_head
-tcache_enabled_tsd_set
tcache_event
tcache_event_hard
tcache_flush
tcache_get
-tcache_initialized
+tcache_get_hard
tcache_maxclass
+tcaches
tcache_salloc
+tcaches_create
+tcaches_destroy
+tcaches_flush
+tcaches_get
tcache_stats_merge
-tcache_thread_cleanup
-tcache_tls
-tcache_tsd
-tcache_tsd_boot
-tcache_tsd_cleanup_wrapper
-tcache_tsd_get
-tcache_tsd_get_wrapper
-tcache_tsd_init_head
-tcache_tsd_set
-thread_allocated_booted
-thread_allocated_initialized
-thread_allocated_tls
-thread_allocated_tsd
-thread_allocated_tsd_boot
-thread_allocated_tsd_cleanup_wrapper
-thread_allocated_tsd_get
-thread_allocated_tsd_get_wrapper
-thread_allocated_tsd_init_head
-thread_allocated_tsd_set
+thread_allocated_cleanup
+thread_deallocated_cleanup
+tsd_arena_get
+tsd_arena_set
+tsd_boot
+tsd_boot0
+tsd_boot1
+tsd_booted
+tsd_cleanup
+tsd_cleanup_wrapper
+tsd_fetch
+tsd_get
+tsd_wrapper_get
+tsd_wrapper_set
+tsd_initialized
tsd_init_check_recursion
tsd_init_finish
+tsd_init_head
+tsd_nominal
+tsd_quarantine_get
+tsd_quarantine_set
+tsd_set
+tsd_tcache_enabled_get
+tsd_tcache_enabled_set
+tsd_tcache_get
+tsd_tcache_set
+tsd_tls
+tsd_tsd
+tsd_prof_tdata_get
+tsd_prof_tdata_set
+tsd_thread_allocated_get
+tsd_thread_allocated_set
+tsd_thread_deallocated_get
+tsd_thread_deallocated_set
u2rz
+valgrind_freelike_block
+valgrind_make_mem_defined
+valgrind_make_mem_noaccess
+valgrind_make_mem_undefined
diff --git a/deps/jemalloc/include/jemalloc/internal/prng.h b/deps/jemalloc/include/jemalloc/internal/prng.h
index 7b2b06512..216d0ef47 100644
--- a/deps/jemalloc/include/jemalloc/internal/prng.h
+++ b/deps/jemalloc/include/jemalloc/internal/prng.h
@@ -15,7 +15,7 @@
* See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
*
* This choice of m has the disadvantage that the quality of the bits is
- * proportional to bit position. For example. the lowest bit has a cycle of 2,
+ * proportional to bit position. For example, the lowest bit has a cycle of 2,
* the next has a cycle of 4, etc. For this reason, we prefer to use the upper
* bits.
*
@@ -26,22 +26,22 @@
* const uint32_t a, c : See above discussion.
*/
#define prng32(r, lg_range, state, a, c) do { \
- assert(lg_range > 0); \
- assert(lg_range <= 32); \
+ assert((lg_range) > 0); \
+ assert((lg_range) <= 32); \
\
r = (state * (a)) + (c); \
state = r; \
- r >>= (32 - lg_range); \
+ r >>= (32 - (lg_range)); \
} while (false)
/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */
#define prng64(r, lg_range, state, a, c) do { \
- assert(lg_range > 0); \
- assert(lg_range <= 64); \
+ assert((lg_range) > 0); \
+ assert((lg_range) <= 64); \
\
r = (state * (a)) + (c); \
state = r; \
- r >>= (64 - lg_range); \
+ r >>= (64 - (lg_range)); \
} while (false)
#endif /* JEMALLOC_H_TYPES */
diff --git a/deps/jemalloc/include/jemalloc/internal/prof.h b/deps/jemalloc/include/jemalloc/internal/prof.h
index 6f162d21e..e5198c3e8 100644
--- a/deps/jemalloc/include/jemalloc/internal/prof.h
+++ b/deps/jemalloc/include/jemalloc/internal/prof.h
@@ -3,8 +3,8 @@
typedef struct prof_bt_s prof_bt_t;
typedef struct prof_cnt_s prof_cnt_t;
-typedef struct prof_thr_cnt_s prof_thr_cnt_t;
-typedef struct prof_ctx_s prof_ctx_t;
+typedef struct prof_tctx_s prof_tctx_t;
+typedef struct prof_gctx_s prof_gctx_t;
typedef struct prof_tdata_s prof_tdata_t;
/* Option defaults. */
@@ -23,9 +23,6 @@ typedef struct prof_tdata_s prof_tdata_t;
*/
#define PROF_BT_MAX 128
-/* Maximum number of backtraces to store in each per thread LRU cache. */
-#define PROF_TCMAX 1024
-
/* Initial hash table size. */
#define PROF_CKH_MINITEMS 64
@@ -36,12 +33,18 @@ typedef struct prof_tdata_s prof_tdata_t;
#define PROF_PRINTF_BUFSIZE 128
/*
- * Number of mutexes shared among all ctx's. No space is allocated for these
+ * Number of mutexes shared among all gctx's. No space is allocated for these
* unless profiling is enabled, so it's okay to over-provision.
*/
#define PROF_NCTX_LOCKS 1024
/*
+ * Number of mutexes shared among all tdata's. No space is allocated for these
+ * unless profiling is enabled, so it's okay to over-provision.
+ */
+#define PROF_NTDATA_LOCKS 256
+
+/*
* prof_tdata pointers close to NULL are used to encode state information that
* is used for cleaning up during thread shutdown.
*/
@@ -63,141 +66,186 @@ struct prof_bt_s {
/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
typedef struct {
prof_bt_t *bt;
- unsigned nignore;
unsigned max;
} prof_unwind_data_t;
#endif
struct prof_cnt_s {
- /*
- * Profiling counters. An allocation/deallocation pair can operate on
- * different prof_thr_cnt_t objects that are linked into the same
- * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
- * negative. In principle it is possible for the *bytes counters to
- * overflow/underflow, but a general solution would require something
- * like 128-bit counters; this implementation doesn't bother to solve
- * that problem.
- */
- int64_t curobjs;
- int64_t curbytes;
+ /* Profiling counters. */
+ uint64_t curobjs;
+ uint64_t curbytes;
uint64_t accumobjs;
uint64_t accumbytes;
};
-struct prof_thr_cnt_s {
- /* Linkage into prof_ctx_t's cnts_ql. */
- ql_elm(prof_thr_cnt_t) cnts_link;
+typedef enum {
+ prof_tctx_state_initializing,
+ prof_tctx_state_nominal,
+ prof_tctx_state_dumping,
+ prof_tctx_state_purgatory /* Dumper must finish destroying. */
+} prof_tctx_state_t;
- /* Linkage into thread's LRU. */
- ql_elm(prof_thr_cnt_t) lru_link;
+struct prof_tctx_s {
+ /* Thread data for thread that performed the allocation. */
+ prof_tdata_t *tdata;
/*
- * Associated context. If a thread frees an object that it did not
- * allocate, it is possible that the context is not cached in the
- * thread's hash table, in which case it must be able to look up the
- * context, insert a new prof_thr_cnt_t into the thread's hash table,
- * and link it into the prof_ctx_t's cnts_ql.
+ * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
+ * defunct during teardown.
*/
- prof_ctx_t *ctx;
+ uint64_t thr_uid;
+ uint64_t thr_discrim;
+
+ /* Profiling counters, protected by tdata->lock. */
+ prof_cnt_t cnts;
+
+ /* Associated global context. */
+ prof_gctx_t *gctx;
/*
- * Threads use memory barriers to update the counters. Since there is
- * only ever one writer, the only challenge is for the reader to get a
- * consistent read of the counters.
- *
- * The writer uses this series of operations:
- *
- * 1) Increment epoch to an odd number.
- * 2) Update counters.
- * 3) Increment epoch to an even number.
- *
- * The reader must assure 1) that the epoch is even while it reads the
- * counters, and 2) that the epoch doesn't change between the time it
- * starts and finishes reading the counters.
+ * UID that distinguishes multiple tctx's created by the same thread,
+ * but coexisting in gctx->tctxs. There are two ways that such
+ * coexistence can occur:
+ * - A dumper thread can cause a tctx to be retained in the purgatory
+ * state.
+ * - Although a single "producer" thread must create all tctx's which
+ * share the same thr_uid, multiple "consumers" can each concurrently
+ * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only
+ * gets called once each time cnts.cur{objs,bytes} drop to 0, but this
+ * threshold can be hit again before the first consumer finishes
+ * executing prof_tctx_destroy().
*/
- unsigned epoch;
+ uint64_t tctx_uid;
- /* Profiling counters. */
- prof_cnt_t cnts;
-};
+ /* Linkage into gctx's tctxs. */
+ rb_node(prof_tctx_t) tctx_link;
-struct prof_ctx_s {
- /* Associated backtrace. */
- prof_bt_t *bt;
+ /*
+ * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
+ * sample vs destroy race.
+ */
+ bool prepared;
+
+ /* Current dump-related state, protected by gctx->lock. */
+ prof_tctx_state_t state;
+
+ /*
+ * Copy of cnts snapshotted during early dump phase, protected by
+ * dump_mtx.
+ */
+ prof_cnt_t dump_cnts;
+};
+typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
- /* Protects nlimbo, cnt_merged, and cnts_ql. */
+struct prof_gctx_s {
+ /* Protects nlimbo, cnt_summed, and tctxs. */
malloc_mutex_t *lock;
/*
- * Number of threads that currently cause this ctx to be in a state of
+ * Number of threads that currently cause this gctx to be in a state of
* limbo due to one of:
- * - Initializing per thread counters associated with this ctx.
- * - Preparing to destroy this ctx.
- * - Dumping a heap profile that includes this ctx.
+ * - Initializing this gctx.
+ * - Initializing per thread counters associated with this gctx.
+ * - Preparing to destroy this gctx.
+ * - Dumping a heap profile that includes this gctx.
* nlimbo must be 1 (single destroyer) in order to safely destroy the
- * ctx.
+ * gctx.
*/
unsigned nlimbo;
- /* Temporary storage for summation during dump. */
- prof_cnt_t cnt_summed;
-
- /* When threads exit, they merge their stats into cnt_merged. */
- prof_cnt_t cnt_merged;
-
/*
- * List of profile counters, one for each thread that has allocated in
+ * Tree of profile counters, one for each thread that has allocated in
* this context.
*/
- ql_head(prof_thr_cnt_t) cnts_ql;
+ prof_tctx_tree_t tctxs;
+
+ /* Linkage for tree of contexts to be dumped. */
+ rb_node(prof_gctx_t) dump_link;
+
+ /* Temporary storage for summation during dump. */
+ prof_cnt_t cnt_summed;
+
+ /* Associated backtrace. */
+ prof_bt_t bt;
- /* Linkage for list of contexts to be dumped. */
- ql_elm(prof_ctx_t) dump_link;
+ /* Backtrace vector, variable size, referred to by bt. */
+ void *vec[1];
};
-typedef ql_head(prof_ctx_t) prof_ctx_list_t;
+typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
struct prof_tdata_s {
+ malloc_mutex_t *lock;
+
+ /* Monotonically increasing unique thread identifier. */
+ uint64_t thr_uid;
+
/*
- * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a
- * cache of backtraces, with associated thread-specific prof_thr_cnt_t
- * objects. Other threads may read the prof_thr_cnt_t contents, but no
- * others will ever write them.
- *
- * Upon thread exit, the thread must merge all the prof_thr_cnt_t
- * counter data into the associated prof_ctx_t objects, and unlink/free
- * the prof_thr_cnt_t objects.
+ * Monotonically increasing discriminator among tdata structures
+ * associated with the same thr_uid.
*/
- ckh_t bt2cnt;
+ uint64_t thr_discrim;
- /* LRU for contents of bt2cnt. */
- ql_head(prof_thr_cnt_t) lru_ql;
+ /* Included in heap profile dumps if non-NULL. */
+ char *thread_name;
- /* Backtrace vector, used for calls to prof_backtrace(). */
- void **vec;
+ bool attached;
+ bool expired;
+
+ rb_node(prof_tdata_t) tdata_link;
+
+ /*
+ * Counter used to initialize prof_tctx_t's tctx_uid. No locking is
+ * necessary when incrementing this field, because only one thread ever
+ * does so.
+ */
+ uint64_t tctx_uid_next;
+
+ /*
+ * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks
+ * backtraces for which it has non-zero allocation/deallocation counters
+ * associated with thread-specific prof_tctx_t objects. Other threads
+ * may write to prof_tctx_t contents when freeing associated objects.
+ */
+ ckh_t bt2tctx;
/* Sampling state. */
uint64_t prng_state;
- uint64_t threshold;
- uint64_t accum;
+ uint64_t bytes_until_sample;
/* State used to avoid dumping while operating on prof internals. */
bool enq;
bool enq_idump;
bool enq_gdump;
+
+ /*
+ * Set to true during an early dump phase for tdata's which are
+ * currently being dumped. New threads' tdata's have this initialized
+ * to false so that they aren't accidentally included in later dump
+ * phases.
+ */
+ bool dumping;
+
+ /*
+ * True if profiling is active for this tdata's thread
+ * (thread.prof.active mallctl).
+ */
+ bool active;
+
+ /* Temporary storage for summation during dump. */
+ prof_cnt_t cnt_summed;
+
+ /* Backtrace vector, used for calls to prof_backtrace(). */
+ void *vec[PROF_BT_MAX];
};
+typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
extern bool opt_prof;
-/*
- * Even if opt_prof is true, sampling can be temporarily disabled by setting
- * opt_prof_active to false. No locking is used when updating opt_prof_active,
- * so there are no guarantees regarding how long it will take for all threads
- * to notice state changes.
- */
extern bool opt_prof_active;
+extern bool opt_prof_thread_active_init;
extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
extern bool opt_prof_gdump; /* High-water memory dumping. */
@@ -211,6 +259,12 @@ extern char opt_prof_prefix[
#endif
1];
+/* Accessed via prof_active_[gs]et{_unlocked,}(). */
+extern bool prof_active;
+
+/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
+extern bool prof_gdump_val;
+
/*
* Profile dump interval, measured in bytes allocated. Each arena triggers a
* profile dump when it reaches this threshold. The effect is that the
@@ -221,391 +275,269 @@ extern char opt_prof_prefix[
extern uint64_t prof_interval;
/*
- * If true, promote small sampled objects to large objects, since small run
- * headers do not have embedded profile context pointers.
+ * Initialized as opt_lg_prof_sample, and potentially modified during profiling
+ * resets.
*/
-extern bool prof_promote;
+extern size_t lg_prof_sample;
+void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
+void prof_malloc_sample_object(const void *ptr, size_t usize,
+ prof_tctx_t *tctx);
+void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
void bt_init(prof_bt_t *bt, void **vec);
-void prof_backtrace(prof_bt_t *bt, unsigned nignore);
-prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
+void prof_backtrace(prof_bt_t *bt);
+prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
#ifdef JEMALLOC_JET
+size_t prof_tdata_count(void);
size_t prof_bt_count(void);
+const prof_cnt_t *prof_cnt_all(void);
typedef int (prof_dump_open_t)(bool, const char *);
extern prof_dump_open_t *prof_dump_open;
+typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *);
+extern prof_dump_header_t *prof_dump_header;
#endif
void prof_idump(void);
bool prof_mdump(const char *filename);
void prof_gdump(void);
-prof_tdata_t *prof_tdata_init(void);
-void prof_tdata_cleanup(void *arg);
+prof_tdata_t *prof_tdata_init(tsd_t *tsd);
+prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
+void prof_reset(tsd_t *tsd, size_t lg_sample);
+void prof_tdata_cleanup(tsd_t *tsd);
+const char *prof_thread_name_get(void);
+bool prof_active_get(void);
+bool prof_active_set(bool active);
+int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
+bool prof_thread_active_get(void);
+bool prof_thread_active_set(bool active);
+bool prof_thread_active_init_get(void);
+bool prof_thread_active_init_set(bool active_init);
+bool prof_gdump_get(void);
+bool prof_gdump_set(bool active);
void prof_boot0(void);
void prof_boot1(void);
bool prof_boot2(void);
void prof_prefork(void);
void prof_postfork_parent(void);
void prof_postfork_child(void);
+void prof_sample_threshold_update(prof_tdata_t *tdata);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
-#define PROF_ALLOC_PREP(nignore, size, ret) do { \
- prof_tdata_t *prof_tdata; \
- prof_bt_t bt; \
- \
- assert(size == s2u(size)); \
- \
- prof_tdata = prof_tdata_get(true); \
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \
- if (prof_tdata != NULL) \
- ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
- else \
- ret = NULL; \
- break; \
- } \
- \
- if (opt_prof_active == false) { \
- /* Sampling is currently inactive, so avoid sampling. */\
- ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
- } else if (opt_lg_prof_sample == 0) { \
- /* Don't bother with sampling logic, since sampling */\
- /* interval is 1. */\
- bt_init(&bt, prof_tdata->vec); \
- prof_backtrace(&bt, nignore); \
- ret = prof_lookup(&bt); \
- } else { \
- if (prof_tdata->threshold == 0) { \
- /* Initialize. Seed the prng differently for */\
- /* each thread. */\
- prof_tdata->prng_state = \
- (uint64_t)(uintptr_t)&size; \
- prof_sample_threshold_update(prof_tdata); \
- } \
- \
- /* Determine whether to capture a backtrace based on */\
- /* whether size is enough for prof_accum to reach */\
- /* prof_tdata->threshold. However, delay updating */\
- /* these variables until prof_{m,re}alloc(), because */\
- /* we don't know for sure that the allocation will */\
- /* succeed. */\
- /* */\
- /* Use subtraction rather than addition to avoid */\
- /* potential integer overflow. */\
- if (size >= prof_tdata->threshold - \
- prof_tdata->accum) { \
- bt_init(&bt, prof_tdata->vec); \
- prof_backtrace(&bt, nignore); \
- ret = prof_lookup(&bt); \
- } else \
- ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
- } \
-} while (0)
-
#ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
-
-prof_tdata_t *prof_tdata_get(bool create);
-void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
-prof_ctx_t *prof_ctx_get(const void *ptr);
-void prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx);
-bool prof_sample_accum_update(size_t size);
-void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt);
-void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
- size_t old_usize, prof_ctx_t *old_ctx);
-void prof_free(const void *ptr, size_t size);
+bool prof_active_get_unlocked(void);
+bool prof_gdump_get_unlocked(void);
+prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create);
+bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
+ prof_tdata_t **tdata_out);
+prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
+ bool update);
+prof_tctx_t *prof_tctx_get(const void *ptr);
+void prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
+void prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+ prof_tctx_t *tctx);
+void prof_malloc_sample_object(const void *ptr, size_t usize,
+ prof_tctx_t *tctx);
+void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
+void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
+ prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
+ size_t old_usize, prof_tctx_t *old_tctx);
+void prof_free(tsd_t *tsd, const void *ptr, size_t usize);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
-/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
-malloc_tsd_externs(prof_tdata, prof_tdata_t *)
-malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
- prof_tdata_cleanup)
+JEMALLOC_ALWAYS_INLINE bool
+prof_active_get_unlocked(void)
+{
+
+ /*
+ * Even if opt_prof is true, sampling can be temporarily disabled by
+ * setting prof_active to false. No locking is used when reading
+ * prof_active in the fast path, so there are no guarantees regarding
+ * how long it will take for all threads to notice state changes.
+ */
+ return (prof_active);
+}
-JEMALLOC_INLINE prof_tdata_t *
-prof_tdata_get(bool create)
+JEMALLOC_ALWAYS_INLINE bool
+prof_gdump_get_unlocked(void)
{
- prof_tdata_t *prof_tdata;
+
+ /*
+ * No locking is used when reading prof_gdump_val in the fast path, so
+ * there are no guarantees regarding how long it will take for all
+ * threads to notice state changes.
+ */
+ return (prof_gdump_val);
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tdata_t *
+prof_tdata_get(tsd_t *tsd, bool create)
+{
+ prof_tdata_t *tdata;
cassert(config_prof);
- prof_tdata = *prof_tdata_tsd_get();
- if (create && prof_tdata == NULL)
- prof_tdata = prof_tdata_init();
+ tdata = tsd_prof_tdata_get(tsd);
+ if (create) {
+ if (unlikely(tdata == NULL)) {
+ if (tsd_nominal(tsd)) {
+ tdata = prof_tdata_init(tsd);
+ tsd_prof_tdata_set(tsd, tdata);
+ }
+ } else if (unlikely(tdata->expired)) {
+ tdata = prof_tdata_reinit(tsd, tdata);
+ tsd_prof_tdata_set(tsd, tdata);
+ }
+ assert(tdata == NULL || tdata->attached);
+ }
- return (prof_tdata);
+ return (tdata);
}
-JEMALLOC_INLINE void
-prof_sample_threshold_update(prof_tdata_t *prof_tdata)
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_tctx_get(const void *ptr)
{
- /*
- * The body of this function is compiled out unless heap profiling is
- * enabled, so that it is possible to compile jemalloc with floating
- * point support completely disabled. Avoiding floating point code is
- * important on memory-constrained systems, but it also enables a
- * workaround for versions of glibc that don't properly save/restore
- * floating point registers during dynamic lazy symbol loading (which
- * internally calls into whatever malloc implementation happens to be
- * integrated into the application). Note that some compilers (e.g.
- * gcc 4.8) may use floating point registers for fast memory moves, so
- * jemalloc must be compiled with such optimizations disabled (e.g.
- * -mno-sse) in order for the workaround to be complete.
- */
-#ifdef JEMALLOC_PROF
- uint64_t r;
- double u;
cassert(config_prof);
+ assert(ptr != NULL);
- /*
- * Compute sample threshold as a geometrically distributed random
- * variable with mean (2^opt_lg_prof_sample).
- *
- * __ __
- * | log(u) | 1
- * prof_tdata->threshold = | -------- |, where p = -------------------
- * | log(1-p) | opt_lg_prof_sample
- * 2
- *
- * For more information on the math, see:
- *
- * Non-Uniform Random Variate Generation
- * Luc Devroye
- * Springer-Verlag, New York, 1986
- * pp 500
- * (http://luc.devroye.org/rnbookindex.html)
- */
- prng64(r, 53, prof_tdata->prng_state,
- UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
- u = (double)r * (1.0/9007199254740992.0L);
- prof_tdata->threshold = (uint64_t)(log(u) /
- log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
- + (uint64_t)1U;
-#endif
+ return (arena_prof_tctx_get(ptr));
}
-JEMALLOC_INLINE prof_ctx_t *
-prof_ctx_get(const void *ptr)
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
{
- prof_ctx_t *ret;
- arena_chunk_t *chunk;
cassert(config_prof);
assert(ptr != NULL);
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr) {
- /* Region. */
- ret = arena_prof_ctx_get(ptr);
- } else
- ret = huge_prof_ctx_get(ptr);
-
- return (ret);
+ arena_prof_tctx_set(ptr, usize, tctx);
}
-JEMALLOC_INLINE void
-prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx)
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+ prof_tctx_t *old_tctx)
{
- arena_chunk_t *chunk;
cassert(config_prof);
assert(ptr != NULL);
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr) {
- /* Region. */
- arena_prof_ctx_set(ptr, usize, ctx);
- } else
- huge_prof_ctx_set(ptr, ctx);
+ arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
}
-JEMALLOC_INLINE bool
-prof_sample_accum_update(size_t size)
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
+ prof_tdata_t **tdata_out)
{
- prof_tdata_t *prof_tdata;
+ prof_tdata_t *tdata;
cassert(config_prof);
- /* Sampling logic is unnecessary if the interval is 1. */
- assert(opt_lg_prof_sample != 0);
- prof_tdata = prof_tdata_get(false);
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+ tdata = prof_tdata_get(tsd, true);
+ if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+ tdata = NULL;
+
+ if (tdata_out != NULL)
+ *tdata_out = tdata;
+
+ if (tdata == NULL)
return (true);
- /* Take care to avoid integer overflow. */
- if (size >= prof_tdata->threshold - prof_tdata->accum) {
- prof_tdata->accum -= (prof_tdata->threshold - size);
- /* Compute new sample threshold. */
- prof_sample_threshold_update(prof_tdata);
- while (prof_tdata->accum >= prof_tdata->threshold) {
- prof_tdata->accum -= prof_tdata->threshold;
- prof_sample_threshold_update(prof_tdata);
- }
- return (false);
- } else {
- prof_tdata->accum += size;
+ if (tdata->bytes_until_sample >= usize) {
+ if (update)
+ tdata->bytes_until_sample -= usize;
return (true);
+ } else {
+ /* Compute new sample threshold. */
+ if (update)
+ prof_sample_threshold_update(tdata);
+ return (!tdata->active);
}
}
-JEMALLOC_INLINE void
-prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
+{
+ prof_tctx_t *ret;
+ prof_tdata_t *tdata;
+ prof_bt_t bt;
+
+ assert(usize == s2u(usize));
+
+ if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
+ &tdata)))
+ ret = (prof_tctx_t *)(uintptr_t)1U;
+ else {
+ bt_init(&bt, tdata->vec);
+ prof_backtrace(&bt);
+ ret = prof_lookup(tsd, &bt);
+ }
+
+ return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
{
cassert(config_prof);
assert(ptr != NULL);
assert(usize == isalloc(ptr, true));
- if (opt_lg_prof_sample != 0) {
- if (prof_sample_accum_update(usize)) {
- /*
- * Don't sample. For malloc()-like allocation, it is
- * always possible to tell in advance how large an
- * object's usable size will be, so there should never
- * be a difference between the usize passed to
- * PROF_ALLOC_PREP() and prof_malloc().
- */
- assert((uintptr_t)cnt == (uintptr_t)1U);
- }
- }
-
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- prof_ctx_set(ptr, usize, cnt->ctx);
-
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
- cnt->cnts.curobjs++;
- cnt->cnts.curbytes += usize;
- if (opt_prof_accum) {
- cnt->cnts.accumobjs++;
- cnt->cnts.accumbytes += usize;
- }
- /*********/
- mb_write();
- /*********/
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
- } else
- prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U);
+ if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+ prof_malloc_sample_object(ptr, usize, tctx);
+ else
+ prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
}
-JEMALLOC_INLINE void
-prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
- size_t old_usize, prof_ctx_t *old_ctx)
+JEMALLOC_ALWAYS_INLINE void
+prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
+ bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
+ prof_tctx_t *old_tctx)
{
- prof_thr_cnt_t *told_cnt;
+ bool sampled, old_sampled;
cassert(config_prof);
- assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
+ assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
- if (ptr != NULL) {
+ if (prof_active && !updated && ptr != NULL) {
assert(usize == isalloc(ptr, true));
- if (opt_lg_prof_sample != 0) {
- if (prof_sample_accum_update(usize)) {
- /*
- * Don't sample. The usize passed to
- * PROF_ALLOC_PREP() was larger than what
- * actually got allocated, so a backtrace was
- * captured for this allocation, even though
- * its actual usize was insufficient to cross
- * the sample threshold.
- */
- cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
- }
- }
- }
-
- if ((uintptr_t)old_ctx > (uintptr_t)1U) {
- told_cnt = prof_lookup(old_ctx->bt);
- if (told_cnt == NULL) {
+ if (prof_sample_accum_update(tsd, usize, true, NULL)) {
/*
- * It's too late to propagate OOM for this realloc(),
- * so operate directly on old_cnt->ctx->cnt_merged.
+ * Don't sample. The usize passed to prof_alloc_prep()
+ * was larger than what actually got allocated, so a
+ * backtrace was captured for this allocation, even
+ * though its actual usize was insufficient to cross the
+ * sample threshold.
*/
- malloc_mutex_lock(old_ctx->lock);
- old_ctx->cnt_merged.curobjs--;
- old_ctx->cnt_merged.curbytes -= old_usize;
- malloc_mutex_unlock(old_ctx->lock);
- told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+ tctx = (prof_tctx_t *)(uintptr_t)1U;
}
- } else
- told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
-
- if ((uintptr_t)told_cnt > (uintptr_t)1U)
- told_cnt->epoch++;
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- prof_ctx_set(ptr, usize, cnt->ctx);
- cnt->epoch++;
- } else if (ptr != NULL)
- prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U);
- /*********/
- mb_write();
- /*********/
- if ((uintptr_t)told_cnt > (uintptr_t)1U) {
- told_cnt->cnts.curobjs--;
- told_cnt->cnts.curbytes -= old_usize;
}
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- cnt->cnts.curobjs++;
- cnt->cnts.curbytes += usize;
- if (opt_prof_accum) {
- cnt->cnts.accumobjs++;
- cnt->cnts.accumbytes += usize;
- }
- }
- /*********/
- mb_write();
- /*********/
- if ((uintptr_t)told_cnt > (uintptr_t)1U)
- told_cnt->epoch++;
- if ((uintptr_t)cnt > (uintptr_t)1U)
- cnt->epoch++;
- /*********/
- mb_write(); /* Not strictly necessary. */
+
+ sampled = ((uintptr_t)tctx > (uintptr_t)1U);
+ old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
+
+ if (unlikely(sampled))
+ prof_malloc_sample_object(ptr, usize, tctx);
+ else
+ prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
+
+ if (unlikely(old_sampled))
+ prof_free_sampled_object(tsd, old_usize, old_tctx);
}
-JEMALLOC_INLINE void
-prof_free(const void *ptr, size_t size)
+JEMALLOC_ALWAYS_INLINE void
+prof_free(tsd_t *tsd, const void *ptr, size_t usize)
{
- prof_ctx_t *ctx = prof_ctx_get(ptr);
+ prof_tctx_t *tctx = prof_tctx_get(ptr);
cassert(config_prof);
+ assert(usize == isalloc(ptr, true));
- if ((uintptr_t)ctx > (uintptr_t)1) {
- prof_thr_cnt_t *tcnt;
- assert(size == isalloc(ptr, true));
- tcnt = prof_lookup(ctx->bt);
-
- if (tcnt != NULL) {
- tcnt->epoch++;
- /*********/
- mb_write();
- /*********/
- tcnt->cnts.curobjs--;
- tcnt->cnts.curbytes -= size;
- /*********/
- mb_write();
- /*********/
- tcnt->epoch++;
- /*********/
- mb_write();
- /*********/
- } else {
- /*
- * OOM during free() cannot be propagated, so operate
- * directly on cnt->ctx->cnt_merged.
- */
- malloc_mutex_lock(ctx->lock);
- ctx->cnt_merged.curobjs--;
- ctx->cnt_merged.curbytes -= size;
- malloc_mutex_unlock(ctx->lock);
- }
- }
+ if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+ prof_free_sampled_object(tsd, usize, tctx);
}
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/ql.h b/deps/jemalloc/include/jemalloc/internal/ql.h
index f70c5f6f3..1834bb855 100644
--- a/deps/jemalloc/include/jemalloc/internal/ql.h
+++ b/deps/jemalloc/include/jemalloc/internal/ql.h
@@ -1,6 +1,4 @@
-/*
- * List definitions.
- */
+/* List definitions. */
#define ql_head(a_type) \
struct { \
a_type *qlh_first; \
diff --git a/deps/jemalloc/include/jemalloc/internal/qr.h b/deps/jemalloc/include/jemalloc/internal/qr.h
index 602944b9b..0fbaec25e 100644
--- a/deps/jemalloc/include/jemalloc/internal/qr.h
+++ b/deps/jemalloc/include/jemalloc/internal/qr.h
@@ -40,8 +40,10 @@ struct { \
(a_qr_b)->a_field.qre_prev = t; \
} while (0)
-/* qr_meld() and qr_split() are functionally equivalent, so there's no need to
- * have two copies of the code. */
+/*
+ * qr_meld() and qr_split() are functionally equivalent, so there's no need to
+ * have two copies of the code.
+ */
#define qr_split(a_qr_a, a_qr_b, a_field) \
qr_meld((a_qr_a), (a_qr_b), a_field)
diff --git a/deps/jemalloc/include/jemalloc/internal/quarantine.h b/deps/jemalloc/include/jemalloc/internal/quarantine.h
index 16f677f73..ae607399f 100644
--- a/deps/jemalloc/include/jemalloc/internal/quarantine.h
+++ b/deps/jemalloc/include/jemalloc/internal/quarantine.h
@@ -29,36 +29,29 @@ struct quarantine_s {
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
-quarantine_t *quarantine_init(size_t lg_maxobjs);
-void quarantine(void *ptr);
-void quarantine_cleanup(void *arg);
-bool quarantine_boot(void);
+void quarantine_alloc_hook_work(tsd_t *tsd);
+void quarantine(tsd_t *tsd, void *ptr);
+void quarantine_cleanup(tsd_t *tsd);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), quarantine, quarantine_t *)
-
void quarantine_alloc_hook(void);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_QUARANTINE_C_))
-malloc_tsd_externs(quarantine, quarantine_t *)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, quarantine, quarantine_t *, NULL,
- quarantine_cleanup)
-
JEMALLOC_ALWAYS_INLINE void
quarantine_alloc_hook(void)
{
- quarantine_t *quarantine;
+ tsd_t *tsd;
assert(config_fill && opt_quarantine);
- quarantine = *quarantine_tsd_get();
- if (quarantine == NULL)
- quarantine_init(LG_MAXOBJS_INIT);
+ tsd = tsd_fetch();
+ if (tsd_quarantine_get(tsd) == NULL)
+ quarantine_alloc_hook_work(tsd);
}
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/rb.h b/deps/jemalloc/include/jemalloc/internal/rb.h
index 423802eb2..2ca8e5933 100644
--- a/deps/jemalloc/include/jemalloc/internal/rb.h
+++ b/deps/jemalloc/include/jemalloc/internal/rb.h
@@ -158,6 +158,8 @@ struct { \
#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \
a_attr void \
a_prefix##new(a_rbt_type *rbtree); \
+a_attr bool \
+a_prefix##empty(a_rbt_type *rbtree); \
a_attr a_type * \
a_prefix##first(a_rbt_type *rbtree); \
a_attr a_type * \
@@ -198,7 +200,7 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
* int (a_cmp *)(a_type *a_node, a_type *a_other);
* ^^^^^^
* or a_key
- * Interpretation of comparision function return values:
+ * Interpretation of comparison function return values:
* -1 : a_node < a_other
* 0 : a_node == a_other
* 1 : a_node > a_other
@@ -224,6 +226,13 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
* Args:
* tree: Pointer to an uninitialized red-black tree object.
*
+ * static bool
+ * ex_empty(ex_t *tree);
+ * Description: Determine whether tree is empty.
+ * Args:
+ * tree: Pointer to an initialized red-black tree object.
+ * Ret: True if tree is empty, false otherwise.
+ *
* static ex_node_t *
* ex_first(ex_t *tree);
* static ex_node_t *
@@ -309,6 +318,10 @@ a_attr void \
a_prefix##new(a_rbt_type *rbtree) { \
rb_new(a_type, a_field, rbtree); \
} \
+a_attr bool \
+a_prefix##empty(a_rbt_type *rbtree) { \
+ return (rbtree->rbt_root == &rbtree->rbt_nil); \
+} \
a_attr a_type * \
a_prefix##first(a_rbt_type *rbtree) { \
a_type *ret; \
@@ -580,7 +593,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
if (left != &rbtree->rbt_nil) { \
/* node has no successor, but it has a left child. */\
/* Splice node out, without losing the left child. */\
- assert(rbtn_red_get(a_type, a_field, node) == false); \
+ assert(!rbtn_red_get(a_type, a_field, node)); \
assert(rbtn_red_get(a_type, a_field, left)); \
rbtn_black_set(a_type, a_field, left); \
if (pathp == path) { \
@@ -616,8 +629,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
if (pathp->cmp < 0) { \
rbtn_left_set(a_type, a_field, pathp->node, \
pathp[1].node); \
- assert(rbtn_red_get(a_type, a_field, pathp[1].node) \
- == false); \
+ assert(!rbtn_red_get(a_type, a_field, pathp[1].node)); \
if (rbtn_red_get(a_type, a_field, pathp->node)) { \
a_type *right = rbtn_right_get(a_type, a_field, \
pathp->node); \
@@ -681,7 +693,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_rotate_left(a_type, a_field, pathp->node, \
tnode); \
/* Balance restored, but rotation modified */\
- /* subree root, which may actually be the tree */\
+ /* subtree root, which may actually be the tree */\
/* root. */\
if (pathp == path) { \
/* Set root. */ \
@@ -849,7 +861,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
} \
/* Set root. */ \
rbtree->rbt_root = path->node; \
- assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false); \
+ assert(!rbtn_red_get(a_type, a_field, rbtree->rbt_root)); \
} \
a_attr a_type * \
a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \
diff --git a/deps/jemalloc/include/jemalloc/internal/rtree.h b/deps/jemalloc/include/jemalloc/internal/rtree.h
index bc74769f5..28ae9d1dd 100644
--- a/deps/jemalloc/include/jemalloc/internal/rtree.h
+++ b/deps/jemalloc/include/jemalloc/internal/rtree.h
@@ -1,170 +1,292 @@
/*
* This radix tree implementation is tailored to the singular purpose of
- * tracking which chunks are currently owned by jemalloc. This functionality
- * is mandatory for OS X, where jemalloc must be able to respond to object
- * ownership queries.
+ * associating metadata with chunks that are currently owned by jemalloc.
*
*******************************************************************************
*/
#ifdef JEMALLOC_H_TYPES
+typedef struct rtree_node_elm_s rtree_node_elm_t;
+typedef struct rtree_level_s rtree_level_t;
typedef struct rtree_s rtree_t;
/*
- * Size of each radix tree node (must be a power of 2). This impacts tree
- * depth.
+ * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the
+ * machine address width.
*/
-#define RTREE_NODESIZE (1U << 16)
+#define LG_RTREE_BITS_PER_LEVEL 4
+#define RTREE_BITS_PER_LEVEL (ZU(1) << LG_RTREE_BITS_PER_LEVEL)
+#define RTREE_HEIGHT_MAX \
+ ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
-typedef void *(rtree_alloc_t)(size_t);
-typedef void (rtree_dalloc_t)(void *);
+/* Used for two-stage lock-free node initialization. */
+#define RTREE_NODE_INITIALIZING ((rtree_node_elm_t *)0x1)
+
+/*
+ * The node allocation callback function's argument is the number of contiguous
+ * rtree_node_elm_t structures to allocate, and the resulting memory must be
+ * zeroed.
+ */
+typedef rtree_node_elm_t *(rtree_node_alloc_t)(size_t);
+typedef void (rtree_node_dalloc_t)(rtree_node_elm_t *);
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
+struct rtree_node_elm_s {
+ union {
+ void *pun;
+ rtree_node_elm_t *child;
+ extent_node_t *val;
+ };
+};
+
+struct rtree_level_s {
+ /*
+ * A non-NULL subtree points to a subtree rooted along the hypothetical
+ * path to the leaf node corresponding to key 0. Depending on what keys
+ * have been used to store to the tree, an arbitrary combination of
+ * subtree pointers may remain NULL.
+ *
+ * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4.
+ * This results in a 3-level tree, and the leftmost leaf can be directly
+ * accessed via subtrees[2], the subtree prefixed by 0x0000 (excluding
+ * 0x00000000) can be accessed via subtrees[1], and the remainder of the
+ * tree can be accessed via subtrees[0].
+ *
+ * levels[0] : [<unused> | 0x0001******** | 0x0002******** | ...]
+ *
+ * levels[1] : [<unused> | 0x00000001**** | 0x00000002**** | ... ]
+ *
+ * levels[2] : [val(0x000000000000) | val(0x000000000001) | ...]
+ *
+ * This has practical implications on x64, which currently uses only the
+ * lower 47 bits of virtual address space in userland, thus leaving
+ * subtrees[0] unused and avoiding a level of tree traversal.
+ */
+ union {
+ void *subtree_pun;
+ rtree_node_elm_t *subtree;
+ };
+ /* Number of key bits distinguished by this level. */
+ unsigned bits;
+ /*
+ * Cumulative number of key bits distinguished by traversing to
+ * corresponding tree level.
+ */
+ unsigned cumbits;
+};
+
struct rtree_s {
- rtree_alloc_t *alloc;
- rtree_dalloc_t *dalloc;
- malloc_mutex_t mutex;
- void **root;
- unsigned height;
- unsigned level2bits[1]; /* Dynamically sized. */
+ rtree_node_alloc_t *alloc;
+ rtree_node_dalloc_t *dalloc;
+ unsigned height;
+ /*
+ * Precomputed table used to convert from the number of leading 0 key
+ * bits to which subtree level to start at.
+ */
+ unsigned start_level[RTREE_HEIGHT_MAX];
+ rtree_level_t levels[RTREE_HEIGHT_MAX];
};
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
-rtree_t *rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc);
+bool rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
+ rtree_node_dalloc_t *dalloc);
void rtree_delete(rtree_t *rtree);
-void rtree_prefork(rtree_t *rtree);
-void rtree_postfork_parent(rtree_t *rtree);
-void rtree_postfork_child(rtree_t *rtree);
+rtree_node_elm_t *rtree_subtree_read_hard(rtree_t *rtree,
+ unsigned level);
+rtree_node_elm_t *rtree_child_read_hard(rtree_t *rtree,
+ rtree_node_elm_t *elm, unsigned level);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
-#ifdef JEMALLOC_DEBUG
-uint8_t rtree_get_locked(rtree_t *rtree, uintptr_t key);
-#endif
-uint8_t rtree_get(rtree_t *rtree, uintptr_t key);
-bool rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val);
+unsigned rtree_start_level(rtree_t *rtree, uintptr_t key);
+uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
+
+bool rtree_node_valid(rtree_node_elm_t *node);
+rtree_node_elm_t *rtree_child_tryread(rtree_node_elm_t *elm);
+rtree_node_elm_t *rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
+ unsigned level);
+extent_node_t *rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
+ bool dependent);
+void rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
+ const extent_node_t *val);
+rtree_node_elm_t *rtree_subtree_tryread(rtree_t *rtree, unsigned level);
+rtree_node_elm_t *rtree_subtree_read(rtree_t *rtree, unsigned level);
+
+extent_node_t *rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
+bool rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
-#define RTREE_GET_GENERATE(f) \
-/* The least significant bits of the key are ignored. */ \
-JEMALLOC_INLINE uint8_t \
-f(rtree_t *rtree, uintptr_t key) \
-{ \
- uint8_t ret; \
- uintptr_t subkey; \
- unsigned i, lshift, height, bits; \
- void **node, **child; \
- \
- RTREE_LOCK(&rtree->mutex); \
- for (i = lshift = 0, height = rtree->height, node = rtree->root;\
- i < height - 1; \
- i++, lshift += bits, node = child) { \
- bits = rtree->level2bits[i]; \
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \
- 3)) - bits); \
- child = (void**)node[subkey]; \
- if (child == NULL) { \
- RTREE_UNLOCK(&rtree->mutex); \
- return (0); \
- } \
- } \
- \
- /* \
- * node is a leaf, so it contains values rather than node \
- * pointers. \
- */ \
- bits = rtree->level2bits[i]; \
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \
- bits); \
- { \
- uint8_t *leaf = (uint8_t *)node; \
- ret = leaf[subkey]; \
- } \
- RTREE_UNLOCK(&rtree->mutex); \
- \
- RTREE_GET_VALIDATE \
- return (ret); \
+JEMALLOC_INLINE unsigned
+rtree_start_level(rtree_t *rtree, uintptr_t key)
+{
+ unsigned start_level;
+
+ if (unlikely(key == 0))
+ return (rtree->height - 1);
+
+ start_level = rtree->start_level[lg_floor(key) >>
+ LG_RTREE_BITS_PER_LEVEL];
+ assert(start_level < rtree->height);
+ return (start_level);
}
-#ifdef JEMALLOC_DEBUG
-# define RTREE_LOCK(l) malloc_mutex_lock(l)
-# define RTREE_UNLOCK(l) malloc_mutex_unlock(l)
-# define RTREE_GET_VALIDATE
-RTREE_GET_GENERATE(rtree_get_locked)
-# undef RTREE_LOCK
-# undef RTREE_UNLOCK
-# undef RTREE_GET_VALIDATE
-#endif
+JEMALLOC_INLINE uintptr_t
+rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
+{
-#define RTREE_LOCK(l)
-#define RTREE_UNLOCK(l)
-#ifdef JEMALLOC_DEBUG
- /*
- * Suppose that it were possible for a jemalloc-allocated chunk to be
- * munmap()ped, followed by a different allocator in another thread re-using
- * overlapping virtual memory, all without invalidating the cached rtree
- * value. The result would be a false positive (the rtree would claim that
- * jemalloc owns memory that it had actually discarded). This scenario
- * seems impossible, but the following assertion is a prudent sanity check.
- */
-# define RTREE_GET_VALIDATE \
- assert(rtree_get_locked(rtree, key) == ret);
-#else
-# define RTREE_GET_VALIDATE
-#endif
-RTREE_GET_GENERATE(rtree_get)
-#undef RTREE_LOCK
-#undef RTREE_UNLOCK
-#undef RTREE_GET_VALIDATE
+ return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
+ rtree->levels[level].cumbits)) & ((ZU(1) <<
+ rtree->levels[level].bits) - 1));
+}
JEMALLOC_INLINE bool
-rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val)
+rtree_node_valid(rtree_node_elm_t *node)
+{
+
+ return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING);
+}
+
+JEMALLOC_INLINE rtree_node_elm_t *
+rtree_child_tryread(rtree_node_elm_t *elm)
+{
+ rtree_node_elm_t *child;
+
+ /* Double-checked read (first read may be stale. */
+ child = elm->child;
+ if (!rtree_node_valid(child))
+ child = atomic_read_p(&elm->pun);
+ return (child);
+}
+
+JEMALLOC_INLINE rtree_node_elm_t *
+rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
+{
+ rtree_node_elm_t *child;
+
+ child = rtree_child_tryread(elm);
+ if (unlikely(!rtree_node_valid(child)))
+ child = rtree_child_read_hard(rtree, elm, level);
+ return (child);
+}
+
+JEMALLOC_INLINE extent_node_t *
+rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent)
+{
+
+ if (dependent) {
+ /*
+ * Reading a val on behalf of a pointer to a valid allocation is
+ * guaranteed to be a clean read even without synchronization,
+ * because the rtree update became visible in memory before the
+ * pointer came into existence.
+ */
+ return (elm->val);
+ } else {
+ /*
+ * An arbitrary read, e.g. on behalf of ivsalloc(), may not be
+ * dependent on a previous rtree write, which means a stale read
+ * could result if synchronization were omitted here.
+ */
+ return (atomic_read_p(&elm->pun));
+ }
+}
+
+JEMALLOC_INLINE void
+rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val)
+{
+
+ atomic_write_p(&elm->pun, val);
+}
+
+JEMALLOC_INLINE rtree_node_elm_t *
+rtree_subtree_tryread(rtree_t *rtree, unsigned level)
+{
+ rtree_node_elm_t *subtree;
+
+ /* Double-checked read (first read may be stale. */
+ subtree = rtree->levels[level].subtree;
+ if (!rtree_node_valid(subtree))
+ subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
+ return (subtree);
+}
+
+JEMALLOC_INLINE rtree_node_elm_t *
+rtree_subtree_read(rtree_t *rtree, unsigned level)
+{
+ rtree_node_elm_t *subtree;
+
+ subtree = rtree_subtree_tryread(rtree, level);
+ if (unlikely(!rtree_node_valid(subtree)))
+ subtree = rtree_subtree_read_hard(rtree, level);
+ return (subtree);
+}
+
+JEMALLOC_INLINE extent_node_t *
+rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
{
uintptr_t subkey;
- unsigned i, lshift, height, bits;
- void **node, **child;
-
- malloc_mutex_lock(&rtree->mutex);
- for (i = lshift = 0, height = rtree->height, node = rtree->root;
- i < height - 1;
- i++, lshift += bits, node = child) {
- bits = rtree->level2bits[i];
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
- bits);
- child = (void**)node[subkey];
- if (child == NULL) {
- size_t size = ((i + 1 < height - 1) ? sizeof(void *)
- : (sizeof(uint8_t))) << rtree->level2bits[i+1];
- child = (void**)rtree->alloc(size);
- if (child == NULL) {
- malloc_mutex_unlock(&rtree->mutex);
- return (true);
- }
- memset(child, 0, size);
- node[subkey] = child;
+ unsigned i, start_level;
+ rtree_node_elm_t *node, *child;
+
+ start_level = rtree_start_level(rtree, key);
+
+ for (i = start_level, node = rtree_subtree_tryread(rtree, start_level);
+ /**/; i++, node = child) {
+ if (!dependent && unlikely(!rtree_node_valid(node)))
+ return (NULL);
+ subkey = rtree_subkey(rtree, key, i);
+ if (i == rtree->height - 1) {
+ /*
+ * node is a leaf, so it contains values rather than
+ * child pointers.
+ */
+ return (rtree_val_read(rtree, &node[subkey],
+ dependent));
}
+ assert(i < rtree->height - 1);
+ child = rtree_child_tryread(&node[subkey]);
}
+ not_reached();
+}
- /* node is a leaf, so it contains values rather than node pointers. */
- bits = rtree->level2bits[i];
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits);
- {
- uint8_t *leaf = (uint8_t *)node;
- leaf[subkey] = val;
- }
- malloc_mutex_unlock(&rtree->mutex);
+JEMALLOC_INLINE bool
+rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
+{
+ uintptr_t subkey;
+ unsigned i, start_level;
+ rtree_node_elm_t *node, *child;
- return (false);
+ start_level = rtree_start_level(rtree, key);
+
+ node = rtree_subtree_read(rtree, start_level);
+ if (node == NULL)
+ return (true);
+ for (i = start_level; /**/; i++, node = child) {
+ subkey = rtree_subkey(rtree, key, i);
+ if (i == rtree->height - 1) {
+ /*
+ * node is a leaf, so it contains values rather than
+ * child pointers.
+ */
+ rtree_val_write(rtree, &node[subkey], val);
+ return (false);
+ }
+ assert(i + 1 < rtree->height);
+ child = rtree_child_read(rtree, &node[subkey], i);
+ if (child == NULL)
+ return (true);
+ }
+ not_reached();
}
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/size_classes.sh b/deps/jemalloc/include/jemalloc/internal/size_classes.sh
index 29c80c1fb..fc82036d3 100755
--- a/deps/jemalloc/include/jemalloc/internal/size_classes.sh
+++ b/deps/jemalloc/include/jemalloc/internal/size_classes.sh
@@ -1,17 +1,26 @@
#!/bin/sh
+#
+# Usage: size_classes.sh <lg_qarr> <lg_tmin> <lg_parr> <lg_g>
# The following limits are chosen such that they cover all supported platforms.
-# Range of quanta.
-lg_qmin=3
-lg_qmax=4
+# Pointer sizes.
+lg_zarr="2 3"
+
+# Quanta.
+lg_qarr=$1
# The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)].
-lg_tmin=3
+lg_tmin=$2
+
+# Maximum lookup size.
+lg_kmax=12
+
+# Page sizes.
+lg_parr=`echo $3 | tr ',' ' '`
-# Range of page sizes.
-lg_pmin=12
-lg_pmax=16
+# Size class group size (number of size classes for each size doubling).
+lg_g=$4
pow2() {
e=$1
@@ -22,68 +31,224 @@ pow2() {
done
}
+lg() {
+ x=$1
+ lg_result=0
+ while [ ${x} -gt 1 ] ; do
+ lg_result=$((${lg_result} + 1))
+ x=$((${x} / 2))
+ done
+}
+
+size_class() {
+ index=$1
+ lg_grp=$2
+ lg_delta=$3
+ ndelta=$4
+ lg_p=$5
+ lg_kmax=$6
+
+ lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta}
+ if [ ${pow2_result} -lt ${ndelta} ] ; then
+ rem="yes"
+ else
+ rem="no"
+ fi
+
+ lg_size=${lg_grp}
+ if [ $((${lg_delta} + ${lg_ndelta})) -eq ${lg_grp} ] ; then
+ lg_size=$((${lg_grp} + 1))
+ else
+ lg_size=${lg_grp}
+ rem="yes"
+ fi
+
+ if [ ${lg_size} -lt $((${lg_p} + ${lg_g})) ] ; then
+ bin="yes"
+ else
+ bin="no"
+ fi
+ if [ ${lg_size} -lt ${lg_kmax} \
+ -o ${lg_size} -eq ${lg_kmax} -a ${rem} = "no" ] ; then
+ lg_delta_lookup=${lg_delta}
+ else
+ lg_delta_lookup="no"
+ fi
+ printf ' SC(%3d, %6d, %8d, %6d, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${lg_delta_lookup}
+ # Defined upon return:
+ # - lg_delta_lookup (${lg_delta} or "no")
+ # - bin ("yes" or "no")
+}
+
+sep_line() {
+ echo " \\"
+}
+
+size_classes() {
+ lg_z=$1
+ lg_q=$2
+ lg_t=$3
+ lg_p=$4
+ lg_g=$5
+
+ pow2 $((${lg_z} + 3)); ptr_bits=${pow2_result}
+ pow2 ${lg_g}; g=${pow2_result}
+
+ echo "#define SIZE_CLASSES \\"
+ echo " /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \\"
+
+ ntbins=0
+ nlbins=0
+ lg_tiny_maxclass='"NA"'
+ nbins=0
+
+ # Tiny size classes.
+ ndelta=0
+ index=0
+ lg_grp=${lg_t}
+ lg_delta=${lg_grp}
+ while [ ${lg_grp} -lt ${lg_q} ] ; do
+ size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+ if [ ${lg_delta_lookup} != "no" ] ; then
+ nlbins=$((${index} + 1))
+ fi
+ if [ ${bin} != "no" ] ; then
+ nbins=$((${index} + 1))
+ fi
+ ntbins=$((${ntbins} + 1))
+ lg_tiny_maxclass=${lg_grp} # Final written value is correct.
+ index=$((${index} + 1))
+ lg_delta=${lg_grp}
+ lg_grp=$((${lg_grp} + 1))
+ done
+
+ # First non-tiny group.
+ if [ ${ntbins} -gt 0 ] ; then
+ sep_line
+ # The first size class has an unusual encoding, because the size has to be
+ # split between grp and delta*ndelta.
+ lg_grp=$((${lg_grp} - 1))
+ ndelta=1
+ size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+ index=$((${index} + 1))
+ lg_grp=$((${lg_grp} + 1))
+ lg_delta=$((${lg_delta} + 1))
+ fi
+ while [ ${ndelta} -lt ${g} ] ; do
+ size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+ index=$((${index} + 1))
+ ndelta=$((${ndelta} + 1))
+ done
+
+ # All remaining groups.
+ lg_grp=$((${lg_grp} + ${lg_g}))
+ while [ ${lg_grp} -lt ${ptr_bits} ] ; do
+ sep_line
+ ndelta=1
+ if [ ${lg_grp} -eq $((${ptr_bits} - 1)) ] ; then
+ ndelta_limit=$((${g} - 1))
+ else
+ ndelta_limit=${g}
+ fi
+ while [ ${ndelta} -le ${ndelta_limit} ] ; do
+ size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+ if [ ${lg_delta_lookup} != "no" ] ; then
+ nlbins=$((${index} + 1))
+ # Final written value is correct:
+ lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
+ fi
+ if [ ${bin} != "no" ] ; then
+ nbins=$((${index} + 1))
+ # Final written value is correct:
+ small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
+ if [ ${lg_g} -gt 0 ] ; then
+ lg_large_minclass=$((${lg_grp} + 1))
+ else
+ lg_large_minclass=$((${lg_grp} + 2))
+ fi
+ fi
+ # Final written value is correct:
+ huge_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
+ index=$((${index} + 1))
+ ndelta=$((${ndelta} + 1))
+ done
+ lg_grp=$((${lg_grp} + 1))
+ lg_delta=$((${lg_delta} + 1))
+ done
+ echo
+ nsizes=${index}
+
+ # Defined upon completion:
+ # - ntbins
+ # - nlbins
+ # - nbins
+ # - nsizes
+ # - lg_tiny_maxclass
+ # - lookup_maxclass
+ # - small_maxclass
+ # - lg_large_minclass
+ # - huge_maxclass
+}
+
cat <<EOF
/* This file was automatically generated by size_classes.sh. */
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
+/*
+ * This header requires LG_SIZEOF_PTR, LG_TINY_MIN, LG_QUANTUM, and LG_PAGE to
+ * be defined prior to inclusion, and it in turn defines:
+ *
+ * LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling.
+ * SIZE_CLASSES: Complete table of
+ * SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)
+ * tuples.
+ * index: Size class index.
+ * lg_grp: Lg group base size (no deltas added).
+ * lg_delta: Lg delta to previous size class.
+ * ndelta: Delta multiplier. size == 1<<lg_grp + ndelta<<lg_delta
+ * bin: 'yes' if a small bin size class, 'no' otherwise.
+ * lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no'
+ * otherwise.
+ * NTBINS: Number of tiny bins.
+ * NLBINS: Number of bins supported by the lookup table.
+ * NBINS: Number of small size class bins.
+ * NSIZES: Number of size classes.
+ * LG_TINY_MAXCLASS: Lg of maximum tiny size class.
+ * LOOKUP_MAXCLASS: Maximum size class included in lookup table.
+ * SMALL_MAXCLASS: Maximum small size class.
+ * LG_LARGE_MINCLASS: Lg of minimum large size class.
+ * HUGE_MAXCLASS: Maximum (huge) size class.
+ */
+
+#define LG_SIZE_CLASS_GROUP ${lg_g}
+
EOF
-lg_q=${lg_qmin}
-while [ ${lg_q} -le ${lg_qmax} ] ; do
- lg_t=${lg_tmin}
- while [ ${lg_t} -le ${lg_q} ] ; do
- lg_p=${lg_pmin}
- while [ ${lg_p} -le ${lg_pmax} ] ; do
- echo "#if (LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})"
- echo "#define SIZE_CLASSES_DEFINED"
- pow2 ${lg_q}; q=${pow2_result}
- pow2 ${lg_t}; t=${pow2_result}
- pow2 ${lg_p}; p=${pow2_result}
- bin=0
- psz=0
- sz=${t}
- delta=$((${sz} - ${psz}))
- echo "/* SIZE_CLASS(bin, delta, sz) */"
- echo "#define SIZE_CLASSES \\"
-
- # Tiny size classes.
- while [ ${sz} -lt ${q} ] ; do
- echo " SIZE_CLASS(${bin}, ${delta}, ${sz}) \\"
- bin=$((${bin} + 1))
- psz=${sz}
- sz=$((${sz} + ${sz}))
- delta=$((${sz} - ${psz}))
- done
- # Quantum-multiple size classes. For each doubling of sz, as many as 4
- # size classes exist. Their spacing is the greater of:
- # - q
- # - sz/4, where sz is a power of 2
- while [ ${sz} -lt ${p} ] ; do
- if [ ${sz} -ge $((${q} * 4)) ] ; then
- i=$((${sz} / 4))
- else
- i=${q}
- fi
- next_2pow=$((${sz} * 2))
- while [ ${sz} -lt $next_2pow ] ; do
- echo " SIZE_CLASS(${bin}, ${delta}, ${sz}) \\"
- bin=$((${bin} + 1))
- psz=${sz}
- sz=$((${sz} + ${i}))
- delta=$((${sz} - ${psz}))
- done
+for lg_z in ${lg_zarr} ; do
+ for lg_q in ${lg_qarr} ; do
+ lg_t=${lg_tmin}
+ while [ ${lg_t} -le ${lg_q} ] ; do
+ # Iterate through page sizes and compute how many bins there are.
+ for lg_p in ${lg_parr} ; do
+ echo "#if (LG_SIZEOF_PTR == ${lg_z} && LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})"
+ size_classes ${lg_z} ${lg_q} ${lg_t} ${lg_p} ${lg_g}
+ echo "#define SIZE_CLASSES_DEFINED"
+ echo "#define NTBINS ${ntbins}"
+ echo "#define NLBINS ${nlbins}"
+ echo "#define NBINS ${nbins}"
+ echo "#define NSIZES ${nsizes}"
+ echo "#define LG_TINY_MAXCLASS ${lg_tiny_maxclass}"
+ echo "#define LOOKUP_MAXCLASS ${lookup_maxclass}"
+ echo "#define SMALL_MAXCLASS ${small_maxclass}"
+ echo "#define LG_LARGE_MINCLASS ${lg_large_minclass}"
+ echo "#define HUGE_MAXCLASS ${huge_maxclass}"
+ echo "#endif"
+ echo
done
- echo
- echo "#define NBINS ${bin}"
- echo "#define SMALL_MAXCLASS ${psz}"
- echo "#endif"
- echo
- lg_p=$((${lg_p} + 1))
+ lg_t=$((${lg_t} + 1))
done
- lg_t=$((${lg_t} + 1))
done
- lg_q=$((${lg_q} + 1))
done
cat <<EOF
@@ -92,11 +257,10 @@ cat <<EOF
#endif
#undef SIZE_CLASSES_DEFINED
/*
- * The small_size2bin lookup table uses uint8_t to encode each bin index, so we
+ * The size2index_tab lookup table uses uint8_t to encode each bin index, so we
* cannot support more than 256 small size classes. Further constrain NBINS to
- * 255 to support prof_promote, since all small size classes, plus a "not
- * small" size class must be stored in 8 bits of arena_chunk_map_t's bits
- * field.
+ * 255 since all small size classes, plus a "not small" size class must be
+ * stored in 8 bits of arena_chunk_map_bits_t's bits field.
*/
#if (NBINS > 255)
# error "Too many small size classes"
diff --git a/deps/jemalloc/include/jemalloc/internal/stats.h b/deps/jemalloc/include/jemalloc/internal/stats.h
index 27f68e368..c91dba99d 100644
--- a/deps/jemalloc/include/jemalloc/internal/stats.h
+++ b/deps/jemalloc/include/jemalloc/internal/stats.h
@@ -4,6 +4,7 @@
typedef struct tcache_bin_stats_s tcache_bin_stats_t;
typedef struct malloc_bin_stats_s malloc_bin_stats_t;
typedef struct malloc_large_stats_s malloc_large_stats_t;
+typedef struct malloc_huge_stats_s malloc_huge_stats_t;
typedef struct arena_stats_s arena_stats_t;
typedef struct chunk_stats_s chunk_stats_t;
@@ -21,12 +22,6 @@ struct tcache_bin_stats_s {
struct malloc_bin_stats_s {
/*
- * Current number of bytes allocated, including objects currently
- * cached by tcache.
- */
- size_t allocated;
-
- /*
* Total number of allocation/deallocation requests served directly by
* the bin. Note that tcache may allocate an object, then recycle it
* many times, resulting many increments to nrequests, but only one
@@ -42,6 +37,12 @@ struct malloc_bin_stats_s {
*/
uint64_t nrequests;
+ /*
+ * Current number of regions of this size class, including regions
+ * currently cached by tcache.
+ */
+ size_t curregs;
+
/* Number of tcache fills from this bin. */
uint64_t nfills;
@@ -78,10 +79,25 @@ struct malloc_large_stats_s {
*/
uint64_t nrequests;
- /* Current number of runs of this size class. */
+ /*
+ * Current number of runs of this size class, including runs currently
+ * cached by tcache.
+ */
size_t curruns;
};
+struct malloc_huge_stats_s {
+ /*
+ * Total number of allocation/deallocation requests served directly by
+ * the arena.
+ */
+ uint64_t nmalloc;
+ uint64_t ndalloc;
+
+ /* Current number of (multi-)chunk allocations of this size class. */
+ size_t curhchunks;
+};
+
struct arena_stats_s {
/* Number of bytes currently mapped. */
size_t mapped;
@@ -95,34 +111,28 @@ struct arena_stats_s {
uint64_t nmadvise;
uint64_t purged;
+ /*
+ * Number of bytes currently mapped purely for metadata purposes, and
+ * number of bytes currently allocated for internal metadata.
+ */
+ size_t metadata_mapped;
+ size_t metadata_allocated; /* Protected via atomic_*_z(). */
+
/* Per-size-category statistics. */
size_t allocated_large;
uint64_t nmalloc_large;
uint64_t ndalloc_large;
uint64_t nrequests_large;
- /*
- * One element for each possible size class, including sizes that
- * overlap with bin size classes. This is necessary because ipalloc()
- * sometimes has to use such large objects in order to assure proper
- * alignment.
- */
- malloc_large_stats_t *lstats;
-};
-
-struct chunk_stats_s {
- /* Number of chunks that were allocated. */
- uint64_t nchunks;
+ size_t allocated_huge;
+ uint64_t nmalloc_huge;
+ uint64_t ndalloc_huge;
- /* High-water mark for number of chunks allocated. */
- size_t highchunks;
+ /* One element for each large size class. */
+ malloc_large_stats_t *lstats;
- /*
- * Current number of chunks allocated. This value isn't maintained for
- * any other purpose, so keep track of it in order to be able to set
- * highchunks.
- */
- size_t curchunks;
+ /* One element for each huge size class. */
+ malloc_huge_stats_t *hstats;
};
#endif /* JEMALLOC_H_STRUCTS */
diff --git a/deps/jemalloc/include/jemalloc/internal/tcache.h b/deps/jemalloc/include/jemalloc/internal/tcache.h
index c3d4b58d4..5079cd266 100644
--- a/deps/jemalloc/include/jemalloc/internal/tcache.h
+++ b/deps/jemalloc/include/jemalloc/internal/tcache.h
@@ -4,6 +4,7 @@
typedef struct tcache_bin_info_s tcache_bin_info_t;
typedef struct tcache_bin_s tcache_bin_t;
typedef struct tcache_s tcache_t;
+typedef struct tcaches_s tcaches_t;
/*
* tcache pointers close to NULL are used to encode state information that is
@@ -16,6 +17,11 @@ typedef struct tcache_s tcache_t;
#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY
/*
+ * Absolute minimum number of cache slots for each small bin.
+ */
+#define TCACHE_NSLOTS_SMALL_MIN 20
+
+/*
* Absolute maximum number of cache slots for each small bin in the thread
* cache. This is an additional constraint beyond that imposed as: twice the
* number of regions per run for this size class.
@@ -69,10 +75,9 @@ struct tcache_bin_s {
struct tcache_s {
ql_elm(tcache_t) link; /* Used for aggregating stats. */
- uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */
- arena_t *arena; /* This thread's arena. */
+ uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */
unsigned ev_cnt; /* Event count since incremental GC. */
- unsigned next_gc_bin; /* Next bin to GC. */
+ szind_t next_gc_bin; /* Next bin to GC. */
tcache_bin_t tbins[1]; /* Dynamically sized. */
/*
* The pointer stacks associated with tbins follow as a contiguous
@@ -82,6 +87,14 @@ struct tcache_s {
*/
};
+/* Linkage for list of available (previously used) explicit tcache IDs. */
+struct tcaches_s {
+ union {
+ tcache_t *tcache;
+ tcaches_t *next;
+ };
+};
+
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
@@ -95,84 +108,90 @@ extern tcache_bin_info_t *tcache_bin_info;
* Number of tcache bins. There are NBINS small-object bins, plus 0 or more
* large-object bins.
*/
-extern size_t nhbins;
+extern size_t nhbins;
/* Maximum cached size class. */
-extern size_t tcache_maxclass;
+extern size_t tcache_maxclass;
+
+/*
+ * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
+ * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are
+ * completely disjoint from this data structure. tcaches starts off as a sparse
+ * array, so it has no physical memory footprint until individual pages are
+ * touched. This allows the entire array to be allocated the first time an
+ * explicit tcache is created without a disproportionate impact on memory usage.
+ */
+extern tcaches_t *tcaches;
size_t tcache_salloc(const void *ptr);
-void tcache_event_hard(tcache_t *tcache);
-void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin,
- size_t binind);
-void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
- tcache_t *tcache);
-void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
- tcache_t *tcache);
+void tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
+void *tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+ tcache_bin_t *tbin, szind_t binind);
+void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+ szind_t binind, unsigned rem);
+void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+ unsigned rem, tcache_t *tcache);
void tcache_arena_associate(tcache_t *tcache, arena_t *arena);
-void tcache_arena_dissociate(tcache_t *tcache);
-tcache_t *tcache_create(arena_t *arena);
-void tcache_destroy(tcache_t *tcache);
-void tcache_thread_cleanup(void *arg);
+void tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena,
+ arena_t *newarena);
+void tcache_arena_dissociate(tcache_t *tcache, arena_t *arena);
+tcache_t *tcache_get_hard(tsd_t *tsd);
+tcache_t *tcache_create(tsd_t *tsd, arena_t *arena);
+void tcache_cleanup(tsd_t *tsd);
+void tcache_enabled_cleanup(tsd_t *tsd);
void tcache_stats_merge(tcache_t *tcache, arena_t *arena);
-bool tcache_boot0(void);
-bool tcache_boot1(void);
+bool tcaches_create(tsd_t *tsd, unsigned *r_ind);
+void tcaches_flush(tsd_t *tsd, unsigned ind);
+void tcaches_destroy(tsd_t *tsd, unsigned ind);
+bool tcache_boot(void);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *)
-malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t)
-
-void tcache_event(tcache_t *tcache);
+void tcache_event(tsd_t *tsd, tcache_t *tcache);
void tcache_flush(void);
bool tcache_enabled_get(void);
-tcache_t *tcache_get(bool create);
+tcache_t *tcache_get(tsd_t *tsd, bool create);
void tcache_enabled_set(bool enabled);
void *tcache_alloc_easy(tcache_bin_t *tbin);
-void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero);
-void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero);
-void tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind);
-void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size);
+void *tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+ size_t size, bool zero);
+void *tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+ size_t size, bool zero);
+void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr,
+ szind_t binind);
+void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr,
+ size_t size);
+tcache_t *tcaches_get(tsd_t *tsd, unsigned ind);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
-/* Map of thread-specific caches. */
-malloc_tsd_externs(tcache, tcache_t *)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache, tcache_t *, NULL,
- tcache_thread_cleanup)
-/* Per thread flag that allows thread caches to be disabled. */
-malloc_tsd_externs(tcache_enabled, tcache_enabled_t)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache_enabled, tcache_enabled_t,
- tcache_enabled_default, malloc_tsd_no_cleanup)
-
JEMALLOC_INLINE void
tcache_flush(void)
{
- tcache_t *tcache;
+ tsd_t *tsd;
cassert(config_tcache);
- tcache = *tcache_tsd_get();
- if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX)
- return;
- tcache_destroy(tcache);
- tcache = NULL;
- tcache_tsd_set(&tcache);
+ tsd = tsd_fetch();
+ tcache_cleanup(tsd);
}
JEMALLOC_INLINE bool
tcache_enabled_get(void)
{
+ tsd_t *tsd;
tcache_enabled_t tcache_enabled;
cassert(config_tcache);
- tcache_enabled = *tcache_enabled_tsd_get();
+ tsd = tsd_fetch();
+ tcache_enabled = tsd_tcache_enabled_get(tsd);
if (tcache_enabled == tcache_enabled_default) {
tcache_enabled = (tcache_enabled_t)opt_tcache;
- tcache_enabled_tsd_set(&tcache_enabled);
+ tsd_tcache_enabled_set(tsd, tcache_enabled);
}
return ((bool)tcache_enabled);
@@ -181,85 +200,41 @@ tcache_enabled_get(void)
JEMALLOC_INLINE void
tcache_enabled_set(bool enabled)
{
+ tsd_t *tsd;
tcache_enabled_t tcache_enabled;
- tcache_t *tcache;
cassert(config_tcache);
+ tsd = tsd_fetch();
+
tcache_enabled = (tcache_enabled_t)enabled;
- tcache_enabled_tsd_set(&tcache_enabled);
- tcache = *tcache_tsd_get();
- if (enabled) {
- if (tcache == TCACHE_STATE_DISABLED) {
- tcache = NULL;
- tcache_tsd_set(&tcache);
- }
- } else /* disabled */ {
- if (tcache > TCACHE_STATE_MAX) {
- tcache_destroy(tcache);
- tcache = NULL;
- }
- if (tcache == NULL) {
- tcache = TCACHE_STATE_DISABLED;
- tcache_tsd_set(&tcache);
- }
- }
+ tsd_tcache_enabled_set(tsd, tcache_enabled);
+
+ if (!enabled)
+ tcache_cleanup(tsd);
}
JEMALLOC_ALWAYS_INLINE tcache_t *
-tcache_get(bool create)
+tcache_get(tsd_t *tsd, bool create)
{
tcache_t *tcache;
- if (config_tcache == false)
- return (NULL);
- if (config_lazy_lock && isthreaded == false)
+ if (!config_tcache)
return (NULL);
- tcache = *tcache_tsd_get();
- if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) {
- if (tcache == TCACHE_STATE_DISABLED)
- return (NULL);
- if (tcache == NULL) {
- if (create == false) {
- /*
- * Creating a tcache here would cause
- * allocation as a side effect of free().
- * Ordinarily that would be okay since
- * tcache_create() failure is a soft failure
- * that doesn't propagate. However, if TLS
- * data are freed via free() as in glibc,
- * subtle corruption could result from setting
- * a TLS variable after its backing memory is
- * freed.
- */
- return (NULL);
- }
- if (tcache_enabled_get() == false) {
- tcache_enabled_set(false); /* Memoize. */
- return (NULL);
- }
- return (tcache_create(choose_arena(NULL)));
- }
- if (tcache == TCACHE_STATE_PURGATORY) {
- /*
- * Make a note that an allocator function was called
- * after tcache_thread_cleanup() was called.
- */
- tcache = TCACHE_STATE_REINCARNATED;
- tcache_tsd_set(&tcache);
- return (NULL);
- }
- if (tcache == TCACHE_STATE_REINCARNATED)
- return (NULL);
- not_reached();
+ tcache = tsd_tcache_get(tsd);
+ if (!create)
+ return (tcache);
+ if (unlikely(tcache == NULL) && tsd_nominal(tsd)) {
+ tcache = tcache_get_hard(tsd);
+ tsd_tcache_set(tsd, tcache);
}
return (tcache);
}
JEMALLOC_ALWAYS_INLINE void
-tcache_event(tcache_t *tcache)
+tcache_event(tsd_t *tsd, tcache_t *tcache)
{
if (TCACHE_GC_INCR == 0)
@@ -267,8 +242,8 @@ tcache_event(tcache_t *tcache)
tcache->ev_cnt++;
assert(tcache->ev_cnt <= TCACHE_GC_INCR);
- if (tcache->ev_cnt == TCACHE_GC_INCR)
- tcache_event_hard(tcache);
+ if (unlikely(tcache->ev_cnt == TCACHE_GC_INCR))
+ tcache_event_hard(tsd, tcache);
}
JEMALLOC_ALWAYS_INLINE void *
@@ -276,85 +251,87 @@ tcache_alloc_easy(tcache_bin_t *tbin)
{
void *ret;
- if (tbin->ncached == 0) {
+ if (unlikely(tbin->ncached == 0)) {
tbin->low_water = -1;
return (NULL);
}
tbin->ncached--;
- if ((int)tbin->ncached < tbin->low_water)
+ if (unlikely((int)tbin->ncached < tbin->low_water))
tbin->low_water = tbin->ncached;
ret = tbin->avail[tbin->ncached];
return (ret);
}
JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
+tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
+ bool zero)
{
void *ret;
- size_t binind;
+ szind_t binind;
+ size_t usize;
tcache_bin_t *tbin;
- binind = SMALL_SIZE2BIN(size);
+ binind = size2index(size);
assert(binind < NBINS);
tbin = &tcache->tbins[binind];
- size = arena_bin_info[binind].reg_size;
+ usize = index2size(binind);
ret = tcache_alloc_easy(tbin);
- if (ret == NULL) {
- ret = tcache_alloc_small_hard(tcache, tbin, binind);
+ if (unlikely(ret == NULL)) {
+ ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind);
if (ret == NULL)
return (NULL);
}
- assert(tcache_salloc(ret) == arena_bin_info[binind].reg_size);
+ assert(tcache_salloc(ret) == usize);
- if (zero == false) {
+ if (likely(!zero)) {
if (config_fill) {
- if (opt_junk) {
+ if (unlikely(opt_junk_alloc)) {
arena_alloc_junk_small(ret,
&arena_bin_info[binind], false);
- } else if (opt_zero)
- memset(ret, 0, size);
+ } else if (unlikely(opt_zero))
+ memset(ret, 0, usize);
}
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
} else {
- if (config_fill && opt_junk) {
+ if (config_fill && unlikely(opt_junk_alloc)) {
arena_alloc_junk_small(ret, &arena_bin_info[binind],
true);
}
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
- memset(ret, 0, size);
+ memset(ret, 0, usize);
}
if (config_stats)
tbin->tstats.nrequests++;
if (config_prof)
- tcache->prof_accumbytes += arena_bin_info[binind].reg_size;
- tcache_event(tcache);
+ tcache->prof_accumbytes += usize;
+ tcache_event(tsd, tcache);
return (ret);
}
JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
+tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
+ bool zero)
{
void *ret;
- size_t binind;
+ szind_t binind;
+ size_t usize;
tcache_bin_t *tbin;
- size = PAGE_CEILING(size);
- assert(size <= tcache_maxclass);
- binind = NBINS + (size >> LG_PAGE) - 1;
+ binind = size2index(size);
+ usize = index2size(binind);
+ assert(usize <= tcache_maxclass);
assert(binind < nhbins);
tbin = &tcache->tbins[binind];
ret = tcache_alloc_easy(tbin);
- if (ret == NULL) {
+ if (unlikely(ret == NULL)) {
/*
* Only allocate one large object at a time, because it's quite
* expensive to create one and not use it.
*/
- ret = arena_malloc_large(tcache->arena, size, zero);
+ ret = arena_malloc_large(arena, usize, zero);
if (ret == NULL)
return (NULL);
} else {
- if (config_prof && prof_promote && size == PAGE) {
+ if (config_prof && usize == LARGE_MINCLASS) {
arena_chunk_t *chunk =
(arena_chunk_t *)CHUNK_ADDR2BASE(ret);
size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
@@ -362,57 +339,54 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
arena_mapbits_large_binind_set(chunk, pageind,
BININD_INVALID);
}
- if (zero == false) {
+ if (likely(!zero)) {
if (config_fill) {
- if (opt_junk)
- memset(ret, 0xa5, size);
- else if (opt_zero)
- memset(ret, 0, size);
+ if (unlikely(opt_junk_alloc))
+ memset(ret, 0xa5, usize);
+ else if (unlikely(opt_zero))
+ memset(ret, 0, usize);
}
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
- } else {
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
- memset(ret, 0, size);
- }
+ } else
+ memset(ret, 0, usize);
if (config_stats)
tbin->tstats.nrequests++;
if (config_prof)
- tcache->prof_accumbytes += size;
+ tcache->prof_accumbytes += usize;
}
- tcache_event(tcache);
+ tcache_event(tsd, tcache);
return (ret);
}
JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind)
+tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind)
{
tcache_bin_t *tbin;
tcache_bin_info_t *tbin_info;
assert(tcache_salloc(ptr) <= SMALL_MAXCLASS);
- if (config_fill && opt_junk)
+ if (config_fill && unlikely(opt_junk_free))
arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
tbin = &tcache->tbins[binind];
tbin_info = &tcache_bin_info[binind];
- if (tbin->ncached == tbin_info->ncached_max) {
- tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >>
- 1), tcache);
+ if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
+ tcache_bin_flush_small(tsd, tcache, tbin, binind,
+ (tbin_info->ncached_max >> 1));
}
assert(tbin->ncached < tbin_info->ncached_max);
tbin->avail[tbin->ncached] = ptr;
tbin->ncached++;
- tcache_event(tcache);
+ tcache_event(tsd, tcache);
}
JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
+tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size)
{
- size_t binind;
+ szind_t binind;
tcache_bin_t *tbin;
tcache_bin_info_t *tbin_info;
@@ -420,22 +394,31 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
assert(tcache_salloc(ptr) > SMALL_MAXCLASS);
assert(tcache_salloc(ptr) <= tcache_maxclass);
- binind = NBINS + (size >> LG_PAGE) - 1;
+ binind = size2index(size);
- if (config_fill && opt_junk)
- memset(ptr, 0x5a, size);
+ if (config_fill && unlikely(opt_junk_free))
+ arena_dalloc_junk_large(ptr, size);
tbin = &tcache->tbins[binind];
tbin_info = &tcache_bin_info[binind];
- if (tbin->ncached == tbin_info->ncached_max) {
- tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >>
- 1), tcache);
+ if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
+ tcache_bin_flush_large(tsd, tbin, binind,
+ (tbin_info->ncached_max >> 1), tcache);
}
assert(tbin->ncached < tbin_info->ncached_max);
tbin->avail[tbin->ncached] = ptr;
tbin->ncached++;
- tcache_event(tcache);
+ tcache_event(tsd, tcache);
+}
+
+JEMALLOC_ALWAYS_INLINE tcache_t *
+tcaches_get(tsd_t *tsd, unsigned ind)
+{
+ tcaches_t *elm = &tcaches[ind];
+ if (unlikely(elm->tcache == NULL))
+ elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL));
+ return (elm->tcache);
}
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/tsd.h b/deps/jemalloc/include/jemalloc/internal/tsd.h
index 9fb4a23ec..eed7aa013 100644
--- a/deps/jemalloc/include/jemalloc/internal/tsd.h
+++ b/deps/jemalloc/include/jemalloc/internal/tsd.h
@@ -2,7 +2,7 @@
#ifdef JEMALLOC_H_TYPES
/* Maximum number of malloc_tsd users with cleanup functions. */
-#define MALLOC_TSD_CLEANUPS_MAX 8
+#define MALLOC_TSD_CLEANUPS_MAX 2
typedef bool (*malloc_tsd_cleanup_t)(void);
@@ -12,9 +12,18 @@ typedef struct tsd_init_block_s tsd_init_block_t;
typedef struct tsd_init_head_s tsd_init_head_t;
#endif
+typedef struct tsd_s tsd_t;
+
+typedef enum {
+ tsd_state_uninitialized,
+ tsd_state_nominal,
+ tsd_state_purgatory,
+ tsd_state_reincarnated
+} tsd_state_t;
+
/*
* TLS/TSD-agnostic macro-based implementation of thread-specific data. There
- * are four macros that support (at least) three use cases: file-private,
+ * are five macros that support (at least) three use cases: file-private,
* library-private, and library-private inlined. Following is an example
* library-private tsd variable:
*
@@ -24,34 +33,36 @@ typedef struct tsd_init_head_s tsd_init_head_t;
* int y;
* } example_t;
* #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0})
- * malloc_tsd_protos(, example, example_t *)
- * malloc_tsd_externs(example, example_t *)
+ * malloc_tsd_types(example_, example_t)
+ * malloc_tsd_protos(, example_, example_t)
+ * malloc_tsd_externs(example_, example_t)
* In example.c:
- * malloc_tsd_data(, example, example_t *, EX_INITIALIZER)
- * malloc_tsd_funcs(, example, example_t *, EX_INITIALIZER,
+ * malloc_tsd_data(, example_, example_t, EX_INITIALIZER)
+ * malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER,
* example_tsd_cleanup)
*
* The result is a set of generated functions, e.g.:
*
* bool example_tsd_boot(void) {...}
- * example_t **example_tsd_get() {...}
- * void example_tsd_set(example_t **val) {...}
+ * example_t *example_tsd_get() {...}
+ * void example_tsd_set(example_t *val) {...}
*
* Note that all of the functions deal in terms of (a_type *) rather than
- * (a_type) so that it is possible to support non-pointer types (unlike
+ * (a_type) so that it is possible to support non-pointer types (unlike
* pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is
- * cast to (void *). This means that the cleanup function needs to cast *and*
- * dereference the function argument, e.g.:
+ * cast to (void *). This means that the cleanup function needs to cast the
+ * function argument to (a_type *), then dereference the resulting pointer to
+ * access fields, e.g.
*
* void
* example_tsd_cleanup(void *arg)
* {
- * example_t *example = *(example_t **)arg;
+ * example_t *example = (example_t *)arg;
*
+ * example->x = 42;
* [...]
- * if ([want the cleanup function to be called again]) {
- * example_tsd_set(&example);
- * }
+ * if ([want the cleanup function to be called again])
+ * example_tsd_set(example);
* }
*
* If example_tsd_set() is called within example_tsd_cleanup(), it will be
@@ -60,63 +71,96 @@ typedef struct tsd_init_head_s tsd_init_head_t;
* non-NULL.
*/
+/* malloc_tsd_types(). */
+#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+#define malloc_tsd_types(a_name, a_type)
+#elif (defined(JEMALLOC_TLS))
+#define malloc_tsd_types(a_name, a_type)
+#elif (defined(_WIN32))
+#define malloc_tsd_types(a_name, a_type) \
+typedef struct { \
+ bool initialized; \
+ a_type val; \
+} a_name##tsd_wrapper_t;
+#else
+#define malloc_tsd_types(a_name, a_type) \
+typedef struct { \
+ bool initialized; \
+ a_type val; \
+} a_name##tsd_wrapper_t;
+#endif
+
/* malloc_tsd_protos(). */
#define malloc_tsd_protos(a_attr, a_name, a_type) \
a_attr bool \
-a_name##_tsd_boot(void); \
+a_name##tsd_boot0(void); \
+a_attr void \
+a_name##tsd_boot1(void); \
+a_attr bool \
+a_name##tsd_boot(void); \
a_attr a_type * \
-a_name##_tsd_get(void); \
+a_name##tsd_get(void); \
a_attr void \
-a_name##_tsd_set(a_type *val);
+a_name##tsd_set(a_type *val);
/* malloc_tsd_externs(). */
#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
#define malloc_tsd_externs(a_name, a_type) \
-extern __thread a_type a_name##_tls; \
-extern __thread bool a_name##_initialized; \
-extern bool a_name##_booted;
+extern __thread a_type a_name##tsd_tls; \
+extern __thread bool a_name##tsd_initialized; \
+extern bool a_name##tsd_booted;
#elif (defined(JEMALLOC_TLS))
#define malloc_tsd_externs(a_name, a_type) \
-extern __thread a_type a_name##_tls; \
-extern pthread_key_t a_name##_tsd; \
-extern bool a_name##_booted;
+extern __thread a_type a_name##tsd_tls; \
+extern pthread_key_t a_name##tsd_tsd; \
+extern bool a_name##tsd_booted;
#elif (defined(_WIN32))
#define malloc_tsd_externs(a_name, a_type) \
-extern DWORD a_name##_tsd; \
-extern bool a_name##_booted;
+extern DWORD a_name##tsd_tsd; \
+extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \
+extern bool a_name##tsd_booted;
#else
#define malloc_tsd_externs(a_name, a_type) \
-extern pthread_key_t a_name##_tsd; \
-extern tsd_init_head_t a_name##_tsd_init_head; \
-extern bool a_name##_booted;
+extern pthread_key_t a_name##tsd_tsd; \
+extern tsd_init_head_t a_name##tsd_init_head; \
+extern a_name##tsd_wrapper_t a_name##tsd_boot_wrapper; \
+extern bool a_name##tsd_booted;
#endif
/* malloc_tsd_data(). */
#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \
a_attr __thread a_type JEMALLOC_TLS_MODEL \
- a_name##_tls = a_initializer; \
+ a_name##tsd_tls = a_initializer; \
a_attr __thread bool JEMALLOC_TLS_MODEL \
- a_name##_initialized = false; \
-a_attr bool a_name##_booted = false;
+ a_name##tsd_initialized = false; \
+a_attr bool a_name##tsd_booted = false;
#elif (defined(JEMALLOC_TLS))
#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \
a_attr __thread a_type JEMALLOC_TLS_MODEL \
- a_name##_tls = a_initializer; \
-a_attr pthread_key_t a_name##_tsd; \
-a_attr bool a_name##_booted = false;
+ a_name##tsd_tls = a_initializer; \
+a_attr pthread_key_t a_name##tsd_tsd; \
+a_attr bool a_name##tsd_booted = false;
#elif (defined(_WIN32))
#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \
-a_attr DWORD a_name##_tsd; \
-a_attr bool a_name##_booted = false;
+a_attr DWORD a_name##tsd_tsd; \
+a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \
+ false, \
+ a_initializer \
+}; \
+a_attr bool a_name##tsd_booted = false;
#else
#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \
-a_attr pthread_key_t a_name##_tsd; \
-a_attr tsd_init_head_t a_name##_tsd_init_head = { \
+a_attr pthread_key_t a_name##tsd_tsd; \
+a_attr tsd_init_head_t a_name##tsd_init_head = { \
ql_head_initializer(blocks), \
MALLOC_MUTEX_INITIALIZER \
}; \
-a_attr bool a_name##_booted = false;
+a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = { \
+ false, \
+ a_initializer \
+}; \
+a_attr bool a_name##tsd_booted = false;
#endif
/* malloc_tsd_funcs(). */
@@ -125,75 +169,100 @@ a_attr bool a_name##_booted = false;
a_cleanup) \
/* Initialization/cleanup. */ \
a_attr bool \
-a_name##_tsd_cleanup_wrapper(void) \
+a_name##tsd_cleanup_wrapper(void) \
{ \
\
- if (a_name##_initialized) { \
- a_name##_initialized = false; \
- a_cleanup(&a_name##_tls); \
+ if (a_name##tsd_initialized) { \
+ a_name##tsd_initialized = false; \
+ a_cleanup(&a_name##tsd_tls); \
} \
- return (a_name##_initialized); \
+ return (a_name##tsd_initialized); \
} \
a_attr bool \
-a_name##_tsd_boot(void) \
+a_name##tsd_boot0(void) \
{ \
\
if (a_cleanup != malloc_tsd_no_cleanup) { \
malloc_tsd_cleanup_register( \
- &a_name##_tsd_cleanup_wrapper); \
+ &a_name##tsd_cleanup_wrapper); \
} \
- a_name##_booted = true; \
+ a_name##tsd_booted = true; \
return (false); \
} \
+a_attr void \
+a_name##tsd_boot1(void) \
+{ \
+ \
+ /* Do nothing. */ \
+} \
+a_attr bool \
+a_name##tsd_boot(void) \
+{ \
+ \
+ return (a_name##tsd_boot0()); \
+} \
/* Get/set. */ \
a_attr a_type * \
-a_name##_tsd_get(void) \
+a_name##tsd_get(void) \
{ \
\
- assert(a_name##_booted); \
- return (&a_name##_tls); \
+ assert(a_name##tsd_booted); \
+ return (&a_name##tsd_tls); \
} \
a_attr void \
-a_name##_tsd_set(a_type *val) \
+a_name##tsd_set(a_type *val) \
{ \
\
- assert(a_name##_booted); \
- a_name##_tls = (*val); \
+ assert(a_name##tsd_booted); \
+ a_name##tsd_tls = (*val); \
if (a_cleanup != malloc_tsd_no_cleanup) \
- a_name##_initialized = true; \
+ a_name##tsd_initialized = true; \
}
#elif (defined(JEMALLOC_TLS))
#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \
a_cleanup) \
/* Initialization/cleanup. */ \
a_attr bool \
-a_name##_tsd_boot(void) \
+a_name##tsd_boot0(void) \
{ \
\
if (a_cleanup != malloc_tsd_no_cleanup) { \
- if (pthread_key_create(&a_name##_tsd, a_cleanup) != 0) \
+ if (pthread_key_create(&a_name##tsd_tsd, a_cleanup) != \
+ 0) \
return (true); \
} \
- a_name##_booted = true; \
+ a_name##tsd_booted = true; \
return (false); \
} \
+a_attr void \
+a_name##tsd_boot1(void) \
+{ \
+ \
+ /* Do nothing. */ \
+} \
+a_attr bool \
+a_name##tsd_boot(void) \
+{ \
+ \
+ return (a_name##tsd_boot0()); \
+} \
/* Get/set. */ \
a_attr a_type * \
-a_name##_tsd_get(void) \
+a_name##tsd_get(void) \
{ \
\
- assert(a_name##_booted); \
- return (&a_name##_tls); \
+ assert(a_name##tsd_booted); \
+ return (&a_name##tsd_tls); \
} \
a_attr void \
-a_name##_tsd_set(a_type *val) \
+a_name##tsd_set(a_type *val) \
{ \
\
- assert(a_name##_booted); \
- a_name##_tls = (*val); \
+ assert(a_name##tsd_booted); \
+ a_name##tsd_tls = (*val); \
if (a_cleanup != malloc_tsd_no_cleanup) { \
- if (pthread_setspecific(a_name##_tsd, \
- (void *)(&a_name##_tls))) { \
+ if (pthread_setspecific(a_name##tsd_tsd, \
+ (void *)(&a_name##tsd_tls))) { \
malloc_write("<jemalloc>: Error" \
" setting TSD for "#a_name"\n"); \
if (opt_abort) \
@@ -204,27 +273,21 @@ a_name##_tsd_set(a_type *val) \
#elif (defined(_WIN32))
#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \
a_cleanup) \
-/* Data structure. */ \
-typedef struct { \
- bool initialized; \
- a_type val; \
-} a_name##_tsd_wrapper_t; \
/* Initialization/cleanup. */ \
a_attr bool \
-a_name##_tsd_cleanup_wrapper(void) \
+a_name##tsd_cleanup_wrapper(void) \
{ \
- a_name##_tsd_wrapper_t *wrapper; \
+ DWORD error = GetLastError(); \
+ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \
+ TlsGetValue(a_name##tsd_tsd); \
+ SetLastError(error); \
\
- wrapper = (a_name##_tsd_wrapper_t *) TlsGetValue(a_name##_tsd); \
if (wrapper == NULL) \
return (false); \
if (a_cleanup != malloc_tsd_no_cleanup && \
wrapper->initialized) { \
- a_type val = wrapper->val; \
- a_type tsd_static_data = a_initializer; \
wrapper->initialized = false; \
- wrapper->val = tsd_static_data; \
- a_cleanup(&val); \
+ a_cleanup(&wrapper->val); \
if (wrapper->initialized) { \
/* Trigger another cleanup round. */ \
return (true); \
@@ -233,63 +296,95 @@ a_name##_tsd_cleanup_wrapper(void) \
malloc_tsd_dalloc(wrapper); \
return (false); \
} \
-a_attr bool \
-a_name##_tsd_boot(void) \
+a_attr void \
+a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \
{ \
\
- a_name##_tsd = TlsAlloc(); \
- if (a_name##_tsd == TLS_OUT_OF_INDEXES) \
- return (true); \
- if (a_cleanup != malloc_tsd_no_cleanup) { \
- malloc_tsd_cleanup_register( \
- &a_name##_tsd_cleanup_wrapper); \
+ if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) { \
+ malloc_write("<jemalloc>: Error setting" \
+ " TSD for "#a_name"\n"); \
+ abort(); \
} \
- a_name##_booted = true; \
- return (false); \
} \
-/* Get/set. */ \
-a_attr a_name##_tsd_wrapper_t * \
-a_name##_tsd_get_wrapper(void) \
+a_attr a_name##tsd_wrapper_t * \
+a_name##tsd_wrapper_get(void) \
{ \
- a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \
- TlsGetValue(a_name##_tsd); \
+ DWORD error = GetLastError(); \
+ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \
+ TlsGetValue(a_name##tsd_tsd); \
+ SetLastError(error); \
\
- if (wrapper == NULL) { \
- wrapper = (a_name##_tsd_wrapper_t *) \
- malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \
+ if (unlikely(wrapper == NULL)) { \
+ wrapper = (a_name##tsd_wrapper_t *) \
+ malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \
if (wrapper == NULL) { \
malloc_write("<jemalloc>: Error allocating" \
" TSD for "#a_name"\n"); \
abort(); \
} else { \
- static a_type tsd_static_data = a_initializer; \
wrapper->initialized = false; \
- wrapper->val = tsd_static_data; \
- } \
- if (!TlsSetValue(a_name##_tsd, (void *)wrapper)) { \
- malloc_write("<jemalloc>: Error setting" \
- " TSD for "#a_name"\n"); \
- abort(); \
+ wrapper->val = a_initializer; \
} \
+ a_name##tsd_wrapper_set(wrapper); \
} \
return (wrapper); \
} \
+a_attr bool \
+a_name##tsd_boot0(void) \
+{ \
+ \
+ a_name##tsd_tsd = TlsAlloc(); \
+ if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) \
+ return (true); \
+ if (a_cleanup != malloc_tsd_no_cleanup) { \
+ malloc_tsd_cleanup_register( \
+ &a_name##tsd_cleanup_wrapper); \
+ } \
+ a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \
+ a_name##tsd_booted = true; \
+ return (false); \
+} \
+a_attr void \
+a_name##tsd_boot1(void) \
+{ \
+ a_name##tsd_wrapper_t *wrapper; \
+ wrapper = (a_name##tsd_wrapper_t *) \
+ malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \
+ if (wrapper == NULL) { \
+ malloc_write("<jemalloc>: Error allocating" \
+ " TSD for "#a_name"\n"); \
+ abort(); \
+ } \
+ memcpy(wrapper, &a_name##tsd_boot_wrapper, \
+ sizeof(a_name##tsd_wrapper_t)); \
+ a_name##tsd_wrapper_set(wrapper); \
+} \
+a_attr bool \
+a_name##tsd_boot(void) \
+{ \
+ \
+ if (a_name##tsd_boot0()) \
+ return (true); \
+ a_name##tsd_boot1(); \
+ return (false); \
+} \
+/* Get/set. */ \
a_attr a_type * \
-a_name##_tsd_get(void) \
+a_name##tsd_get(void) \
{ \
- a_name##_tsd_wrapper_t *wrapper; \
+ a_name##tsd_wrapper_t *wrapper; \
\
- assert(a_name##_booted); \
- wrapper = a_name##_tsd_get_wrapper(); \
+ assert(a_name##tsd_booted); \
+ wrapper = a_name##tsd_wrapper_get(); \
return (&wrapper->val); \
} \
a_attr void \
-a_name##_tsd_set(a_type *val) \
+a_name##tsd_set(a_type *val) \
{ \
- a_name##_tsd_wrapper_t *wrapper; \
+ a_name##tsd_wrapper_t *wrapper; \
\
- assert(a_name##_booted); \
- wrapper = a_name##_tsd_get_wrapper(); \
+ assert(a_name##tsd_booted); \
+ wrapper = a_name##tsd_wrapper_get(); \
wrapper->val = *(val); \
if (a_cleanup != malloc_tsd_no_cleanup) \
wrapper->initialized = true; \
@@ -297,16 +392,11 @@ a_name##_tsd_set(a_type *val) \
#else
#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \
a_cleanup) \
-/* Data structure. */ \
-typedef struct { \
- bool initialized; \
- a_type val; \
-} a_name##_tsd_wrapper_t; \
/* Initialization/cleanup. */ \
a_attr void \
-a_name##_tsd_cleanup_wrapper(void *arg) \
+a_name##tsd_cleanup_wrapper(void *arg) \
{ \
- a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)arg;\
+ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)arg; \
\
if (a_cleanup != malloc_tsd_no_cleanup && \
wrapper->initialized) { \
@@ -314,7 +404,7 @@ a_name##_tsd_cleanup_wrapper(void *arg) \
a_cleanup(&wrapper->val); \
if (wrapper->initialized) { \
/* Trigger another cleanup round. */ \
- if (pthread_setspecific(a_name##_tsd, \
+ if (pthread_setspecific(a_name##tsd_tsd, \
(void *)wrapper)) { \
malloc_write("<jemalloc>: Error" \
" setting TSD for "#a_name"\n"); \
@@ -326,67 +416,97 @@ a_name##_tsd_cleanup_wrapper(void *arg) \
} \
malloc_tsd_dalloc(wrapper); \
} \
-a_attr bool \
-a_name##_tsd_boot(void) \
+a_attr void \
+a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) \
{ \
\
- if (pthread_key_create(&a_name##_tsd, \
- a_name##_tsd_cleanup_wrapper) != 0) \
- return (true); \
- a_name##_booted = true; \
- return (false); \
+ if (pthread_setspecific(a_name##tsd_tsd, \
+ (void *)wrapper)) { \
+ malloc_write("<jemalloc>: Error setting" \
+ " TSD for "#a_name"\n"); \
+ abort(); \
+ } \
} \
-/* Get/set. */ \
-a_attr a_name##_tsd_wrapper_t * \
-a_name##_tsd_get_wrapper(void) \
+a_attr a_name##tsd_wrapper_t * \
+a_name##tsd_wrapper_get(void) \
{ \
- a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \
- pthread_getspecific(a_name##_tsd); \
+ a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *) \
+ pthread_getspecific(a_name##tsd_tsd); \
\
- if (wrapper == NULL) { \
+ if (unlikely(wrapper == NULL)) { \
tsd_init_block_t block; \
wrapper = tsd_init_check_recursion( \
- &a_name##_tsd_init_head, &block); \
+ &a_name##tsd_init_head, &block); \
if (wrapper) \
return (wrapper); \
- wrapper = (a_name##_tsd_wrapper_t *) \
- malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \
+ wrapper = (a_name##tsd_wrapper_t *) \
+ malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \
block.data = wrapper; \
if (wrapper == NULL) { \
malloc_write("<jemalloc>: Error allocating" \
" TSD for "#a_name"\n"); \
abort(); \
} else { \
- static a_type tsd_static_data = a_initializer; \
wrapper->initialized = false; \
- wrapper->val = tsd_static_data; \
- } \
- if (pthread_setspecific(a_name##_tsd, \
- (void *)wrapper)) { \
- malloc_write("<jemalloc>: Error setting" \
- " TSD for "#a_name"\n"); \
- abort(); \
+ wrapper->val = a_initializer; \
} \
- tsd_init_finish(&a_name##_tsd_init_head, &block); \
+ a_name##tsd_wrapper_set(wrapper); \
+ tsd_init_finish(&a_name##tsd_init_head, &block); \
} \
return (wrapper); \
} \
+a_attr bool \
+a_name##tsd_boot0(void) \
+{ \
+ \
+ if (pthread_key_create(&a_name##tsd_tsd, \
+ a_name##tsd_cleanup_wrapper) != 0) \
+ return (true); \
+ a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper); \
+ a_name##tsd_booted = true; \
+ return (false); \
+} \
+a_attr void \
+a_name##tsd_boot1(void) \
+{ \
+ a_name##tsd_wrapper_t *wrapper; \
+ wrapper = (a_name##tsd_wrapper_t *) \
+ malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t)); \
+ if (wrapper == NULL) { \
+ malloc_write("<jemalloc>: Error allocating" \
+ " TSD for "#a_name"\n"); \
+ abort(); \
+ } \
+ memcpy(wrapper, &a_name##tsd_boot_wrapper, \
+ sizeof(a_name##tsd_wrapper_t)); \
+ a_name##tsd_wrapper_set(wrapper); \
+} \
+a_attr bool \
+a_name##tsd_boot(void) \
+{ \
+ \
+ if (a_name##tsd_boot0()) \
+ return (true); \
+ a_name##tsd_boot1(); \
+ return (false); \
+} \
+/* Get/set. */ \
a_attr a_type * \
-a_name##_tsd_get(void) \
+a_name##tsd_get(void) \
{ \
- a_name##_tsd_wrapper_t *wrapper; \
+ a_name##tsd_wrapper_t *wrapper; \
\
- assert(a_name##_booted); \
- wrapper = a_name##_tsd_get_wrapper(); \
+ assert(a_name##tsd_booted); \
+ wrapper = a_name##tsd_wrapper_get(); \
return (&wrapper->val); \
} \
a_attr void \
-a_name##_tsd_set(a_type *val) \
+a_name##tsd_set(a_type *val) \
{ \
- a_name##_tsd_wrapper_t *wrapper; \
+ a_name##tsd_wrapper_t *wrapper; \
\
- assert(a_name##_booted); \
- wrapper = a_name##_tsd_get_wrapper(); \
+ assert(a_name##tsd_booted); \
+ wrapper = a_name##tsd_wrapper_get(); \
wrapper->val = *(val); \
if (a_cleanup != malloc_tsd_no_cleanup) \
wrapper->initialized = true; \
@@ -410,25 +530,136 @@ struct tsd_init_head_s {
};
#endif
+#define MALLOC_TSD \
+/* O(name, type) */ \
+ O(tcache, tcache_t *) \
+ O(thread_allocated, uint64_t) \
+ O(thread_deallocated, uint64_t) \
+ O(prof_tdata, prof_tdata_t *) \
+ O(arena, arena_t *) \
+ O(arenas_cache, arena_t **) \
+ O(narenas_cache, unsigned) \
+ O(arenas_cache_bypass, bool) \
+ O(tcache_enabled, tcache_enabled_t) \
+ O(quarantine, quarantine_t *) \
+
+#define TSD_INITIALIZER { \
+ tsd_state_uninitialized, \
+ NULL, \
+ 0, \
+ 0, \
+ NULL, \
+ NULL, \
+ NULL, \
+ 0, \
+ false, \
+ tcache_enabled_default, \
+ NULL \
+}
+
+struct tsd_s {
+ tsd_state_t state;
+#define O(n, t) \
+ t n;
+MALLOC_TSD
+#undef O
+};
+
+static const tsd_t tsd_initializer = TSD_INITIALIZER;
+
+malloc_tsd_types(, tsd_t)
+
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
void *malloc_tsd_malloc(size_t size);
void malloc_tsd_dalloc(void *wrapper);
-void malloc_tsd_no_cleanup(void *);
+void malloc_tsd_no_cleanup(void *arg);
void malloc_tsd_cleanup_register(bool (*f)(void));
-void malloc_tsd_boot(void);
+bool malloc_tsd_boot0(void);
+void malloc_tsd_boot1(void);
#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
!defined(_WIN32))
void *tsd_init_check_recursion(tsd_init_head_t *head,
tsd_init_block_t *block);
void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
#endif
+void tsd_cleanup(void *arg);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
+#ifndef JEMALLOC_ENABLE_INLINE
+malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
+
+tsd_t *tsd_fetch(void);
+bool tsd_nominal(tsd_t *tsd);
+#define O(n, t) \
+t *tsd_##n##p_get(tsd_t *tsd); \
+t tsd_##n##_get(tsd_t *tsd); \
+void tsd_##n##_set(tsd_t *tsd, t n);
+MALLOC_TSD
+#undef O
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
+malloc_tsd_externs(, tsd_t)
+malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch(void)
+{
+ tsd_t *tsd = tsd_get();
+
+ if (unlikely(tsd->state != tsd_state_nominal)) {
+ if (tsd->state == tsd_state_uninitialized) {
+ tsd->state = tsd_state_nominal;
+ /* Trigger cleanup handler registration. */
+ tsd_set(tsd);
+ } else if (tsd->state == tsd_state_purgatory) {
+ tsd->state = tsd_state_reincarnated;
+ tsd_set(tsd);
+ } else
+ assert(tsd->state == tsd_state_reincarnated);
+ }
+
+ return (tsd);
+}
+
+JEMALLOC_INLINE bool
+tsd_nominal(tsd_t *tsd)
+{
+
+ return (tsd->state == tsd_state_nominal);
+}
+
+#define O(n, t) \
+JEMALLOC_ALWAYS_INLINE t * \
+tsd_##n##p_get(tsd_t *tsd) \
+{ \
+ \
+ return (&tsd->n); \
+} \
+ \
+JEMALLOC_ALWAYS_INLINE t \
+tsd_##n##_get(tsd_t *tsd) \
+{ \
+ \
+ return (*tsd_##n##p_get(tsd)); \
+} \
+ \
+JEMALLOC_ALWAYS_INLINE void \
+tsd_##n##_set(tsd_t *tsd, t n) \
+{ \
+ \
+ assert(tsd->state == tsd_state_nominal); \
+ tsd->n = n; \
+}
+MALLOC_TSD
+#undef O
+#endif
+
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/util.h b/deps/jemalloc/include/jemalloc/internal/util.h
index 6b938f746..b2ea740fd 100644
--- a/deps/jemalloc/include/jemalloc/internal/util.h
+++ b/deps/jemalloc/include/jemalloc/internal/util.h
@@ -1,6 +1,36 @@
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
+#ifdef _WIN32
+# ifdef _WIN64
+# define FMT64_PREFIX "ll"
+# define FMTPTR_PREFIX "ll"
+# else
+# define FMT64_PREFIX "ll"
+# define FMTPTR_PREFIX ""
+# endif
+# define FMTd32 "d"
+# define FMTu32 "u"
+# define FMTx32 "x"
+# define FMTd64 FMT64_PREFIX "d"
+# define FMTu64 FMT64_PREFIX "u"
+# define FMTx64 FMT64_PREFIX "x"
+# define FMTdPTR FMTPTR_PREFIX "d"
+# define FMTuPTR FMTPTR_PREFIX "u"
+# define FMTxPTR FMTPTR_PREFIX "x"
+#else
+# include <inttypes.h>
+# define FMTd32 PRId32
+# define FMTu32 PRIu32
+# define FMTx32 PRIx32
+# define FMTd64 PRId64
+# define FMTu64 PRIu64
+# define FMTx64 PRIx64
+# define FMTdPTR PRIdPTR
+# define FMTuPTR PRIuPTR
+# define FMTxPTR PRIxPTR
+#endif
+
/* Size of stack-allocated buffer passed to buferror(). */
#define BUFERROR_BUF 64
@@ -22,9 +52,33 @@
* uninitialized.
*/
#ifdef JEMALLOC_CC_SILENCE
-# define JEMALLOC_CC_SILENCE_INIT(v) = v
+# define JEMALLOC_CC_SILENCE_INIT(v) = v
#else
-# define JEMALLOC_CC_SILENCE_INIT(v)
+# define JEMALLOC_CC_SILENCE_INIT(v)
+#endif
+
+#define JEMALLOC_GNUC_PREREQ(major, minor) \
+ (!defined(__clang__) && \
+ (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))))
+#ifndef __has_builtin
+# define __has_builtin(builtin) (0)
+#endif
+#define JEMALLOC_CLANG_HAS_BUILTIN(builtin) \
+ (defined(__clang__) && __has_builtin(builtin))
+
+#ifdef __GNUC__
+# define likely(x) __builtin_expect(!!(x), 1)
+# define unlikely(x) __builtin_expect(!!(x), 0)
+# if JEMALLOC_GNUC_PREREQ(4, 6) || \
+ JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable)
+# define unreachable() __builtin_unreachable()
+# else
+# define unreachable()
+# endif
+#else
+# define likely(x) !!(x)
+# define unlikely(x) !!(x)
+# define unreachable()
#endif
/*
@@ -33,7 +87,7 @@
*/
#ifndef assert
#define assert(e) do { \
- if (config_debug && !(e)) { \
+ if (unlikely(config_debug && !(e))) { \
malloc_printf( \
"<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \
__FILE__, __LINE__, #e); \
@@ -50,6 +104,7 @@
__FILE__, __LINE__); \
abort(); \
} \
+ unreachable(); \
} while (0)
#endif
@@ -65,14 +120,14 @@
#ifndef assert_not_implemented
#define assert_not_implemented(e) do { \
- if (config_debug && !(e)) \
+ if (unlikely(config_debug && !(e))) \
not_implemented(); \
} while (0)
#endif
/* Use to assert a particular configuration, e.g., cassert(config_debug). */
#define cassert(c) do { \
- if ((c) == false) \
+ if (unlikely(!(c))) \
not_reached(); \
} while (0)
@@ -96,25 +151,47 @@ void malloc_write(const char *s);
int malloc_vsnprintf(char *str, size_t size, const char *format,
va_list ap);
int malloc_snprintf(char *str, size_t size, const char *format, ...)
- JEMALLOC_ATTR(format(printf, 3, 4));
+ JEMALLOC_FORMAT_PRINTF(3, 4);
void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
const char *format, va_list ap);
void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
- const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4));
-void malloc_printf(const char *format, ...)
- JEMALLOC_ATTR(format(printf, 1, 2));
+ const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
+void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
+int jemalloc_ffsl(long bitmap);
+int jemalloc_ffs(int bitmap);
size_t pow2_ceil(size_t x);
+size_t lg_floor(size_t x);
void set_errno(int errnum);
int get_errno(void);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_))
+
+/* Sanity check. */
+#if !defined(JEMALLOC_INTERNAL_FFSL) || !defined(JEMALLOC_INTERNAL_FFS)
+# error Both JEMALLOC_INTERNAL_FFSL && JEMALLOC_INTERNAL_FFS should have been defined by configure
+#endif
+
+JEMALLOC_ALWAYS_INLINE int
+jemalloc_ffsl(long bitmap)
+{
+
+ return (JEMALLOC_INTERNAL_FFSL(bitmap));
+}
+
+JEMALLOC_ALWAYS_INLINE int
+jemalloc_ffs(int bitmap)
+{
+
+ return (JEMALLOC_INTERNAL_FFS(bitmap));
+}
+
/* Compute the smallest power of 2 that is >= x. */
JEMALLOC_INLINE size_t
pow2_ceil(size_t x)
@@ -133,7 +210,82 @@ pow2_ceil(size_t x)
return (x);
}
-/* Sets error code */
+#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
+JEMALLOC_INLINE size_t
+lg_floor(size_t x)
+{
+ size_t ret;
+
+ assert(x != 0);
+
+ asm ("bsr %1, %0"
+ : "=r"(ret) // Outputs.
+ : "r"(x) // Inputs.
+ );
+ return (ret);
+}
+#elif (defined(_MSC_VER))
+JEMALLOC_INLINE size_t
+lg_floor(size_t x)
+{
+ unsigned long ret;
+
+ assert(x != 0);
+
+#if (LG_SIZEOF_PTR == 3)
+ _BitScanReverse64(&ret, x);
+#elif (LG_SIZEOF_PTR == 2)
+ _BitScanReverse(&ret, x);
+#else
+# error "Unsupported type sizes for lg_floor()"
+#endif
+ return (ret);
+}
+#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
+JEMALLOC_INLINE size_t
+lg_floor(size_t x)
+{
+
+ assert(x != 0);
+
+#if (LG_SIZEOF_PTR == LG_SIZEOF_INT)
+ return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x));
+#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG)
+ return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x));
+#else
+# error "Unsupported type sizes for lg_floor()"
+#endif
+}
+#else
+JEMALLOC_INLINE size_t
+lg_floor(size_t x)
+{
+
+ assert(x != 0);
+
+ x |= (x >> 1);
+ x |= (x >> 2);
+ x |= (x >> 4);
+ x |= (x >> 8);
+ x |= (x >> 16);
+#if (LG_SIZEOF_PTR == 3 && LG_SIZEOF_PTR == LG_SIZEOF_LONG)
+ x |= (x >> 32);
+ if (x == KZU(0xffffffffffffffff))
+ return (63);
+ x++;
+ return (jemalloc_ffsl(x) - 2);
+#elif (LG_SIZEOF_PTR == 2)
+ if (x == KZU(0xffffffff))
+ return (31);
+ x++;
+ return (jemalloc_ffs(x) - 2);
+#else
+# error "Unsupported type sizes for lg_floor()"
+#endif
+}
+#endif
+
+/* Set error code. */
JEMALLOC_INLINE void
set_errno(int errnum)
{
@@ -145,7 +297,7 @@ set_errno(int errnum)
#endif
}
-/* Get last error code */
+/* Get last error code. */
JEMALLOC_INLINE int
get_errno(void)
{
diff --git a/deps/jemalloc/include/jemalloc/internal/valgrind.h b/deps/jemalloc/include/jemalloc/internal/valgrind.h
new file mode 100644
index 000000000..a3380df92
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/valgrind.h
@@ -0,0 +1,112 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#ifdef JEMALLOC_VALGRIND
+#include <valgrind/valgrind.h>
+
+/*
+ * The size that is reported to Valgrind must be consistent through a chain of
+ * malloc..realloc..realloc calls. Request size isn't recorded anywhere in
+ * jemalloc, so it is critical that all callers of these macros provide usize
+ * rather than request size. As a result, buffer overflow detection is
+ * technically weakened for the standard API, though it is generally accepted
+ * practice to consider any extra bytes reported by malloc_usable_size() as
+ * usable space.
+ */
+#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do { \
+ if (unlikely(in_valgrind)) \
+ valgrind_make_mem_noaccess(ptr, usize); \
+} while (0)
+#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do { \
+ if (unlikely(in_valgrind)) \
+ valgrind_make_mem_undefined(ptr, usize); \
+} while (0)
+#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do { \
+ if (unlikely(in_valgrind)) \
+ valgrind_make_mem_defined(ptr, usize); \
+} while (0)
+/*
+ * The VALGRIND_MALLOCLIKE_BLOCK() and VALGRIND_RESIZEINPLACE_BLOCK() macro
+ * calls must be embedded in macros rather than in functions so that when
+ * Valgrind reports errors, there are no extra stack frames in the backtraces.
+ */
+#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \
+ if (unlikely(in_valgrind && cond)) \
+ VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \
+} while (0)
+#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \
+ ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \
+ zero) do { \
+ if (unlikely(in_valgrind)) { \
+ size_t rzsize = p2rz(ptr); \
+ \
+ if (!maybe_moved || ptr == old_ptr) { \
+ VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \
+ usize, rzsize); \
+ if (zero && old_usize < usize) { \
+ valgrind_make_mem_defined( \
+ (void *)((uintptr_t)ptr + \
+ old_usize), usize - old_usize); \
+ } \
+ } else { \
+ if (!old_ptr_maybe_null || old_ptr != NULL) { \
+ valgrind_freelike_block(old_ptr, \
+ old_rzsize); \
+ } \
+ if (!ptr_maybe_null || ptr != NULL) { \
+ size_t copy_size = (old_usize < usize) \
+ ? old_usize : usize; \
+ size_t tail_size = usize - copy_size; \
+ VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \
+ rzsize, false); \
+ if (copy_size > 0) { \
+ valgrind_make_mem_defined(ptr, \
+ copy_size); \
+ } \
+ if (zero && tail_size > 0) { \
+ valgrind_make_mem_defined( \
+ (void *)((uintptr_t)ptr + \
+ copy_size), tail_size); \
+ } \
+ } \
+ } \
+ } \
+} while (0)
+#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \
+ if (unlikely(in_valgrind)) \
+ valgrind_freelike_block(ptr, rzsize); \
+} while (0)
+#else
+#define RUNNING_ON_VALGRIND ((unsigned)0)
+#define JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0)
+#define JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0)
+#define JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0)
+#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0)
+#define JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize, \
+ ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null, \
+ zero) do {} while (0)
+#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0)
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#ifdef JEMALLOC_VALGRIND
+void valgrind_make_mem_noaccess(void *ptr, size_t usize);
+void valgrind_make_mem_undefined(void *ptr, size_t usize);
+void valgrind_make_mem_defined(void *ptr, size_t usize);
+void valgrind_freelike_block(void *ptr, size_t usize);
+#endif
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+