summaryrefslogtreecommitdiff
path: root/deps/jemalloc/include/jemalloc/internal
diff options
context:
space:
mode:
Diffstat (limited to 'deps/jemalloc/include/jemalloc/internal')
-rw-r--r--deps/jemalloc/include/jemalloc/internal/arena_externs.h26
-rw-r--r--deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h157
-rw-r--r--deps/jemalloc/include/jemalloc/internal/arena_stats.h64
-rw-r--r--deps/jemalloc/include/jemalloc/internal/arena_structs_b.h9
-rw-r--r--deps/jemalloc/include/jemalloc/internal/arena_types.h12
-rw-r--r--deps/jemalloc/include/jemalloc/internal/atomic.h11
-rw-r--r--deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h6
-rw-r--r--deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h18
-rw-r--r--deps/jemalloc/include/jemalloc/internal/background_thread_externs.h1
-rw-r--r--deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h7
-rw-r--r--deps/jemalloc/include/jemalloc/internal/background_thread_structs.h1
-rw-r--r--deps/jemalloc/include/jemalloc/internal/base_structs.h4
-rw-r--r--deps/jemalloc/include/jemalloc/internal/bin.h25
-rw-r--r--deps/jemalloc/include/jemalloc/internal/bin_stats.h3
-rw-r--r--deps/jemalloc/include/jemalloc/internal/bin_types.h17
-rw-r--r--deps/jemalloc/include/jemalloc/internal/bit_util.h74
-rw-r--r--deps/jemalloc/include/jemalloc/internal/bitmap.h6
-rw-r--r--deps/jemalloc/include/jemalloc/internal/cache_bin.h35
-rw-r--r--deps/jemalloc/include/jemalloc/internal/ctl.h9
-rw-r--r--deps/jemalloc/include/jemalloc/internal/emitter.h349
-rw-r--r--deps/jemalloc/include/jemalloc/internal/extent_externs.h12
-rw-r--r--deps/jemalloc/include/jemalloc/internal/extent_inlines.h98
-rw-r--r--deps/jemalloc/include/jemalloc/internal/extent_structs.h59
-rw-r--r--deps/jemalloc/include/jemalloc/internal/extent_types.h10
-rw-r--r--deps/jemalloc/include/jemalloc/internal/hash.h65
-rw-r--r--deps/jemalloc/include/jemalloc/internal/hook.h163
-rw-r--r--deps/jemalloc/include/jemalloc/internal/hooks.h19
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h3
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in44
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h6
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h14
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h3
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h57
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h73
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h75
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in25
-rw-r--r--deps/jemalloc/include/jemalloc/internal/large_externs.h10
-rw-r--r--deps/jemalloc/include/jemalloc/internal/malloc_io.h2
-rw-r--r--deps/jemalloc/include/jemalloc/internal/mutex.h74
-rw-r--r--deps/jemalloc/include/jemalloc/internal/mutex_prof.h25
-rw-r--r--deps/jemalloc/include/jemalloc/internal/prof_externs.h15
-rw-r--r--deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h14
-rw-r--r--deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h76
-rw-r--r--deps/jemalloc/include/jemalloc/internal/prof_structs.h1
-rw-r--r--deps/jemalloc/include/jemalloc/internal/quantum.h77
-rw-r--r--deps/jemalloc/include/jemalloc/internal/rtree.h70
-rw-r--r--deps/jemalloc/include/jemalloc/internal/rtree_tsd.h2
-rw-r--r--deps/jemalloc/include/jemalloc/internal/safety_check.h26
-rw-r--r--deps/jemalloc/include/jemalloc/internal/sc.h333
-rw-r--r--deps/jemalloc/include/jemalloc/internal/seq.h55
-rwxr-xr-xdeps/jemalloc/include/jemalloc/internal/size_classes.sh361
-rw-r--r--deps/jemalloc/include/jemalloc/internal/stats.h3
-rw-r--r--deps/jemalloc/include/jemalloc/internal/sz.h173
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tcache_externs.h4
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tcache_inlines.h24
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tcache_structs.h19
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tcache_types.h7
-rw-r--r--deps/jemalloc/include/jemalloc/internal/test_hooks.h19
-rw-r--r--deps/jemalloc/include/jemalloc/internal/ticker.h13
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tsd.h155
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tsd_generic.h6
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h7
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tsd_tls.h7
-rw-r--r--deps/jemalloc/include/jemalloc/internal/witness.h31
64 files changed, 2112 insertions, 1057 deletions
diff --git a/deps/jemalloc/include/jemalloc/internal/arena_externs.h b/deps/jemalloc/include/jemalloc/internal/arena_externs.h
index 4b3732b41..a4523ae0c 100644
--- a/deps/jemalloc/include/jemalloc/internal/arena_externs.h
+++ b/deps/jemalloc/include/jemalloc/internal/arena_externs.h
@@ -3,8 +3,8 @@
#include "jemalloc/internal/bin.h"
#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/hook.h"
#include "jemalloc/internal/pages.h"
-#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/stats.h"
extern ssize_t opt_dirty_decay_ms;
@@ -16,13 +16,17 @@ extern const char *percpu_arena_mode_names[];
extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
extern malloc_mutex_t arenas_lock;
+extern size_t opt_oversize_threshold;
+extern size_t oversize_threshold;
+
void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
- bin_stats_t *bstats, arena_stats_large_t *lstats);
+ bin_stats_t *bstats, arena_stats_large_t *lstats,
+ arena_stats_extents_t *estats);
void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
extent_hooks_t **r_extent_hooks, extent_t *extent);
#ifdef JEMALLOC_JET
@@ -56,16 +60,17 @@ void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
szind_t ind, bool zero);
void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
size_t alignment, bool zero, tcache_t *tcache);
-void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize);
+void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
bool slow_path);
-void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
- extent_t *extent, void *ptr);
+void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+ szind_t binind, extent_t *extent, void *ptr);
void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
- size_t extra, bool zero);
+ size_t extra, bool zero, size_t *newsize);
void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
- size_t size, size_t alignment, bool zero, tcache_t *tcache);
+ size_t size, size_t alignment, bool zero, tcache_t *tcache,
+ hook_ralloc_args_t *hook_args);
dss_prec_t arena_dss_prec_get(arena_t *arena);
bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
ssize_t arena_dirty_decay_ms_default_get(void);
@@ -79,7 +84,12 @@ void arena_nthreads_inc(arena_t *arena, bool internal);
void arena_nthreads_dec(arena_t *arena, bool internal);
size_t arena_extent_sn_next(arena_t *arena);
arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
-void arena_boot(void);
+bool arena_init_huge(void);
+bool arena_is_huge(unsigned arena_ind);
+arena_t *arena_choose_huge(tsd_t *tsd);
+bin_t *arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+ unsigned *binshard);
+void arena_boot(sc_data_t *sc_data);
void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
void arena_prefork2(tsdn_t *tsdn, arena_t *arena);
diff --git a/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h b/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h
index 2b7e77e72..dd926575f 100644
--- a/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h
+++ b/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h
@@ -4,10 +4,36 @@
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/rtree.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/ticker.h"
+JEMALLOC_ALWAYS_INLINE bool
+arena_has_default_hooks(arena_t *arena) {
+ return (extent_hooks_get(arena) == &extent_hooks_default);
+}
+
+JEMALLOC_ALWAYS_INLINE arena_t *
+arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
+ if (arena != NULL) {
+ return arena;
+ }
+
+ /*
+ * For huge allocations, use the dedicated huge arena if both are true:
+ * 1) is using auto arena selection (i.e. arena == NULL), and 2) the
+ * thread is not assigned to a manual arena.
+ */
+ if (unlikely(size >= oversize_threshold)) {
+ arena_t *tsd_arena = tsd_arena_get(tsd);
+ if (tsd_arena == NULL || arena_is_auto(tsd_arena)) {
+ return arena_choose_huge(tsd);
+ }
+ }
+
+ return arena_choose(tsd, NULL);
+}
+
JEMALLOC_ALWAYS_INLINE prof_tctx_t *
arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
cassert(config_prof);
@@ -28,7 +54,7 @@ arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
}
JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, UNUSED size_t usize,
+arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
cassert(config_prof);
assert(ptr != NULL);
@@ -47,7 +73,7 @@ arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, UNUSED size_t usize,
}
static inline void
-arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, UNUSED prof_tctx_t *tctx) {
+arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
cassert(config_prof);
assert(ptr != NULL);
@@ -57,6 +83,32 @@ arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, UNUSED prof_tctx_t *tctx) {
large_prof_tctx_reset(tsdn, extent);
}
+JEMALLOC_ALWAYS_INLINE nstime_t
+arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr,
+ alloc_ctx_t *alloc_ctx) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+
+ extent_t *extent = iealloc(tsdn, ptr);
+ /*
+ * Unlike arena_prof_prof_tctx_{get, set}, we only call this once we're
+ * sure we have a sampled allocation.
+ */
+ assert(!extent_slab_get(extent));
+ return large_prof_alloc_time_get(extent);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
+ nstime_t t) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+
+ extent_t *extent = iealloc(tsdn, ptr);
+ assert(!extent_slab_get(extent));
+ large_prof_alloc_time_set(extent, t);
+}
+
JEMALLOC_ALWAYS_INLINE void
arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
tsd_t *tsd;
@@ -83,14 +135,33 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
arena_decay_ticks(tsdn, arena, 1);
}
+/* Purge a single extent to retained / unmapped directly. */
+JEMALLOC_ALWAYS_INLINE void
+arena_decay_extent(tsdn_t *tsdn,arena_t *arena, extent_hooks_t **r_extent_hooks,
+ extent_t *extent) {
+ size_t extent_size = extent_size_get(extent);
+ extent_dalloc_wrapper(tsdn, arena,
+ r_extent_hooks, extent);
+ if (config_stats) {
+ /* Update stats accordingly. */
+ arena_stats_lock(tsdn, &arena->stats);
+ arena_stats_add_u64(tsdn, &arena->stats,
+ &arena->decay_dirty.stats->nmadvise, 1);
+ arena_stats_add_u64(tsdn, &arena->stats,
+ &arena->decay_dirty.stats->purged, extent_size >> LG_PAGE);
+ arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
+ extent_size);
+ arena_stats_unlock(tsdn, &arena->stats);
+ }
+}
+
JEMALLOC_ALWAYS_INLINE void *
arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
tcache_t *tcache, bool slow_path) {
assert(!tsdn_null(tsdn) || tcache == NULL);
- assert(size != 0);
if (likely(tcache != NULL)) {
- if (likely(size <= SMALL_MAXCLASS)) {
+ if (likely(size <= SC_SMALL_MAXCLASS)) {
return tcache_alloc_small(tsdn_tsd(tsdn), arena,
tcache, size, ind, zero, slow_path);
}
@@ -119,7 +190,7 @@ arena_salloc(tsdn_t *tsdn, const void *ptr) {
szind_t szind = rtree_szind_read(tsdn, &extents_rtree, rtree_ctx,
(uintptr_t)ptr, true);
- assert(szind != NSIZES);
+ assert(szind != SC_NSIZES);
return sz_index2size(szind);
}
@@ -152,12 +223,22 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
/* Only slab members should be looked up via interior pointers. */
assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
- assert(szind != NSIZES);
+ assert(szind != SC_NSIZES);
return sz_index2size(szind);
}
static inline void
+arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
+ if (config_prof && unlikely(szind < SC_NBINS)) {
+ arena_dalloc_promoted(tsdn, ptr, NULL, true);
+ } else {
+ extent_t *extent = iealloc(tsdn, ptr);
+ large_dalloc(tsdn, extent);
+ }
+}
+
+static inline void
arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
assert(ptr != NULL);
@@ -173,7 +254,7 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
rtree_ctx, (uintptr_t)ptr, true);
assert(szind == extent_szind_get(extent));
- assert(szind < NSIZES);
+ assert(szind < SC_NSIZES);
assert(slab == extent_slab_get(extent));
}
@@ -181,6 +262,21 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
/* Small allocation. */
arena_dalloc_small(tsdn, ptr);
} else {
+ arena_dalloc_large_no_tcache(tsdn, ptr, szind);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
+ bool slow_path) {
+ if (szind < nhbins) {
+ if (config_prof && unlikely(szind < SC_NBINS)) {
+ arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
+ } else {
+ tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, szind,
+ slow_path);
+ }
+ } else {
extent_t *extent = iealloc(tsdn, ptr);
large_dalloc(tsdn, extent);
}
@@ -203,7 +299,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
if (alloc_ctx != NULL) {
szind = alloc_ctx->szind;
slab = alloc_ctx->slab;
- assert(szind != NSIZES);
+ assert(szind != SC_NSIZES);
} else {
rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
@@ -215,7 +311,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
rtree_ctx, (uintptr_t)ptr, true);
assert(szind == extent_szind_get(extent));
- assert(szind < NSIZES);
+ assert(szind < SC_NSIZES);
assert(slab == extent_slab_get(extent));
}
@@ -224,25 +320,14 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
slow_path);
} else {
- if (szind < nhbins) {
- if (config_prof && unlikely(szind < NBINS)) {
- arena_dalloc_promoted(tsdn, ptr, tcache,
- slow_path);
- } else {
- tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
- szind, slow_path);
- }
- } else {
- extent_t *extent = iealloc(tsdn, ptr);
- large_dalloc(tsdn, extent);
- }
+ arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path);
}
}
static inline void
arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
assert(ptr != NULL);
- assert(size <= LARGE_MAXCLASS);
+ assert(size <= SC_LARGE_MAXCLASS);
szind_t szind;
bool slab;
@@ -252,7 +337,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
* object, so base szind and slab on the given size.
*/
szind = sz_size2index(size);
- slab = (szind < NBINS);
+ slab = (szind < SC_NBINS);
}
if ((config_prof && opt_prof) || config_debug) {
@@ -264,7 +349,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
(uintptr_t)ptr, true, &szind, &slab);
assert(szind == sz_size2index(size));
- assert((config_prof && opt_prof) || slab == (szind < NBINS));
+ assert((config_prof && opt_prof) || slab == (szind < SC_NBINS));
if (config_debug) {
extent_t *extent = rtree_extent_read(tsdn,
@@ -278,8 +363,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
/* Small allocation. */
arena_dalloc_small(tsdn, ptr);
} else {
- extent_t *extent = iealloc(tsdn, ptr);
- large_dalloc(tsdn, extent);
+ arena_dalloc_large_no_tcache(tsdn, ptr, szind);
}
}
@@ -288,7 +372,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
alloc_ctx_t *alloc_ctx, bool slow_path) {
assert(!tsdn_null(tsdn) || tcache == NULL);
assert(ptr != NULL);
- assert(size <= LARGE_MAXCLASS);
+ assert(size <= SC_LARGE_MAXCLASS);
if (unlikely(tcache == NULL)) {
arena_sdalloc_no_tcache(tsdn, ptr, size);
@@ -297,7 +381,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
szind_t szind;
bool slab;
- UNUSED alloc_ctx_t local_ctx;
+ alloc_ctx_t local_ctx;
if (config_prof && opt_prof) {
if (alloc_ctx == NULL) {
/* Uncommon case and should be a static check. */
@@ -318,7 +402,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
* object, so base szind and slab on the given size.
*/
szind = sz_size2index(size);
- slab = (szind < NBINS);
+ slab = (szind < SC_NBINS);
}
if (config_debug) {
@@ -336,18 +420,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
slow_path);
} else {
- if (szind < nhbins) {
- if (config_prof && unlikely(szind < NBINS)) {
- arena_dalloc_promoted(tsdn, ptr, tcache,
- slow_path);
- } else {
- tcache_dalloc_large(tsdn_tsd(tsdn),
- tcache, ptr, szind, slow_path);
- }
- } else {
- extent_t *extent = iealloc(tsdn, ptr);
- large_dalloc(tsdn, extent);
- }
+ arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path);
}
}
diff --git a/deps/jemalloc/include/jemalloc/internal/arena_stats.h b/deps/jemalloc/include/jemalloc/internal/arena_stats.h
index 5f3dca8b1..23949ed92 100644
--- a/deps/jemalloc/include/jemalloc/internal/arena_stats.h
+++ b/deps/jemalloc/include/jemalloc/internal/arena_stats.h
@@ -4,7 +4,9 @@
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/mutex_prof.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
+
+JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
/*
* In those architectures that support 64-bit atomics, we use atomic updates for
@@ -33,6 +35,13 @@ struct arena_stats_large_s {
* periodically merges into this counter.
*/
arena_stats_u64_t nrequests; /* Partially derived. */
+ /*
+ * Number of tcache fills / flushes for large (similarly, periodically
+ * merged). Note that there is no large tcache batch-fill currently
+ * (i.e. only fill 1 at a time); however flush may be batched.
+ */
+ arena_stats_u64_t nfills; /* Partially derived. */
+ arena_stats_u64_t nflushes; /* Partially derived. */
/* Current number of allocations of this size class. */
size_t curlextents; /* Derived. */
@@ -48,6 +57,22 @@ struct arena_stats_decay_s {
arena_stats_u64_t purged;
};
+typedef struct arena_stats_extents_s arena_stats_extents_t;
+struct arena_stats_extents_s {
+ /*
+ * Stats for a given index in the range [0, SC_NPSIZES] in an extents_t.
+ * We track both bytes and # of extents: two extents in the same bucket
+ * may have different sizes if adjacent size classes differ by more than
+ * a page, so bytes cannot always be derived from # of extents.
+ */
+ atomic_zu_t ndirty;
+ atomic_zu_t dirty_bytes;
+ atomic_zu_t nmuzzy;
+ atomic_zu_t muzzy_bytes;
+ atomic_zu_t nretained;
+ atomic_zu_t retained_bytes;
+};
+
/*
* Arena stats. Note that fields marked "derived" are not directly maintained
* within the arena code; rather their values are derived during stats merge
@@ -69,6 +94,9 @@ struct arena_stats_s {
*/
atomic_zu_t retained; /* Derived. */
+ /* Number of extent_t structs allocated by base, but not being used. */
+ atomic_zu_t extent_avail;
+
arena_stats_decay_t decay_dirty;
arena_stats_decay_t decay_muzzy;
@@ -80,22 +108,27 @@ struct arena_stats_s {
atomic_zu_t allocated_large; /* Derived. */
arena_stats_u64_t nmalloc_large; /* Derived. */
arena_stats_u64_t ndalloc_large; /* Derived. */
+ arena_stats_u64_t nfills_large; /* Derived. */
+ arena_stats_u64_t nflushes_large; /* Derived. */
arena_stats_u64_t nrequests_large; /* Derived. */
+ /* VM space had to be leaked (undocumented). Normally 0. */
+ atomic_zu_t abandoned_vm;
+
/* Number of bytes cached in tcache associated with this arena. */
atomic_zu_t tcache_bytes; /* Derived. */
mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
/* One element for each large size class. */
- arena_stats_large_t lstats[NSIZES - NBINS];
+ arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
/* Arena uptime. */
nstime_t uptime;
};
static inline bool
-arena_stats_init(UNUSED tsdn_t *tsdn, arena_stats_t *arena_stats) {
+arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
if (config_debug) {
for (size_t i = 0; i < sizeof(arena_stats_t); i++) {
assert(((char *)arena_stats)[i] == 0);
@@ -147,11 +180,11 @@ arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
#endif
}
-UNUSED static inline void
+static inline void
arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
arena_stats_u64_t *p, uint64_t x) {
#ifdef JEMALLOC_ATOMIC_U64
- UNUSED uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
+ uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
assert(r - x <= r);
#else
malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
@@ -176,7 +209,8 @@ arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
}
static inline size_t
-arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) {
+arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
+ atomic_zu_t *p) {
#ifdef JEMALLOC_ATOMIC_U64
return atomic_load_zu(p, ATOMIC_RELAXED);
#else
@@ -186,8 +220,8 @@ arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) {
}
static inline void
-arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
- size_t x) {
+arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
+ atomic_zu_t *p, size_t x) {
#ifdef JEMALLOC_ATOMIC_U64
atomic_fetch_add_zu(p, x, ATOMIC_RELAXED);
#else
@@ -198,10 +232,10 @@ arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
}
static inline void
-arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
- size_t x) {
+arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
+ atomic_zu_t *p, size_t x) {
#ifdef JEMALLOC_ATOMIC_U64
- UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
+ size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
assert(r - x <= r);
#else
malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
@@ -218,11 +252,12 @@ arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
}
static inline void
-arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
+arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
szind_t szind, uint64_t nrequests) {
arena_stats_lock(tsdn, arena_stats);
- arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind -
- NBINS].nrequests, nrequests);
+ arena_stats_large_t *lstats = &arena_stats->lstats[szind - SC_NBINS];
+ arena_stats_add_u64(tsdn, arena_stats, &lstats->nrequests, nrequests);
+ arena_stats_add_u64(tsdn, arena_stats, &lstats->nflushes, 1);
arena_stats_unlock(tsdn, arena_stats);
}
@@ -233,5 +268,4 @@ arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
arena_stats_unlock(tsdn, arena_stats);
}
-
#endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h b/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h
index 38bc95962..eeab57fd6 100644
--- a/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h
+++ b/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h
@@ -10,7 +10,7 @@
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/nstime.h"
#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/smoothstep.h"
#include "jemalloc/internal/ticker.h"
@@ -90,6 +90,9 @@ struct arena_s {
*/
atomic_u_t nthreads[2];
+ /* Next bin shard for binding new threads. Synchronization: atomic. */
+ atomic_u_t binshard_next;
+
/*
* When percpu_arena is enabled, to amortize the cost of reading /
* updating the current CPU id, track the most recent thread accessing
@@ -113,7 +116,6 @@ struct arena_s {
/* Synchronization: internal. */
prof_accum_t prof_accum;
- uint64_t prof_accumbytes;
/*
* PRNG state for cache index randomization of large allocation base
@@ -196,6 +198,7 @@ struct arena_s {
* Synchronization: extent_avail_mtx.
*/
extent_tree_t extent_avail;
+ atomic_zu_t extent_avail_cnt;
malloc_mutex_t extent_avail_mtx;
/*
@@ -203,7 +206,7 @@ struct arena_s {
*
* Synchronization: internal.
*/
- bin_t bins[NBINS];
+ bins_t bins[SC_NBINS];
/*
* Base allocator, from which arena metadata are allocated.
diff --git a/deps/jemalloc/include/jemalloc/internal/arena_types.h b/deps/jemalloc/include/jemalloc/internal/arena_types.h
index 70001b5f1..624937e4f 100644
--- a/deps/jemalloc/include/jemalloc/internal/arena_types.h
+++ b/deps/jemalloc/include/jemalloc/internal/arena_types.h
@@ -1,13 +1,15 @@
#ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H
#define JEMALLOC_INTERNAL_ARENA_TYPES_H
+#include "jemalloc/internal/sc.h"
+
/* Maximum number of regions in one slab. */
-#define LG_SLAB_MAXREGS (LG_PAGE - LG_TINY_MIN)
+#define LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
#define SLAB_MAXREGS (1U << LG_SLAB_MAXREGS)
/* Default decay times in milliseconds. */
#define DIRTY_DECAY_MS_DEFAULT ZD(10 * 1000)
-#define MUZZY_DECAY_MS_DEFAULT ZD(10 * 1000)
+#define MUZZY_DECAY_MS_DEFAULT (0)
/* Number of event ticks between time checks. */
#define DECAY_NTICKS_PER_UPDATE 1000
@@ -40,4 +42,10 @@ typedef enum {
#define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base)
#define PERCPU_ARENA_DEFAULT percpu_arena_disabled
+/*
+ * When allocation_size >= oversize_threshold, use the dedicated huge arena
+ * (unless have explicitly spicified arena index). 0 disables the feature.
+ */
+#define OVERSIZE_THRESHOLD_DEFAULT (8 << 20)
+
#endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic.h
index adadb1a3a..a76f54cee 100644
--- a/deps/jemalloc/include/jemalloc/internal/atomic.h
+++ b/deps/jemalloc/include/jemalloc/internal/atomic.h
@@ -1,12 +1,19 @@
#ifndef JEMALLOC_INTERNAL_ATOMIC_H
#define JEMALLOC_INTERNAL_ATOMIC_H
-#define ATOMIC_INLINE static inline
+#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+#define JEMALLOC_U8_ATOMICS
#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS)
# include "jemalloc/internal/atomic_gcc_atomic.h"
+# if !defined(JEMALLOC_GCC_U8_ATOMIC_ATOMICS)
+# undef JEMALLOC_U8_ATOMICS
+# endif
#elif defined(JEMALLOC_GCC_SYNC_ATOMICS)
# include "jemalloc/internal/atomic_gcc_sync.h"
+# if !defined(JEMALLOC_GCC_U8_SYNC_ATOMICS)
+# undef JEMALLOC_U8_ATOMICS
+# endif
#elif defined(_MSC_VER)
# include "jemalloc/internal/atomic_msvc.h"
#elif defined(JEMALLOC_C11_ATOMICS)
@@ -66,6 +73,8 @@ JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
JEMALLOC_GENERATE_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
+JEMALLOC_GENERATE_INT_ATOMICS(uint8_t, u8, 0)
+
JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2)
#ifdef JEMALLOC_ATOMIC_U64
diff --git a/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h
index 6b73a14f8..471515e82 100644
--- a/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h
+++ b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h
@@ -67,7 +67,8 @@ atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
\
ATOMIC_INLINE bool \
atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
- type *expected, type desired, atomic_memory_order_t success_mo, \
+ UNUSED type *expected, type desired, \
+ atomic_memory_order_t success_mo, \
atomic_memory_order_t failure_mo) { \
return __atomic_compare_exchange(&a->repr, expected, &desired, \
true, atomic_enum_to_builtin(success_mo), \
@@ -76,7 +77,8 @@ atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
\
ATOMIC_INLINE bool \
atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
- type *expected, type desired, atomic_memory_order_t success_mo, \
+ UNUSED type *expected, type desired, \
+ atomic_memory_order_t success_mo, \
atomic_memory_order_t failure_mo) { \
return __atomic_compare_exchange(&a->repr, expected, &desired, \
false, \
diff --git a/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h
index 30846e4d2..e02b7cbe3 100644
--- a/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h
+++ b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h
@@ -27,8 +27,10 @@ atomic_fence(atomic_memory_order_t mo) {
asm volatile("" ::: "memory");
# if defined(__i386__) || defined(__x86_64__)
/* This is implicit on x86. */
-# elif defined(__ppc__)
+# elif defined(__ppc64__)
asm volatile("lwsync");
+# elif defined(__ppc__)
+ asm volatile("sync");
# elif defined(__sparc__) && defined(__arch64__)
if (mo == atomic_memory_order_acquire) {
asm volatile("membar #LoadLoad | #LoadStore");
@@ -113,8 +115,8 @@ atomic_store_##short_type(atomic_##short_type##_t *a, \
} \
\
ATOMIC_INLINE type \
-atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
- atomic_memory_order_t mo) { \
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
/* \
* Because of FreeBSD, we care about gcc 4.2, which doesn't have\
* an atomic exchange builtin. We fake it with a CAS loop. \
@@ -129,8 +131,9 @@ atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
\
ATOMIC_INLINE bool \
atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
- type *expected, type desired, atomic_memory_order_t success_mo, \
- atomic_memory_order_t failure_mo) { \
+ type *expected, type desired, \
+ atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
desired); \
if (prev == *expected) { \
@@ -142,8 +145,9 @@ atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
} \
ATOMIC_INLINE bool \
atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
- type *expected, type desired, atomic_memory_order_t success_mo, \
- atomic_memory_order_t failure_mo) { \
+ type *expected, type desired, \
+ atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
desired); \
if (prev == *expected) { \
diff --git a/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h b/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h
index 3209aa49f..0f997e18b 100644
--- a/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h
+++ b/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h
@@ -8,7 +8,6 @@ extern atomic_b_t background_thread_enabled_state;
extern size_t n_background_threads;
extern size_t max_background_threads;
extern background_thread_info_t *background_thread_info;
-extern bool can_enable_background_thread;
bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
bool background_threads_enable(tsd_t *tsd);
diff --git a/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h b/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h
index ef50231e8..f85e86fa3 100644
--- a/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h
+++ b/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h
@@ -15,7 +15,12 @@ background_thread_enabled_set(tsdn_t *tsdn, bool state) {
JEMALLOC_ALWAYS_INLINE background_thread_info_t *
arena_background_thread_info_get(arena_t *arena) {
unsigned arena_ind = arena_ind_get(arena);
- return &background_thread_info[arena_ind % ncpus];
+ return &background_thread_info[arena_ind % max_background_threads];
+}
+
+JEMALLOC_ALWAYS_INLINE background_thread_info_t *
+background_thread_info_get(size_t ind) {
+ return &background_thread_info[ind % max_background_threads];
}
JEMALLOC_ALWAYS_INLINE uint64_t
diff --git a/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h b/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h
index c1107dfe9..c02aa434c 100644
--- a/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h
+++ b/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h
@@ -9,6 +9,7 @@
#define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
#define MAX_BACKGROUND_THREAD_LIMIT MALLOCX_ARENA_LIMIT
+#define DEFAULT_NUM_BACKGROUND_THREAD 4
typedef enum {
background_thread_stopped,
diff --git a/deps/jemalloc/include/jemalloc/internal/base_structs.h b/deps/jemalloc/include/jemalloc/internal/base_structs.h
index 2102247ac..07f214eb2 100644
--- a/deps/jemalloc/include/jemalloc/internal/base_structs.h
+++ b/deps/jemalloc/include/jemalloc/internal/base_structs.h
@@ -3,7 +3,7 @@
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
/* Embedded at the beginning of every block of base-managed virtual memory. */
struct base_block_s {
@@ -46,7 +46,7 @@ struct base_s {
base_block_t *blocks;
/* Heap of extents that track unused trailing space within blocks. */
- extent_heap_t avail[NSIZES];
+ extent_heap_t avail[SC_NSIZES];
/* Stats, only maintained if config_stats. */
size_t allocated;
diff --git a/deps/jemalloc/include/jemalloc/internal/bin.h b/deps/jemalloc/include/jemalloc/internal/bin.h
index 9b416ada7..8547e8930 100644
--- a/deps/jemalloc/include/jemalloc/internal/bin.h
+++ b/deps/jemalloc/include/jemalloc/internal/bin.h
@@ -1,10 +1,12 @@
#ifndef JEMALLOC_INTERNAL_BIN_H
#define JEMALLOC_INTERNAL_BIN_H
+#include "jemalloc/internal/bin_stats.h"
+#include "jemalloc/internal/bin_types.h"
#include "jemalloc/internal/extent_types.h"
#include "jemalloc/internal/extent_structs.h"
#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/bin_stats.h"
+#include "jemalloc/internal/sc.h"
/*
* A bin contains a set of extents that are currently being used for slab
@@ -41,6 +43,9 @@ struct bin_info_s {
/* Total number of regions in a slab for this bin's size class. */
uint32_t nregs;
+ /* Number of sharded bins in each arena for this size class. */
+ uint32_t n_shards;
+
/*
* Metadata used to manipulate bitmaps for slabs associated with this
* bin.
@@ -48,8 +53,7 @@ struct bin_info_s {
bitmap_info_t bitmap_info;
};
-extern const bin_info_t bin_infos[NBINS];
-
+extern bin_info_t bin_infos[SC_NBINS];
typedef struct bin_s bin_t;
struct bin_s {
@@ -78,6 +82,18 @@ struct bin_s {
bin_stats_t stats;
};
+/* A set of sharded bins of the same size class. */
+typedef struct bins_s bins_t;
+struct bins_s {
+ /* Sharded bins. Dynamically sized. */
+ bin_t *bin_shards;
+};
+
+void bin_shard_sizes_boot(unsigned bin_shards[SC_NBINS]);
+bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
+ size_t end_size, size_t nshards);
+void bin_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
+
/* Initializes a bin to empty. Returns true on error. */
bool bin_init(bin_t *bin);
@@ -90,7 +106,7 @@ void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
static inline void
bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) {
malloc_mutex_lock(tsdn, &bin->lock);
- malloc_mutex_prof_read(tsdn, &dst_bin_stats->mutex_data, &bin->lock);
+ malloc_mutex_prof_accum(tsdn, &dst_bin_stats->mutex_data, &bin->lock);
dst_bin_stats->nmalloc += bin->stats.nmalloc;
dst_bin_stats->ndalloc += bin->stats.ndalloc;
dst_bin_stats->nrequests += bin->stats.nrequests;
@@ -100,6 +116,7 @@ bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) {
dst_bin_stats->nslabs += bin->stats.nslabs;
dst_bin_stats->reslabs += bin->stats.reslabs;
dst_bin_stats->curslabs += bin->stats.curslabs;
+ dst_bin_stats->nonfull_slabs += bin->stats.nonfull_slabs;
malloc_mutex_unlock(tsdn, &bin->lock);
}
diff --git a/deps/jemalloc/include/jemalloc/internal/bin_stats.h b/deps/jemalloc/include/jemalloc/internal/bin_stats.h
index 86e673ec4..d04519c82 100644
--- a/deps/jemalloc/include/jemalloc/internal/bin_stats.h
+++ b/deps/jemalloc/include/jemalloc/internal/bin_stats.h
@@ -45,6 +45,9 @@ struct bin_stats_s {
/* Current number of slabs in this bin. */
size_t curslabs;
+ /* Current size of nonfull slabs heap in this bin. */
+ size_t nonfull_slabs;
+
mutex_prof_data_t mutex_data;
};
diff --git a/deps/jemalloc/include/jemalloc/internal/bin_types.h b/deps/jemalloc/include/jemalloc/internal/bin_types.h
new file mode 100644
index 000000000..3533606b9
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/bin_types.h
@@ -0,0 +1,17 @@
+#ifndef JEMALLOC_INTERNAL_BIN_TYPES_H
+#define JEMALLOC_INTERNAL_BIN_TYPES_H
+
+#include "jemalloc/internal/sc.h"
+
+#define BIN_SHARDS_MAX (1 << EXTENT_BITS_BINSHARD_WIDTH)
+#define N_BIN_SHARDS_DEFAULT 1
+
+/* Used in TSD static initializer only. Real init in arena_bind(). */
+#define TSD_BINSHARDS_ZERO_INITIALIZER {{UINT8_MAX}}
+
+typedef struct tsd_binshards_s tsd_binshards_t;
+struct tsd_binshards_s {
+ uint8_t binshard[SC_NBINS];
+};
+
+#endif /* JEMALLOC_INTERNAL_BIN_TYPES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/bit_util.h b/deps/jemalloc/include/jemalloc/internal/bit_util.h
index 8d078a8a3..c045eb868 100644
--- a/deps/jemalloc/include/jemalloc/internal/bit_util.h
+++ b/deps/jemalloc/include/jemalloc/internal/bit_util.h
@@ -27,6 +27,25 @@ ffs_u(unsigned bitmap) {
return JEMALLOC_INTERNAL_FFS(bitmap);
}
+#ifdef JEMALLOC_INTERNAL_POPCOUNTL
+BIT_UTIL_INLINE unsigned
+popcount_lu(unsigned long bitmap) {
+ return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
+}
+#endif
+
+/*
+ * Clears first unset bit in bitmap, and returns
+ * place of bit. bitmap *must not* be 0.
+ */
+
+BIT_UTIL_INLINE size_t
+cfs_lu(unsigned long* bitmap) {
+ size_t bit = ffs_lu(*bitmap) - 1;
+ *bitmap ^= ZU(1) << bit;
+ return bit;
+}
+
BIT_UTIL_INLINE unsigned
ffs_zu(size_t bitmap) {
#if LG_SIZEOF_PTR == LG_SIZEOF_INT
@@ -63,6 +82,22 @@ ffs_u32(uint32_t bitmap) {
BIT_UTIL_INLINE uint64_t
pow2_ceil_u64(uint64_t x) {
+#if (defined(__amd64__) || defined(__x86_64__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ))
+ if(unlikely(x <= 1)) {
+ return x;
+ }
+ size_t msb_on_index;
+#if (defined(__amd64__) || defined(__x86_64__))
+ asm ("bsrq %1, %0"
+ : "=r"(msb_on_index) // Outputs.
+ : "r"(x-1) // Inputs.
+ );
+#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
+ msb_on_index = (63 ^ __builtin_clzll(x - 1));
+#endif
+ assert(msb_on_index < 63);
+ return 1ULL << (msb_on_index + 1);
+#else
x--;
x |= x >> 1;
x |= x >> 2;
@@ -72,10 +107,27 @@ pow2_ceil_u64(uint64_t x) {
x |= x >> 32;
x++;
return x;
+#endif
}
BIT_UTIL_INLINE uint32_t
pow2_ceil_u32(uint32_t x) {
+#if ((defined(__i386__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ)) && (!defined(__s390__)))
+ if(unlikely(x <= 1)) {
+ return x;
+ }
+ size_t msb_on_index;
+#if (defined(__i386__))
+ asm ("bsr %1, %0"
+ : "=r"(msb_on_index) // Outputs.
+ : "r"(x-1) // Inputs.
+ );
+#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
+ msb_on_index = (31 ^ __builtin_clz(x - 1));
+#endif
+ assert(msb_on_index < 31);
+ return 1U << (msb_on_index + 1);
+#else
x--;
x |= x >> 1;
x |= x >> 2;
@@ -84,6 +136,7 @@ pow2_ceil_u32(uint32_t x) {
x |= x >> 16;
x++;
return x;
+#endif
}
/* Compute the smallest power of 2 that is >= x. */
@@ -160,6 +213,27 @@ lg_floor(size_t x) {
}
#endif
+BIT_UTIL_INLINE unsigned
+lg_ceil(size_t x) {
+ return lg_floor(x) + ((x & (x - 1)) == 0 ? 0 : 1);
+}
+
#undef BIT_UTIL_INLINE
+/* A compile-time version of lg_floor and lg_ceil. */
+#define LG_FLOOR_1(x) 0
+#define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1))
+#define LG_FLOOR_4(x) (x < (1ULL << 2) ? LG_FLOOR_2(x) : 2 + LG_FLOOR_2(x >> 2))
+#define LG_FLOOR_8(x) (x < (1ULL << 4) ? LG_FLOOR_4(x) : 4 + LG_FLOOR_4(x >> 4))
+#define LG_FLOOR_16(x) (x < (1ULL << 8) ? LG_FLOOR_8(x) : 8 + LG_FLOOR_8(x >> 8))
+#define LG_FLOOR_32(x) (x < (1ULL << 16) ? LG_FLOOR_16(x) : 16 + LG_FLOOR_16(x >> 16))
+#define LG_FLOOR_64(x) (x < (1ULL << 32) ? LG_FLOOR_32(x) : 32 + LG_FLOOR_32(x >> 32))
+#if LG_SIZEOF_PTR == 2
+# define LG_FLOOR(x) LG_FLOOR_32((x))
+#else
+# define LG_FLOOR(x) LG_FLOOR_64((x))
+#endif
+
+#define LG_CEIL(x) (LG_FLOOR(x) + (((x) & ((x) - 1)) == 0 ? 0 : 1))
+
#endif /* JEMALLOC_INTERNAL_BIT_UTIL_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/bitmap.h b/deps/jemalloc/include/jemalloc/internal/bitmap.h
index ac990290a..c3f9cb490 100644
--- a/deps/jemalloc/include/jemalloc/internal/bitmap.h
+++ b/deps/jemalloc/include/jemalloc/internal/bitmap.h
@@ -3,18 +3,18 @@
#include "jemalloc/internal/arena_types.h"
#include "jemalloc/internal/bit_util.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
typedef unsigned long bitmap_t;
#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG
/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
-#if LG_SLAB_MAXREGS > LG_CEIL_NSIZES
+#if LG_SLAB_MAXREGS > LG_CEIL(SC_NSIZES)
/* Maximum bitmap bit count is determined by maximum regions per slab. */
# define LG_BITMAP_MAXBITS LG_SLAB_MAXREGS
#else
/* Maximum bitmap bit count is determined by number of extent size classes. */
-# define LG_BITMAP_MAXBITS LG_CEIL_NSIZES
+# define LG_BITMAP_MAXBITS LG_CEIL(SC_NSIZES)
#endif
#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS)
diff --git a/deps/jemalloc/include/jemalloc/internal/cache_bin.h b/deps/jemalloc/include/jemalloc/internal/cache_bin.h
index 12f3ef2dd..d14556a3d 100644
--- a/deps/jemalloc/include/jemalloc/internal/cache_bin.h
+++ b/deps/jemalloc/include/jemalloc/internal/cache_bin.h
@@ -88,11 +88,21 @@ JEMALLOC_ALWAYS_INLINE void *
cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
void *ret;
- if (unlikely(bin->ncached == 0)) {
- bin->low_water = -1;
- *success = false;
- return NULL;
+ bin->ncached--;
+
+ /*
+ * Check for both bin->ncached == 0 and ncached < low_water
+ * in a single branch.
+ */
+ if (unlikely(bin->ncached <= bin->low_water)) {
+ bin->low_water = bin->ncached;
+ if (bin->ncached == -1) {
+ bin->ncached = 0;
+ *success = false;
+ return NULL;
+ }
}
+
/*
* success (instead of ret) should be checked upon the return of this
* function. We avoid checking (ret == NULL) because there is never a
@@ -101,14 +111,21 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
* cacheline).
*/
*success = true;
- ret = *(bin->avail - bin->ncached);
- bin->ncached--;
+ ret = *(bin->avail - (bin->ncached + 1));
- if (unlikely(bin->ncached < bin->low_water)) {
- bin->low_water = bin->ncached;
+ return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+cache_bin_dalloc_easy(cache_bin_t *bin, cache_bin_info_t *bin_info, void *ptr) {
+ if (unlikely(bin->ncached == bin_info->ncached_max)) {
+ return false;
}
+ assert(bin->ncached < bin_info->ncached_max);
+ bin->ncached++;
+ *(bin->avail - bin->ncached) = ptr;
- return ret;
+ return true;
}
#endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/ctl.h b/deps/jemalloc/include/jemalloc/internal/ctl.h
index d927d9480..1d1aacc6f 100644
--- a/deps/jemalloc/include/jemalloc/internal/ctl.h
+++ b/deps/jemalloc/include/jemalloc/internal/ctl.h
@@ -5,7 +5,7 @@
#include "jemalloc/internal/malloc_io.h"
#include "jemalloc/internal/mutex_prof.h"
#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/stats.h"
/* Maximum ctl tree depth. */
@@ -39,9 +39,12 @@ typedef struct ctl_arena_stats_s {
uint64_t nmalloc_small;
uint64_t ndalloc_small;
uint64_t nrequests_small;
+ uint64_t nfills_small;
+ uint64_t nflushes_small;
- bin_stats_t bstats[NBINS];
- arena_stats_large_t lstats[NSIZES - NBINS];
+ bin_stats_t bstats[SC_NBINS];
+ arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
+ arena_stats_extents_t estats[SC_NPSIZES];
} ctl_arena_stats_t;
typedef struct ctl_stats_s {
diff --git a/deps/jemalloc/include/jemalloc/internal/emitter.h b/deps/jemalloc/include/jemalloc/internal/emitter.h
index 3a2b2f7f2..542bc79c3 100644
--- a/deps/jemalloc/include/jemalloc/internal/emitter.h
+++ b/deps/jemalloc/include/jemalloc/internal/emitter.h
@@ -45,7 +45,9 @@ struct emitter_col_s {
int int_val;
unsigned unsigned_val;
uint32_t uint32_val;
+ uint32_t uint32_t_val;
uint64_t uint64_val;
+ uint64_t uint64_t_val;
size_t size_val;
ssize_t ssize_val;
const char *str_val;
@@ -60,17 +62,6 @@ struct emitter_row_s {
ql_head(emitter_col_t) cols;
};
-static inline void
-emitter_row_init(emitter_row_t *row) {
- ql_new(&row->cols);
-}
-
-static inline void
-emitter_col_init(emitter_col_t *col, emitter_row_t *row) {
- ql_elm_new(col, link);
- ql_tail_insert(&row->cols, col, link);
-}
-
typedef struct emitter_s emitter_t;
struct emitter_s {
emitter_output_t output;
@@ -80,18 +71,10 @@ struct emitter_s {
int nesting_depth;
/* True if we've already emitted a value at the given depth. */
bool item_at_depth;
+ /* True if we emitted a key and will emit corresponding value next. */
+ bool emitted_key;
};
-static inline void
-emitter_init(emitter_t *emitter, emitter_output_t emitter_output,
- void (*write_cb)(void *, const char *), void *cbopaque) {
- emitter->output = emitter_output;
- emitter->write_cb = write_cb;
- emitter->cbopaque = cbopaque;
- emitter->item_at_depth = false;
- emitter->nesting_depth = 0;
-}
-
/* Internal convenience function. Write to the emitter the given string. */
JEMALLOC_FORMAT_PRINTF(2, 3)
static inline void
@@ -103,22 +86,11 @@ emitter_printf(emitter_t *emitter, const char *format, ...) {
va_end(ap);
}
-/* Write to the emitter the given string, but only in table mode. */
-JEMALLOC_FORMAT_PRINTF(2, 3)
-static inline void
-emitter_table_printf(emitter_t *emitter, const char *format, ...) {
- if (emitter->output == emitter_output_table) {
- va_list ap;
- va_start(ap, format);
- malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap);
- va_end(ap);
- }
-}
-
-static inline void
+static inline const char * JEMALLOC_FORMAT_ARG(3)
emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
emitter_justify_t justify, int width) {
size_t written;
+ fmt_specifier++;
if (justify == emitter_justify_none) {
written = malloc_snprintf(out_fmt, out_size,
"%%%s", fmt_specifier);
@@ -131,6 +103,7 @@ emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
}
/* Only happens in case of bad format string, which *we* choose. */
assert(written < out_size);
+ return out_fmt;
}
/*
@@ -156,26 +129,27 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
char buf[BUF_SIZE];
#define EMIT_SIMPLE(type, format) \
- emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width); \
- emitter_printf(emitter, fmt, *(const type *)value); \
+ emitter_printf(emitter, \
+ emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width), \
+ *(const type *)value);
switch (value_type) {
case emitter_type_bool:
- emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width);
- emitter_printf(emitter, fmt, *(const bool *)value ?
- "true" : "false");
+ emitter_printf(emitter,
+ emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width),
+ *(const bool *)value ? "true" : "false");
break;
case emitter_type_int:
- EMIT_SIMPLE(int, "d")
+ EMIT_SIMPLE(int, "%d")
break;
case emitter_type_unsigned:
- EMIT_SIMPLE(unsigned, "u")
+ EMIT_SIMPLE(unsigned, "%u")
break;
case emitter_type_ssize:
- EMIT_SIMPLE(ssize_t, "zd")
+ EMIT_SIMPLE(ssize_t, "%zd")
break;
case emitter_type_size:
- EMIT_SIMPLE(size_t, "zu")
+ EMIT_SIMPLE(size_t, "%zu")
break;
case emitter_type_string:
str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"",
@@ -185,17 +159,17 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
* anywhere near the fmt size.
*/
assert(str_written < BUF_SIZE);
- emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width);
- emitter_printf(emitter, fmt, buf);
+ emitter_printf(emitter,
+ emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width), buf);
break;
case emitter_type_uint32:
- EMIT_SIMPLE(uint32_t, FMTu32)
+ EMIT_SIMPLE(uint32_t, "%" FMTu32)
break;
case emitter_type_uint64:
- EMIT_SIMPLE(uint64_t, FMTu64)
+ EMIT_SIMPLE(uint64_t, "%" FMTu64)
break;
case emitter_type_title:
- EMIT_SIMPLE(char *const, "s");
+ EMIT_SIMPLE(char *const, "%s");
break;
default:
unreachable();
@@ -235,201 +209,278 @@ emitter_indent(emitter_t *emitter) {
static inline void
emitter_json_key_prefix(emitter_t *emitter) {
+ if (emitter->emitted_key) {
+ emitter->emitted_key = false;
+ return;
+ }
emitter_printf(emitter, "%s\n", emitter->item_at_depth ? "," : "");
emitter_indent(emitter);
}
+/******************************************************************************/
+/* Public functions for emitter_t. */
+
static inline void
-emitter_begin(emitter_t *emitter) {
- if (emitter->output == emitter_output_json) {
- assert(emitter->nesting_depth == 0);
- emitter_printf(emitter, "{");
- emitter_nest_inc(emitter);
- } else {
- // tabular init
- emitter_printf(emitter, "%s", "");
- }
+emitter_init(emitter_t *emitter, emitter_output_t emitter_output,
+ void (*write_cb)(void *, const char *), void *cbopaque) {
+ emitter->output = emitter_output;
+ emitter->write_cb = write_cb;
+ emitter->cbopaque = cbopaque;
+ emitter->item_at_depth = false;
+ emitter->emitted_key = false;
+ emitter->nesting_depth = 0;
}
+/******************************************************************************/
+/* JSON public API. */
+
+/*
+ * Emits a key (e.g. as appears in an object). The next json entity emitted will
+ * be the corresponding value.
+ */
static inline void
-emitter_end(emitter_t *emitter) {
+emitter_json_key(emitter_t *emitter, const char *json_key) {
if (emitter->output == emitter_output_json) {
- assert(emitter->nesting_depth == 1);
- emitter_nest_dec(emitter);
- emitter_printf(emitter, "\n}\n");
+ emitter_json_key_prefix(emitter);
+ emitter_printf(emitter, "\"%s\": ", json_key);
+ emitter->emitted_key = true;
}
}
-/*
- * Note emits a different kv pair as well, but only in table mode. Omits the
- * note if table_note_key is NULL.
- */
static inline void
-emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key,
- emitter_type_t value_type, const void *value,
- const char *table_note_key, emitter_type_t table_note_value_type,
- const void *table_note_value) {
+emitter_json_value(emitter_t *emitter, emitter_type_t value_type,
+ const void *value) {
if (emitter->output == emitter_output_json) {
- assert(emitter->nesting_depth > 0);
emitter_json_key_prefix(emitter);
- emitter_printf(emitter, "\"%s\": ", json_key);
- emitter_print_value(emitter, emitter_justify_none, -1,
- value_type, value);
- } else {
- emitter_indent(emitter);
- emitter_printf(emitter, "%s: ", table_key);
emitter_print_value(emitter, emitter_justify_none, -1,
value_type, value);
- if (table_note_key != NULL) {
- emitter_printf(emitter, " (%s: ", table_note_key);
- emitter_print_value(emitter, emitter_justify_none, -1,
- table_note_value_type, table_note_value);
- emitter_printf(emitter, ")");
- }
- emitter_printf(emitter, "\n");
+ emitter->item_at_depth = true;
}
- emitter->item_at_depth = true;
}
+/* Shorthand for calling emitter_json_key and then emitter_json_value. */
static inline void
-emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key,
+emitter_json_kv(emitter_t *emitter, const char *json_key,
emitter_type_t value_type, const void *value) {
- emitter_kv_note(emitter, json_key, table_key, value_type, value, NULL,
- emitter_type_bool, NULL);
+ emitter_json_key(emitter, json_key);
+ emitter_json_value(emitter, value_type, value);
}
static inline void
-emitter_json_kv(emitter_t *emitter, const char *json_key,
- emitter_type_t value_type, const void *value) {
+emitter_json_array_begin(emitter_t *emitter) {
if (emitter->output == emitter_output_json) {
- emitter_kv(emitter, json_key, NULL, value_type, value);
+ emitter_json_key_prefix(emitter);
+ emitter_printf(emitter, "[");
+ emitter_nest_inc(emitter);
}
}
+/* Shorthand for calling emitter_json_key and then emitter_json_array_begin. */
static inline void
-emitter_table_kv(emitter_t *emitter, const char *table_key,
- emitter_type_t value_type, const void *value) {
- if (emitter->output == emitter_output_table) {
- emitter_kv(emitter, NULL, table_key, value_type, value);
+emitter_json_array_kv_begin(emitter_t *emitter, const char *json_key) {
+ emitter_json_key(emitter, json_key);
+ emitter_json_array_begin(emitter);
+}
+
+static inline void
+emitter_json_array_end(emitter_t *emitter) {
+ if (emitter->output == emitter_output_json) {
+ assert(emitter->nesting_depth > 0);
+ emitter_nest_dec(emitter);
+ emitter_printf(emitter, "\n");
+ emitter_indent(emitter);
+ emitter_printf(emitter, "]");
}
}
static inline void
-emitter_dict_begin(emitter_t *emitter, const char *json_key,
- const char *table_header) {
+emitter_json_object_begin(emitter_t *emitter) {
if (emitter->output == emitter_output_json) {
emitter_json_key_prefix(emitter);
- emitter_printf(emitter, "\"%s\": {", json_key);
- emitter_nest_inc(emitter);
- } else {
- emitter_indent(emitter);
- emitter_printf(emitter, "%s\n", table_header);
+ emitter_printf(emitter, "{");
emitter_nest_inc(emitter);
}
}
+/* Shorthand for calling emitter_json_key and then emitter_json_object_begin. */
static inline void
-emitter_dict_end(emitter_t *emitter) {
+emitter_json_object_kv_begin(emitter_t *emitter, const char *json_key) {
+ emitter_json_key(emitter, json_key);
+ emitter_json_object_begin(emitter);
+}
+
+static inline void
+emitter_json_object_end(emitter_t *emitter) {
if (emitter->output == emitter_output_json) {
assert(emitter->nesting_depth > 0);
emitter_nest_dec(emitter);
emitter_printf(emitter, "\n");
emitter_indent(emitter);
emitter_printf(emitter, "}");
- } else {
- emitter_nest_dec(emitter);
}
}
+
+/******************************************************************************/
+/* Table public API. */
+
static inline void
-emitter_json_dict_begin(emitter_t *emitter, const char *json_key) {
- if (emitter->output == emitter_output_json) {
- emitter_dict_begin(emitter, json_key, NULL);
+emitter_table_dict_begin(emitter_t *emitter, const char *table_key) {
+ if (emitter->output == emitter_output_table) {
+ emitter_indent(emitter);
+ emitter_printf(emitter, "%s\n", table_key);
+ emitter_nest_inc(emitter);
}
}
static inline void
-emitter_json_dict_end(emitter_t *emitter) {
- if (emitter->output == emitter_output_json) {
- emitter_dict_end(emitter);
+emitter_table_dict_end(emitter_t *emitter) {
+ if (emitter->output == emitter_output_table) {
+ emitter_nest_dec(emitter);
}
}
static inline void
-emitter_table_dict_begin(emitter_t *emitter, const char *table_key) {
+emitter_table_kv_note(emitter_t *emitter, const char *table_key,
+ emitter_type_t value_type, const void *value,
+ const char *table_note_key, emitter_type_t table_note_value_type,
+ const void *table_note_value) {
if (emitter->output == emitter_output_table) {
- emitter_dict_begin(emitter, NULL, table_key);
+ emitter_indent(emitter);
+ emitter_printf(emitter, "%s: ", table_key);
+ emitter_print_value(emitter, emitter_justify_none, -1,
+ value_type, value);
+ if (table_note_key != NULL) {
+ emitter_printf(emitter, " (%s: ", table_note_key);
+ emitter_print_value(emitter, emitter_justify_none, -1,
+ table_note_value_type, table_note_value);
+ emitter_printf(emitter, ")");
+ }
+ emitter_printf(emitter, "\n");
}
+ emitter->item_at_depth = true;
}
static inline void
-emitter_table_dict_end(emitter_t *emitter) {
+emitter_table_kv(emitter_t *emitter, const char *table_key,
+ emitter_type_t value_type, const void *value) {
+ emitter_table_kv_note(emitter, table_key, value_type, value, NULL,
+ emitter_type_bool, NULL);
+}
+
+
+/* Write to the emitter the given string, but only in table mode. */
+JEMALLOC_FORMAT_PRINTF(2, 3)
+static inline void
+emitter_table_printf(emitter_t *emitter, const char *format, ...) {
if (emitter->output == emitter_output_table) {
- emitter_dict_end(emitter);
+ va_list ap;
+ va_start(ap, format);
+ malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap);
+ va_end(ap);
}
}
static inline void
-emitter_json_arr_begin(emitter_t *emitter, const char *json_key) {
- if (emitter->output == emitter_output_json) {
- emitter_json_key_prefix(emitter);
- emitter_printf(emitter, "\"%s\": [", json_key);
- emitter_nest_inc(emitter);
+emitter_table_row(emitter_t *emitter, emitter_row_t *row) {
+ if (emitter->output != emitter_output_table) {
+ return;
+ }
+ emitter_col_t *col;
+ ql_foreach(col, &row->cols, link) {
+ emitter_print_value(emitter, col->justify, col->width,
+ col->type, (const void *)&col->bool_val);
}
+ emitter_table_printf(emitter, "\n");
+}
+
+static inline void
+emitter_row_init(emitter_row_t *row) {
+ ql_new(&row->cols);
}
static inline void
-emitter_json_arr_end(emitter_t *emitter) {
+emitter_col_init(emitter_col_t *col, emitter_row_t *row) {
+ ql_elm_new(col, link);
+ ql_tail_insert(&row->cols, col, link);
+}
+
+
+/******************************************************************************/
+/*
+ * Generalized public API. Emits using either JSON or table, according to
+ * settings in the emitter_t. */
+
+/*
+ * Note emits a different kv pair as well, but only in table mode. Omits the
+ * note if table_note_key is NULL.
+ */
+static inline void
+emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key,
+ emitter_type_t value_type, const void *value,
+ const char *table_note_key, emitter_type_t table_note_value_type,
+ const void *table_note_value) {
if (emitter->output == emitter_output_json) {
- assert(emitter->nesting_depth > 0);
- emitter_nest_dec(emitter);
- emitter_printf(emitter, "\n");
- emitter_indent(emitter);
- emitter_printf(emitter, "]");
+ emitter_json_key(emitter, json_key);
+ emitter_json_value(emitter, value_type, value);
+ } else {
+ emitter_table_kv_note(emitter, table_key, value_type, value,
+ table_note_key, table_note_value_type, table_note_value);
}
+ emitter->item_at_depth = true;
}
static inline void
-emitter_json_arr_obj_begin(emitter_t *emitter) {
+emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key,
+ emitter_type_t value_type, const void *value) {
+ emitter_kv_note(emitter, json_key, table_key, value_type, value, NULL,
+ emitter_type_bool, NULL);
+}
+
+static inline void
+emitter_dict_begin(emitter_t *emitter, const char *json_key,
+ const char *table_header) {
if (emitter->output == emitter_output_json) {
- emitter_json_key_prefix(emitter);
- emitter_printf(emitter, "{");
- emitter_nest_inc(emitter);
+ emitter_json_key(emitter, json_key);
+ emitter_json_object_begin(emitter);
+ } else {
+ emitter_table_dict_begin(emitter, table_header);
}
}
static inline void
-emitter_json_arr_obj_end(emitter_t *emitter) {
+emitter_dict_end(emitter_t *emitter) {
if (emitter->output == emitter_output_json) {
- assert(emitter->nesting_depth > 0);
- emitter_nest_dec(emitter);
- emitter_printf(emitter, "\n");
- emitter_indent(emitter);
- emitter_printf(emitter, "}");
+ emitter_json_object_end(emitter);
+ } else {
+ emitter_table_dict_end(emitter);
}
}
static inline void
-emitter_json_arr_value(emitter_t *emitter, emitter_type_t value_type,
- const void *value) {
+emitter_begin(emitter_t *emitter) {
if (emitter->output == emitter_output_json) {
- emitter_json_key_prefix(emitter);
- emitter_print_value(emitter, emitter_justify_none, -1,
- value_type, value);
+ assert(emitter->nesting_depth == 0);
+ emitter_printf(emitter, "{");
+ emitter_nest_inc(emitter);
+ } else {
+ /*
+ * This guarantees that we always call write_cb at least once.
+ * This is useful if some invariant is established by each call
+ * to write_cb, but doesn't hold initially: e.g., some buffer
+ * holds a null-terminated string.
+ */
+ emitter_printf(emitter, "%s", "");
}
}
static inline void
-emitter_table_row(emitter_t *emitter, emitter_row_t *row) {
- if (emitter->output != emitter_output_table) {
- return;
- }
- emitter_col_t *col;
- ql_foreach(col, &row->cols, link) {
- emitter_print_value(emitter, col->justify, col->width,
- col->type, (const void *)&col->bool_val);
+emitter_end(emitter_t *emitter) {
+ if (emitter->output == emitter_output_json) {
+ assert(emitter->nesting_depth == 1);
+ emitter_nest_dec(emitter);
+ emitter_printf(emitter, "\n}\n");
}
- emitter_table_printf(emitter, "\n");
}
#endif /* JEMALLOC_INTERNAL_EMITTER_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/extent_externs.h b/deps/jemalloc/include/jemalloc/internal/extent_externs.h
index b8a4d026c..8aba57633 100644
--- a/deps/jemalloc/include/jemalloc/internal/extent_externs.h
+++ b/deps/jemalloc/include/jemalloc/internal/extent_externs.h
@@ -24,13 +24,17 @@ size_t extent_size_quantize_floor(size_t size);
size_t extent_size_quantize_ceil(size_t size);
#endif
-rb_proto(, extent_avail_, extent_tree_t, extent_t)
+ph_proto(, extent_avail_, extent_tree_t, extent_t)
ph_proto(, extent_heap_, extent_heap_t, extent_t)
bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
bool delay_coalesce);
extent_state_t extents_state_get(const extents_t *extents);
size_t extents_npages_get(extents_t *extents);
+/* Get the number of extents in the given page size index. */
+size_t extents_nextents_get(extents_t *extents, pszind_t ind);
+/* Get the sum total bytes of the extents in the given page size index. */
+size_t extents_nbytes_get(extents_t *extents, pszind_t ind);
extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
@@ -70,4 +74,10 @@ bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
bool extent_boot(void);
+void extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
+ size_t *nfree, size_t *nregs, size_t *size);
+void extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
+ size_t *nfree, size_t *nregs, size_t *size,
+ size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
+
#endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/extent_inlines.h b/deps/jemalloc/include/jemalloc/internal/extent_inlines.h
index 77181df8d..77fa4c4a2 100644
--- a/deps/jemalloc/include/jemalloc/internal/extent_inlines.h
+++ b/deps/jemalloc/include/jemalloc/internal/extent_inlines.h
@@ -6,6 +6,7 @@
#include "jemalloc/internal/pages.h"
#include "jemalloc/internal/prng.h"
#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/sz.h"
static inline void
@@ -34,18 +35,19 @@ extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
(uintptr_t)extent2);
}
-static inline arena_t *
-extent_arena_get(const extent_t *extent) {
+static inline unsigned
+extent_arena_ind_get(const extent_t *extent) {
unsigned arena_ind = (unsigned)((extent->e_bits &
EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
- /*
- * The following check is omitted because we should never actually read
- * a NULL arena pointer.
- */
- if (false && arena_ind >= MALLOCX_ARENA_LIMIT) {
- return NULL;
- }
assert(arena_ind < MALLOCX_ARENA_LIMIT);
+
+ return arena_ind;
+}
+
+static inline arena_t *
+extent_arena_get(const extent_t *extent) {
+ unsigned arena_ind = extent_arena_ind_get(extent);
+
return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_ACQUIRE);
}
@@ -53,14 +55,14 @@ static inline szind_t
extent_szind_get_maybe_invalid(const extent_t *extent) {
szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
EXTENT_BITS_SZIND_SHIFT);
- assert(szind <= NSIZES);
+ assert(szind <= SC_NSIZES);
return szind;
}
static inline szind_t
extent_szind_get(const extent_t *extent) {
szind_t szind = extent_szind_get_maybe_invalid(extent);
- assert(szind < NSIZES); /* Never call when "invalid". */
+ assert(szind < SC_NSIZES); /* Never call when "invalid". */
return szind;
}
@@ -69,6 +71,14 @@ extent_usize_get(const extent_t *extent) {
return sz_index2size(extent_szind_get(extent));
}
+static inline unsigned
+extent_binshard_get(const extent_t *extent) {
+ unsigned binshard = (unsigned)((extent->e_bits &
+ EXTENT_BITS_BINSHARD_MASK) >> EXTENT_BITS_BINSHARD_SHIFT);
+ assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+ return binshard;
+}
+
static inline size_t
extent_sn_get(const extent_t *extent) {
return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
@@ -176,6 +186,11 @@ extent_prof_tctx_get(const extent_t *extent) {
ATOMIC_ACQUIRE);
}
+static inline nstime_t
+extent_prof_alloc_time_get(const extent_t *extent) {
+ return extent->e_alloc_time;
+}
+
static inline void
extent_arena_set(extent_t *extent, arena_t *arena) {
unsigned arena_ind = (arena != NULL) ? arena_ind_get(arena) : ((1U <<
@@ -185,12 +200,20 @@ extent_arena_set(extent_t *extent, arena_t *arena) {
}
static inline void
+extent_binshard_set(extent_t *extent, unsigned binshard) {
+ /* The assertion assumes szind is set already. */
+ assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+ extent->e_bits = (extent->e_bits & ~EXTENT_BITS_BINSHARD_MASK) |
+ ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT);
+}
+
+static inline void
extent_addr_set(extent_t *extent, void *addr) {
extent->e_addr = addr;
}
static inline void
-extent_addr_randomize(UNUSED tsdn_t *tsdn, extent_t *extent, size_t alignment) {
+extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) {
assert(extent_base_get(extent) == extent_addr_get(extent));
if (alignment < PAGE) {
@@ -234,7 +257,7 @@ extent_bsize_set(extent_t *extent, size_t bsize) {
static inline void
extent_szind_set(extent_t *extent, szind_t szind) {
- assert(szind <= NSIZES); /* NSIZES means "invalid". */
+ assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
}
@@ -247,6 +270,16 @@ extent_nfree_set(extent_t *extent, unsigned nfree) {
}
static inline void
+extent_nfree_binshard_set(extent_t *extent, unsigned nfree, unsigned binshard) {
+ /* The assertion assumes szind is set already. */
+ assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+ extent->e_bits = (extent->e_bits &
+ (~EXTENT_BITS_NFREE_MASK & ~EXTENT_BITS_BINSHARD_MASK)) |
+ ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT) |
+ ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
extent_nfree_inc(extent_t *extent) {
assert(extent_slab_get(extent));
extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
@@ -259,6 +292,12 @@ extent_nfree_dec(extent_t *extent) {
}
static inline void
+extent_nfree_sub(extent_t *extent, uint64_t n) {
+ assert(extent_slab_get(extent));
+ extent->e_bits -= (n << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
extent_sn_set(extent_t *extent, size_t sn) {
extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
((uint64_t)sn << EXTENT_BITS_SN_SHIFT);
@@ -300,9 +339,34 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
}
static inline void
+extent_prof_alloc_time_set(extent_t *extent, nstime_t t) {
+ nstime_copy(&extent->e_alloc_time, &t);
+}
+
+static inline bool
+extent_is_head_get(extent_t *extent) {
+ if (maps_coalesce) {
+ not_reached();
+ }
+
+ return (bool)((extent->e_bits & EXTENT_BITS_IS_HEAD_MASK) >>
+ EXTENT_BITS_IS_HEAD_SHIFT);
+}
+
+static inline void
+extent_is_head_set(extent_t *extent, bool is_head) {
+ if (maps_coalesce) {
+ not_reached();
+ }
+
+ extent->e_bits = (extent->e_bits & ~EXTENT_BITS_IS_HEAD_MASK) |
+ ((uint64_t)is_head << EXTENT_BITS_IS_HEAD_SHIFT);
+}
+
+static inline void
extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
- bool committed, bool dumpable) {
+ bool committed, bool dumpable, extent_head_state_t is_head) {
assert(addr == PAGE_ADDR2BASE(addr) || !slab);
extent_arena_set(extent, arena);
@@ -316,6 +380,10 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
extent_committed_set(extent, committed);
extent_dumpable_set(extent, dumpable);
ql_elm_new(extent, ql_link);
+ if (!maps_coalesce) {
+ extent_is_head_set(extent, (is_head == EXTENT_IS_HEAD) ? true :
+ false);
+ }
if (config_prof) {
extent_prof_tctx_set(extent, NULL);
}
@@ -327,7 +395,7 @@ extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
extent_addr_set(extent, addr);
extent_bsize_set(extent, bsize);
extent_slab_set(extent, false);
- extent_szind_set(extent, NSIZES);
+ extent_szind_set(extent, SC_NSIZES);
extent_sn_set(extent, sn);
extent_state_set(extent, extent_state_active);
extent_zeroed_set(extent, true);
diff --git a/deps/jemalloc/include/jemalloc/internal/extent_structs.h b/deps/jemalloc/include/jemalloc/internal/extent_structs.h
index 4873b9e9e..767cd8930 100644
--- a/deps/jemalloc/include/jemalloc/internal/extent_structs.h
+++ b/deps/jemalloc/include/jemalloc/internal/extent_structs.h
@@ -2,11 +2,12 @@
#define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bit_util.h"
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/ql.h"
#include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
typedef enum {
extent_state_active = 0,
@@ -28,9 +29,10 @@ struct extent_s {
* t: state
* i: szind
* f: nfree
+ * s: bin_shard
* n: sn
*
- * nnnnnnnn ... nnnnffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
+ * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
*
* arena_ind: Arena from which this extent came, or all 1 bits if
* unassociated.
@@ -75,6 +77,8 @@ struct extent_s {
*
* nfree: Number of free regions in slab.
*
+ * bin_shard: the shard of the bin from which this extent came.
+ *
* sn: Serial number (potentially non-unique).
*
* Serial numbers may wrap around if !opt_retain, but as long as
@@ -112,7 +116,7 @@ struct extent_s {
#define EXTENT_BITS_STATE_SHIFT (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
#define EXTENT_BITS_STATE_MASK MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
-#define EXTENT_BITS_SZIND_WIDTH LG_CEIL_NSIZES
+#define EXTENT_BITS_SZIND_WIDTH LG_CEIL(SC_NSIZES)
#define EXTENT_BITS_SZIND_SHIFT (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
#define EXTENT_BITS_SZIND_MASK MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
@@ -120,7 +124,15 @@ struct extent_s {
#define EXTENT_BITS_NFREE_SHIFT (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
#define EXTENT_BITS_NFREE_MASK MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
-#define EXTENT_BITS_SN_SHIFT (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
+#define EXTENT_BITS_BINSHARD_WIDTH 6
+#define EXTENT_BITS_BINSHARD_SHIFT (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
+#define EXTENT_BITS_BINSHARD_MASK MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT)
+
+#define EXTENT_BITS_IS_HEAD_WIDTH 1
+#define EXTENT_BITS_IS_HEAD_SHIFT (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
+#define EXTENT_BITS_IS_HEAD_MASK MASK(EXTENT_BITS_IS_HEAD_WIDTH, EXTENT_BITS_IS_HEAD_SHIFT)
+
+#define EXTENT_BITS_SN_SHIFT (EXTENT_BITS_IS_HEAD_WIDTH + EXTENT_BITS_IS_HEAD_SHIFT)
#define EXTENT_BITS_SN_MASK (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
/* Pointer to the extent that this structure is responsible for. */
@@ -160,11 +172,13 @@ struct extent_s {
/* Small region slab metadata. */
arena_slab_data_t e_slab_data;
- /*
- * Profile counters, used for large objects. Points to a
- * prof_tctx_t.
- */
- atomic_p_t e_prof_tctx;
+ /* Profiling data, used for large objects. */
+ struct {
+ /* Time when this was allocated. */
+ nstime_t e_alloc_time;
+ /* Points to a prof_tctx_t. */
+ atomic_p_t e_prof_tctx;
+ };
};
};
typedef ql_head(extent_t) extent_list_t;
@@ -180,14 +194,16 @@ struct extents_s {
*
* Synchronization: mtx.
*/
- extent_heap_t heaps[NPSIZES+1];
+ extent_heap_t heaps[SC_NPSIZES + 1];
+ atomic_zu_t nextents[SC_NPSIZES + 1];
+ atomic_zu_t nbytes[SC_NPSIZES + 1];
/*
* Bitmap for which set bits correspond to non-empty heaps.
*
* Synchronization: mtx.
*/
- bitmap_t bitmap[BITMAP_GROUPS(NPSIZES+1)];
+ bitmap_t bitmap[BITMAP_GROUPS(SC_NPSIZES + 1)];
/*
* LRU of all extents in heaps.
@@ -216,4 +232,25 @@ struct extents_s {
bool delay_coalesce;
};
+/*
+ * The following two structs are for experimental purposes. See
+ * experimental_utilization_query_ctl and
+ * experimental_utilization_batch_query_ctl in src/ctl.c.
+ */
+
+struct extent_util_stats_s {
+ size_t nfree;
+ size_t nregs;
+ size_t size;
+};
+
+struct extent_util_stats_verbose_s {
+ void *slabcur_addr;
+ size_t nfree;
+ size_t nregs;
+ size_t size;
+ size_t bin_nfree;
+ size_t bin_nregs;
+};
+
#endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/extent_types.h b/deps/jemalloc/include/jemalloc/internal/extent_types.h
index c0561d99f..96925cf95 100644
--- a/deps/jemalloc/include/jemalloc/internal/extent_types.h
+++ b/deps/jemalloc/include/jemalloc/internal/extent_types.h
@@ -4,9 +4,10 @@
typedef struct extent_s extent_t;
typedef struct extents_s extents_t;
-#define EXTENT_HOOKS_INITIALIZER NULL
+typedef struct extent_util_stats_s extent_util_stats_t;
+typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
-#define EXTENT_GROW_MAX_PIND (NPSIZES - 1)
+#define EXTENT_HOOKS_INITIALIZER NULL
/*
* When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
@@ -14,4 +15,9 @@ typedef struct extents_s extents_t;
*/
#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
+typedef enum {
+ EXTENT_NOT_HEAD,
+ EXTENT_IS_HEAD /* Only relevant for Windows && opt.retain. */
+} extent_head_state_t;
+
#endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/hash.h b/deps/jemalloc/include/jemalloc/internal/hash.h
index dcfc992df..0270034e8 100644
--- a/deps/jemalloc/include/jemalloc/internal/hash.h
+++ b/deps/jemalloc/include/jemalloc/internal/hash.h
@@ -104,8 +104,8 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
uint32_t k1 = 0;
switch (len & 3) {
- case 3: k1 ^= tail[2] << 16;
- case 2: k1 ^= tail[1] << 8;
+ case 3: k1 ^= tail[2] << 16; JEMALLOC_FALLTHROUGH
+ case 2: k1 ^= tail[1] << 8; JEMALLOC_FALLTHROUGH
case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15);
k1 *= c2; h1 ^= k1;
}
@@ -119,7 +119,7 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
return h1;
}
-UNUSED static inline void
+static inline void
hash_x86_128(const void *key, const int len, uint32_t seed,
uint64_t r_out[2]) {
const uint8_t * data = (const uint8_t *) key;
@@ -177,28 +177,29 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
uint32_t k4 = 0;
switch (len & 15) {
- case 15: k4 ^= tail[14] << 16;
- case 14: k4 ^= tail[13] << 8;
+ case 15: k4 ^= tail[14] << 16; JEMALLOC_FALLTHROUGH
+ case 14: k4 ^= tail[13] << 8; JEMALLOC_FALLTHROUGH
case 13: k4 ^= tail[12] << 0;
k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
-
- case 12: k3 ^= tail[11] << 24;
- case 11: k3 ^= tail[10] << 16;
- case 10: k3 ^= tail[ 9] << 8;
+ JEMALLOC_FALLTHROUGH
+ case 12: k3 ^= tail[11] << 24; JEMALLOC_FALLTHROUGH
+ case 11: k3 ^= tail[10] << 16; JEMALLOC_FALLTHROUGH
+ case 10: k3 ^= tail[ 9] << 8; JEMALLOC_FALLTHROUGH
case 9: k3 ^= tail[ 8] << 0;
k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
-
- case 8: k2 ^= tail[ 7] << 24;
- case 7: k2 ^= tail[ 6] << 16;
- case 6: k2 ^= tail[ 5] << 8;
+ JEMALLOC_FALLTHROUGH
+ case 8: k2 ^= tail[ 7] << 24; JEMALLOC_FALLTHROUGH
+ case 7: k2 ^= tail[ 6] << 16; JEMALLOC_FALLTHROUGH
+ case 6: k2 ^= tail[ 5] << 8; JEMALLOC_FALLTHROUGH
case 5: k2 ^= tail[ 4] << 0;
k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
-
- case 4: k1 ^= tail[ 3] << 24;
- case 3: k1 ^= tail[ 2] << 16;
- case 2: k1 ^= tail[ 1] << 8;
+ JEMALLOC_FALLTHROUGH
+ case 4: k1 ^= tail[ 3] << 24; JEMALLOC_FALLTHROUGH
+ case 3: k1 ^= tail[ 2] << 16; JEMALLOC_FALLTHROUGH
+ case 2: k1 ^= tail[ 1] << 8; JEMALLOC_FALLTHROUGH
case 1: k1 ^= tail[ 0] << 0;
k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1;
+ JEMALLOC_FALLTHROUGH
}
}
@@ -220,7 +221,7 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
r_out[1] = (((uint64_t) h4) << 32) | h3;
}
-UNUSED static inline void
+static inline void
hash_x64_128(const void *key, const int len, const uint32_t seed,
uint64_t r_out[2]) {
const uint8_t *data = (const uint8_t *) key;
@@ -260,22 +261,22 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
uint64_t k2 = 0;
switch (len & 15) {
- case 15: k2 ^= ((uint64_t)(tail[14])) << 48; /* falls through */
- case 14: k2 ^= ((uint64_t)(tail[13])) << 40; /* falls through */
- case 13: k2 ^= ((uint64_t)(tail[12])) << 32; /* falls through */
- case 12: k2 ^= ((uint64_t)(tail[11])) << 24; /* falls through */
- case 11: k2 ^= ((uint64_t)(tail[10])) << 16; /* falls through */
- case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; /* falls through */
+ case 15: k2 ^= ((uint64_t)(tail[14])) << 48; JEMALLOC_FALLTHROUGH
+ case 14: k2 ^= ((uint64_t)(tail[13])) << 40; JEMALLOC_FALLTHROUGH
+ case 13: k2 ^= ((uint64_t)(tail[12])) << 32; JEMALLOC_FALLTHROUGH
+ case 12: k2 ^= ((uint64_t)(tail[11])) << 24; JEMALLOC_FALLTHROUGH
+ case 11: k2 ^= ((uint64_t)(tail[10])) << 16; JEMALLOC_FALLTHROUGH
+ case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; JEMALLOC_FALLTHROUGH
case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0;
k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
- /* falls through */
- case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; /* falls through */
- case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; /* falls through */
- case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; /* falls through */
- case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; /* falls through */
- case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; /* falls through */
- case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; /* falls through */
- case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; /* falls through */
+ JEMALLOC_FALLTHROUGH
+ case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; JEMALLOC_FALLTHROUGH
+ case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; JEMALLOC_FALLTHROUGH
+ case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; JEMALLOC_FALLTHROUGH
+ case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; JEMALLOC_FALLTHROUGH
+ case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; JEMALLOC_FALLTHROUGH
+ case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; JEMALLOC_FALLTHROUGH
+ case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; JEMALLOC_FALLTHROUGH
case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0;
k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
}
diff --git a/deps/jemalloc/include/jemalloc/internal/hook.h b/deps/jemalloc/include/jemalloc/internal/hook.h
new file mode 100644
index 000000000..ee246b1e0
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/hook.h
@@ -0,0 +1,163 @@
+#ifndef JEMALLOC_INTERNAL_HOOK_H
+#define JEMALLOC_INTERNAL_HOOK_H
+
+#include "jemalloc/internal/tsd.h"
+
+/*
+ * This API is *extremely* experimental, and may get ripped out, changed in API-
+ * and ABI-incompatible ways, be insufficiently or incorrectly documented, etc.
+ *
+ * It allows hooking the stateful parts of the API to see changes as they
+ * happen.
+ *
+ * Allocation hooks are called after the allocation is done, free hooks are
+ * called before the free is done, and expand hooks are called after the
+ * allocation is expanded.
+ *
+ * For realloc and rallocx, if the expansion happens in place, the expansion
+ * hook is called. If it is moved, then the alloc hook is called on the new
+ * location, and then the free hook is called on the old location (i.e. both
+ * hooks are invoked in between the alloc and the dalloc).
+ *
+ * If we return NULL from OOM, then usize might not be trustworthy. Calling
+ * realloc(NULL, size) only calls the alloc hook, and calling realloc(ptr, 0)
+ * only calls the free hook. (Calling realloc(NULL, 0) is treated as malloc(0),
+ * and only calls the alloc hook).
+ *
+ * Reentrancy:
+ * Reentrancy is guarded against from within the hook implementation. If you
+ * call allocator functions from within a hook, the hooks will not be invoked
+ * again.
+ * Threading:
+ * The installation of a hook synchronizes with all its uses. If you can
+ * prove the installation of a hook happens-before a jemalloc entry point,
+ * then the hook will get invoked (unless there's a racing removal).
+ *
+ * Hook insertion appears to be atomic at a per-thread level (i.e. if a thread
+ * allocates and has the alloc hook invoked, then a subsequent free on the
+ * same thread will also have the free hook invoked).
+ *
+ * The *removal* of a hook does *not* block until all threads are done with
+ * the hook. Hook authors have to be resilient to this, and need some
+ * out-of-band mechanism for cleaning up any dynamically allocated memory
+ * associated with their hook.
+ * Ordering:
+ * Order of hook execution is unspecified, and may be different than insertion
+ * order.
+ */
+
+#define HOOK_MAX 4
+
+enum hook_alloc_e {
+ hook_alloc_malloc,
+ hook_alloc_posix_memalign,
+ hook_alloc_aligned_alloc,
+ hook_alloc_calloc,
+ hook_alloc_memalign,
+ hook_alloc_valloc,
+ hook_alloc_mallocx,
+
+ /* The reallocating functions have both alloc and dalloc variants */
+ hook_alloc_realloc,
+ hook_alloc_rallocx,
+};
+/*
+ * We put the enum typedef after the enum, since this file may get included by
+ * jemalloc_cpp.cpp, and C++ disallows enum forward declarations.
+ */
+typedef enum hook_alloc_e hook_alloc_t;
+
+enum hook_dalloc_e {
+ hook_dalloc_free,
+ hook_dalloc_dallocx,
+ hook_dalloc_sdallocx,
+
+ /*
+ * The dalloc halves of reallocation (not called if in-place expansion
+ * happens).
+ */
+ hook_dalloc_realloc,
+ hook_dalloc_rallocx,
+};
+typedef enum hook_dalloc_e hook_dalloc_t;
+
+
+enum hook_expand_e {
+ hook_expand_realloc,
+ hook_expand_rallocx,
+ hook_expand_xallocx,
+};
+typedef enum hook_expand_e hook_expand_t;
+
+typedef void (*hook_alloc)(
+ void *extra, hook_alloc_t type, void *result, uintptr_t result_raw,
+ uintptr_t args_raw[3]);
+
+typedef void (*hook_dalloc)(
+ void *extra, hook_dalloc_t type, void *address, uintptr_t args_raw[3]);
+
+typedef void (*hook_expand)(
+ void *extra, hook_expand_t type, void *address, size_t old_usize,
+ size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]);
+
+typedef struct hooks_s hooks_t;
+struct hooks_s {
+ hook_alloc alloc_hook;
+ hook_dalloc dalloc_hook;
+ hook_expand expand_hook;
+ void *extra;
+};
+
+/*
+ * Begin implementation details; everything above this point might one day live
+ * in a public API. Everything below this point never will.
+ */
+
+/*
+ * The realloc pathways haven't gotten any refactoring love in a while, and it's
+ * fairly difficult to pass information from the entry point to the hooks. We
+ * put the informaiton the hooks will need into a struct to encapsulate
+ * everything.
+ *
+ * Much of these pathways are force-inlined, so that the compiler can avoid
+ * materializing this struct until we hit an extern arena function. For fairly
+ * goofy reasons, *many* of the realloc paths hit an extern arena function.
+ * These paths are cold enough that it doesn't matter; eventually, we should
+ * rewrite the realloc code to make the expand-in-place and the
+ * free-then-realloc paths more orthogonal, at which point we don't need to
+ * spread the hook logic all over the place.
+ */
+typedef struct hook_ralloc_args_s hook_ralloc_args_t;
+struct hook_ralloc_args_s {
+ /* I.e. as opposed to rallocx. */
+ bool is_realloc;
+ /*
+ * The expand hook takes 4 arguments, even if only 3 are actually used;
+ * we add an extra one in case the user decides to memcpy without
+ * looking too closely at the hooked function.
+ */
+ uintptr_t args[4];
+};
+
+/*
+ * Returns an opaque handle to be used when removing the hook. NULL means that
+ * we couldn't install the hook.
+ */
+bool hook_boot();
+
+void *hook_install(tsdn_t *tsdn, hooks_t *hooks);
+/* Uninstalls the hook with the handle previously returned from hook_install. */
+void hook_remove(tsdn_t *tsdn, void *opaque);
+
+/* Hooks */
+
+void hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
+ uintptr_t args_raw[3]);
+
+void hook_invoke_dalloc(hook_dalloc_t type, void *address,
+ uintptr_t args_raw[3]);
+
+void hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize,
+ size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]);
+
+#endif /* JEMALLOC_INTERNAL_HOOK_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/hooks.h b/deps/jemalloc/include/jemalloc/internal/hooks.h
deleted file mode 100644
index cd49afcb0..000000000
--- a/deps/jemalloc/include/jemalloc/internal/hooks.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_HOOKS_H
-#define JEMALLOC_INTERNAL_HOOKS_H
-
-extern JEMALLOC_EXPORT void (*hooks_arena_new_hook)();
-extern JEMALLOC_EXPORT void (*hooks_libc_hook)();
-
-#define JEMALLOC_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
-
-#define open JEMALLOC_HOOK(open, hooks_libc_hook)
-#define read JEMALLOC_HOOK(read, hooks_libc_hook)
-#define write JEMALLOC_HOOK(write, hooks_libc_hook)
-#define readlink JEMALLOC_HOOK(readlink, hooks_libc_hook)
-#define close JEMALLOC_HOOK(close, hooks_libc_hook)
-#define creat JEMALLOC_HOOK(creat, hooks_libc_hook)
-#define secure_getenv JEMALLOC_HOOK(secure_getenv, hooks_libc_hook)
-/* Note that this is undef'd and re-define'd in src/prof.c. */
-#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook)
-
-#endif /* JEMALLOC_INTERNAL_HOOKS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h
index be70df510..7d6053e21 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -31,6 +31,9 @@
# include <sys/uio.h>
# endif
# include <pthread.h>
+# ifdef __FreeBSD__
+# include <pthread_np.h>
+# endif
# include <signal.h>
# ifdef JEMALLOC_OS_UNFAIR_LOCK
# include <os/lock.h>
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 8dad9a1db..c442a2191 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -48,25 +48,13 @@
/* Defined if GCC __atomic atomics are available. */
#undef JEMALLOC_GCC_ATOMIC_ATOMICS
+/* and the 8-bit variant support. */
+#undef JEMALLOC_GCC_U8_ATOMIC_ATOMICS
/* Defined if GCC __sync atomics are available. */
#undef JEMALLOC_GCC_SYNC_ATOMICS
-
-/*
- * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and
- * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite
- * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the
- * functions are defined in libgcc instead of being inlines).
- */
-#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4
-
-/*
- * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and
- * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite
- * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the
- * functions are defined in libgcc instead of being inlines).
- */
-#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8
+/* and the 8-bit variant support. */
+#undef JEMALLOC_GCC_U8_SYNC_ATOMICS
/*
* Defined if __builtin_clz() and __builtin_clzl() are available.
@@ -78,12 +66,6 @@
*/
#undef JEMALLOC_OS_UNFAIR_LOCK
-/*
- * Defined if OSSpin*() functions are available, as provided by Darwin, and
- * documented in the spinlock(3) manual page.
- */
-#undef JEMALLOC_OSSPIN
-
/* Defined if syscall(2) is usable. */
#undef JEMALLOC_USE_SYSCALL
@@ -153,6 +135,9 @@
/* JEMALLOC_STATS enables statistics calculation. */
#undef JEMALLOC_STATS
+/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
+#undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
+
/* JEMALLOC_PROF enables allocation profiling. */
#undef JEMALLOC_PROF
@@ -234,6 +219,12 @@
#undef JEMALLOC_INTERNAL_FFS
/*
+ * popcount*() functions to use for bitmapping.
+ */
+#undef JEMALLOC_INTERNAL_POPCOUNTL
+#undef JEMALLOC_INTERNAL_POPCOUNT
+
+/*
* If defined, explicitly attempt to more uniformly distribute large allocation
* pointer alignments across all cache indices.
*/
@@ -246,6 +237,12 @@
#undef JEMALLOC_LOG
/*
+ * If defined, use readlinkat() (instead of readlink()) to follow
+ * /etc/malloc_conf.
+ */
+#undef JEMALLOC_READLINKAT
+
+/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
#undef JEMALLOC_ZONE
@@ -363,4 +360,7 @@
*/
#undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
+/* Performs additional safety checks when defined. */
+#undef JEMALLOC_OPT_SAFETY_CHECKS
+
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h
index e10fb275d..d291170be 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -2,7 +2,6 @@
#define JEMALLOC_INTERNAL_EXTERNS_H
#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/tsd_types.h"
/* TSD checks this to set thread local slow state accordingly. */
@@ -11,6 +10,7 @@ extern bool malloc_slow;
/* Run-time options. */
extern bool opt_abort;
extern bool opt_abort_conf;
+extern bool opt_confirm_conf;
extern const char *opt_junk;
extern bool opt_junk_alloc;
extern bool opt_junk_free;
@@ -25,6 +25,9 @@ extern unsigned ncpus;
/* Number of arenas used for automatic multiplexing of threads and arenas. */
extern unsigned narenas_auto;
+/* Base index for manual arenas. */
+extern unsigned manual_arena_base;
+
/*
* Arenas that are used to service external requests. Not all elements of the
* arenas array are necessarily used; arenas are created lazily as needed.
@@ -49,5 +52,6 @@ void jemalloc_prefork(void);
void jemalloc_postfork_parent(void);
void jemalloc_postfork_child(void);
bool malloc_initialized(void);
+void je_sdallocx_noflags(void *ptr, size_t size);
#endif /* JEMALLOC_INTERNAL_EXTERNS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index c6a1f7eb2..ddde9b4e6 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -4,13 +4,15 @@
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/bit_util.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/ticker.h"
JEMALLOC_ALWAYS_INLINE malloc_cpuid_t
malloc_getcpu(void) {
assert(have_percpu_arena);
-#if defined(JEMALLOC_HAVE_SCHED_GETCPU)
+#if defined(_WIN32)
+ return GetCurrentProcessorNumber();
+#elif defined(JEMALLOC_HAVE_SCHED_GETCPU)
return (malloc_cpuid_t)sched_getcpu();
#else
not_reached();
@@ -108,14 +110,14 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
JEMALLOC_ALWAYS_INLINE cache_bin_t *
tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
- assert(binind < NBINS);
+ assert(binind < SC_NBINS);
return &tcache->bins_small[binind];
}
JEMALLOC_ALWAYS_INLINE cache_bin_t *
tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
- assert(binind >= NBINS &&binind < nhbins);
- return &tcache->bins_large[binind - NBINS];
+ assert(binind >= SC_NBINS &&binind < nhbins);
+ return &tcache->bins_large[binind - SC_NBINS];
}
JEMALLOC_ALWAYS_INLINE bool
@@ -156,7 +158,7 @@ pre_reentrancy(tsd_t *tsd, arena_t *arena) {
if (fast) {
/* Prepare slow path for reentrancy. */
tsd_slow_update(tsd);
- assert(tsd->state == tsd_state_nominal_slow);
+ assert(tsd_state_get(tsd) == tsd_state_nominal_slow);
}
}
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 2e76e5d8f..70d6e5788 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -71,7 +71,8 @@ arena_ichoose(tsd_t *tsd, arena_t *arena) {
static inline bool
arena_is_auto(arena_t *arena) {
assert(narenas_auto > 0);
- return (arena_ind_get(arena) < narenas_auto);
+
+ return (arena_ind_get(arena) < manual_arena_base);
}
JEMALLOC_ALWAYS_INLINE extent_t *
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index b19a94207..adae014a1 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -1,6 +1,7 @@
#ifndef JEMALLOC_INTERNAL_INLINES_C_H
#define JEMALLOC_INTERNAL_INLINES_C_H
+#include "jemalloc/internal/hook.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/witness.h"
@@ -42,7 +43,6 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
bool is_internal, arena_t *arena, bool slow_path) {
void *ret;
- assert(size != 0);
assert(!is_internal || tcache == NULL);
assert(!is_internal || arena == NULL || arena_is_auto(arena));
if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) {
@@ -133,31 +133,20 @@ isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
JEMALLOC_ALWAYS_INLINE void *
iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
- size_t extra, size_t alignment, bool zero, tcache_t *tcache,
- arena_t *arena) {
+ size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
+ hook_ralloc_args_t *hook_args) {
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
WITNESS_RANK_CORE, 0);
void *p;
size_t usize, copysize;
- usize = sz_sa2u(size + extra, alignment);
- if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+ usize = sz_sa2u(size, alignment);
+ if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
return NULL;
}
p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
if (p == NULL) {
- if (extra == 0) {
- return NULL;
- }
- /* Try again, without extra this time. */
- usize = sz_sa2u(size, alignment);
- if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
- return NULL;
- }
- p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
- if (p == NULL) {
- return NULL;
- }
+ return NULL;
}
/*
* Copy at most size bytes (not size+extra), since the caller has no
@@ -165,13 +154,26 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
*/
copysize = (size < oldsize) ? size : oldsize;
memcpy(p, ptr, copysize);
+ hook_invoke_alloc(hook_args->is_realloc
+ ? hook_alloc_realloc : hook_alloc_rallocx, p, (uintptr_t)p,
+ hook_args->args);
+ hook_invoke_dalloc(hook_args->is_realloc
+ ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
isdalloct(tsdn, ptr, oldsize, tcache, NULL, true);
return p;
}
+/*
+ * is_realloc threads through the knowledge of whether or not this call comes
+ * from je_realloc (as opposed to je_rallocx); this ensures that we pass the
+ * correct entry point into any hooks.
+ * Note that these functions are all force-inlined, so no actual bool gets
+ * passed-around anywhere.
+ */
JEMALLOC_ALWAYS_INLINE void *
iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
- bool zero, tcache_t *tcache, arena_t *arena) {
+ bool zero, tcache_t *tcache, arena_t *arena, hook_ralloc_args_t *hook_args)
+{
assert(ptr != NULL);
assert(size != 0);
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -183,24 +185,24 @@ iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
* Existing object alignment is inadequate; allocate new space
* and copy.
*/
- return iralloct_realign(tsdn, ptr, oldsize, size, 0, alignment,
- zero, tcache, arena);
+ return iralloct_realign(tsdn, ptr, oldsize, size, alignment,
+ zero, tcache, arena, hook_args);
}
return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero,
- tcache);
+ tcache, hook_args);
}
JEMALLOC_ALWAYS_INLINE void *
iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
- bool zero) {
+ bool zero, hook_ralloc_args_t *hook_args) {
return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
- tcache_get(tsd), NULL);
+ tcache_get(tsd), NULL, hook_args);
}
JEMALLOC_ALWAYS_INLINE bool
ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
- size_t alignment, bool zero) {
+ size_t alignment, bool zero, size_t *newsize) {
assert(ptr != NULL);
assert(size != 0);
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -209,10 +211,12 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
!= 0) {
/* Existing object alignment is inadequate. */
+ *newsize = oldsize;
return true;
}
- return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero);
+ return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero,
+ newsize);
}
JEMALLOC_ALWAYS_INLINE int
@@ -228,7 +232,8 @@ iget_defrag_hint(tsdn_t *tsdn, void* ptr) {
extent_t *slab = iealloc(tsdn, ptr);
arena_t *arena = extent_arena_get(slab);
szind_t binind = extent_szind_get(slab);
- bin_t *bin = &arena->bins[binind];
+ unsigned binshard = extent_binshard_get(slab);
+ bin_t *bin = &arena->bins[binind].bin_shards[binshard];
malloc_mutex_lock(tsdn, &bin->lock);
/* don't bother moving allocations from the slab currently used for new allocations */
if (slab != bin->slabcur) {
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h
index ed75d3768..d8ea06f6d 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -30,7 +30,7 @@
# define restrict
#endif
-/* Various function pointers are statick and immutable except during testing. */
+/* Various function pointers are static and immutable except during testing. */
#ifdef JEMALLOC_JET
# define JET_MUTABLE
#else
@@ -40,4 +40,75 @@
#define JEMALLOC_VA_ARGS_HEAD(head, ...) head
#define JEMALLOC_VA_ARGS_TAIL(head, ...) __VA_ARGS__
+#if (defined(__GNUC__) || defined(__GNUG__)) && !defined(__clang__) \
+ && defined(JEMALLOC_HAVE_ATTR) && (__GNUC__ >= 7)
+#define JEMALLOC_FALLTHROUGH JEMALLOC_ATTR(fallthrough);
+#else
+#define JEMALLOC_FALLTHROUGH /* falls through */
+#endif
+
+/* Diagnostic suppression macros */
+#if defined(_MSC_VER) && !defined(__clang__)
+# define JEMALLOC_DIAGNOSTIC_PUSH __pragma(warning(push))
+# define JEMALLOC_DIAGNOSTIC_POP __pragma(warning(pop))
+# define JEMALLOC_DIAGNOSTIC_IGNORE(W) __pragma(warning(disable:W))
+# define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+# define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+# define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+# define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
+/* #pragma GCC diagnostic first appeared in gcc 4.6. */
+#elif (defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && \
+ (__GNUC_MINOR__ > 5)))) || defined(__clang__)
+/*
+ * The JEMALLOC_PRAGMA__ macro is an implementation detail of the GCC and Clang
+ * diagnostic suppression macros and should not be used anywhere else.
+ */
+# define JEMALLOC_PRAGMA__(X) _Pragma(#X)
+# define JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_PRAGMA__(GCC diagnostic push)
+# define JEMALLOC_DIAGNOSTIC_POP JEMALLOC_PRAGMA__(GCC diagnostic pop)
+# define JEMALLOC_DIAGNOSTIC_IGNORE(W) \
+ JEMALLOC_PRAGMA__(GCC diagnostic ignored W)
+
+/*
+ * The -Wmissing-field-initializers warning is buggy in GCC versions < 5.1 and
+ * all clang versions up to version 7 (currently trunk, unreleased). This macro
+ * suppresses the warning for the affected compiler versions only.
+ */
+# if ((defined(__GNUC__) && !defined(__clang__)) && (__GNUC__ < 5)) || \
+ defined(__clang__)
+# define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS \
+ JEMALLOC_DIAGNOSTIC_IGNORE("-Wmissing-field-initializers")
+# else
+# define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+# endif
+
+# define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS \
+ JEMALLOC_DIAGNOSTIC_IGNORE("-Wtype-limits")
+# define JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER \
+ JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-parameter")
+# if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ >= 7)
+# define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN \
+ JEMALLOC_DIAGNOSTIC_IGNORE("-Walloc-size-larger-than=")
+# else
+# define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+# endif
+# define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS \
+ JEMALLOC_DIAGNOSTIC_PUSH \
+ JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER
+#else
+# define JEMALLOC_DIAGNOSTIC_PUSH
+# define JEMALLOC_DIAGNOSTIC_POP
+# define JEMALLOC_DIAGNOSTIC_IGNORE(W)
+# define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+# define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+# define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+# define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
+#endif
+
+/*
+ * Disables spurious diagnostics for all headers. Since these headers are not
+ * included by users directly, it does not affect their diagnostic settings.
+ */
+JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
+
#endif /* JEMALLOC_INTERNAL_MACROS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h
index 1b750b122..e296c5a7e 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h
@@ -1,6 +1,8 @@
#ifndef JEMALLOC_INTERNAL_TYPES_H
#define JEMALLOC_INTERNAL_TYPES_H
+#include "jemalloc/internal/quantum.h"
+
/* Page size index type. */
typedef unsigned pszind_t;
@@ -50,79 +52,6 @@ typedef int malloc_cpuid_t;
/* Smallest size class to support. */
#define TINY_MIN (1U << LG_TINY_MIN)
-/*
- * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
- * classes).
- */
-#ifndef LG_QUANTUM
-# if (defined(__i386__) || defined(_M_IX86))
-# define LG_QUANTUM 4
-# endif
-# ifdef __ia64__
-# define LG_QUANTUM 4
-# endif
-# ifdef __alpha__
-# define LG_QUANTUM 4
-# endif
-# if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
-# define LG_QUANTUM 4
-# endif
-# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
-# define LG_QUANTUM 4
-# endif
-# ifdef __arm__
-# define LG_QUANTUM 3
-# endif
-# ifdef __aarch64__
-# define LG_QUANTUM 4
-# endif
-# ifdef __hppa__
-# define LG_QUANTUM 4
-# endif
-# ifdef __m68k__
-# define LG_QUANTUM 3
-# endif
-# ifdef __mips__
-# define LG_QUANTUM 3
-# endif
-# ifdef __nios2__
-# define LG_QUANTUM 3
-# endif
-# ifdef __or1k__
-# define LG_QUANTUM 3
-# endif
-# ifdef __powerpc__
-# define LG_QUANTUM 4
-# endif
-# if defined(__riscv) || defined(__riscv__)
-# define LG_QUANTUM 4
-# endif
-# ifdef __s390__
-# define LG_QUANTUM 4
-# endif
-# if (defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || \
- defined(__SH4_SINGLE_ONLY__))
-# define LG_QUANTUM 4
-# endif
-# ifdef __tile__
-# define LG_QUANTUM 4
-# endif
-# ifdef __le32__
-# define LG_QUANTUM 4
-# endif
-# ifndef LG_QUANTUM
-# error "Unknown minimum alignment for architecture; specify via "
- "--with-lg-quantum"
-# endif
-#endif
-
-#define QUANTUM ((size_t)(1U << LG_QUANTUM))
-#define QUANTUM_MASK (QUANTUM - 1)
-
-/* Return the smallest quantum multiple that is >= a. */
-#define QUANTUM_CEILING(a) \
- (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
-
#define LONG ((size_t)(1U << LG_SIZEOF_LONG))
#define LONG_MASK (LONG - 1)
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in
index e621fbc85..3418cbfa2 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -21,7 +21,7 @@
# include "../jemalloc@install_suffix@.h"
#endif
-#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
+#if defined(JEMALLOC_OSATOMIC)
#include <libkern/OSAtomic.h>
#endif
@@ -45,7 +45,7 @@
# include "jemalloc/internal/private_namespace_jet.h"
# endif
#endif
-#include "jemalloc/internal/hooks.h"
+#include "jemalloc/internal/test_hooks.h"
#ifdef JEMALLOC_DEFINE_MADVISE_FREE
# define JEMALLOC_MADV_FREE 8
@@ -161,7 +161,26 @@ static const bool config_log =
false
#endif
;
-#ifdef JEMALLOC_HAVE_SCHED_GETCPU
+/*
+ * Are extra safety checks enabled; things like checking the size of sized
+ * deallocations, double-frees, etc.
+ */
+static const bool config_opt_safety_checks =
+#ifdef JEMALLOC_OPT_SAFETY_CHECKS
+ true
+#elif defined(JEMALLOC_DEBUG)
+ /*
+ * This lets us only guard safety checks by one flag instead of two; fast
+ * checks can guard solely by config_opt_safety_checks and run in debug mode
+ * too.
+ */
+ true
+#else
+ false
+#endif
+ ;
+
+#if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
/* Currently percpu_arena depends on sched_getcpu. */
#define JEMALLOC_PERCPU_ARENA
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/large_externs.h b/deps/jemalloc/include/jemalloc/internal/large_externs.h
index 3f36282cd..a05019e8a 100644
--- a/deps/jemalloc/include/jemalloc/internal/large_externs.h
+++ b/deps/jemalloc/include/jemalloc/internal/large_externs.h
@@ -1,13 +1,16 @@
#ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H
#define JEMALLOC_INTERNAL_LARGE_EXTERNS_H
+#include "jemalloc/internal/hook.h"
+
void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
bool zero);
bool large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
size_t usize_max, bool zero);
-void *large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
- size_t alignment, bool zero, tcache_t *tcache);
+void *large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
+ size_t alignment, bool zero, tcache_t *tcache,
+ hook_ralloc_args_t *hook_args);
typedef void (large_dalloc_junk_t)(void *, size_t);
extern large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk;
@@ -23,4 +26,7 @@ prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
+nstime_t large_prof_alloc_time_get(const extent_t *extent);
+void large_prof_alloc_time_set(extent_t *extent, nstime_t time);
+
#endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/malloc_io.h b/deps/jemalloc/include/jemalloc/internal/malloc_io.h
index bfe556b52..1d1a414e0 100644
--- a/deps/jemalloc/include/jemalloc/internal/malloc_io.h
+++ b/deps/jemalloc/include/jemalloc/internal/malloc_io.h
@@ -54,7 +54,7 @@ size_t malloc_vsnprintf(char *str, size_t size, const char *format,
size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
JEMALLOC_FORMAT_PRINTF(3, 4);
/*
- * The caller can set write_cb and cbopaque to null to choose to print with the
+ * The caller can set write_cb to null to choose to print with the
* je_malloc_message hook.
*/
void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
diff --git a/deps/jemalloc/include/jemalloc/internal/mutex.h b/deps/jemalloc/include/jemalloc/internal/mutex.h
index 6520c2512..7c24f0725 100644
--- a/deps/jemalloc/include/jemalloc/internal/mutex.h
+++ b/deps/jemalloc/include/jemalloc/internal/mutex.h
@@ -37,14 +37,17 @@ struct malloc_mutex_s {
# endif
#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
os_unfair_lock lock;
-#elif (defined(JEMALLOC_OSSPIN))
- OSSpinLock lock;
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
pthread_mutex_t lock;
malloc_mutex_t *postponed_next;
#else
pthread_mutex_t lock;
#endif
+ /*
+ * Hint flag to avoid exclusive cache line contention
+ * during spin waiting
+ */
+ atomic_b_t locked;
};
/*
* We only touch witness when configured w/ debug. However we
@@ -84,10 +87,6 @@ struct malloc_mutex_s {
# define MALLOC_MUTEX_LOCK(m) os_unfair_lock_lock(&(m)->lock)
# define MALLOC_MUTEX_UNLOCK(m) os_unfair_lock_unlock(&(m)->lock)
# define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
-#elif (defined(JEMALLOC_OSSPIN))
-# define MALLOC_MUTEX_LOCK(m) OSSpinLockLock(&(m)->lock)
-# define MALLOC_MUTEX_UNLOCK(m) OSSpinLockUnlock(&(m)->lock)
-# define MALLOC_MUTEX_TRYLOCK(m) (!OSSpinLockTry(&(m)->lock))
#else
# define MALLOC_MUTEX_LOCK(m) pthread_mutex_lock(&(m)->lock)
# define MALLOC_MUTEX_UNLOCK(m) pthread_mutex_unlock(&(m)->lock)
@@ -101,22 +100,37 @@ struct malloc_mutex_s {
#ifdef _WIN32
# define MALLOC_MUTEX_INITIALIZER
#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-# define MALLOC_MUTEX_INITIALIZER \
- {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}}, \
- WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#elif (defined(JEMALLOC_OSSPIN))
-# define MALLOC_MUTEX_INITIALIZER \
- {{{LOCK_PROF_DATA_INITIALIZER, 0}}, \
+# if defined(JEMALLOC_DEBUG)
+# define MALLOC_MUTEX_INITIALIZER \
+ {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}}, \
+ WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
+# else
+# define MALLOC_MUTEX_INITIALIZER \
+ {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+# endif
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
-# define MALLOC_MUTEX_INITIALIZER \
- {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}}, \
- WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+# if (defined(JEMALLOC_DEBUG))
+# define MALLOC_MUTEX_INITIALIZER \
+ {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}}, \
+ WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
+# else
+# define MALLOC_MUTEX_INITIALIZER \
+ {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}}, \
+ WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+# endif
+
#else
# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
+# if defined(JEMALLOC_DEBUG)
# define MALLOC_MUTEX_INITIALIZER \
- {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}}, \
- WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+ {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}}, \
+ WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
+# else
+# define MALLOC_MUTEX_INITIALIZER \
+ {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}}, \
+ WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+# endif
#endif
#ifdef JEMALLOC_LAZY_LOCK
@@ -139,6 +153,7 @@ void malloc_mutex_lock_slow(malloc_mutex_t *mutex);
static inline void
malloc_mutex_lock_final(malloc_mutex_t *mutex) {
MALLOC_MUTEX_LOCK(mutex);
+ atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
}
static inline bool
@@ -164,6 +179,7 @@ malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
if (isthreaded) {
if (malloc_mutex_trylock_final(mutex)) {
+ atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
return true;
}
mutex_owner_stats_update(tsdn, mutex);
@@ -203,6 +219,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
if (isthreaded) {
if (malloc_mutex_trylock_final(mutex)) {
malloc_mutex_lock_slow(mutex);
+ atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
}
mutex_owner_stats_update(tsdn, mutex);
}
@@ -211,6 +228,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
static inline void
malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+ atomic_store_b(&mutex->locked, false, ATOMIC_RELAXED);
witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
if (isthreaded) {
MALLOC_MUTEX_UNLOCK(mutex);
@@ -245,4 +263,26 @@ malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
}
+static inline void
+malloc_mutex_prof_accum(tsdn_t *tsdn, mutex_prof_data_t *data,
+ malloc_mutex_t *mutex) {
+ mutex_prof_data_t *source = &mutex->prof_data;
+ /* Can only read holding the mutex. */
+ malloc_mutex_assert_owner(tsdn, mutex);
+
+ nstime_add(&data->tot_wait_time, &source->tot_wait_time);
+ if (nstime_compare(&source->max_wait_time, &data->max_wait_time) > 0) {
+ nstime_copy(&data->max_wait_time, &source->max_wait_time);
+ }
+ data->n_wait_times += source->n_wait_times;
+ data->n_spin_acquired += source->n_spin_acquired;
+ if (data->max_n_thds < source->max_n_thds) {
+ data->max_n_thds = source->max_n_thds;
+ }
+ /* n_wait_thds is not reported. */
+ atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
+ data->n_owner_switches += source->n_owner_switches;
+ data->n_lock_ops += source->n_lock_ops;
+}
+
#endif /* JEMALLOC_INTERNAL_MUTEX_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/mutex_prof.h b/deps/jemalloc/include/jemalloc/internal/mutex_prof.h
index ce183d335..2cb8fb0cb 100644
--- a/deps/jemalloc/include/jemalloc/internal/mutex_prof.h
+++ b/deps/jemalloc/include/jemalloc/internal/mutex_prof.h
@@ -35,22 +35,31 @@ typedef enum {
mutex_prof_num_arena_mutexes
} mutex_prof_arena_ind_t;
+/*
+ * The forth parameter is a boolean value that is true for derived rate counters
+ * and false for real ones.
+ */
#define MUTEX_PROF_UINT64_COUNTERS \
- OP(num_ops, uint64_t, "n_lock_ops") \
- OP(num_wait, uint64_t, "n_waiting") \
- OP(num_spin_acq, uint64_t, "n_spin_acq") \
- OP(num_owner_switch, uint64_t, "n_owner_switch") \
- OP(total_wait_time, uint64_t, "total_wait_ns") \
- OP(max_wait_time, uint64_t, "max_wait_ns")
+ OP(num_ops, uint64_t, "n_lock_ops", false, num_ops) \
+ OP(num_ops_ps, uint64_t, "(#/sec)", true, num_ops) \
+ OP(num_wait, uint64_t, "n_waiting", false, num_wait) \
+ OP(num_wait_ps, uint64_t, "(#/sec)", true, num_wait) \
+ OP(num_spin_acq, uint64_t, "n_spin_acq", false, num_spin_acq) \
+ OP(num_spin_acq_ps, uint64_t, "(#/sec)", true, num_spin_acq) \
+ OP(num_owner_switch, uint64_t, "n_owner_switch", false, num_owner_switch) \
+ OP(num_owner_switch_ps, uint64_t, "(#/sec)", true, num_owner_switch) \
+ OP(total_wait_time, uint64_t, "total_wait_ns", false, total_wait_time) \
+ OP(total_wait_time_ps, uint64_t, "(#/sec)", true, total_wait_time) \
+ OP(max_wait_time, uint64_t, "max_wait_ns", false, max_wait_time)
#define MUTEX_PROF_UINT32_COUNTERS \
- OP(max_num_thds, uint32_t, "max_n_thds")
+ OP(max_num_thds, uint32_t, "max_n_thds", false, max_num_thds)
#define MUTEX_PROF_COUNTERS \
MUTEX_PROF_UINT64_COUNTERS \
MUTEX_PROF_UINT32_COUNTERS
-#define OP(counter, type, human) mutex_counter_##counter,
+#define OP(counter, type, human, derived, base_counter) mutex_counter_##counter,
#define COUNTER_ENUM(counter_list, t) \
typedef enum { \
diff --git a/deps/jemalloc/include/jemalloc/internal/prof_externs.h b/deps/jemalloc/include/jemalloc/internal/prof_externs.h
index 04348696f..094f3e170 100644
--- a/deps/jemalloc/include/jemalloc/internal/prof_externs.h
+++ b/deps/jemalloc/include/jemalloc/internal/prof_externs.h
@@ -14,6 +14,7 @@ extern bool opt_prof_gdump; /* High-water memory dumping. */
extern bool opt_prof_final; /* Final profile dumping. */
extern bool opt_prof_leak; /* Dump leak summary at exit. */
extern bool opt_prof_accum; /* Report cumulative bytes. */
+extern bool opt_prof_log; /* Turn logging on at boot. */
extern char opt_prof_prefix[
/* Minimize memory bloat for non-prof builds. */
#ifdef JEMALLOC_PROF
@@ -45,7 +46,8 @@ extern size_t lg_prof_sample;
void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
prof_tctx_t *tctx);
-void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
+void prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
+ prof_tctx_t *tctx);
void bt_init(prof_bt_t *bt, void **vec);
void prof_backtrace(prof_bt_t *bt);
prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
@@ -89,4 +91,15 @@ void prof_postfork_parent(tsdn_t *tsdn);
void prof_postfork_child(tsdn_t *tsdn);
void prof_sample_threshold_update(prof_tdata_t *tdata);
+bool prof_log_start(tsdn_t *tsdn, const char *filename);
+bool prof_log_stop(tsdn_t *tsdn);
+#ifdef JEMALLOC_JET
+size_t prof_log_bt_count(void);
+size_t prof_log_alloc_count(void);
+size_t prof_log_thr_count(void);
+bool prof_log_is_logging(void);
+bool prof_log_rep_check(void);
+void prof_log_dummy_set(bool new_value);
+#endif
+
#endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h b/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h
index a6efb4851..471d9853c 100644
--- a/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h
+++ b/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h
@@ -4,7 +4,8 @@
#include "jemalloc/internal/mutex.h"
static inline bool
-prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) {
+prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum,
+ uint64_t accumbytes) {
cassert(config_prof);
bool overflow;
@@ -42,7 +43,8 @@ prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) {
}
static inline void
-prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) {
+prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum,
+ size_t usize) {
cassert(config_prof);
/*
@@ -55,15 +57,15 @@ prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) {
#ifdef JEMALLOC_ATOMIC_U64
a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
do {
- a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS -
- usize) : 0;
+ a1 = (a0 >= SC_LARGE_MINCLASS - usize)
+ ? a0 - (SC_LARGE_MINCLASS - usize) : 0;
} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
#else
malloc_mutex_lock(tsdn, &prof_accum->mtx);
a0 = prof_accum->accumbytes;
- a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - usize) :
- 0;
+ a1 = (a0 >= SC_LARGE_MINCLASS - usize)
+ ? a0 - (SC_LARGE_MINCLASS - usize) : 0;
prof_accum->accumbytes = a1;
malloc_mutex_unlock(tsdn, &prof_accum->mtx);
#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h b/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h
index 6ff465ad7..8ba8a1e1f 100644
--- a/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h
+++ b/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h
@@ -1,6 +1,7 @@
#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
#define JEMALLOC_INTERNAL_PROF_INLINES_B_H
+#include "jemalloc/internal/safety_check.h"
#include "jemalloc/internal/sz.h"
JEMALLOC_ALWAYS_INLINE bool
@@ -61,13 +62,54 @@ prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
arena_prof_tctx_reset(tsdn, ptr, tctx);
}
+JEMALLOC_ALWAYS_INLINE nstime_t
+prof_alloc_time_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+
+ return arena_prof_alloc_time_get(tsdn, ptr, alloc_ctx);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
+ nstime_t t) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+
+ arena_prof_alloc_time_set(tsdn, ptr, alloc_ctx, t);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_check(tsd_t *tsd, size_t usize, bool update) {
+ ssize_t check = update ? 0 : usize;
+
+ int64_t bytes_until_sample = tsd_bytes_until_sample_get(tsd);
+ if (update) {
+ bytes_until_sample -= usize;
+ if (tsd_nominal(tsd)) {
+ tsd_bytes_until_sample_set(tsd, bytes_until_sample);
+ }
+ }
+ if (likely(bytes_until_sample >= check)) {
+ return true;
+ }
+
+ return false;
+}
+
JEMALLOC_ALWAYS_INLINE bool
prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
- prof_tdata_t **tdata_out) {
+ prof_tdata_t **tdata_out) {
prof_tdata_t *tdata;
cassert(config_prof);
+ /* Fastpath: no need to load tdata */
+ if (likely(prof_sample_check(tsd, usize, update))) {
+ return true;
+ }
+
+ bool booted = tsd_prof_tdata_get(tsd);
tdata = prof_tdata_get(tsd, true);
if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
tdata = NULL;
@@ -81,21 +123,23 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
return true;
}
- if (likely(tdata->bytes_until_sample >= usize)) {
- if (update) {
- tdata->bytes_until_sample -= usize;
- }
+ /*
+ * If this was the first creation of tdata, then
+ * prof_tdata_get() reset bytes_until_sample, so decrement and
+ * check it again
+ */
+ if (!booted && prof_sample_check(tsd, usize, update)) {
return true;
- } else {
- if (tsd_reentrancy_level_get(tsd) > 0) {
- return true;
- }
- /* Compute new sample threshold. */
- if (update) {
- prof_sample_threshold_update(tdata);
- }
- return !tdata->active;
}
+
+ if (tsd_reentrancy_level_get(tsd) > 0) {
+ return true;
+ }
+ /* Compute new sample threshold. */
+ if (update) {
+ prof_sample_threshold_update(tdata);
+ }
+ return !tdata->active;
}
JEMALLOC_ALWAYS_INLINE prof_tctx_t *
@@ -187,7 +231,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
* counters.
*/
if (unlikely(old_sampled)) {
- prof_free_sampled_object(tsd, old_usize, old_tctx);
+ prof_free_sampled_object(tsd, ptr, old_usize, old_tctx);
}
}
@@ -199,7 +243,7 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
assert(usize == isalloc(tsd_tsdn(tsd), ptr));
if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
- prof_free_sampled_object(tsd, usize, tctx);
+ prof_free_sampled_object(tsd, ptr, usize, tctx);
}
}
diff --git a/deps/jemalloc/include/jemalloc/internal/prof_structs.h b/deps/jemalloc/include/jemalloc/internal/prof_structs.h
index 0d58ae100..34ed4822b 100644
--- a/deps/jemalloc/include/jemalloc/internal/prof_structs.h
+++ b/deps/jemalloc/include/jemalloc/internal/prof_structs.h
@@ -169,7 +169,6 @@ struct prof_tdata_s {
/* Sampling state. */
uint64_t prng_state;
- uint64_t bytes_until_sample;
/* State used to avoid dumping while operating on prof internals. */
bool enq;
diff --git a/deps/jemalloc/include/jemalloc/internal/quantum.h b/deps/jemalloc/include/jemalloc/internal/quantum.h
new file mode 100644
index 000000000..821086e99
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/quantum.h
@@ -0,0 +1,77 @@
+#ifndef JEMALLOC_INTERNAL_QUANTUM_H
+#define JEMALLOC_INTERNAL_QUANTUM_H
+
+/*
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+#ifndef LG_QUANTUM
+# if (defined(__i386__) || defined(_M_IX86))
+# define LG_QUANTUM 4
+# endif
+# ifdef __ia64__
+# define LG_QUANTUM 4
+# endif
+# ifdef __alpha__
+# define LG_QUANTUM 4
+# endif
+# if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
+# define LG_QUANTUM 4
+# endif
+# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
+# define LG_QUANTUM 4
+# endif
+# ifdef __arm__
+# define LG_QUANTUM 3
+# endif
+# ifdef __aarch64__
+# define LG_QUANTUM 4
+# endif
+# ifdef __hppa__
+# define LG_QUANTUM 4
+# endif
+# ifdef __m68k__
+# define LG_QUANTUM 3
+# endif
+# ifdef __mips__
+# define LG_QUANTUM 3
+# endif
+# ifdef __nios2__
+# define LG_QUANTUM 3
+# endif
+# ifdef __or1k__
+# define LG_QUANTUM 3
+# endif
+# ifdef __powerpc__
+# define LG_QUANTUM 4
+# endif
+# if defined(__riscv) || defined(__riscv__)
+# define LG_QUANTUM 4
+# endif
+# ifdef __s390__
+# define LG_QUANTUM 4
+# endif
+# if (defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || \
+ defined(__SH4_SINGLE_ONLY__))
+# define LG_QUANTUM 4
+# endif
+# ifdef __tile__
+# define LG_QUANTUM 4
+# endif
+# ifdef __le32__
+# define LG_QUANTUM 4
+# endif
+# ifndef LG_QUANTUM
+# error "Unknown minimum alignment for architecture; specify via "
+ "--with-lg-quantum"
+# endif
+#endif
+
+#define QUANTUM ((size_t)(1U << LG_QUANTUM))
+#define QUANTUM_MASK (QUANTUM - 1)
+
+/* Return the smallest quantum multiple that is >= a. */
+#define QUANTUM_CEILING(a) \
+ (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
+
+#endif /* JEMALLOC_INTERNAL_QUANTUM_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/rtree.h b/deps/jemalloc/include/jemalloc/internal/rtree.h
index b59d33a80..16ccbebee 100644
--- a/deps/jemalloc/include/jemalloc/internal/rtree.h
+++ b/deps/jemalloc/include/jemalloc/internal/rtree.h
@@ -4,7 +4,7 @@
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/rtree_tsd.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/tsd.h"
/*
@@ -31,7 +31,7 @@
# error Unsupported number of significant virtual address bits
#endif
/* Use compact leaf representation if virtual address encoding allows. */
-#if RTREE_NHIB >= LG_CEIL_NSIZES
+#if RTREE_NHIB >= LG_CEIL(SC_NSIZES)
# define RTREE_LEAF_COMPACT
#endif
@@ -170,8 +170,8 @@ rtree_subkey(uintptr_t key, unsigned level) {
*/
# ifdef RTREE_LEAF_COMPACT
JEMALLOC_ALWAYS_INLINE uintptr_t
-rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
- bool dependent) {
+rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
+ rtree_leaf_elm_t *elm, bool dependent) {
return (uintptr_t)atomic_load_p(&elm->le_bits, dependent
? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
}
@@ -208,7 +208,7 @@ rtree_leaf_elm_bits_slab_get(uintptr_t bits) {
# endif
JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_leaf_elm_extent_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, bool dependent) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
@@ -221,7 +221,7 @@ rtree_leaf_elm_extent_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
}
JEMALLOC_ALWAYS_INLINE szind_t
-rtree_leaf_elm_szind_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, bool dependent) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
@@ -233,7 +233,7 @@ rtree_leaf_elm_szind_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
}
JEMALLOC_ALWAYS_INLINE bool
-rtree_leaf_elm_slab_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, bool dependent) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
@@ -245,7 +245,7 @@ rtree_leaf_elm_slab_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
}
static inline void
-rtree_leaf_elm_extent_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, extent_t *extent) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true);
@@ -259,9 +259,9 @@ rtree_leaf_elm_extent_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
}
static inline void
-rtree_leaf_elm_szind_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, szind_t szind) {
- assert(szind <= NSIZES);
+ assert(szind <= SC_NSIZES);
#ifdef RTREE_LEAF_COMPACT
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
@@ -277,7 +277,7 @@ rtree_leaf_elm_szind_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
}
static inline void
-rtree_leaf_elm_slab_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, bool slab) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
@@ -292,8 +292,8 @@ rtree_leaf_elm_slab_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
}
static inline void
-rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
- extent_t *extent, szind_t szind, bool slab) {
+rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
+ rtree_leaf_elm_t *elm, extent_t *extent, szind_t szind, bool slab) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
@@ -313,7 +313,7 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
static inline void
rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, szind_t szind, bool slab) {
- assert(!slab || szind < NBINS);
+ assert(!slab || szind < SC_NBINS);
/*
* The caller implicitly assures that it is the only writer to the szind
@@ -429,7 +429,7 @@ rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
dependent);
if (!dependent && elm == NULL) {
- return NSIZES;
+ return SC_NSIZES;
}
return rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
}
@@ -452,6 +452,42 @@ rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
return false;
}
+/*
+ * Try to read szind_slab from the L1 cache. Returns true on a hit,
+ * and fills in r_szind and r_slab. Otherwise returns false.
+ *
+ * Key is allowed to be NULL in order to save an extra branch on the
+ * fastpath. returns false in this case.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+rtree_szind_slab_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+ uintptr_t key, szind_t *r_szind, bool *r_slab) {
+ rtree_leaf_elm_t *elm;
+
+ size_t slot = rtree_cache_direct_map(key);
+ uintptr_t leafkey = rtree_leafkey(key);
+ assert(leafkey != RTREE_LEAFKEY_INVALID);
+
+ if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
+ rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
+ assert(leaf != NULL);
+ uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
+ elm = &leaf[subkey];
+
+#ifdef RTREE_LEAF_COMPACT
+ uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree,
+ elm, true);
+ *r_szind = rtree_leaf_elm_bits_szind_get(bits);
+ *r_slab = rtree_leaf_elm_bits_slab_get(bits);
+#else
+ *r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, true);
+ *r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, true);
+#endif
+ return true;
+ } else {
+ return false;
+ }
+}
JEMALLOC_ALWAYS_INLINE bool
rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab) {
@@ -474,7 +510,7 @@ rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
static inline void
rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key, szind_t szind, bool slab) {
- assert(!slab || szind < NBINS);
+ assert(!slab || szind < SC_NBINS);
rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
rtree_leaf_elm_szind_slab_update(tsdn, rtree, elm, szind, slab);
@@ -486,7 +522,7 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) !=
NULL);
- rtree_leaf_elm_write(tsdn, rtree, elm, NULL, NSIZES, false);
+ rtree_leaf_elm_write(tsdn, rtree, elm, NULL, SC_NSIZES, false);
}
#endif /* JEMALLOC_INTERNAL_RTREE_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h b/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h
index 93a75173a..562e29297 100644
--- a/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h
+++ b/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h
@@ -26,7 +26,7 @@
* Zero initializer required for tsd initialization only. Proper initialization
* done via rtree_ctx_data_init().
*/
-#define RTREE_CTX_ZERO_INITIALIZER {{{0}}, {{0}}}
+#define RTREE_CTX_ZERO_INITIALIZER {{{0, 0}}, {{0, 0}}}
typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
diff --git a/deps/jemalloc/include/jemalloc/internal/safety_check.h b/deps/jemalloc/include/jemalloc/internal/safety_check.h
new file mode 100644
index 000000000..53339ac12
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/safety_check.h
@@ -0,0 +1,26 @@
+#ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H
+#define JEMALLOC_INTERNAL_SAFETY_CHECK_H
+
+void safety_check_fail(const char *format, ...);
+/* Can set to NULL for a default. */
+void safety_check_set_abort(void (*abort_fn)());
+
+JEMALLOC_ALWAYS_INLINE void
+safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
+ assert(usize < bumped_usize);
+ for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
+ *((unsigned char *)ptr + i) = 0xBC;
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE void
+safety_check_verify_redzone(const void *ptr, size_t usize, size_t bumped_usize)
+{
+ for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
+ if (unlikely(*((unsigned char *)ptr + i) != 0xBC)) {
+ safety_check_fail("Use after free error\n");
+ }
+ }
+}
+
+#endif /*JEMALLOC_INTERNAL_SAFETY_CHECK_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/sc.h b/deps/jemalloc/include/jemalloc/internal/sc.h
new file mode 100644
index 000000000..9a099d8b6
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/sc.h
@@ -0,0 +1,333 @@
+#ifndef JEMALLOC_INTERNAL_SC_H
+#define JEMALLOC_INTERNAL_SC_H
+
+#include "jemalloc/internal/jemalloc_internal_types.h"
+
+/*
+ * Size class computations:
+ *
+ * These are a little tricky; we'll first start by describing how things
+ * generally work, and then describe some of the details.
+ *
+ * Ignore the first few size classes for a moment. We can then split all the
+ * remaining size classes into groups. The size classes in a group are spaced
+ * such that they cover allocation request sizes in a power-of-2 range. The
+ * power of two is called the base of the group, and the size classes in it
+ * satisfy allocations in the half-open range (base, base * 2]. There are
+ * SC_NGROUP size classes in each group, equally spaced in the range, so that
+ * each one covers allocations for base / SC_NGROUP possible allocation sizes.
+ * We call that value (base / SC_NGROUP) the delta of the group. Each size class
+ * is delta larger than the one before it (including the initial size class in a
+ * group, which is delta larger than base, the largest size class in the
+ * previous group).
+ * To make the math all work out nicely, we require that SC_NGROUP is a power of
+ * two, and define it in terms of SC_LG_NGROUP. We'll often talk in terms of
+ * lg_base and lg_delta. For each of these groups then, we have that
+ * lg_delta == lg_base - SC_LG_NGROUP.
+ * The size classes in a group with a given lg_base and lg_delta (which, recall,
+ * can be computed from lg_base for these groups) are therefore:
+ * base + 1 * delta
+ * which covers allocations in (base, base + 1 * delta]
+ * base + 2 * delta
+ * which covers allocations in (base + 1 * delta, base + 2 * delta].
+ * base + 3 * delta
+ * which covers allocations in (base + 2 * delta, base + 3 * delta].
+ * ...
+ * base + SC_NGROUP * delta ( == 2 * base)
+ * which covers allocations in (base + (SC_NGROUP - 1) * delta, 2 * base].
+ * (Note that currently SC_NGROUP is always 4, so the "..." is empty in
+ * practice.)
+ * Note that the last size class in the group is the next power of two (after
+ * base), so that we've set up the induction correctly for the next group's
+ * selection of delta.
+ *
+ * Now, let's start considering the first few size classes. Two extra constants
+ * come into play here: LG_QUANTUM and SC_LG_TINY_MIN. LG_QUANTUM ensures
+ * correct platform alignment; all objects of size (1 << LG_QUANTUM) or larger
+ * are at least (1 << LG_QUANTUM) aligned; this can be used to ensure that we
+ * never return improperly aligned memory, by making (1 << LG_QUANTUM) equal the
+ * highest required alignment of a platform. For allocation sizes smaller than
+ * (1 << LG_QUANTUM) though, we can be more relaxed (since we don't support
+ * platforms with types with alignment larger than their size). To allow such
+ * allocations (without wasting space unnecessarily), we introduce tiny size
+ * classes; one per power of two, up until we hit the quantum size. There are
+ * therefore LG_QUANTUM - SC_LG_TINY_MIN such size classes.
+ *
+ * Next, we have a size class of size (1 << LG_QUANTUM). This can't be the
+ * start of a group in the sense we described above (covering a power of two
+ * range) since, if we divided into it to pick a value of delta, we'd get a
+ * delta smaller than (1 << LG_QUANTUM) for sizes >= (1 << LG_QUANTUM), which
+ * is against the rules.
+ *
+ * The first base we can divide by SC_NGROUP while still being at least
+ * (1 << LG_QUANTUM) is SC_NGROUP * (1 << LG_QUANTUM). We can get there by
+ * having SC_NGROUP size classes, spaced (1 << LG_QUANTUM) apart. These size
+ * classes are:
+ * 1 * (1 << LG_QUANTUM)
+ * 2 * (1 << LG_QUANTUM)
+ * 3 * (1 << LG_QUANTUM)
+ * ... (although, as above, this "..." is empty in practice)
+ * SC_NGROUP * (1 << LG_QUANTUM).
+ *
+ * There are SC_NGROUP of these size classes, so we can regard it as a sort of
+ * pseudo-group, even though it spans multiple powers of 2, is divided
+ * differently, and both starts and ends on a power of 2 (as opposed to just
+ * ending). SC_NGROUP is itself a power of two, so the first group after the
+ * pseudo-group has the power-of-two base SC_NGROUP * (1 << LG_QUANTUM), for a
+ * lg_base of LG_QUANTUM + SC_LG_NGROUP. We can divide this base into SC_NGROUP
+ * sizes without violating our LG_QUANTUM requirements, so we can safely set
+ * lg_delta = lg_base - SC_LG_GROUP (== LG_QUANTUM).
+ *
+ * So, in order, the size classes are:
+ *
+ * Tiny size classes:
+ * - Count: LG_QUANTUM - SC_LG_TINY_MIN.
+ * - Sizes:
+ * 1 << SC_LG_TINY_MIN
+ * 1 << (SC_LG_TINY_MIN + 1)
+ * 1 << (SC_LG_TINY_MIN + 2)
+ * ...
+ * 1 << (LG_QUANTUM - 1)
+ *
+ * Initial pseudo-group:
+ * - Count: SC_NGROUP
+ * - Sizes:
+ * 1 * (1 << LG_QUANTUM)
+ * 2 * (1 << LG_QUANTUM)
+ * 3 * (1 << LG_QUANTUM)
+ * ...
+ * SC_NGROUP * (1 << LG_QUANTUM)
+ *
+ * Regular group 0:
+ * - Count: SC_NGROUP
+ * - Sizes:
+ * (relative to lg_base of LG_QUANTUM + SC_LG_NGROUP and lg_delta of
+ * lg_base - SC_LG_NGROUP)
+ * (1 << lg_base) + 1 * (1 << lg_delta)
+ * (1 << lg_base) + 2 * (1 << lg_delta)
+ * (1 << lg_base) + 3 * (1 << lg_delta)
+ * ...
+ * (1 << lg_base) + SC_NGROUP * (1 << lg_delta) [ == (1 << (lg_base + 1)) ]
+ *
+ * Regular group 1:
+ * - Count: SC_NGROUP
+ * - Sizes:
+ * (relative to lg_base of LG_QUANTUM + SC_LG_NGROUP + 1 and lg_delta of
+ * lg_base - SC_LG_NGROUP)
+ * (1 << lg_base) + 1 * (1 << lg_delta)
+ * (1 << lg_base) + 2 * (1 << lg_delta)
+ * (1 << lg_base) + 3 * (1 << lg_delta)
+ * ...
+ * (1 << lg_base) + SC_NGROUP * (1 << lg_delta) [ == (1 << (lg_base + 1)) ]
+ *
+ * ...
+ *
+ * Regular group N:
+ * - Count: SC_NGROUP
+ * - Sizes:
+ * (relative to lg_base of LG_QUANTUM + SC_LG_NGROUP + N and lg_delta of
+ * lg_base - SC_LG_NGROUP)
+ * (1 << lg_base) + 1 * (1 << lg_delta)
+ * (1 << lg_base) + 2 * (1 << lg_delta)
+ * (1 << lg_base) + 3 * (1 << lg_delta)
+ * ...
+ * (1 << lg_base) + SC_NGROUP * (1 << lg_delta) [ == (1 << (lg_base + 1)) ]
+ *
+ *
+ * Representation of metadata:
+ * To make the math easy, we'll mostly work in lg quantities. We record lg_base,
+ * lg_delta, and ndelta (i.e. number of deltas above the base) on a
+ * per-size-class basis, and maintain the invariant that, across all size
+ * classes, size == (1 << lg_base) + ndelta * (1 << lg_delta).
+ *
+ * For regular groups (i.e. those with lg_base >= LG_QUANTUM + SC_LG_NGROUP),
+ * lg_delta is lg_base - SC_LG_NGROUP, and ndelta goes from 1 to SC_NGROUP.
+ *
+ * For the initial tiny size classes (if any), lg_base is lg(size class size).
+ * lg_delta is lg_base for the first size class, and lg_base - 1 for all
+ * subsequent ones. ndelta is always 0.
+ *
+ * For the pseudo-group, if there are no tiny size classes, then we set
+ * lg_base == LG_QUANTUM, lg_delta == LG_QUANTUM, and have ndelta range from 0
+ * to SC_NGROUP - 1. (Note that delta == base, so base + (SC_NGROUP - 1) * delta
+ * is just SC_NGROUP * base, or (1 << (SC_LG_NGROUP + LG_QUANTUM)), so we do
+ * indeed get a power of two that way). If there *are* tiny size classes, then
+ * the first size class needs to have lg_delta relative to the largest tiny size
+ * class. We therefore set lg_base == LG_QUANTUM - 1,
+ * lg_delta == LG_QUANTUM - 1, and ndelta == 1, keeping the rest of the
+ * pseudo-group the same.
+ *
+ *
+ * Other terminology:
+ * "Small" size classes mean those that are allocated out of bins, which is the
+ * same as those that are slab allocated.
+ * "Large" size classes are those that are not small. The cutoff for counting as
+ * large is page size * group size.
+ */
+
+/*
+ * Size class N + (1 << SC_LG_NGROUP) twice the size of size class N.
+ */
+#define SC_LG_NGROUP 2
+#define SC_LG_TINY_MIN 3
+
+#if SC_LG_TINY_MIN == 0
+/* The div module doesn't support division by 1, which this would require. */
+#error "Unsupported LG_TINY_MIN"
+#endif
+
+/*
+ * The definitions below are all determined by the above settings and system
+ * characteristics.
+ */
+#define SC_NGROUP (1ULL << SC_LG_NGROUP)
+#define SC_PTR_BITS ((1ULL << LG_SIZEOF_PTR) * 8)
+#define SC_NTINY (LG_QUANTUM - SC_LG_TINY_MIN)
+#define SC_LG_TINY_MAXCLASS (LG_QUANTUM > SC_LG_TINY_MIN ? LG_QUANTUM - 1 : -1)
+#define SC_NPSEUDO SC_NGROUP
+#define SC_LG_FIRST_REGULAR_BASE (LG_QUANTUM + SC_LG_NGROUP)
+/*
+ * We cap allocations to be less than 2 ** (ptr_bits - 1), so the highest base
+ * we need is 2 ** (ptr_bits - 2). (This also means that the last group is 1
+ * size class shorter than the others).
+ * We could probably save some space in arenas by capping this at LG_VADDR size.
+ */
+#define SC_LG_BASE_MAX (SC_PTR_BITS - 2)
+#define SC_NREGULAR (SC_NGROUP * \
+ (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1)
+#define SC_NSIZES (SC_NTINY + SC_NPSEUDO + SC_NREGULAR)
+
+/* The number of size classes that are a multiple of the page size. */
+#define SC_NPSIZES ( \
+ /* Start with all the size classes. */ \
+ SC_NSIZES \
+ /* Subtract out those groups with too small a base. */ \
+ - (LG_PAGE - 1 - SC_LG_FIRST_REGULAR_BASE) * SC_NGROUP \
+ /* And the pseudo-group. */ \
+ - SC_NPSEUDO \
+ /* And the tiny group. */ \
+ - SC_NTINY \
+ /* Sizes where ndelta*delta is not a multiple of the page size. */ \
+ - (SC_LG_NGROUP * SC_NGROUP))
+/*
+ * Note that the last line is computed as the sum of the second column in the
+ * following table:
+ * lg(base) | count of sizes to exclude
+ * ------------------------------|-----------------------------
+ * LG_PAGE - 1 | SC_NGROUP - 1
+ * LG_PAGE | SC_NGROUP - 1
+ * LG_PAGE + 1 | SC_NGROUP - 2
+ * LG_PAGE + 2 | SC_NGROUP - 4
+ * ... | ...
+ * LG_PAGE + (SC_LG_NGROUP - 1) | SC_NGROUP - (SC_NGROUP / 2)
+ */
+
+/*
+ * We declare a size class is binnable if size < page size * group. Or, in other
+ * words, lg(size) < lg(page size) + lg(group size).
+ */
+#define SC_NBINS ( \
+ /* Sub-regular size classes. */ \
+ SC_NTINY + SC_NPSEUDO \
+ /* Groups with lg_regular_min_base <= lg_base <= lg_base_max */ \
+ + SC_NGROUP * (LG_PAGE + SC_LG_NGROUP - SC_LG_FIRST_REGULAR_BASE) \
+ /* Last SC of the last group hits the bound exactly; exclude it. */ \
+ - 1)
+
+/*
+ * The size2index_tab lookup table uses uint8_t to encode each bin index, so we
+ * cannot support more than 256 small size classes.
+ */
+#if (SC_NBINS > 256)
+# error "Too many small size classes"
+#endif
+
+/* The largest size class in the lookup table. */
+#define SC_LOOKUP_MAXCLASS ((size_t)1 << 12)
+
+/* Internal, only used for the definition of SC_SMALL_MAXCLASS. */
+#define SC_SMALL_MAX_BASE ((size_t)1 << (LG_PAGE + SC_LG_NGROUP - 1))
+#define SC_SMALL_MAX_DELTA ((size_t)1 << (LG_PAGE - 1))
+
+/* The largest size class allocated out of a slab. */
+#define SC_SMALL_MAXCLASS (SC_SMALL_MAX_BASE \
+ + (SC_NGROUP - 1) * SC_SMALL_MAX_DELTA)
+
+/* The smallest size class not allocated out of a slab. */
+#define SC_LARGE_MINCLASS ((size_t)1ULL << (LG_PAGE + SC_LG_NGROUP))
+#define SC_LG_LARGE_MINCLASS (LG_PAGE + SC_LG_NGROUP)
+
+/* Internal; only used for the definition of SC_LARGE_MAXCLASS. */
+#define SC_MAX_BASE ((size_t)1 << (SC_PTR_BITS - 2))
+#define SC_MAX_DELTA ((size_t)1 << (SC_PTR_BITS - 2 - SC_LG_NGROUP))
+
+/* The largest size class supported. */
+#define SC_LARGE_MAXCLASS (SC_MAX_BASE + (SC_NGROUP - 1) * SC_MAX_DELTA)
+
+typedef struct sc_s sc_t;
+struct sc_s {
+ /* Size class index, or -1 if not a valid size class. */
+ int index;
+ /* Lg group base size (no deltas added). */
+ int lg_base;
+ /* Lg delta to previous size class. */
+ int lg_delta;
+ /* Delta multiplier. size == 1<<lg_base + ndelta<<lg_delta */
+ int ndelta;
+ /*
+ * True if the size class is a multiple of the page size, false
+ * otherwise.
+ */
+ bool psz;
+ /*
+ * True if the size class is a small, bin, size class. False otherwise.
+ */
+ bool bin;
+ /* The slab page count if a small bin size class, 0 otherwise. */
+ int pgs;
+ /* Same as lg_delta if a lookup table size class, 0 otherwise. */
+ int lg_delta_lookup;
+};
+
+typedef struct sc_data_s sc_data_t;
+struct sc_data_s {
+ /* Number of tiny size classes. */
+ unsigned ntiny;
+ /* Number of bins supported by the lookup table. */
+ int nlbins;
+ /* Number of small size class bins. */
+ int nbins;
+ /* Number of size classes. */
+ int nsizes;
+ /* Number of bits required to store NSIZES. */
+ int lg_ceil_nsizes;
+ /* Number of size classes that are a multiple of (1U << LG_PAGE). */
+ unsigned npsizes;
+ /* Lg of maximum tiny size class (or -1, if none). */
+ int lg_tiny_maxclass;
+ /* Maximum size class included in lookup table. */
+ size_t lookup_maxclass;
+ /* Maximum small size class. */
+ size_t small_maxclass;
+ /* Lg of minimum large size class. */
+ int lg_large_minclass;
+ /* The minimum large size class. */
+ size_t large_minclass;
+ /* Maximum (large) size class. */
+ size_t large_maxclass;
+ /* True if the sc_data_t has been initialized (for debugging only). */
+ bool initialized;
+
+ sc_t sc[SC_NSIZES];
+};
+
+void sc_data_init(sc_data_t *data);
+/*
+ * Updates slab sizes in [begin, end] to be pgs pages in length, if possible.
+ * Otherwise, does its best to accomodate the request.
+ */
+void sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end,
+ int pgs);
+void sc_boot(sc_data_t *data);
+
+#endif /* JEMALLOC_INTERNAL_SC_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/seq.h b/deps/jemalloc/include/jemalloc/internal/seq.h
new file mode 100644
index 000000000..ef2df4c6e
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/seq.h
@@ -0,0 +1,55 @@
+#ifndef JEMALLOC_INTERNAL_SEQ_H
+#define JEMALLOC_INTERNAL_SEQ_H
+
+#include "jemalloc/internal/atomic.h"
+
+/*
+ * A simple seqlock implementation.
+ */
+
+#define seq_define(type, short_type) \
+typedef struct { \
+ atomic_zu_t seq; \
+ atomic_zu_t data[ \
+ (sizeof(type) + sizeof(size_t) - 1) / sizeof(size_t)]; \
+} seq_##short_type##_t; \
+ \
+/* \
+ * No internal synchronization -- the caller must ensure that there's \
+ * only a single writer at a time. \
+ */ \
+static inline void \
+seq_store_##short_type(seq_##short_type##_t *dst, type *src) { \
+ size_t buf[sizeof(dst->data) / sizeof(size_t)]; \
+ buf[sizeof(buf) / sizeof(size_t) - 1] = 0; \
+ memcpy(buf, src, sizeof(type)); \
+ size_t old_seq = atomic_load_zu(&dst->seq, ATOMIC_RELAXED); \
+ atomic_store_zu(&dst->seq, old_seq + 1, ATOMIC_RELAXED); \
+ atomic_fence(ATOMIC_RELEASE); \
+ for (size_t i = 0; i < sizeof(buf) / sizeof(size_t); i++) { \
+ atomic_store_zu(&dst->data[i], buf[i], ATOMIC_RELAXED); \
+ } \
+ atomic_store_zu(&dst->seq, old_seq + 2, ATOMIC_RELEASE); \
+} \
+ \
+/* Returns whether or not the read was consistent. */ \
+static inline bool \
+seq_try_load_##short_type(type *dst, seq_##short_type##_t *src) { \
+ size_t buf[sizeof(src->data) / sizeof(size_t)]; \
+ size_t seq1 = atomic_load_zu(&src->seq, ATOMIC_ACQUIRE); \
+ if (seq1 % 2 != 0) { \
+ return false; \
+ } \
+ for (size_t i = 0; i < sizeof(buf) / sizeof(size_t); i++) { \
+ buf[i] = atomic_load_zu(&src->data[i], ATOMIC_RELAXED); \
+ } \
+ atomic_fence(ATOMIC_ACQUIRE); \
+ size_t seq2 = atomic_load_zu(&src->seq, ATOMIC_RELAXED); \
+ if (seq1 != seq2) { \
+ return false; \
+ } \
+ memcpy(dst, buf, sizeof(type)); \
+ return true; \
+}
+
+#endif /* JEMALLOC_INTERNAL_SEQ_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/size_classes.sh b/deps/jemalloc/include/jemalloc/internal/size_classes.sh
deleted file mode 100755
index 998994d09..000000000
--- a/deps/jemalloc/include/jemalloc/internal/size_classes.sh
+++ /dev/null
@@ -1,361 +0,0 @@
-#!/bin/sh
-#
-# Usage: size_classes.sh <lg_qarr> <lg_tmin> <lg_parr> <lg_g>
-
-# The following limits are chosen such that they cover all supported platforms.
-
-# Pointer sizes.
-lg_zarr="2 3"
-
-# Quanta.
-lg_qarr=$1
-
-# The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)].
-lg_tmin=$2
-
-# Maximum lookup size.
-lg_kmax=12
-
-# Page sizes.
-lg_parr=`echo $3 | tr ',' ' '`
-
-# Size class group size (number of size classes for each size doubling).
-lg_g=$4
-
-pow2() {
- e=$1
- pow2_result=1
- while [ ${e} -gt 0 ] ; do
- pow2_result=$((${pow2_result} + ${pow2_result}))
- e=$((${e} - 1))
- done
-}
-
-lg() {
- x=$1
- lg_result=0
- while [ ${x} -gt 1 ] ; do
- lg_result=$((${lg_result} + 1))
- x=$((${x} / 2))
- done
-}
-
-lg_ceil() {
- y=$1
- lg ${y}; lg_floor=${lg_result}
- pow2 ${lg_floor}; pow2_floor=${pow2_result}
- if [ ${pow2_floor} -lt ${y} ] ; then
- lg_ceil_result=$((${lg_floor} + 1))
- else
- lg_ceil_result=${lg_floor}
- fi
-}
-
-reg_size_compute() {
- lg_grp=$1
- lg_delta=$2
- ndelta=$3
-
- pow2 ${lg_grp}; grp=${pow2_result}
- pow2 ${lg_delta}; delta=${pow2_result}
- reg_size=$((${grp} + ${delta}*${ndelta}))
-}
-
-slab_size() {
- lg_p=$1
- lg_grp=$2
- lg_delta=$3
- ndelta=$4
-
- pow2 ${lg_p}; p=${pow2_result}
- reg_size_compute ${lg_grp} ${lg_delta} ${ndelta}
-
- # Compute smallest slab size that is an integer multiple of reg_size.
- try_slab_size=${p}
- try_nregs=$((${try_slab_size} / ${reg_size}))
- perfect=0
- while [ ${perfect} -eq 0 ] ; do
- perfect_slab_size=${try_slab_size}
- perfect_nregs=${try_nregs}
-
- try_slab_size=$((${try_slab_size} + ${p}))
- try_nregs=$((${try_slab_size} / ${reg_size}))
- if [ ${perfect_slab_size} -eq $((${perfect_nregs} * ${reg_size})) ] ; then
- perfect=1
- fi
- done
-
- slab_size_pgs=$((${perfect_slab_size} / ${p}))
-}
-
-size_class() {
- index=$1
- lg_grp=$2
- lg_delta=$3
- ndelta=$4
- lg_p=$5
- lg_kmax=$6
-
- if [ ${lg_delta} -ge ${lg_p} ] ; then
- psz="yes"
- else
- pow2 ${lg_p}; p=${pow2_result}
- pow2 ${lg_grp}; grp=${pow2_result}
- pow2 ${lg_delta}; delta=${pow2_result}
- sz=$((${grp} + ${delta} * ${ndelta}))
- npgs=$((${sz} / ${p}))
- if [ ${sz} -eq $((${npgs} * ${p})) ] ; then
- psz="yes"
- else
- psz="no"
- fi
- fi
-
- lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta}
- if [ ${pow2_result} -lt ${ndelta} ] ; then
- rem="yes"
- else
- rem="no"
- fi
-
- lg_size=${lg_grp}
- if [ $((${lg_delta} + ${lg_ndelta})) -eq ${lg_grp} ] ; then
- lg_size=$((${lg_grp} + 1))
- else
- lg_size=${lg_grp}
- rem="yes"
- fi
-
- if [ ${lg_size} -lt $((${lg_p} + ${lg_g})) ] ; then
- bin="yes"
- slab_size ${lg_p} ${lg_grp} ${lg_delta} ${ndelta}; pgs=${slab_size_pgs}
- else
- bin="no"
- pgs=0
- fi
- if [ ${lg_size} -lt ${lg_kmax} \
- -o ${lg_size} -eq ${lg_kmax} -a ${rem} = "no" ] ; then
- lg_delta_lookup=${lg_delta}
- else
- lg_delta_lookup="no"
- fi
- printf ' SC(%3d, %6d, %8d, %6d, %3s, %3s, %3d, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${pgs} ${lg_delta_lookup}
- # Defined upon return:
- # - psz ("yes" or "no")
- # - bin ("yes" or "no")
- # - pgs
- # - lg_delta_lookup (${lg_delta} or "no")
-}
-
-sep_line() {
- echo " \\"
-}
-
-size_classes() {
- lg_z=$1
- lg_q=$2
- lg_t=$3
- lg_p=$4
- lg_g=$5
-
- pow2 $((${lg_z} + 3)); ptr_bits=${pow2_result}
- pow2 ${lg_g}; g=${pow2_result}
-
- echo "#define SIZE_CLASSES \\"
- echo " /* index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup */ \\"
-
- ntbins=0
- nlbins=0
- lg_tiny_maxclass='"NA"'
- nbins=0
- npsizes=0
-
- # Tiny size classes.
- ndelta=0
- index=0
- lg_grp=${lg_t}
- lg_delta=${lg_grp}
- while [ ${lg_grp} -lt ${lg_q} ] ; do
- size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
- if [ ${lg_delta_lookup} != "no" ] ; then
- nlbins=$((${index} + 1))
- fi
- if [ ${psz} = "yes" ] ; then
- npsizes=$((${npsizes} + 1))
- fi
- if [ ${bin} != "no" ] ; then
- nbins=$((${index} + 1))
- fi
- ntbins=$((${ntbins} + 1))
- lg_tiny_maxclass=${lg_grp} # Final written value is correct.
- index=$((${index} + 1))
- lg_delta=${lg_grp}
- lg_grp=$((${lg_grp} + 1))
- done
-
- # First non-tiny group.
- if [ ${ntbins} -gt 0 ] ; then
- sep_line
- # The first size class has an unusual encoding, because the size has to be
- # split between grp and delta*ndelta.
- lg_grp=$((${lg_grp} - 1))
- ndelta=1
- size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
- index=$((${index} + 1))
- lg_grp=$((${lg_grp} + 1))
- lg_delta=$((${lg_delta} + 1))
- if [ ${psz} = "yes" ] ; then
- npsizes=$((${npsizes} + 1))
- fi
- fi
- while [ ${ndelta} -lt ${g} ] ; do
- size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
- index=$((${index} + 1))
- ndelta=$((${ndelta} + 1))
- if [ ${psz} = "yes" ] ; then
- npsizes=$((${npsizes} + 1))
- fi
- done
-
- # All remaining groups.
- lg_grp=$((${lg_grp} + ${lg_g}))
- while [ ${lg_grp} -lt $((${ptr_bits} - 1)) ] ; do
- sep_line
- ndelta=1
- if [ ${lg_grp} -eq $((${ptr_bits} - 2)) ] ; then
- ndelta_limit=$((${g} - 1))
- else
- ndelta_limit=${g}
- fi
- while [ ${ndelta} -le ${ndelta_limit} ] ; do
- size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
- if [ ${lg_delta_lookup} != "no" ] ; then
- nlbins=$((${index} + 1))
- # Final written value is correct:
- lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
- fi
- if [ ${psz} = "yes" ] ; then
- npsizes=$((${npsizes} + 1))
- fi
- if [ ${bin} != "no" ] ; then
- nbins=$((${index} + 1))
- # Final written value is correct:
- small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
- if [ ${lg_g} -gt 0 ] ; then
- lg_large_minclass=$((${lg_grp} + 1))
- else
- lg_large_minclass=$((${lg_grp} + 2))
- fi
- fi
- # Final written value is correct:
- large_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
- index=$((${index} + 1))
- ndelta=$((${ndelta} + 1))
- done
- lg_grp=$((${lg_grp} + 1))
- lg_delta=$((${lg_delta} + 1))
- done
- echo
- nsizes=${index}
- lg_ceil ${nsizes}; lg_ceil_nsizes=${lg_ceil_result}
-
- # Defined upon completion:
- # - ntbins
- # - nlbins
- # - nbins
- # - nsizes
- # - lg_ceil_nsizes
- # - npsizes
- # - lg_tiny_maxclass
- # - lookup_maxclass
- # - small_maxclass
- # - lg_large_minclass
- # - large_maxclass
-}
-
-cat <<EOF
-#ifndef JEMALLOC_INTERNAL_SIZE_CLASSES_H
-#define JEMALLOC_INTERNAL_SIZE_CLASSES_H
-
-/* This file was automatically generated by size_classes.sh. */
-
-#include "jemalloc/internal/jemalloc_internal_types.h"
-
-/*
- * This header file defines:
- *
- * LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling.
- * LG_TINY_MIN: Lg of minimum size class to support.
- * SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, psz,
- * bin, pgs, lg_delta_lookup) tuples.
- * index: Size class index.
- * lg_grp: Lg group base size (no deltas added).
- * lg_delta: Lg delta to previous size class.
- * ndelta: Delta multiplier. size == 1<<lg_grp + ndelta<<lg_delta
- * psz: 'yes' if a multiple of the page size, 'no' otherwise.
- * bin: 'yes' if a small bin size class, 'no' otherwise.
- * pgs: Slab page count if a small bin size class, 0 otherwise.
- * lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no'
- * otherwise.
- * NTBINS: Number of tiny bins.
- * NLBINS: Number of bins supported by the lookup table.
- * NBINS: Number of small size class bins.
- * NSIZES: Number of size classes.
- * LG_CEIL_NSIZES: Number of bits required to store NSIZES.
- * NPSIZES: Number of size classes that are a multiple of (1U << LG_PAGE).
- * LG_TINY_MAXCLASS: Lg of maximum tiny size class.
- * LOOKUP_MAXCLASS: Maximum size class included in lookup table.
- * SMALL_MAXCLASS: Maximum small size class.
- * LG_LARGE_MINCLASS: Lg of minimum large size class.
- * LARGE_MAXCLASS: Maximum (large) size class.
- */
-
-#define LG_SIZE_CLASS_GROUP ${lg_g}
-#define LG_TINY_MIN ${lg_tmin}
-
-EOF
-
-for lg_z in ${lg_zarr} ; do
- for lg_q in ${lg_qarr} ; do
- lg_t=${lg_tmin}
- while [ ${lg_t} -le ${lg_q} ] ; do
- # Iterate through page sizes and compute how many bins there are.
- for lg_p in ${lg_parr} ; do
- echo "#if (LG_SIZEOF_PTR == ${lg_z} && LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})"
- size_classes ${lg_z} ${lg_q} ${lg_t} ${lg_p} ${lg_g}
- echo "#define SIZE_CLASSES_DEFINED"
- echo "#define NTBINS ${ntbins}"
- echo "#define NLBINS ${nlbins}"
- echo "#define NBINS ${nbins}"
- echo "#define NSIZES ${nsizes}"
- echo "#define LG_CEIL_NSIZES ${lg_ceil_nsizes}"
- echo "#define NPSIZES ${npsizes}"
- echo "#define LG_TINY_MAXCLASS ${lg_tiny_maxclass}"
- echo "#define LOOKUP_MAXCLASS ${lookup_maxclass}"
- echo "#define SMALL_MAXCLASS ${small_maxclass}"
- echo "#define LG_LARGE_MINCLASS ${lg_large_minclass}"
- echo "#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS)"
- echo "#define LARGE_MAXCLASS ${large_maxclass}"
- echo "#endif"
- echo
- done
- lg_t=$((${lg_t} + 1))
- done
- done
-done
-
-cat <<EOF
-#ifndef SIZE_CLASSES_DEFINED
-# error "No size class definitions match configuration"
-#endif
-#undef SIZE_CLASSES_DEFINED
-/*
- * The size2index_tab lookup table uses uint8_t to encode each bin index, so we
- * cannot support more than 256 small size classes.
- */
-#if (NBINS > 256)
-# error "Too many small size classes"
-#endif
-
-#endif /* JEMALLOC_INTERNAL_SIZE_CLASSES_H */
-EOF
diff --git a/deps/jemalloc/include/jemalloc/internal/stats.h b/deps/jemalloc/include/jemalloc/internal/stats.h
index 852e34269..3b9e0eac1 100644
--- a/deps/jemalloc/include/jemalloc/internal/stats.h
+++ b/deps/jemalloc/include/jemalloc/internal/stats.h
@@ -10,7 +10,8 @@
OPTION('a', unmerged, config_stats, false) \
OPTION('b', bins, true, false) \
OPTION('l', large, true, false) \
- OPTION('x', mutex, true, false)
+ OPTION('x', mutex, true, false) \
+ OPTION('e', extents, true, false)
enum {
#define OPTION(o, v, d, s) stats_print_option_num_##v,
diff --git a/deps/jemalloc/include/jemalloc/internal/sz.h b/deps/jemalloc/include/jemalloc/internal/sz.h
index 979462898..68e558abf 100644
--- a/deps/jemalloc/include/jemalloc/internal/sz.h
+++ b/deps/jemalloc/include/jemalloc/internal/sz.h
@@ -3,7 +3,7 @@
#include "jemalloc/internal/bit_util.h"
#include "jemalloc/internal/pages.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/util.h"
/*
@@ -26,18 +26,18 @@
* sz_pind2sz_tab encodes the same information as could be computed by
* sz_pind2sz_compute().
*/
-extern size_t const sz_pind2sz_tab[NPSIZES+1];
+extern size_t sz_pind2sz_tab[SC_NPSIZES + 1];
/*
* sz_index2size_tab encodes the same information as could be computed (at
* unacceptable cost in some code paths) by sz_index2size_compute().
*/
-extern size_t const sz_index2size_tab[NSIZES];
+extern size_t sz_index2size_tab[SC_NSIZES];
/*
* sz_size2index_tab is a compact lookup table that rounds request sizes up to
* size classes. In order to reduce cache footprint, the table is compressed,
* and all accesses are via sz_size2index().
*/
-extern uint8_t const sz_size2index_tab[];
+extern uint8_t sz_size2index_tab[];
static const size_t sz_large_pad =
#ifdef JEMALLOC_CACHE_OBLIVIOUS
@@ -47,49 +47,47 @@ static const size_t sz_large_pad =
#endif
;
+extern void sz_boot(const sc_data_t *sc_data);
+
JEMALLOC_ALWAYS_INLINE pszind_t
sz_psz2ind(size_t psz) {
- if (unlikely(psz > LARGE_MAXCLASS)) {
- return NPSIZES;
+ if (unlikely(psz > SC_LARGE_MAXCLASS)) {
+ return SC_NPSIZES;
}
- {
- pszind_t x = lg_floor((psz<<1)-1);
- pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
- (LG_SIZE_CLASS_GROUP + LG_PAGE);
- pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
+ pszind_t x = lg_floor((psz<<1)-1);
+ pszind_t shift = (x < SC_LG_NGROUP + LG_PAGE) ?
+ 0 : x - (SC_LG_NGROUP + LG_PAGE);
+ pszind_t grp = shift << SC_LG_NGROUP;
- pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
- LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+ pszind_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1) ?
+ LG_PAGE : x - SC_LG_NGROUP - 1;
- size_t delta_inverse_mask = ZU(-1) << lg_delta;
- pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
- ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+ size_t delta_inverse_mask = ZU(-1) << lg_delta;
+ pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
+ ((ZU(1) << SC_LG_NGROUP) - 1);
- pszind_t ind = grp + mod;
- return ind;
- }
+ pszind_t ind = grp + mod;
+ return ind;
}
static inline size_t
sz_pind2sz_compute(pszind_t pind) {
- if (unlikely(pind == NPSIZES)) {
- return LARGE_MAXCLASS + PAGE;
+ if (unlikely(pind == SC_NPSIZES)) {
+ return SC_LARGE_MAXCLASS + PAGE;
}
- {
- size_t grp = pind >> LG_SIZE_CLASS_GROUP;
- size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+ size_t grp = pind >> SC_LG_NGROUP;
+ size_t mod = pind & ((ZU(1) << SC_LG_NGROUP) - 1);
- size_t grp_size_mask = ~((!!grp)-1);
- size_t grp_size = ((ZU(1) << (LG_PAGE +
- (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+ size_t grp_size_mask = ~((!!grp)-1);
+ size_t grp_size = ((ZU(1) << (LG_PAGE + (SC_LG_NGROUP-1))) << grp)
+ & grp_size_mask;
- size_t shift = (grp == 0) ? 1 : grp;
- size_t lg_delta = shift + (LG_PAGE-1);
- size_t mod_size = (mod+1) << lg_delta;
+ size_t shift = (grp == 0) ? 1 : grp;
+ size_t lg_delta = shift + (LG_PAGE-1);
+ size_t mod_size = (mod+1) << lg_delta;
- size_t sz = grp_size + mod_size;
- return sz;
- }
+ size_t sz = grp_size + mod_size;
+ return sz;
}
static inline size_t
@@ -101,70 +99,70 @@ sz_pind2sz_lookup(pszind_t pind) {
static inline size_t
sz_pind2sz(pszind_t pind) {
- assert(pind < NPSIZES+1);
+ assert(pind < SC_NPSIZES + 1);
return sz_pind2sz_lookup(pind);
}
static inline size_t
sz_psz2u(size_t psz) {
- if (unlikely(psz > LARGE_MAXCLASS)) {
- return LARGE_MAXCLASS + PAGE;
- }
- {
- size_t x = lg_floor((psz<<1)-1);
- size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
- LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
- size_t delta = ZU(1) << lg_delta;
- size_t delta_mask = delta - 1;
- size_t usize = (psz + delta_mask) & ~delta_mask;
- return usize;
+ if (unlikely(psz > SC_LARGE_MAXCLASS)) {
+ return SC_LARGE_MAXCLASS + PAGE;
}
+ size_t x = lg_floor((psz<<1)-1);
+ size_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1) ?
+ LG_PAGE : x - SC_LG_NGROUP - 1;
+ size_t delta = ZU(1) << lg_delta;
+ size_t delta_mask = delta - 1;
+ size_t usize = (psz + delta_mask) & ~delta_mask;
+ return usize;
}
static inline szind_t
sz_size2index_compute(size_t size) {
- if (unlikely(size > LARGE_MAXCLASS)) {
- return NSIZES;
+ if (unlikely(size > SC_LARGE_MAXCLASS)) {
+ return SC_NSIZES;
+ }
+
+ if (size == 0) {
+ return 0;
}
-#if (NTBINS != 0)
- if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
- szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+#if (SC_NTINY != 0)
+ if (size <= (ZU(1) << SC_LG_TINY_MAXCLASS)) {
+ szind_t lg_tmin = SC_LG_TINY_MAXCLASS - SC_NTINY + 1;
szind_t lg_ceil = lg_floor(pow2_ceil_zu(size));
return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
}
#endif
{
szind_t x = lg_floor((size<<1)-1);
- szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
- x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
- szind_t grp = shift << LG_SIZE_CLASS_GROUP;
+ szind_t shift = (x < SC_LG_NGROUP + LG_QUANTUM) ? 0 :
+ x - (SC_LG_NGROUP + LG_QUANTUM);
+ szind_t grp = shift << SC_LG_NGROUP;
- szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
- ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+ szind_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
+ ? LG_QUANTUM : x - SC_LG_NGROUP - 1;
size_t delta_inverse_mask = ZU(-1) << lg_delta;
szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
- ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+ ((ZU(1) << SC_LG_NGROUP) - 1);
- szind_t index = NTBINS + grp + mod;
+ szind_t index = SC_NTINY + grp + mod;
return index;
}
}
JEMALLOC_ALWAYS_INLINE szind_t
sz_size2index_lookup(size_t size) {
- assert(size <= LOOKUP_MAXCLASS);
- {
- szind_t ret = (sz_size2index_tab[(size-1) >> LG_TINY_MIN]);
- assert(ret == sz_size2index_compute(size));
- return ret;
- }
+ assert(size <= SC_LOOKUP_MAXCLASS);
+ szind_t ret = (sz_size2index_tab[(size + (ZU(1) << SC_LG_TINY_MIN) - 1)
+ >> SC_LG_TINY_MIN]);
+ assert(ret == sz_size2index_compute(size));
+ return ret;
}
JEMALLOC_ALWAYS_INLINE szind_t
sz_size2index(size_t size) {
- assert(size > 0);
- if (likely(size <= LOOKUP_MAXCLASS)) {
+ if (likely(size <= SC_LOOKUP_MAXCLASS)) {
return sz_size2index_lookup(size);
}
return sz_size2index_compute(size);
@@ -172,20 +170,20 @@ sz_size2index(size_t size) {
static inline size_t
sz_index2size_compute(szind_t index) {
-#if (NTBINS > 0)
- if (index < NTBINS) {
- return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index));
+#if (SC_NTINY > 0)
+ if (index < SC_NTINY) {
+ return (ZU(1) << (SC_LG_TINY_MAXCLASS - SC_NTINY + 1 + index));
}
#endif
{
- size_t reduced_index = index - NTBINS;
- size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP;
- size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) -
+ size_t reduced_index = index - SC_NTINY;
+ size_t grp = reduced_index >> SC_LG_NGROUP;
+ size_t mod = reduced_index & ((ZU(1) << SC_LG_NGROUP) -
1);
size_t grp_size_mask = ~((!!grp)-1);
size_t grp_size = ((ZU(1) << (LG_QUANTUM +
- (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+ (SC_LG_NGROUP-1))) << grp) & grp_size_mask;
size_t shift = (grp == 0) ? 1 : grp;
size_t lg_delta = shift + (LG_QUANTUM-1);
@@ -205,18 +203,22 @@ sz_index2size_lookup(szind_t index) {
JEMALLOC_ALWAYS_INLINE size_t
sz_index2size(szind_t index) {
- assert(index < NSIZES);
+ assert(index < SC_NSIZES);
return sz_index2size_lookup(index);
}
JEMALLOC_ALWAYS_INLINE size_t
sz_s2u_compute(size_t size) {
- if (unlikely(size > LARGE_MAXCLASS)) {
+ if (unlikely(size > SC_LARGE_MAXCLASS)) {
return 0;
}
-#if (NTBINS > 0)
- if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
- size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+
+ if (size == 0) {
+ size++;
+ }
+#if (SC_NTINY > 0)
+ if (size <= (ZU(1) << SC_LG_TINY_MAXCLASS)) {
+ size_t lg_tmin = SC_LG_TINY_MAXCLASS - SC_NTINY + 1;
size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
(ZU(1) << lg_ceil));
@@ -224,8 +226,8 @@ sz_s2u_compute(size_t size) {
#endif
{
size_t x = lg_floor((size<<1)-1);
- size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
- ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+ size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
+ ? LG_QUANTUM : x - SC_LG_NGROUP - 1;
size_t delta = ZU(1) << lg_delta;
size_t delta_mask = delta - 1;
size_t usize = (size + delta_mask) & ~delta_mask;
@@ -247,8 +249,7 @@ sz_s2u_lookup(size_t size) {
*/
JEMALLOC_ALWAYS_INLINE size_t
sz_s2u(size_t size) {
- assert(size > 0);
- if (likely(size <= LOOKUP_MAXCLASS)) {
+ if (likely(size <= SC_LOOKUP_MAXCLASS)) {
return sz_s2u_lookup(size);
}
return sz_s2u_compute(size);
@@ -265,7 +266,7 @@ sz_sa2u(size_t size, size_t alignment) {
assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
/* Try for a small size class. */
- if (size <= SMALL_MAXCLASS && alignment < PAGE) {
+ if (size <= SC_SMALL_MAXCLASS && alignment < PAGE) {
/*
* Round size up to the nearest multiple of alignment.
*
@@ -281,20 +282,20 @@ sz_sa2u(size_t size, size_t alignment) {
* 192 | 11000000 | 64
*/
usize = sz_s2u(ALIGNMENT_CEILING(size, alignment));
- if (usize < LARGE_MINCLASS) {
+ if (usize < SC_LARGE_MINCLASS) {
return usize;
}
}
/* Large size class. Beware of overflow. */
- if (unlikely(alignment > LARGE_MAXCLASS)) {
+ if (unlikely(alignment > SC_LARGE_MAXCLASS)) {
return 0;
}
/* Make sure result is a large size class. */
- if (size <= LARGE_MINCLASS) {
- usize = LARGE_MINCLASS;
+ if (size <= SC_LARGE_MINCLASS) {
+ usize = SC_LARGE_MINCLASS;
} else {
usize = sz_s2u(size);
if (usize < size) {
diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_externs.h b/deps/jemalloc/include/jemalloc/internal/tcache_externs.h
index 790367bd4..d63eafde8 100644
--- a/deps/jemalloc/include/jemalloc/internal/tcache_externs.h
+++ b/deps/jemalloc/include/jemalloc/internal/tcache_externs.h
@@ -1,15 +1,13 @@
#ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
#define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
-#include "jemalloc/internal/size_classes.h"
-
extern bool opt_tcache;
extern ssize_t opt_lg_tcache_max;
extern cache_bin_info_t *tcache_bin_info;
/*
- * Number of tcache bins. There are NBINS small-object bins, plus 0 or more
+ * Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more
* large-object bins.
*/
extern unsigned nhbins;
diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h b/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h
index 0f6ab8cb5..5eca20e89 100644
--- a/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h
+++ b/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h
@@ -3,7 +3,7 @@
#include "jemalloc/internal/bin.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/ticker.h"
#include "jemalloc/internal/util.h"
@@ -40,13 +40,13 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) {
JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
- UNUSED size_t size, szind_t binind, bool zero, bool slow_path) {
+ size_t size, szind_t binind, bool zero, bool slow_path) {
void *ret;
cache_bin_t *bin;
bool tcache_success;
size_t usize JEMALLOC_CC_SILENCE_INIT(0);
- assert(binind < NBINS);
+ assert(binind < SC_NBINS);
bin = tcache_small_bin_get(tcache, binind);
ret = cache_bin_alloc_easy(bin, &tcache_success);
assert(tcache_success == (ret != NULL));
@@ -107,7 +107,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
cache_bin_t *bin;
bool tcache_success;
- assert(binind >= NBINS &&binind < nhbins);
+ assert(binind >= SC_NBINS &&binind < nhbins);
bin = tcache_large_bin_get(tcache, binind);
ret = cache_bin_alloc_easy(bin, &tcache_success);
assert(tcache_success == (ret != NULL));
@@ -166,7 +166,8 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
cache_bin_t *bin;
cache_bin_info_t *bin_info;
- assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
+ assert(tcache_salloc(tsd_tsdn(tsd), ptr)
+ <= SC_SMALL_MAXCLASS);
if (slow_path && config_fill && unlikely(opt_junk_free)) {
arena_dalloc_junk_small(ptr, &bin_infos[binind]);
@@ -174,13 +175,12 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
bin = tcache_small_bin_get(tcache, binind);
bin_info = &tcache_bin_info[binind];
- if (unlikely(bin->ncached == bin_info->ncached_max)) {
+ if (unlikely(!cache_bin_dalloc_easy(bin, bin_info, ptr))) {
tcache_bin_flush_small(tsd, tcache, bin, binind,
(bin_info->ncached_max >> 1));
+ bool ret = cache_bin_dalloc_easy(bin, bin_info, ptr);
+ assert(ret);
}
- assert(bin->ncached < bin_info->ncached_max);
- bin->ncached++;
- *(bin->avail - bin->ncached) = ptr;
tcache_event(tsd, tcache);
}
@@ -191,7 +191,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
cache_bin_t *bin;
cache_bin_info_t *bin_info;
- assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
+ assert(tcache_salloc(tsd_tsdn(tsd), ptr)
+ > SC_SMALL_MAXCLASS);
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
if (slow_path && config_fill && unlikely(opt_junk_free)) {
@@ -215,6 +216,9 @@ JEMALLOC_ALWAYS_INLINE tcache_t *
tcaches_get(tsd_t *tsd, unsigned ind) {
tcaches_t *elm = &tcaches[ind];
if (unlikely(elm->tcache == NULL)) {
+ malloc_printf("<jemalloc>: invalid tcache id (%u).\n", ind);
+ abort();
+ } else if (unlikely(elm->tcache == TCACHES_ELM_NEED_REINIT)) {
elm->tcache = tcache_create_explicit(tsd);
}
return elm->tcache;
diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_structs.h b/deps/jemalloc/include/jemalloc/internal/tcache_structs.h
index 07b738705..172ef9040 100644
--- a/deps/jemalloc/include/jemalloc/internal/tcache_structs.h
+++ b/deps/jemalloc/include/jemalloc/internal/tcache_structs.h
@@ -1,10 +1,14 @@
#ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
#define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
-#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/cache_bin.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/ticker.h"
+#include "jemalloc/internal/tsd_types.h"
+
+/* Various uses of this struct need it to be a named type. */
+typedef ql_elm(tsd_t) tsd_link_t;
struct tcache_s {
/*
@@ -21,7 +25,7 @@ struct tcache_s {
* During tcache initialization, the avail pointer in each element of
* tbins is initialized to point to the proper offset within this array.
*/
- cache_bin_t bins_small[NBINS];
+ cache_bin_t bins_small[SC_NBINS];
/*
* This data is less hot; we can be a little less careful with our
@@ -29,6 +33,11 @@ struct tcache_s {
*/
/* Lets us track all the tcaches in an arena. */
ql_elm(tcache_t) link;
+
+ /* Logically scoped to tsd, but put here for cache layout reasons. */
+ ql_elm(tsd_t) tsd_link;
+ bool in_hook;
+
/*
* The descriptor lets the arena find our cache bins without seeing the
* tcache definition. This enables arenas to aggregate stats across
@@ -41,13 +50,13 @@ struct tcache_s {
/* Next bin to GC. */
szind_t next_gc_bin;
/* For small bins, fill (ncached_max >> lg_fill_div). */
- uint8_t lg_fill_div[NBINS];
+ uint8_t lg_fill_div[SC_NBINS];
/*
* We put the cache bins for large size classes at the end of the
* struct, since some of them might not get used. This might end up
* letting us avoid touching an extra page if we don't have to.
*/
- cache_bin_t bins_large[NSIZES-NBINS];
+ cache_bin_t bins_large[SC_NSIZES-SC_NBINS];
};
/* Linkage for list of available (previously used) explicit tcache IDs. */
diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_types.h b/deps/jemalloc/include/jemalloc/internal/tcache_types.h
index e49bc9d79..dce69382e 100644
--- a/deps/jemalloc/include/jemalloc/internal/tcache_types.h
+++ b/deps/jemalloc/include/jemalloc/internal/tcache_types.h
@@ -1,7 +1,7 @@
#ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H
#define JEMALLOC_INTERNAL_TCACHE_TYPES_H
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
typedef struct tcache_s tcache_t;
typedef struct tcaches_s tcaches_t;
@@ -45,7 +45,7 @@ typedef struct tcaches_s tcaches_t;
/* Number of tcache allocation/deallocation events between incremental GCs. */
#define TCACHE_GC_INCR \
- ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
+ ((TCACHE_GC_SWEEP / SC_NBINS) + ((TCACHE_GC_SWEEP / SC_NBINS == 0) ? 0 : 1))
/* Used in TSD static initializer only. Real init in tcache_data_init(). */
#define TCACHE_ZERO_INITIALIZER {0}
@@ -53,4 +53,7 @@ typedef struct tcaches_s tcaches_t;
/* Used in TSD static initializer only. Will be initialized to opt_tcache. */
#define TCACHE_ENABLED_ZERO_INITIALIZER false
+/* Used for explicit tcache only. Means flushed but not destroyed. */
+#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
+
#endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/test_hooks.h b/deps/jemalloc/include/jemalloc/internal/test_hooks.h
new file mode 100644
index 000000000..a6351e59a
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/test_hooks.h
@@ -0,0 +1,19 @@
+#ifndef JEMALLOC_INTERNAL_TEST_HOOKS_H
+#define JEMALLOC_INTERNAL_TEST_HOOKS_H
+
+extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)();
+extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)();
+
+#define JEMALLOC_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
+
+#define open JEMALLOC_HOOK(open, test_hooks_libc_hook)
+#define read JEMALLOC_HOOK(read, test_hooks_libc_hook)
+#define write JEMALLOC_HOOK(write, test_hooks_libc_hook)
+#define readlink JEMALLOC_HOOK(readlink, test_hooks_libc_hook)
+#define close JEMALLOC_HOOK(close, test_hooks_libc_hook)
+#define creat JEMALLOC_HOOK(creat, test_hooks_libc_hook)
+#define secure_getenv JEMALLOC_HOOK(secure_getenv, test_hooks_libc_hook)
+/* Note that this is undef'd and re-define'd in src/prof.c. */
+#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+
+#endif /* JEMALLOC_INTERNAL_TEST_HOOKS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/ticker.h b/deps/jemalloc/include/jemalloc/internal/ticker.h
index 4b3604708..52d0db4c8 100644
--- a/deps/jemalloc/include/jemalloc/internal/ticker.h
+++ b/deps/jemalloc/include/jemalloc/internal/ticker.h
@@ -75,4 +75,17 @@ ticker_tick(ticker_t *ticker) {
return ticker_ticks(ticker, 1);
}
+/*
+ * Try to tick. If ticker would fire, return true, but rely on
+ * slowpath to reset ticker.
+ */
+static inline bool
+ticker_trytick(ticker_t *ticker) {
+ --ticker->tick;
+ if (unlikely(ticker->tick < 0)) {
+ return true;
+ }
+ return false;
+}
+
#endif /* JEMALLOC_INTERNAL_TICKER_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/tsd.h b/deps/jemalloc/include/jemalloc/internal/tsd.h
index 0b9841aa7..9ba260045 100644
--- a/deps/jemalloc/include/jemalloc/internal/tsd.h
+++ b/deps/jemalloc/include/jemalloc/internal/tsd.h
@@ -3,6 +3,7 @@
#include "jemalloc/internal/arena_types.h"
#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/bin_types.h"
#include "jemalloc/internal/jemalloc_internal_externs.h"
#include "jemalloc/internal/prof_types.h"
#include "jemalloc/internal/ql.h"
@@ -68,17 +69,19 @@ typedef void (*test_callback_t)(int *);
O(offset_state, uint64_t, uint64_t) \
O(thread_allocated, uint64_t, uint64_t) \
O(thread_deallocated, uint64_t, uint64_t) \
+ O(bytes_until_sample, int64_t, int64_t) \
O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \
O(rtree_ctx, rtree_ctx_t, rtree_ctx_t) \
O(iarena, arena_t *, arena_t *) \
O(arena, arena_t *, arena_t *) \
O(arenas_tdata, arena_tdata_t *, arena_tdata_t *)\
+ O(binshards, tsd_binshards_t, tsd_binshards_t)\
O(tcache, tcache_t, tcache_t) \
O(witness_tsd, witness_tsd_t, witness_tsdn_t) \
MALLOC_TEST_TSD
#define TSD_INITIALIZER { \
- tsd_state_uninitialized, \
+ ATOMIC_INIT(tsd_state_uninitialized), \
TCACHE_ENABLED_ZERO_INITIALIZER, \
false, \
0, \
@@ -86,29 +89,97 @@ typedef void (*test_callback_t)(int *);
0, \
0, \
0, \
+ 0, \
NULL, \
RTREE_CTX_ZERO_INITIALIZER, \
NULL, \
NULL, \
NULL, \
+ TSD_BINSHARDS_ZERO_INITIALIZER, \
TCACHE_ZERO_INITIALIZER, \
WITNESS_TSD_INITIALIZER \
MALLOC_TEST_TSD_INITIALIZER \
}
+void *malloc_tsd_malloc(size_t size);
+void malloc_tsd_dalloc(void *wrapper);
+void malloc_tsd_cleanup_register(bool (*f)(void));
+tsd_t *malloc_tsd_boot0(void);
+void malloc_tsd_boot1(void);
+void tsd_cleanup(void *arg);
+tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
+void tsd_state_set(tsd_t *tsd, uint8_t new_state);
+void tsd_slow_update(tsd_t *tsd);
+void tsd_prefork(tsd_t *tsd);
+void tsd_postfork_parent(tsd_t *tsd);
+void tsd_postfork_child(tsd_t *tsd);
+
+/*
+ * Call ..._inc when your module wants to take all threads down the slow paths,
+ * and ..._dec when it no longer needs to.
+ */
+void tsd_global_slow_inc(tsdn_t *tsdn);
+void tsd_global_slow_dec(tsdn_t *tsdn);
+bool tsd_global_slow();
+
enum {
- tsd_state_nominal = 0, /* Common case --> jnz. */
- tsd_state_nominal_slow = 1, /* Initialized but on slow path. */
- /* the above 2 nominal states should be lower values. */
- tsd_state_nominal_max = 1, /* used for comparison only. */
- tsd_state_minimal_initialized = 2,
- tsd_state_purgatory = 3,
- tsd_state_reincarnated = 4,
- tsd_state_uninitialized = 5
+ /* Common case --> jnz. */
+ tsd_state_nominal = 0,
+ /* Initialized but on slow path. */
+ tsd_state_nominal_slow = 1,
+ /*
+ * Some thread has changed global state in such a way that all nominal
+ * threads need to recompute their fast / slow status the next time they
+ * get a chance.
+ *
+ * Any thread can change another thread's status *to* recompute, but
+ * threads are the only ones who can change their status *from*
+ * recompute.
+ */
+ tsd_state_nominal_recompute = 2,
+ /*
+ * The above nominal states should be lower values. We use
+ * tsd_nominal_max to separate nominal states from threads in the
+ * process of being born / dying.
+ */
+ tsd_state_nominal_max = 2,
+
+ /*
+ * A thread might free() during its death as its only allocator action;
+ * in such scenarios, we need tsd, but set up in such a way that no
+ * cleanup is necessary.
+ */
+ tsd_state_minimal_initialized = 3,
+ /* States during which we know we're in thread death. */
+ tsd_state_purgatory = 4,
+ tsd_state_reincarnated = 5,
+ /*
+ * What it says on the tin; tsd that hasn't been initialized. Note
+ * that even when the tsd struct lives in TLS, when need to keep track
+ * of stuff like whether or not our pthread destructors have been
+ * scheduled, so this really truly is different than the nominal state.
+ */
+ tsd_state_uninitialized = 6
};
-/* Manually limit tsd_state_t to a single byte. */
-typedef uint8_t tsd_state_t;
+/*
+ * Some TSD accesses can only be done in a nominal state. To enforce this, we
+ * wrap TSD member access in a function that asserts on TSD state, and mangle
+ * field names to prevent touching them accidentally.
+ */
+#define TSD_MANGLE(n) cant_access_tsd_items_directly_use_a_getter_or_setter_##n
+
+#ifdef JEMALLOC_U8_ATOMICS
+# define tsd_state_t atomic_u8_t
+# define tsd_atomic_load atomic_load_u8
+# define tsd_atomic_store atomic_store_u8
+# define tsd_atomic_exchange atomic_exchange_u8
+#else
+# define tsd_state_t atomic_u32_t
+# define tsd_atomic_load atomic_load_u32
+# define tsd_atomic_store atomic_store_u32
+# define tsd_atomic_exchange atomic_exchange_u32
+#endif
/* The actual tsd. */
struct tsd_s {
@@ -117,13 +188,29 @@ struct tsd_s {
* module. Access any thread-local state through the getters and
* setters below.
*/
- tsd_state_t state;
+
+ /*
+ * We manually limit the state to just a single byte. Unless the 8-bit
+ * atomics are unavailable (which is rare).
+ */
+ tsd_state_t state;
#define O(n, t, nt) \
- t use_a_getter_or_setter_instead_##n;
+ t TSD_MANGLE(n);
MALLOC_TSD
#undef O
};
+JEMALLOC_ALWAYS_INLINE uint8_t
+tsd_state_get(tsd_t *tsd) {
+ /*
+ * This should be atomic. Unfortunately, compilers right now can't tell
+ * that this can be done as a memory comparison, and forces a load into
+ * a register that hurts fast-path performance.
+ */
+ /* return atomic_load_u8(&tsd->state, ATOMIC_RELAXED); */
+ return *(uint8_t *)&tsd->state;
+}
+
/*
* Wrapper around tsd_t that makes it possible to avoid implicit conversion
* between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
@@ -150,15 +237,6 @@ tsdn_tsd(tsdn_t *tsdn) {
return &tsdn->tsd;
}
-void *malloc_tsd_malloc(size_t size);
-void malloc_tsd_dalloc(void *wrapper);
-void malloc_tsd_cleanup_register(bool (*f)(void));
-tsd_t *malloc_tsd_boot0(void);
-void malloc_tsd_boot1(void);
-void tsd_cleanup(void *arg);
-tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
-void tsd_slow_update(tsd_t *tsd);
-
/*
* We put the platform-specific data declarations and inlines into their own
* header files to avoid cluttering this file. They define tsd_boot0,
@@ -182,7 +260,7 @@ void tsd_slow_update(tsd_t *tsd);
#define O(n, t, nt) \
JEMALLOC_ALWAYS_INLINE t * \
tsd_##n##p_get_unsafe(tsd_t *tsd) { \
- return &tsd->use_a_getter_or_setter_instead_##n; \
+ return &tsd->TSD_MANGLE(n); \
}
MALLOC_TSD
#undef O
@@ -191,10 +269,16 @@ MALLOC_TSD
#define O(n, t, nt) \
JEMALLOC_ALWAYS_INLINE t * \
tsd_##n##p_get(tsd_t *tsd) { \
- assert(tsd->state == tsd_state_nominal || \
- tsd->state == tsd_state_nominal_slow || \
- tsd->state == tsd_state_reincarnated || \
- tsd->state == tsd_state_minimal_initialized); \
+ /* \
+ * Because the state might change asynchronously if it's \
+ * nominal, we need to make sure that we only read it once. \
+ */ \
+ uint8_t state = tsd_state_get(tsd); \
+ assert(state == tsd_state_nominal || \
+ state == tsd_state_nominal_slow || \
+ state == tsd_state_nominal_recompute || \
+ state == tsd_state_reincarnated || \
+ state == tsd_state_minimal_initialized); \
return tsd_##n##p_get_unsafe(tsd); \
}
MALLOC_TSD
@@ -229,8 +313,8 @@ MALLOC_TSD
#define O(n, t, nt) \
JEMALLOC_ALWAYS_INLINE void \
tsd_##n##_set(tsd_t *tsd, t val) { \
- assert(tsd->state != tsd_state_reincarnated && \
- tsd->state != tsd_state_minimal_initialized); \
+ assert(tsd_state_get(tsd) != tsd_state_reincarnated && \
+ tsd_state_get(tsd) != tsd_state_minimal_initialized); \
*tsd_##n##p_get(tsd) = val; \
}
MALLOC_TSD
@@ -238,13 +322,18 @@ MALLOC_TSD
JEMALLOC_ALWAYS_INLINE void
tsd_assert_fast(tsd_t *tsd) {
+ /*
+ * Note that our fastness assertion does *not* include global slowness
+ * counters; it's not in general possible to ensure that they won't
+ * change asynchronously from underneath us.
+ */
assert(!malloc_slow && tsd_tcache_enabled_get(tsd) &&
tsd_reentrancy_level_get(tsd) == 0);
}
JEMALLOC_ALWAYS_INLINE bool
tsd_fast(tsd_t *tsd) {
- bool fast = (tsd->state == tsd_state_nominal);
+ bool fast = (tsd_state_get(tsd) == tsd_state_nominal);
if (fast) {
tsd_assert_fast(tsd);
}
@@ -261,7 +350,7 @@ tsd_fetch_impl(bool init, bool minimal) {
}
assert(tsd != NULL);
- if (unlikely(tsd->state != tsd_state_nominal)) {
+ if (unlikely(tsd_state_get(tsd) != tsd_state_nominal)) {
return tsd_fetch_slow(tsd, minimal);
}
assert(tsd_fast(tsd));
@@ -281,7 +370,7 @@ JEMALLOC_ALWAYS_INLINE tsd_t *
tsd_internal_fetch(void) {
tsd_t *tsd = tsd_fetch_min();
/* Use reincarnated state to prevent full initialization. */
- tsd->state = tsd_state_reincarnated;
+ tsd_state_set(tsd, tsd_state_reincarnated);
return tsd;
}
@@ -293,7 +382,7 @@ tsd_fetch(void) {
static inline bool
tsd_nominal(tsd_t *tsd) {
- return (tsd->state <= tsd_state_nominal_max);
+ return (tsd_state_get(tsd) <= tsd_state_nominal_max);
}
JEMALLOC_ALWAYS_INLINE tsdn_t *
diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_generic.h b/deps/jemalloc/include/jemalloc/internal/tsd_generic.h
index 1e52ef767..cf73c0c71 100644
--- a/deps/jemalloc/include/jemalloc/internal/tsd_generic.h
+++ b/deps/jemalloc/include/jemalloc/internal/tsd_generic.h
@@ -77,7 +77,10 @@ tsd_wrapper_get(bool init) {
abort();
} else {
wrapper->initialized = false;
+ JEMALLOC_DIAGNOSTIC_PUSH
+ JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
tsd_t initializer = TSD_INITIALIZER;
+ JEMALLOC_DIAGNOSTIC_POP
wrapper->val = initializer;
}
tsd_wrapper_set(wrapper);
@@ -107,7 +110,10 @@ tsd_boot1(void) {
tsd_boot_wrapper.initialized = false;
tsd_cleanup(&tsd_boot_wrapper.val);
wrapper->initialized = false;
+ JEMALLOC_DIAGNOSTIC_PUSH
+ JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
tsd_t initializer = TSD_INITIALIZER;
+ JEMALLOC_DIAGNOSTIC_POP
wrapper->val = initializer;
tsd_wrapper_set(wrapper);
}
diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
index beb467a67..65852d5c1 100644
--- a/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
+++ b/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -3,8 +3,10 @@
#endif
#define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
-extern __thread tsd_t tsd_tls;
-extern __thread bool tsd_initialized;
+#define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
+
+extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
+extern JEMALLOC_TSD_TYPE_ATTR(bool) tsd_initialized;
extern bool tsd_booted;
/* Initialization/cleanup. */
@@ -47,7 +49,6 @@ tsd_get_allocates(void) {
/* Get/set. */
JEMALLOC_ALWAYS_INLINE tsd_t *
tsd_get(bool init) {
- assert(tsd_booted);
return &tsd_tls;
}
JEMALLOC_ALWAYS_INLINE void
diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_tls.h b/deps/jemalloc/include/jemalloc/internal/tsd_tls.h
index 0de64b7b8..7d6c805be 100644
--- a/deps/jemalloc/include/jemalloc/internal/tsd_tls.h
+++ b/deps/jemalloc/include/jemalloc/internal/tsd_tls.h
@@ -3,7 +3,9 @@
#endif
#define JEMALLOC_INTERNAL_TSD_TLS_H
-extern __thread tsd_t tsd_tls;
+#define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
+
+extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
extern pthread_key_t tsd_tsd;
extern bool tsd_booted;
@@ -39,8 +41,7 @@ tsd_get_allocates(void) {
/* Get/set. */
JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_get(UNUSED bool init) {
- assert(tsd_booted);
+tsd_get(bool init) {
return &tsd_tls;
}
diff --git a/deps/jemalloc/include/jemalloc/internal/witness.h b/deps/jemalloc/include/jemalloc/internal/witness.h
index 7ace8ae4a..fff9e98cb 100644
--- a/deps/jemalloc/include/jemalloc/internal/witness.h
+++ b/deps/jemalloc/include/jemalloc/internal/witness.h
@@ -27,9 +27,9 @@
#define WITNESS_RANK_PROF_BT2GCTX 6U
#define WITNESS_RANK_PROF_TDATAS 7U
#define WITNESS_RANK_PROF_TDATA 8U
-#define WITNESS_RANK_PROF_GCTX 9U
-
-#define WITNESS_RANK_BACKGROUND_THREAD 10U
+#define WITNESS_RANK_PROF_LOG 9U
+#define WITNESS_RANK_PROF_GCTX 10U
+#define WITNESS_RANK_BACKGROUND_THREAD 11U
/*
* Used as an argument to witness_assert_depth_to_rank() in order to validate
@@ -37,18 +37,19 @@
* witness_assert_depth_to_rank() is inclusive rather than exclusive, this
* definition can have the same value as the minimally ranked core lock.
*/
-#define WITNESS_RANK_CORE 11U
-
-#define WITNESS_RANK_DECAY 11U
-#define WITNESS_RANK_TCACHE_QL 12U
-#define WITNESS_RANK_EXTENT_GROW 13U
-#define WITNESS_RANK_EXTENTS 14U
-#define WITNESS_RANK_EXTENT_AVAIL 15U
-
-#define WITNESS_RANK_EXTENT_POOL 16U
-#define WITNESS_RANK_RTREE 17U
-#define WITNESS_RANK_BASE 18U
-#define WITNESS_RANK_ARENA_LARGE 19U
+#define WITNESS_RANK_CORE 12U
+
+#define WITNESS_RANK_DECAY 12U
+#define WITNESS_RANK_TCACHE_QL 13U
+#define WITNESS_RANK_EXTENT_GROW 14U
+#define WITNESS_RANK_EXTENTS 15U
+#define WITNESS_RANK_EXTENT_AVAIL 16U
+
+#define WITNESS_RANK_EXTENT_POOL 17U
+#define WITNESS_RANK_RTREE 18U
+#define WITNESS_RANK_BASE 19U
+#define WITNESS_RANK_ARENA_LARGE 20U
+#define WITNESS_RANK_HOOK 21U
#define WITNESS_RANK_LEAF 0xffffffffU
#define WITNESS_RANK_BIN WITNESS_RANK_LEAF